{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 2.0,
  "eval_steps": 10000000,
  "global_step": 16312,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.00012260912211868564,
      "grad_norm": 15.908027862772077,
      "learning_rate": 6.1274509803921574e-09,
      "loss": 1.8403,
      "step": 1
    },
    {
      "epoch": 0.0002452182442373713,
      "grad_norm": 14.200876061475446,
      "learning_rate": 1.2254901960784315e-08,
      "loss": 1.7772,
      "step": 2
    },
    {
      "epoch": 0.0003678273663560569,
      "grad_norm": 15.203797962775749,
      "learning_rate": 1.8382352941176472e-08,
      "loss": 1.7714,
      "step": 3
    },
    {
      "epoch": 0.0004904364884747426,
      "grad_norm": 14.906821870572799,
      "learning_rate": 2.450980392156863e-08,
      "loss": 1.7764,
      "step": 4
    },
    {
      "epoch": 0.0006130456105934281,
      "grad_norm": 12.96957473023371,
      "learning_rate": 3.0637254901960784e-08,
      "loss": 1.6855,
      "step": 5
    },
    {
      "epoch": 0.0007356547327121138,
      "grad_norm": 13.874998797446901,
      "learning_rate": 3.6764705882352945e-08,
      "loss": 1.7638,
      "step": 6
    },
    {
      "epoch": 0.0008582638548307994,
      "grad_norm": 15.012145469286944,
      "learning_rate": 4.2892156862745105e-08,
      "loss": 1.7768,
      "step": 7
    },
    {
      "epoch": 0.000980872976949485,
      "grad_norm": 14.93297870816131,
      "learning_rate": 4.901960784313726e-08,
      "loss": 1.7808,
      "step": 8
    },
    {
      "epoch": 0.0011034820990681706,
      "grad_norm": 13.72437468882856,
      "learning_rate": 5.514705882352942e-08,
      "loss": 1.7157,
      "step": 9
    },
    {
      "epoch": 0.0012260912211868563,
      "grad_norm": 15.211452532401387,
      "learning_rate": 6.127450980392157e-08,
      "loss": 1.6962,
      "step": 10
    },
    {
      "epoch": 0.001348700343305542,
      "grad_norm": 14.382468615812941,
      "learning_rate": 6.740196078431373e-08,
      "loss": 1.7993,
      "step": 11
    },
    {
      "epoch": 0.0014713094654242277,
      "grad_norm": 14.605413794871856,
      "learning_rate": 7.352941176470589e-08,
      "loss": 1.7468,
      "step": 12
    },
    {
      "epoch": 0.0015939185875429131,
      "grad_norm": 14.793959901573825,
      "learning_rate": 7.965686274509804e-08,
      "loss": 1.7061,
      "step": 13
    },
    {
      "epoch": 0.0017165277096615988,
      "grad_norm": 13.32485913943112,
      "learning_rate": 8.578431372549021e-08,
      "loss": 1.6583,
      "step": 14
    },
    {
      "epoch": 0.0018391368317802845,
      "grad_norm": 14.370271159691884,
      "learning_rate": 9.191176470588236e-08,
      "loss": 1.7757,
      "step": 15
    },
    {
      "epoch": 0.00196174595389897,
      "grad_norm": 15.005027466798389,
      "learning_rate": 9.803921568627452e-08,
      "loss": 1.7094,
      "step": 16
    },
    {
      "epoch": 0.0020843550760176557,
      "grad_norm": 12.153528724721102,
      "learning_rate": 1.0416666666666667e-07,
      "loss": 1.5602,
      "step": 17
    },
    {
      "epoch": 0.002206964198136341,
      "grad_norm": 14.585935610614525,
      "learning_rate": 1.1029411764705884e-07,
      "loss": 1.7185,
      "step": 18
    },
    {
      "epoch": 0.002329573320255027,
      "grad_norm": 13.205288838560323,
      "learning_rate": 1.1642156862745099e-07,
      "loss": 1.7595,
      "step": 19
    },
    {
      "epoch": 0.0024521824423737125,
      "grad_norm": 12.801584786977616,
      "learning_rate": 1.2254901960784314e-07,
      "loss": 1.6708,
      "step": 20
    },
    {
      "epoch": 0.002574791564492398,
      "grad_norm": 14.96246125613865,
      "learning_rate": 1.286764705882353e-07,
      "loss": 1.8194,
      "step": 21
    },
    {
      "epoch": 0.002697400686611084,
      "grad_norm": 13.389227163741324,
      "learning_rate": 1.3480392156862746e-07,
      "loss": 1.6824,
      "step": 22
    },
    {
      "epoch": 0.0028200098087297694,
      "grad_norm": 14.575369613560092,
      "learning_rate": 1.4093137254901963e-07,
      "loss": 1.7585,
      "step": 23
    },
    {
      "epoch": 0.0029426189308484553,
      "grad_norm": 12.931400783936585,
      "learning_rate": 1.4705882352941178e-07,
      "loss": 1.6383,
      "step": 24
    },
    {
      "epoch": 0.003065228052967141,
      "grad_norm": 15.426134200983181,
      "learning_rate": 1.5318627450980393e-07,
      "loss": 1.7783,
      "step": 25
    },
    {
      "epoch": 0.0031878371750858263,
      "grad_norm": 13.583406320896136,
      "learning_rate": 1.5931372549019607e-07,
      "loss": 1.6642,
      "step": 26
    },
    {
      "epoch": 0.003310446297204512,
      "grad_norm": 12.650246080728518,
      "learning_rate": 1.6544117647058825e-07,
      "loss": 1.6928,
      "step": 27
    },
    {
      "epoch": 0.0034330554193231977,
      "grad_norm": 12.680056423320304,
      "learning_rate": 1.7156862745098042e-07,
      "loss": 1.6245,
      "step": 28
    },
    {
      "epoch": 0.003555664541441883,
      "grad_norm": 14.287026228434783,
      "learning_rate": 1.7769607843137257e-07,
      "loss": 1.7692,
      "step": 29
    },
    {
      "epoch": 0.003678273663560569,
      "grad_norm": 14.35586559710964,
      "learning_rate": 1.8382352941176472e-07,
      "loss": 1.7096,
      "step": 30
    },
    {
      "epoch": 0.0038008827856792545,
      "grad_norm": 14.953175239140228,
      "learning_rate": 1.8995098039215686e-07,
      "loss": 1.8307,
      "step": 31
    },
    {
      "epoch": 0.00392349190779794,
      "grad_norm": 13.59526874970394,
      "learning_rate": 1.9607843137254904e-07,
      "loss": 1.6963,
      "step": 32
    },
    {
      "epoch": 0.0040461010299166255,
      "grad_norm": 13.547071303345309,
      "learning_rate": 2.022058823529412e-07,
      "loss": 1.63,
      "step": 33
    },
    {
      "epoch": 0.004168710152035311,
      "grad_norm": 12.1631932383078,
      "learning_rate": 2.0833333333333333e-07,
      "loss": 1.645,
      "step": 34
    },
    {
      "epoch": 0.004291319274153997,
      "grad_norm": 13.610737647564681,
      "learning_rate": 2.144607843137255e-07,
      "loss": 1.7174,
      "step": 35
    },
    {
      "epoch": 0.004413928396272682,
      "grad_norm": 12.474321155135593,
      "learning_rate": 2.2058823529411768e-07,
      "loss": 1.63,
      "step": 36
    },
    {
      "epoch": 0.004536537518391368,
      "grad_norm": 14.128252139929488,
      "learning_rate": 2.2671568627450983e-07,
      "loss": 1.7187,
      "step": 37
    },
    {
      "epoch": 0.004659146640510054,
      "grad_norm": 14.098879605814819,
      "learning_rate": 2.3284313725490198e-07,
      "loss": 1.7538,
      "step": 38
    },
    {
      "epoch": 0.004781755762628739,
      "grad_norm": 12.173420850955022,
      "learning_rate": 2.389705882352941e-07,
      "loss": 1.5833,
      "step": 39
    },
    {
      "epoch": 0.004904364884747425,
      "grad_norm": 13.963201887144226,
      "learning_rate": 2.4509803921568627e-07,
      "loss": 1.7062,
      "step": 40
    },
    {
      "epoch": 0.005026974006866111,
      "grad_norm": 11.060124731484759,
      "learning_rate": 2.5122549019607844e-07,
      "loss": 1.5747,
      "step": 41
    },
    {
      "epoch": 0.005149583128984796,
      "grad_norm": 12.265144164203802,
      "learning_rate": 2.573529411764706e-07,
      "loss": 1.6584,
      "step": 42
    },
    {
      "epoch": 0.005272192251103482,
      "grad_norm": 12.5179542002746,
      "learning_rate": 2.634803921568628e-07,
      "loss": 1.6881,
      "step": 43
    },
    {
      "epoch": 0.005394801373222168,
      "grad_norm": 12.72047873267031,
      "learning_rate": 2.696078431372549e-07,
      "loss": 1.675,
      "step": 44
    },
    {
      "epoch": 0.005517410495340854,
      "grad_norm": 10.724006585118751,
      "learning_rate": 2.757352941176471e-07,
      "loss": 1.5774,
      "step": 45
    },
    {
      "epoch": 0.005640019617459539,
      "grad_norm": 11.265801332213291,
      "learning_rate": 2.8186274509803926e-07,
      "loss": 1.5436,
      "step": 46
    },
    {
      "epoch": 0.005762628739578225,
      "grad_norm": 11.88533557438284,
      "learning_rate": 2.879901960784314e-07,
      "loss": 1.6551,
      "step": 47
    },
    {
      "epoch": 0.005885237861696911,
      "grad_norm": 11.373442975315482,
      "learning_rate": 2.9411764705882356e-07,
      "loss": 1.6374,
      "step": 48
    },
    {
      "epoch": 0.006007846983815596,
      "grad_norm": 11.641799440152976,
      "learning_rate": 3.0024509803921573e-07,
      "loss": 1.647,
      "step": 49
    },
    {
      "epoch": 0.006130456105934282,
      "grad_norm": 11.215584677749169,
      "learning_rate": 3.0637254901960785e-07,
      "loss": 1.5548,
      "step": 50
    },
    {
      "epoch": 0.0062530652280529675,
      "grad_norm": 12.328049429195588,
      "learning_rate": 3.125e-07,
      "loss": 1.6302,
      "step": 51
    },
    {
      "epoch": 0.0063756743501716525,
      "grad_norm": 10.082338945731614,
      "learning_rate": 3.1862745098039215e-07,
      "loss": 1.5169,
      "step": 52
    },
    {
      "epoch": 0.0064982834722903384,
      "grad_norm": 11.594259742328118,
      "learning_rate": 3.2475490196078437e-07,
      "loss": 1.5316,
      "step": 53
    },
    {
      "epoch": 0.006620892594409024,
      "grad_norm": 10.077844243447528,
      "learning_rate": 3.308823529411765e-07,
      "loss": 1.5332,
      "step": 54
    },
    {
      "epoch": 0.006743501716527709,
      "grad_norm": 10.299841425890532,
      "learning_rate": 3.370098039215686e-07,
      "loss": 1.5773,
      "step": 55
    },
    {
      "epoch": 0.006866110838646395,
      "grad_norm": 9.595901446555757,
      "learning_rate": 3.4313725490196084e-07,
      "loss": 1.4469,
      "step": 56
    },
    {
      "epoch": 0.006988719960765081,
      "grad_norm": 9.740997540082496,
      "learning_rate": 3.4926470588235296e-07,
      "loss": 1.422,
      "step": 57
    },
    {
      "epoch": 0.007111329082883766,
      "grad_norm": 9.724384231631202,
      "learning_rate": 3.5539215686274514e-07,
      "loss": 1.4537,
      "step": 58
    },
    {
      "epoch": 0.007233938205002452,
      "grad_norm": 8.881643085859123,
      "learning_rate": 3.615196078431373e-07,
      "loss": 1.4082,
      "step": 59
    },
    {
      "epoch": 0.007356547327121138,
      "grad_norm": 10.374478205738235,
      "learning_rate": 3.6764705882352943e-07,
      "loss": 1.4516,
      "step": 60
    },
    {
      "epoch": 0.007479156449239823,
      "grad_norm": 9.022227290466015,
      "learning_rate": 3.737745098039216e-07,
      "loss": 1.3798,
      "step": 61
    },
    {
      "epoch": 0.007601765571358509,
      "grad_norm": 8.356324185239687,
      "learning_rate": 3.7990196078431373e-07,
      "loss": 1.339,
      "step": 62
    },
    {
      "epoch": 0.007724374693477195,
      "grad_norm": 8.259416428105437,
      "learning_rate": 3.8602941176470595e-07,
      "loss": 1.3688,
      "step": 63
    },
    {
      "epoch": 0.00784698381559588,
      "grad_norm": 8.596876667956566,
      "learning_rate": 3.921568627450981e-07,
      "loss": 1.3907,
      "step": 64
    },
    {
      "epoch": 0.007969592937714567,
      "grad_norm": 8.252257992611947,
      "learning_rate": 3.982843137254902e-07,
      "loss": 1.3019,
      "step": 65
    },
    {
      "epoch": 0.008092202059833251,
      "grad_norm": 9.305679331050133,
      "learning_rate": 4.044117647058824e-07,
      "loss": 1.3872,
      "step": 66
    },
    {
      "epoch": 0.008214811181951937,
      "grad_norm": 8.849747085336784,
      "learning_rate": 4.1053921568627454e-07,
      "loss": 1.3717,
      "step": 67
    },
    {
      "epoch": 0.008337420304070623,
      "grad_norm": 9.409394374029954,
      "learning_rate": 4.1666666666666667e-07,
      "loss": 1.4662,
      "step": 68
    },
    {
      "epoch": 0.008460029426189309,
      "grad_norm": 6.678452938397923,
      "learning_rate": 4.227941176470589e-07,
      "loss": 1.2768,
      "step": 69
    },
    {
      "epoch": 0.008582638548307995,
      "grad_norm": 7.90601849713494,
      "learning_rate": 4.28921568627451e-07,
      "loss": 1.2638,
      "step": 70
    },
    {
      "epoch": 0.00870524767042668,
      "grad_norm": 7.0622500669170725,
      "learning_rate": 4.3504901960784313e-07,
      "loss": 1.2782,
      "step": 71
    },
    {
      "epoch": 0.008827856792545365,
      "grad_norm": 6.721634495340414,
      "learning_rate": 4.4117647058823536e-07,
      "loss": 1.2894,
      "step": 72
    },
    {
      "epoch": 0.00895046591466405,
      "grad_norm": 6.364931536166396,
      "learning_rate": 4.473039215686275e-07,
      "loss": 1.2109,
      "step": 73
    },
    {
      "epoch": 0.009073075036782736,
      "grad_norm": 7.720043442486401,
      "learning_rate": 4.5343137254901966e-07,
      "loss": 1.2925,
      "step": 74
    },
    {
      "epoch": 0.009195684158901422,
      "grad_norm": 6.2704202768301345,
      "learning_rate": 4.5955882352941183e-07,
      "loss": 1.1633,
      "step": 75
    },
    {
      "epoch": 0.009318293281020108,
      "grad_norm": 5.507264661865023,
      "learning_rate": 4.6568627450980395e-07,
      "loss": 1.1199,
      "step": 76
    },
    {
      "epoch": 0.009440902403138794,
      "grad_norm": 6.386243996407514,
      "learning_rate": 4.718137254901961e-07,
      "loss": 1.2062,
      "step": 77
    },
    {
      "epoch": 0.009563511525257478,
      "grad_norm": 6.550019736355448,
      "learning_rate": 4.779411764705882e-07,
      "loss": 1.1768,
      "step": 78
    },
    {
      "epoch": 0.009686120647376164,
      "grad_norm": 6.175287535947205,
      "learning_rate": 4.840686274509804e-07,
      "loss": 1.1846,
      "step": 79
    },
    {
      "epoch": 0.00980872976949485,
      "grad_norm": 5.098756673764183,
      "learning_rate": 4.901960784313725e-07,
      "loss": 1.1053,
      "step": 80
    },
    {
      "epoch": 0.009931338891613536,
      "grad_norm": 5.033192824026827,
      "learning_rate": 4.963235294117648e-07,
      "loss": 1.1115,
      "step": 81
    },
    {
      "epoch": 0.010053948013732222,
      "grad_norm": 5.811552053187737,
      "learning_rate": 5.024509803921569e-07,
      "loss": 1.1144,
      "step": 82
    },
    {
      "epoch": 0.010176557135850908,
      "grad_norm": 4.608029213341682,
      "learning_rate": 5.08578431372549e-07,
      "loss": 1.0097,
      "step": 83
    },
    {
      "epoch": 0.010299166257969592,
      "grad_norm": 5.242037381972679,
      "learning_rate": 5.147058823529412e-07,
      "loss": 1.0336,
      "step": 84
    },
    {
      "epoch": 0.010421775380088278,
      "grad_norm": 4.861869558019663,
      "learning_rate": 5.208333333333334e-07,
      "loss": 1.0322,
      "step": 85
    },
    {
      "epoch": 0.010544384502206964,
      "grad_norm": 4.869689531101706,
      "learning_rate": 5.269607843137256e-07,
      "loss": 1.0012,
      "step": 86
    },
    {
      "epoch": 0.01066699362432565,
      "grad_norm": 5.162426560796787,
      "learning_rate": 5.330882352941177e-07,
      "loss": 1.1314,
      "step": 87
    },
    {
      "epoch": 0.010789602746444336,
      "grad_norm": 4.702392463436129,
      "learning_rate": 5.392156862745098e-07,
      "loss": 1.1039,
      "step": 88
    },
    {
      "epoch": 0.010912211868563022,
      "grad_norm": 4.651295847034721,
      "learning_rate": 5.453431372549021e-07,
      "loss": 1.1133,
      "step": 89
    },
    {
      "epoch": 0.011034820990681708,
      "grad_norm": 4.307145253071472,
      "learning_rate": 5.514705882352942e-07,
      "loss": 1.0418,
      "step": 90
    },
    {
      "epoch": 0.011157430112800392,
      "grad_norm": 4.402310438180788,
      "learning_rate": 5.575980392156864e-07,
      "loss": 0.9503,
      "step": 91
    },
    {
      "epoch": 0.011280039234919078,
      "grad_norm": 4.20801687845572,
      "learning_rate": 5.637254901960785e-07,
      "loss": 1.0917,
      "step": 92
    },
    {
      "epoch": 0.011402648357037764,
      "grad_norm": 4.112509261225217,
      "learning_rate": 5.698529411764706e-07,
      "loss": 1.0944,
      "step": 93
    },
    {
      "epoch": 0.01152525747915645,
      "grad_norm": 3.8512385618821288,
      "learning_rate": 5.759803921568628e-07,
      "loss": 0.9853,
      "step": 94
    },
    {
      "epoch": 0.011647866601275135,
      "grad_norm": 3.7278204507328834,
      "learning_rate": 5.82107843137255e-07,
      "loss": 1.0034,
      "step": 95
    },
    {
      "epoch": 0.011770475723393821,
      "grad_norm": 3.8433949103769502,
      "learning_rate": 5.882352941176471e-07,
      "loss": 0.9779,
      "step": 96
    },
    {
      "epoch": 0.011893084845512505,
      "grad_norm": 4.010240225156376,
      "learning_rate": 5.943627450980392e-07,
      "loss": 1.0444,
      "step": 97
    },
    {
      "epoch": 0.012015693967631191,
      "grad_norm": 3.722980089551218,
      "learning_rate": 6.004901960784315e-07,
      "loss": 0.9577,
      "step": 98
    },
    {
      "epoch": 0.012138303089749877,
      "grad_norm": 3.6948417280628134,
      "learning_rate": 6.066176470588236e-07,
      "loss": 0.9425,
      "step": 99
    },
    {
      "epoch": 0.012260912211868563,
      "grad_norm": 3.5405790700916095,
      "learning_rate": 6.127450980392157e-07,
      "loss": 0.8907,
      "step": 100
    },
    {
      "epoch": 0.012383521333987249,
      "grad_norm": 3.2922430844734847,
      "learning_rate": 6.188725490196079e-07,
      "loss": 0.9583,
      "step": 101
    },
    {
      "epoch": 0.012506130456105935,
      "grad_norm": 3.5017612299246257,
      "learning_rate": 6.25e-07,
      "loss": 0.9836,
      "step": 102
    },
    {
      "epoch": 0.01262873957822462,
      "grad_norm": 3.4447867124669305,
      "learning_rate": 6.311274509803922e-07,
      "loss": 0.9181,
      "step": 103
    },
    {
      "epoch": 0.012751348700343305,
      "grad_norm": 3.49831085005959,
      "learning_rate": 6.372549019607843e-07,
      "loss": 0.921,
      "step": 104
    },
    {
      "epoch": 0.012873957822461991,
      "grad_norm": 3.6093370636807967,
      "learning_rate": 6.433823529411764e-07,
      "loss": 1.0205,
      "step": 105
    },
    {
      "epoch": 0.012996566944580677,
      "grad_norm": 3.145975065091122,
      "learning_rate": 6.495098039215687e-07,
      "loss": 0.927,
      "step": 106
    },
    {
      "epoch": 0.013119176066699363,
      "grad_norm": 3.3852000244858336,
      "learning_rate": 6.556372549019609e-07,
      "loss": 0.9241,
      "step": 107
    },
    {
      "epoch": 0.013241785188818049,
      "grad_norm": 3.2341259354495806,
      "learning_rate": 6.61764705882353e-07,
      "loss": 0.9042,
      "step": 108
    },
    {
      "epoch": 0.013364394310936733,
      "grad_norm": 3.1392262142476617,
      "learning_rate": 6.678921568627451e-07,
      "loss": 0.9101,
      "step": 109
    },
    {
      "epoch": 0.013487003433055419,
      "grad_norm": 3.433468145640304,
      "learning_rate": 6.740196078431372e-07,
      "loss": 0.943,
      "step": 110
    },
    {
      "epoch": 0.013609612555174105,
      "grad_norm": 3.1550147410396017,
      "learning_rate": 6.801470588235295e-07,
      "loss": 0.8475,
      "step": 111
    },
    {
      "epoch": 0.01373222167729279,
      "grad_norm": 3.2173285319648603,
      "learning_rate": 6.862745098039217e-07,
      "loss": 0.9182,
      "step": 112
    },
    {
      "epoch": 0.013854830799411477,
      "grad_norm": 3.142975718393582,
      "learning_rate": 6.924019607843138e-07,
      "loss": 0.901,
      "step": 113
    },
    {
      "epoch": 0.013977439921530162,
      "grad_norm": 3.08751135975829,
      "learning_rate": 6.985294117647059e-07,
      "loss": 0.9355,
      "step": 114
    },
    {
      "epoch": 0.014100049043648848,
      "grad_norm": 3.3964404358460913,
      "learning_rate": 7.04656862745098e-07,
      "loss": 0.9269,
      "step": 115
    },
    {
      "epoch": 0.014222658165767533,
      "grad_norm": 3.131007479957211,
      "learning_rate": 7.107843137254903e-07,
      "loss": 0.8678,
      "step": 116
    },
    {
      "epoch": 0.014345267287886218,
      "grad_norm": 2.9402197131801384,
      "learning_rate": 7.169117647058824e-07,
      "loss": 0.855,
      "step": 117
    },
    {
      "epoch": 0.014467876410004904,
      "grad_norm": 3.0963580944975084,
      "learning_rate": 7.230392156862746e-07,
      "loss": 0.778,
      "step": 118
    },
    {
      "epoch": 0.01459048553212359,
      "grad_norm": 3.492771944197993,
      "learning_rate": 7.291666666666667e-07,
      "loss": 0.8889,
      "step": 119
    },
    {
      "epoch": 0.014713094654242276,
      "grad_norm": 2.976941626881151,
      "learning_rate": 7.352941176470589e-07,
      "loss": 0.8692,
      "step": 120
    },
    {
      "epoch": 0.014835703776360962,
      "grad_norm": 3.109104131926475,
      "learning_rate": 7.414215686274511e-07,
      "loss": 0.8684,
      "step": 121
    },
    {
      "epoch": 0.014958312898479646,
      "grad_norm": 3.252468279180868,
      "learning_rate": 7.475490196078432e-07,
      "loss": 0.8744,
      "step": 122
    },
    {
      "epoch": 0.015080922020598332,
      "grad_norm": 3.3270352722322754,
      "learning_rate": 7.536764705882353e-07,
      "loss": 0.8713,
      "step": 123
    },
    {
      "epoch": 0.015203531142717018,
      "grad_norm": 3.306359903291898,
      "learning_rate": 7.598039215686275e-07,
      "loss": 0.8652,
      "step": 124
    },
    {
      "epoch": 0.015326140264835704,
      "grad_norm": 2.9752255291766936,
      "learning_rate": 7.659313725490197e-07,
      "loss": 0.8245,
      "step": 125
    },
    {
      "epoch": 0.01544874938695439,
      "grad_norm": 2.907226997962801,
      "learning_rate": 7.720588235294119e-07,
      "loss": 0.8,
      "step": 126
    },
    {
      "epoch": 0.015571358509073076,
      "grad_norm": 3.2645311743422276,
      "learning_rate": 7.78186274509804e-07,
      "loss": 0.7828,
      "step": 127
    },
    {
      "epoch": 0.01569396763119176,
      "grad_norm": 3.1448797132716484,
      "learning_rate": 7.843137254901962e-07,
      "loss": 0.9071,
      "step": 128
    },
    {
      "epoch": 0.015816576753310448,
      "grad_norm": 3.1792103887605845,
      "learning_rate": 7.904411764705883e-07,
      "loss": 0.8712,
      "step": 129
    },
    {
      "epoch": 0.015939185875429134,
      "grad_norm": 3.0635719614817263,
      "learning_rate": 7.965686274509804e-07,
      "loss": 0.8767,
      "step": 130
    },
    {
      "epoch": 0.016061794997547816,
      "grad_norm": 3.1659741330485343,
      "learning_rate": 8.026960784313727e-07,
      "loss": 0.8458,
      "step": 131
    },
    {
      "epoch": 0.016184404119666502,
      "grad_norm": 2.8437397539736193,
      "learning_rate": 8.088235294117648e-07,
      "loss": 0.8984,
      "step": 132
    },
    {
      "epoch": 0.016307013241785188,
      "grad_norm": 3.0372119192226124,
      "learning_rate": 8.14950980392157e-07,
      "loss": 0.8525,
      "step": 133
    },
    {
      "epoch": 0.016429622363903874,
      "grad_norm": 3.090196003309593,
      "learning_rate": 8.210784313725491e-07,
      "loss": 0.8181,
      "step": 134
    },
    {
      "epoch": 0.01655223148602256,
      "grad_norm": 2.8147281176097434,
      "learning_rate": 8.272058823529412e-07,
      "loss": 0.8122,
      "step": 135
    },
    {
      "epoch": 0.016674840608141245,
      "grad_norm": 2.8848672883684583,
      "learning_rate": 8.333333333333333e-07,
      "loss": 0.832,
      "step": 136
    },
    {
      "epoch": 0.01679744973025993,
      "grad_norm": 3.2354818569231747,
      "learning_rate": 8.394607843137255e-07,
      "loss": 0.8978,
      "step": 137
    },
    {
      "epoch": 0.016920058852378617,
      "grad_norm": 3.208927074160181,
      "learning_rate": 8.455882352941178e-07,
      "loss": 0.7598,
      "step": 138
    },
    {
      "epoch": 0.017042667974497303,
      "grad_norm": 2.8312489110556687,
      "learning_rate": 8.517156862745099e-07,
      "loss": 0.7915,
      "step": 139
    },
    {
      "epoch": 0.01716527709661599,
      "grad_norm": 3.1009990538012646,
      "learning_rate": 8.57843137254902e-07,
      "loss": 0.8301,
      "step": 140
    },
    {
      "epoch": 0.017287886218734675,
      "grad_norm": 2.772019261273683,
      "learning_rate": 8.639705882352941e-07,
      "loss": 0.765,
      "step": 141
    },
    {
      "epoch": 0.01741049534085336,
      "grad_norm": 2.9403711740549223,
      "learning_rate": 8.700980392156863e-07,
      "loss": 0.8439,
      "step": 142
    },
    {
      "epoch": 0.017533104462972043,
      "grad_norm": 3.4457074189233214,
      "learning_rate": 8.762254901960785e-07,
      "loss": 0.816,
      "step": 143
    },
    {
      "epoch": 0.01765571358509073,
      "grad_norm": 3.0412998490638277,
      "learning_rate": 8.823529411764707e-07,
      "loss": 0.7798,
      "step": 144
    },
    {
      "epoch": 0.017778322707209415,
      "grad_norm": 2.952945100686405,
      "learning_rate": 8.884803921568628e-07,
      "loss": 0.8084,
      "step": 145
    },
    {
      "epoch": 0.0179009318293281,
      "grad_norm": 3.4561866750732406,
      "learning_rate": 8.94607843137255e-07,
      "loss": 0.7574,
      "step": 146
    },
    {
      "epoch": 0.018023540951446787,
      "grad_norm": 2.612957515809956,
      "learning_rate": 9.007352941176471e-07,
      "loss": 0.7492,
      "step": 147
    },
    {
      "epoch": 0.018146150073565473,
      "grad_norm": 3.3686942954830057,
      "learning_rate": 9.068627450980393e-07,
      "loss": 0.8336,
      "step": 148
    },
    {
      "epoch": 0.01826875919568416,
      "grad_norm": 2.717458301316654,
      "learning_rate": 9.129901960784314e-07,
      "loss": 0.8179,
      "step": 149
    },
    {
      "epoch": 0.018391368317802845,
      "grad_norm": 3.1522214108814346,
      "learning_rate": 9.191176470588237e-07,
      "loss": 0.7595,
      "step": 150
    },
    {
      "epoch": 0.01851397743992153,
      "grad_norm": 3.2720123597961197,
      "learning_rate": 9.252450980392158e-07,
      "loss": 0.8033,
      "step": 151
    },
    {
      "epoch": 0.018636586562040217,
      "grad_norm": 3.0153764166274812,
      "learning_rate": 9.313725490196079e-07,
      "loss": 0.7509,
      "step": 152
    },
    {
      "epoch": 0.018759195684158902,
      "grad_norm": 3.168997891814854,
      "learning_rate": 9.375000000000001e-07,
      "loss": 0.7546,
      "step": 153
    },
    {
      "epoch": 0.01888180480627759,
      "grad_norm": 3.209088437912217,
      "learning_rate": 9.436274509803923e-07,
      "loss": 0.7318,
      "step": 154
    },
    {
      "epoch": 0.019004413928396274,
      "grad_norm": 2.9821742128880406,
      "learning_rate": 9.497549019607844e-07,
      "loss": 0.7433,
      "step": 155
    },
    {
      "epoch": 0.019127023050514957,
      "grad_norm": 2.9051600675257907,
      "learning_rate": 9.558823529411764e-07,
      "loss": 0.7584,
      "step": 156
    },
    {
      "epoch": 0.019249632172633643,
      "grad_norm": 3.0240290376869186,
      "learning_rate": 9.620098039215688e-07,
      "loss": 0.8307,
      "step": 157
    },
    {
      "epoch": 0.01937224129475233,
      "grad_norm": 2.9607232285915193,
      "learning_rate": 9.681372549019608e-07,
      "loss": 0.7908,
      "step": 158
    },
    {
      "epoch": 0.019494850416871014,
      "grad_norm": 3.0773691477609173,
      "learning_rate": 9.74264705882353e-07,
      "loss": 0.7502,
      "step": 159
    },
    {
      "epoch": 0.0196174595389897,
      "grad_norm": 3.1901113436781614,
      "learning_rate": 9.80392156862745e-07,
      "loss": 0.8171,
      "step": 160
    },
    {
      "epoch": 0.019740068661108386,
      "grad_norm": 2.555349193309099,
      "learning_rate": 9.865196078431373e-07,
      "loss": 0.7876,
      "step": 161
    },
    {
      "epoch": 0.019862677783227072,
      "grad_norm": 3.091542951983368,
      "learning_rate": 9.926470588235295e-07,
      "loss": 0.7574,
      "step": 162
    },
    {
      "epoch": 0.019985286905345758,
      "grad_norm": 3.062047123944315,
      "learning_rate": 9.987745098039218e-07,
      "loss": 0.7479,
      "step": 163
    },
    {
      "epoch": 0.020107896027464444,
      "grad_norm": 3.041313247093804,
      "learning_rate": 1.0049019607843138e-06,
      "loss": 0.7655,
      "step": 164
    },
    {
      "epoch": 0.02023050514958313,
      "grad_norm": 2.7892750210621737,
      "learning_rate": 1.011029411764706e-06,
      "loss": 0.764,
      "step": 165
    },
    {
      "epoch": 0.020353114271701816,
      "grad_norm": 2.909700980007263,
      "learning_rate": 1.017156862745098e-06,
      "loss": 0.7397,
      "step": 166
    },
    {
      "epoch": 0.020475723393820502,
      "grad_norm": 2.9945169944123258,
      "learning_rate": 1.0232843137254902e-06,
      "loss": 0.8168,
      "step": 167
    },
    {
      "epoch": 0.020598332515939184,
      "grad_norm": 3.136945316877904,
      "learning_rate": 1.0294117647058825e-06,
      "loss": 0.8289,
      "step": 168
    },
    {
      "epoch": 0.02072094163805787,
      "grad_norm": 3.154913792897732,
      "learning_rate": 1.0355392156862747e-06,
      "loss": 0.7709,
      "step": 169
    },
    {
      "epoch": 0.020843550760176556,
      "grad_norm": 3.0346563015703145,
      "learning_rate": 1.0416666666666667e-06,
      "loss": 0.8011,
      "step": 170
    },
    {
      "epoch": 0.020966159882295242,
      "grad_norm": 3.0199109868112908,
      "learning_rate": 1.047794117647059e-06,
      "loss": 0.7795,
      "step": 171
    },
    {
      "epoch": 0.021088769004413928,
      "grad_norm": 2.907626675788384,
      "learning_rate": 1.0539215686274512e-06,
      "loss": 0.7227,
      "step": 172
    },
    {
      "epoch": 0.021211378126532614,
      "grad_norm": 2.6979155601577576,
      "learning_rate": 1.0600490196078432e-06,
      "loss": 0.7663,
      "step": 173
    },
    {
      "epoch": 0.0213339872486513,
      "grad_norm": 2.784681295844694,
      "learning_rate": 1.0661764705882354e-06,
      "loss": 0.7593,
      "step": 174
    },
    {
      "epoch": 0.021456596370769986,
      "grad_norm": 2.9325983940537466,
      "learning_rate": 1.0723039215686274e-06,
      "loss": 0.7933,
      "step": 175
    },
    {
      "epoch": 0.02157920549288867,
      "grad_norm": 2.826455016924836,
      "learning_rate": 1.0784313725490197e-06,
      "loss": 0.7401,
      "step": 176
    },
    {
      "epoch": 0.021701814615007357,
      "grad_norm": 2.743092816356206,
      "learning_rate": 1.0845588235294119e-06,
      "loss": 0.6966,
      "step": 177
    },
    {
      "epoch": 0.021824423737126043,
      "grad_norm": 2.987604637244347,
      "learning_rate": 1.0906862745098041e-06,
      "loss": 0.7254,
      "step": 178
    },
    {
      "epoch": 0.02194703285924473,
      "grad_norm": 3.059104182148251,
      "learning_rate": 1.0968137254901961e-06,
      "loss": 0.7818,
      "step": 179
    },
    {
      "epoch": 0.022069641981363415,
      "grad_norm": 2.6766087127044793,
      "learning_rate": 1.1029411764705884e-06,
      "loss": 0.7088,
      "step": 180
    },
    {
      "epoch": 0.022192251103482098,
      "grad_norm": 2.8613313867639762,
      "learning_rate": 1.1090686274509804e-06,
      "loss": 0.7463,
      "step": 181
    },
    {
      "epoch": 0.022314860225600783,
      "grad_norm": 2.8749674339113076,
      "learning_rate": 1.1151960784313728e-06,
      "loss": 0.7234,
      "step": 182
    },
    {
      "epoch": 0.02243746934771947,
      "grad_norm": 3.3370077107660814,
      "learning_rate": 1.1213235294117648e-06,
      "loss": 0.7601,
      "step": 183
    },
    {
      "epoch": 0.022560078469838155,
      "grad_norm": 2.7780972871736918,
      "learning_rate": 1.127450980392157e-06,
      "loss": 0.7265,
      "step": 184
    },
    {
      "epoch": 0.02268268759195684,
      "grad_norm": 2.7189851352300747,
      "learning_rate": 1.133578431372549e-06,
      "loss": 0.7685,
      "step": 185
    },
    {
      "epoch": 0.022805296714075527,
      "grad_norm": 3.0006544672617075,
      "learning_rate": 1.1397058823529413e-06,
      "loss": 0.7509,
      "step": 186
    },
    {
      "epoch": 0.022927905836194213,
      "grad_norm": 3.0256867171923068,
      "learning_rate": 1.1458333333333333e-06,
      "loss": 0.7909,
      "step": 187
    },
    {
      "epoch": 0.0230505149583129,
      "grad_norm": 2.754201957789674,
      "learning_rate": 1.1519607843137255e-06,
      "loss": 0.7387,
      "step": 188
    },
    {
      "epoch": 0.023173124080431585,
      "grad_norm": 2.6688113167735787,
      "learning_rate": 1.1580882352941178e-06,
      "loss": 0.7424,
      "step": 189
    },
    {
      "epoch": 0.02329573320255027,
      "grad_norm": 2.8790407237910194,
      "learning_rate": 1.16421568627451e-06,
      "loss": 0.8074,
      "step": 190
    },
    {
      "epoch": 0.023418342324668957,
      "grad_norm": 3.1451689660530167,
      "learning_rate": 1.170343137254902e-06,
      "loss": 0.7829,
      "step": 191
    },
    {
      "epoch": 0.023540951446787643,
      "grad_norm": 2.9633589447433617,
      "learning_rate": 1.1764705882352942e-06,
      "loss": 0.7789,
      "step": 192
    },
    {
      "epoch": 0.023663560568906325,
      "grad_norm": 2.776399345439902,
      "learning_rate": 1.1825980392156862e-06,
      "loss": 0.7723,
      "step": 193
    },
    {
      "epoch": 0.02378616969102501,
      "grad_norm": 3.0598037272042107,
      "learning_rate": 1.1887254901960785e-06,
      "loss": 0.8028,
      "step": 194
    },
    {
      "epoch": 0.023908778813143697,
      "grad_norm": 2.878549115622639,
      "learning_rate": 1.1948529411764707e-06,
      "loss": 0.7669,
      "step": 195
    },
    {
      "epoch": 0.024031387935262383,
      "grad_norm": 2.876701157459896,
      "learning_rate": 1.200980392156863e-06,
      "loss": 0.7448,
      "step": 196
    },
    {
      "epoch": 0.02415399705738107,
      "grad_norm": 2.809641378623048,
      "learning_rate": 1.207107843137255e-06,
      "loss": 0.6673,
      "step": 197
    },
    {
      "epoch": 0.024276606179499755,
      "grad_norm": 2.754064213232191,
      "learning_rate": 1.2132352941176472e-06,
      "loss": 0.8107,
      "step": 198
    },
    {
      "epoch": 0.02439921530161844,
      "grad_norm": 2.4992670571524376,
      "learning_rate": 1.2193627450980394e-06,
      "loss": 0.6788,
      "step": 199
    },
    {
      "epoch": 0.024521824423737126,
      "grad_norm": 2.9031825551327737,
      "learning_rate": 1.2254901960784314e-06,
      "loss": 0.7052,
      "step": 200
    },
    {
      "epoch": 0.024644433545855812,
      "grad_norm": 3.1039884795438493,
      "learning_rate": 1.2316176470588236e-06,
      "loss": 0.7768,
      "step": 201
    },
    {
      "epoch": 0.024767042667974498,
      "grad_norm": 2.834972140248317,
      "learning_rate": 1.2377450980392159e-06,
      "loss": 0.6974,
      "step": 202
    },
    {
      "epoch": 0.024889651790093184,
      "grad_norm": 2.6221305171723994,
      "learning_rate": 1.2438725490196079e-06,
      "loss": 0.7091,
      "step": 203
    },
    {
      "epoch": 0.02501226091221187,
      "grad_norm": 3.2431672918304795,
      "learning_rate": 1.25e-06,
      "loss": 0.7985,
      "step": 204
    },
    {
      "epoch": 0.025134870034330556,
      "grad_norm": 3.360841875404792,
      "learning_rate": 1.2561274509803923e-06,
      "loss": 0.7525,
      "step": 205
    },
    {
      "epoch": 0.02525747915644924,
      "grad_norm": 2.5600663578933935,
      "learning_rate": 1.2622549019607843e-06,
      "loss": 0.7025,
      "step": 206
    },
    {
      "epoch": 0.025380088278567924,
      "grad_norm": 2.6720217825010653,
      "learning_rate": 1.2683823529411766e-06,
      "loss": 0.7717,
      "step": 207
    },
    {
      "epoch": 0.02550269740068661,
      "grad_norm": 2.841159489445417,
      "learning_rate": 1.2745098039215686e-06,
      "loss": 0.6814,
      "step": 208
    },
    {
      "epoch": 0.025625306522805296,
      "grad_norm": 3.074731691613389,
      "learning_rate": 1.2806372549019608e-06,
      "loss": 0.7288,
      "step": 209
    },
    {
      "epoch": 0.025747915644923982,
      "grad_norm": 3.3360115017460252,
      "learning_rate": 1.2867647058823528e-06,
      "loss": 0.7418,
      "step": 210
    },
    {
      "epoch": 0.025870524767042668,
      "grad_norm": 2.975146132444564,
      "learning_rate": 1.2928921568627453e-06,
      "loss": 0.746,
      "step": 211
    },
    {
      "epoch": 0.025993133889161354,
      "grad_norm": 2.545086732507768,
      "learning_rate": 1.2990196078431375e-06,
      "loss": 0.7641,
      "step": 212
    },
    {
      "epoch": 0.02611574301128004,
      "grad_norm": 2.9778789467798443,
      "learning_rate": 1.3051470588235295e-06,
      "loss": 0.7376,
      "step": 213
    },
    {
      "epoch": 0.026238352133398726,
      "grad_norm": 2.889032174661955,
      "learning_rate": 1.3112745098039217e-06,
      "loss": 0.7201,
      "step": 214
    },
    {
      "epoch": 0.02636096125551741,
      "grad_norm": 2.857298532636212,
      "learning_rate": 1.317401960784314e-06,
      "loss": 0.7307,
      "step": 215
    },
    {
      "epoch": 0.026483570377636097,
      "grad_norm": 2.736302300252761,
      "learning_rate": 1.323529411764706e-06,
      "loss": 0.7847,
      "step": 216
    },
    {
      "epoch": 0.026606179499754783,
      "grad_norm": 2.903723755965568,
      "learning_rate": 1.3296568627450982e-06,
      "loss": 0.7434,
      "step": 217
    },
    {
      "epoch": 0.026728788621873466,
      "grad_norm": 2.8395334880778065,
      "learning_rate": 1.3357843137254902e-06,
      "loss": 0.6965,
      "step": 218
    },
    {
      "epoch": 0.02685139774399215,
      "grad_norm": 2.6437129803752,
      "learning_rate": 1.3419117647058824e-06,
      "loss": 0.7253,
      "step": 219
    },
    {
      "epoch": 0.026974006866110838,
      "grad_norm": 2.6872355539091357,
      "learning_rate": 1.3480392156862745e-06,
      "loss": 0.7353,
      "step": 220
    },
    {
      "epoch": 0.027096615988229523,
      "grad_norm": 2.6750200472734393,
      "learning_rate": 1.3541666666666667e-06,
      "loss": 0.6812,
      "step": 221
    },
    {
      "epoch": 0.02721922511034821,
      "grad_norm": 2.823430525243088,
      "learning_rate": 1.360294117647059e-06,
      "loss": 0.7693,
      "step": 222
    },
    {
      "epoch": 0.027341834232466895,
      "grad_norm": 2.69171675396457,
      "learning_rate": 1.366421568627451e-06,
      "loss": 0.7282,
      "step": 223
    },
    {
      "epoch": 0.02746444335458558,
      "grad_norm": 2.79528946733066,
      "learning_rate": 1.3725490196078434e-06,
      "loss": 0.6617,
      "step": 224
    },
    {
      "epoch": 0.027587052476704267,
      "grad_norm": 2.8265779840549072,
      "learning_rate": 1.3786764705882356e-06,
      "loss": 0.6564,
      "step": 225
    },
    {
      "epoch": 0.027709661598822953,
      "grad_norm": 2.535133405774168,
      "learning_rate": 1.3848039215686276e-06,
      "loss": 0.7047,
      "step": 226
    },
    {
      "epoch": 0.02783227072094164,
      "grad_norm": 2.9790641779148275,
      "learning_rate": 1.3909313725490198e-06,
      "loss": 0.7672,
      "step": 227
    },
    {
      "epoch": 0.027954879843060325,
      "grad_norm": 2.757560243816323,
      "learning_rate": 1.3970588235294119e-06,
      "loss": 0.7253,
      "step": 228
    },
    {
      "epoch": 0.02807748896517901,
      "grad_norm": 2.9292144259878556,
      "learning_rate": 1.403186274509804e-06,
      "loss": 0.7198,
      "step": 229
    },
    {
      "epoch": 0.028200098087297697,
      "grad_norm": 2.8059288500730437,
      "learning_rate": 1.409313725490196e-06,
      "loss": 0.7479,
      "step": 230
    },
    {
      "epoch": 0.02832270720941638,
      "grad_norm": 2.844475203684346,
      "learning_rate": 1.4154411764705883e-06,
      "loss": 0.723,
      "step": 231
    },
    {
      "epoch": 0.028445316331535065,
      "grad_norm": 3.0911138571167616,
      "learning_rate": 1.4215686274509805e-06,
      "loss": 0.7609,
      "step": 232
    },
    {
      "epoch": 0.02856792545365375,
      "grad_norm": 2.9722918117318375,
      "learning_rate": 1.4276960784313726e-06,
      "loss": 0.6906,
      "step": 233
    },
    {
      "epoch": 0.028690534575772437,
      "grad_norm": 2.781683459123979,
      "learning_rate": 1.4338235294117648e-06,
      "loss": 0.7359,
      "step": 234
    },
    {
      "epoch": 0.028813143697891123,
      "grad_norm": 2.560423383393698,
      "learning_rate": 1.4399509803921568e-06,
      "loss": 0.7301,
      "step": 235
    },
    {
      "epoch": 0.02893575282000981,
      "grad_norm": 2.8054808954036266,
      "learning_rate": 1.4460784313725492e-06,
      "loss": 0.6505,
      "step": 236
    },
    {
      "epoch": 0.029058361942128495,
      "grad_norm": 2.7748149288625403,
      "learning_rate": 1.4522058823529415e-06,
      "loss": 0.7426,
      "step": 237
    },
    {
      "epoch": 0.02918097106424718,
      "grad_norm": 2.796785230872665,
      "learning_rate": 1.4583333333333335e-06,
      "loss": 0.7598,
      "step": 238
    },
    {
      "epoch": 0.029303580186365866,
      "grad_norm": 2.906719455979947,
      "learning_rate": 1.4644607843137257e-06,
      "loss": 0.7471,
      "step": 239
    },
    {
      "epoch": 0.029426189308484552,
      "grad_norm": 3.06797571693449,
      "learning_rate": 1.4705882352941177e-06,
      "loss": 0.7431,
      "step": 240
    },
    {
      "epoch": 0.029548798430603238,
      "grad_norm": 2.6846872791037155,
      "learning_rate": 1.47671568627451e-06,
      "loss": 0.6788,
      "step": 241
    },
    {
      "epoch": 0.029671407552721924,
      "grad_norm": 3.3131370055662317,
      "learning_rate": 1.4828431372549022e-06,
      "loss": 0.7556,
      "step": 242
    },
    {
      "epoch": 0.029794016674840607,
      "grad_norm": 2.946659258384323,
      "learning_rate": 1.4889705882352942e-06,
      "loss": 0.6407,
      "step": 243
    },
    {
      "epoch": 0.029916625796959292,
      "grad_norm": 2.5496078358930503,
      "learning_rate": 1.4950980392156864e-06,
      "loss": 0.6548,
      "step": 244
    },
    {
      "epoch": 0.03003923491907798,
      "grad_norm": 2.720860059509374,
      "learning_rate": 1.5012254901960784e-06,
      "loss": 0.7072,
      "step": 245
    },
    {
      "epoch": 0.030161844041196664,
      "grad_norm": 2.7721599249413247,
      "learning_rate": 1.5073529411764707e-06,
      "loss": 0.768,
      "step": 246
    },
    {
      "epoch": 0.03028445316331535,
      "grad_norm": 2.894657740938729,
      "learning_rate": 1.5134803921568627e-06,
      "loss": 0.6497,
      "step": 247
    },
    {
      "epoch": 0.030407062285434036,
      "grad_norm": 2.8815939584408086,
      "learning_rate": 1.519607843137255e-06,
      "loss": 0.7311,
      "step": 248
    },
    {
      "epoch": 0.030529671407552722,
      "grad_norm": 2.855858583292397,
      "learning_rate": 1.5257352941176473e-06,
      "loss": 0.6649,
      "step": 249
    },
    {
      "epoch": 0.030652280529671408,
      "grad_norm": 2.758292440502194,
      "learning_rate": 1.5318627450980394e-06,
      "loss": 0.6861,
      "step": 250
    },
    {
      "epoch": 0.030774889651790094,
      "grad_norm": 2.8521597153479052,
      "learning_rate": 1.5379901960784316e-06,
      "loss": 0.7073,
      "step": 251
    },
    {
      "epoch": 0.03089749877390878,
      "grad_norm": 2.614235879272852,
      "learning_rate": 1.5441176470588238e-06,
      "loss": 0.7049,
      "step": 252
    },
    {
      "epoch": 0.031020107896027466,
      "grad_norm": 2.8977910729751692,
      "learning_rate": 1.5502450980392158e-06,
      "loss": 0.7392,
      "step": 253
    },
    {
      "epoch": 0.03114271701814615,
      "grad_norm": 3.0306155743591607,
      "learning_rate": 1.556372549019608e-06,
      "loss": 0.6861,
      "step": 254
    },
    {
      "epoch": 0.03126532614026484,
      "grad_norm": 2.5785825621399585,
      "learning_rate": 1.5625e-06,
      "loss": 0.7055,
      "step": 255
    },
    {
      "epoch": 0.03138793526238352,
      "grad_norm": 2.3299898361269373,
      "learning_rate": 1.5686274509803923e-06,
      "loss": 0.6594,
      "step": 256
    },
    {
      "epoch": 0.03151054438450221,
      "grad_norm": 2.4966225619226066,
      "learning_rate": 1.5747549019607843e-06,
      "loss": 0.6592,
      "step": 257
    },
    {
      "epoch": 0.031633153506620895,
      "grad_norm": 2.9634560237021472,
      "learning_rate": 1.5808823529411765e-06,
      "loss": 0.7244,
      "step": 258
    },
    {
      "epoch": 0.03175576262873958,
      "grad_norm": 2.945101061964975,
      "learning_rate": 1.5870098039215688e-06,
      "loss": 0.7021,
      "step": 259
    },
    {
      "epoch": 0.03187837175085827,
      "grad_norm": 2.4578545996290257,
      "learning_rate": 1.5931372549019608e-06,
      "loss": 0.6749,
      "step": 260
    },
    {
      "epoch": 0.03200098087297695,
      "grad_norm": 2.712229677315807,
      "learning_rate": 1.599264705882353e-06,
      "loss": 0.7855,
      "step": 261
    },
    {
      "epoch": 0.03212358999509563,
      "grad_norm": 3.1904793302783574,
      "learning_rate": 1.6053921568627455e-06,
      "loss": 0.6919,
      "step": 262
    },
    {
      "epoch": 0.03224619911721432,
      "grad_norm": 2.9242753916655913,
      "learning_rate": 1.6115196078431375e-06,
      "loss": 0.6786,
      "step": 263
    },
    {
      "epoch": 0.032368808239333004,
      "grad_norm": 2.833601027496882,
      "learning_rate": 1.6176470588235297e-06,
      "loss": 0.6791,
      "step": 264
    },
    {
      "epoch": 0.03249141736145169,
      "grad_norm": 2.8765386139801032,
      "learning_rate": 1.6237745098039217e-06,
      "loss": 0.6811,
      "step": 265
    },
    {
      "epoch": 0.032614026483570376,
      "grad_norm": 3.059297443525125,
      "learning_rate": 1.629901960784314e-06,
      "loss": 0.7252,
      "step": 266
    },
    {
      "epoch": 0.03273663560568906,
      "grad_norm": 2.5344794938440542,
      "learning_rate": 1.636029411764706e-06,
      "loss": 0.6888,
      "step": 267
    },
    {
      "epoch": 0.03285924472780775,
      "grad_norm": 2.7230335664213428,
      "learning_rate": 1.6421568627450982e-06,
      "loss": 0.7329,
      "step": 268
    },
    {
      "epoch": 0.03298185384992643,
      "grad_norm": 2.6669629312990373,
      "learning_rate": 1.6482843137254904e-06,
      "loss": 0.757,
      "step": 269
    },
    {
      "epoch": 0.03310446297204512,
      "grad_norm": 3.0561516194017413,
      "learning_rate": 1.6544117647058824e-06,
      "loss": 0.6577,
      "step": 270
    },
    {
      "epoch": 0.033227072094163805,
      "grad_norm": 2.780840403343249,
      "learning_rate": 1.6605392156862746e-06,
      "loss": 0.6739,
      "step": 271
    },
    {
      "epoch": 0.03334968121628249,
      "grad_norm": 3.034590782105019,
      "learning_rate": 1.6666666666666667e-06,
      "loss": 0.7011,
      "step": 272
    },
    {
      "epoch": 0.03347229033840118,
      "grad_norm": 2.612244763469438,
      "learning_rate": 1.6727941176470589e-06,
      "loss": 0.6957,
      "step": 273
    },
    {
      "epoch": 0.03359489946051986,
      "grad_norm": 2.7847931419571696,
      "learning_rate": 1.678921568627451e-06,
      "loss": 0.6794,
      "step": 274
    },
    {
      "epoch": 0.03371750858263855,
      "grad_norm": 2.7218704588853866,
      "learning_rate": 1.6850490196078433e-06,
      "loss": 0.7287,
      "step": 275
    },
    {
      "epoch": 0.033840117704757235,
      "grad_norm": 2.9157314347543433,
      "learning_rate": 1.6911764705882356e-06,
      "loss": 0.6938,
      "step": 276
    },
    {
      "epoch": 0.03396272682687592,
      "grad_norm": 2.753320697723086,
      "learning_rate": 1.6973039215686276e-06,
      "loss": 0.658,
      "step": 277
    },
    {
      "epoch": 0.034085335948994606,
      "grad_norm": 2.665855741015022,
      "learning_rate": 1.7034313725490198e-06,
      "loss": 0.6965,
      "step": 278
    },
    {
      "epoch": 0.03420794507111329,
      "grad_norm": 2.9927465760794747,
      "learning_rate": 1.709558823529412e-06,
      "loss": 0.6577,
      "step": 279
    },
    {
      "epoch": 0.03433055419323198,
      "grad_norm": 2.7744571080227765,
      "learning_rate": 1.715686274509804e-06,
      "loss": 0.6722,
      "step": 280
    },
    {
      "epoch": 0.034453163315350664,
      "grad_norm": 2.840416143340555,
      "learning_rate": 1.7218137254901963e-06,
      "loss": 0.6739,
      "step": 281
    },
    {
      "epoch": 0.03457577243746935,
      "grad_norm": 2.454981381080584,
      "learning_rate": 1.7279411764705883e-06,
      "loss": 0.7085,
      "step": 282
    },
    {
      "epoch": 0.034698381559588036,
      "grad_norm": 2.7770771596898314,
      "learning_rate": 1.7340686274509805e-06,
      "loss": 0.7164,
      "step": 283
    },
    {
      "epoch": 0.03482099068170672,
      "grad_norm": 2.647404935484612,
      "learning_rate": 1.7401960784313725e-06,
      "loss": 0.6762,
      "step": 284
    },
    {
      "epoch": 0.03494359980382541,
      "grad_norm": 2.8525761031235226,
      "learning_rate": 1.7463235294117648e-06,
      "loss": 0.6863,
      "step": 285
    },
    {
      "epoch": 0.03506620892594409,
      "grad_norm": 2.9487192729638765,
      "learning_rate": 1.752450980392157e-06,
      "loss": 0.6824,
      "step": 286
    },
    {
      "epoch": 0.03518881804806277,
      "grad_norm": 2.9742393496975397,
      "learning_rate": 1.7585784313725492e-06,
      "loss": 0.6675,
      "step": 287
    },
    {
      "epoch": 0.03531142717018146,
      "grad_norm": 3.0608918584964497,
      "learning_rate": 1.7647058823529414e-06,
      "loss": 0.6965,
      "step": 288
    },
    {
      "epoch": 0.035434036292300145,
      "grad_norm": 2.941631334431336,
      "learning_rate": 1.7708333333333337e-06,
      "loss": 0.6911,
      "step": 289
    },
    {
      "epoch": 0.03555664541441883,
      "grad_norm": 2.7163797140565977,
      "learning_rate": 1.7769607843137257e-06,
      "loss": 0.6804,
      "step": 290
    },
    {
      "epoch": 0.035679254536537516,
      "grad_norm": 2.6049552121690165,
      "learning_rate": 1.783088235294118e-06,
      "loss": 0.6544,
      "step": 291
    },
    {
      "epoch": 0.0358018636586562,
      "grad_norm": 3.1069387956776993,
      "learning_rate": 1.78921568627451e-06,
      "loss": 0.7187,
      "step": 292
    },
    {
      "epoch": 0.03592447278077489,
      "grad_norm": 2.6606306101793575,
      "learning_rate": 1.7953431372549022e-06,
      "loss": 0.6684,
      "step": 293
    },
    {
      "epoch": 0.036047081902893574,
      "grad_norm": 2.5299428146781744,
      "learning_rate": 1.8014705882352942e-06,
      "loss": 0.6626,
      "step": 294
    },
    {
      "epoch": 0.03616969102501226,
      "grad_norm": 2.8706641657708234,
      "learning_rate": 1.8075980392156864e-06,
      "loss": 0.7227,
      "step": 295
    },
    {
      "epoch": 0.036292300147130946,
      "grad_norm": 2.7741255346617306,
      "learning_rate": 1.8137254901960786e-06,
      "loss": 0.6601,
      "step": 296
    },
    {
      "epoch": 0.03641490926924963,
      "grad_norm": 2.707636299076404,
      "learning_rate": 1.8198529411764706e-06,
      "loss": 0.6625,
      "step": 297
    },
    {
      "epoch": 0.03653751839136832,
      "grad_norm": 2.6524950112680408,
      "learning_rate": 1.8259803921568629e-06,
      "loss": 0.6905,
      "step": 298
    },
    {
      "epoch": 0.036660127513487004,
      "grad_norm": 2.9624639599726756,
      "learning_rate": 1.8321078431372549e-06,
      "loss": 0.7178,
      "step": 299
    },
    {
      "epoch": 0.03678273663560569,
      "grad_norm": 2.6373299460663144,
      "learning_rate": 1.8382352941176473e-06,
      "loss": 0.7021,
      "step": 300
    },
    {
      "epoch": 0.036905345757724375,
      "grad_norm": 2.5120211826164347,
      "learning_rate": 1.8443627450980395e-06,
      "loss": 0.712,
      "step": 301
    },
    {
      "epoch": 0.03702795487984306,
      "grad_norm": 2.5401581983784087,
      "learning_rate": 1.8504901960784316e-06,
      "loss": 0.7425,
      "step": 302
    },
    {
      "epoch": 0.03715056400196175,
      "grad_norm": 2.7757725784774245,
      "learning_rate": 1.8566176470588238e-06,
      "loss": 0.7113,
      "step": 303
    },
    {
      "epoch": 0.03727317312408043,
      "grad_norm": 2.743303474333988,
      "learning_rate": 1.8627450980392158e-06,
      "loss": 0.6573,
      "step": 304
    },
    {
      "epoch": 0.03739578224619912,
      "grad_norm": 2.5392332611404274,
      "learning_rate": 1.868872549019608e-06,
      "loss": 0.6392,
      "step": 305
    },
    {
      "epoch": 0.037518391368317805,
      "grad_norm": 2.3866805981834824,
      "learning_rate": 1.8750000000000003e-06,
      "loss": 0.6285,
      "step": 306
    },
    {
      "epoch": 0.03764100049043649,
      "grad_norm": 2.8111458540011154,
      "learning_rate": 1.8811274509803923e-06,
      "loss": 0.6932,
      "step": 307
    },
    {
      "epoch": 0.03776360961255518,
      "grad_norm": 2.74137756762941,
      "learning_rate": 1.8872549019607845e-06,
      "loss": 0.6909,
      "step": 308
    },
    {
      "epoch": 0.03788621873467386,
      "grad_norm": 2.654235168596994,
      "learning_rate": 1.8933823529411765e-06,
      "loss": 0.6596,
      "step": 309
    },
    {
      "epoch": 0.03800882785679255,
      "grad_norm": 3.1565278803073737,
      "learning_rate": 1.8995098039215687e-06,
      "loss": 0.6812,
      "step": 310
    },
    {
      "epoch": 0.03813143697891123,
      "grad_norm": 2.494831801709263,
      "learning_rate": 1.9056372549019608e-06,
      "loss": 0.7157,
      "step": 311
    },
    {
      "epoch": 0.038254046101029913,
      "grad_norm": 2.8954745255450276,
      "learning_rate": 1.9117647058823528e-06,
      "loss": 0.7071,
      "step": 312
    },
    {
      "epoch": 0.0383766552231486,
      "grad_norm": 2.802761966474127,
      "learning_rate": 1.9178921568627452e-06,
      "loss": 0.7148,
      "step": 313
    },
    {
      "epoch": 0.038499264345267285,
      "grad_norm": 2.667518796840563,
      "learning_rate": 1.9240196078431377e-06,
      "loss": 0.6783,
      "step": 314
    },
    {
      "epoch": 0.03862187346738597,
      "grad_norm": 2.9925239351572905,
      "learning_rate": 1.9301470588235297e-06,
      "loss": 0.6825,
      "step": 315
    },
    {
      "epoch": 0.03874448258950466,
      "grad_norm": 2.80282462121558,
      "learning_rate": 1.9362745098039217e-06,
      "loss": 0.7171,
      "step": 316
    },
    {
      "epoch": 0.03886709171162334,
      "grad_norm": 2.958315485338473,
      "learning_rate": 1.942401960784314e-06,
      "loss": 0.6903,
      "step": 317
    },
    {
      "epoch": 0.03898970083374203,
      "grad_norm": 2.421495486313679,
      "learning_rate": 1.948529411764706e-06,
      "loss": 0.6711,
      "step": 318
    },
    {
      "epoch": 0.039112309955860715,
      "grad_norm": 2.618239692901487,
      "learning_rate": 1.954656862745098e-06,
      "loss": 0.6866,
      "step": 319
    },
    {
      "epoch": 0.0392349190779794,
      "grad_norm": 2.645861349911178,
      "learning_rate": 1.96078431372549e-06,
      "loss": 0.7298,
      "step": 320
    },
    {
      "epoch": 0.03935752820009809,
      "grad_norm": 2.6525272801999593,
      "learning_rate": 1.9669117647058826e-06,
      "loss": 0.6408,
      "step": 321
    },
    {
      "epoch": 0.03948013732221677,
      "grad_norm": 2.838119251307857,
      "learning_rate": 1.9730392156862746e-06,
      "loss": 0.6314,
      "step": 322
    },
    {
      "epoch": 0.03960274644433546,
      "grad_norm": 2.7403136012911062,
      "learning_rate": 1.9791666666666666e-06,
      "loss": 0.6634,
      "step": 323
    },
    {
      "epoch": 0.039725355566454144,
      "grad_norm": 2.964057272669299,
      "learning_rate": 1.985294117647059e-06,
      "loss": 0.6362,
      "step": 324
    },
    {
      "epoch": 0.03984796468857283,
      "grad_norm": 3.075024716109643,
      "learning_rate": 1.991421568627451e-06,
      "loss": 0.656,
      "step": 325
    },
    {
      "epoch": 0.039970573810691516,
      "grad_norm": 2.9758864392444857,
      "learning_rate": 1.9975490196078435e-06,
      "loss": 0.6883,
      "step": 326
    },
    {
      "epoch": 0.0400931829328102,
      "grad_norm": 2.8045668443229834,
      "learning_rate": 2.0036764705882355e-06,
      "loss": 0.6383,
      "step": 327
    },
    {
      "epoch": 0.04021579205492889,
      "grad_norm": 2.83297973325265,
      "learning_rate": 2.0098039215686276e-06,
      "loss": 0.7187,
      "step": 328
    },
    {
      "epoch": 0.040338401177047574,
      "grad_norm": 2.9512643407378087,
      "learning_rate": 2.01593137254902e-06,
      "loss": 0.7097,
      "step": 329
    },
    {
      "epoch": 0.04046101029916626,
      "grad_norm": 2.9399970147981223,
      "learning_rate": 2.022058823529412e-06,
      "loss": 0.6702,
      "step": 330
    },
    {
      "epoch": 0.040583619421284946,
      "grad_norm": 2.672021630817901,
      "learning_rate": 2.028186274509804e-06,
      "loss": 0.6722,
      "step": 331
    },
    {
      "epoch": 0.04070622854340363,
      "grad_norm": 2.722269333711863,
      "learning_rate": 2.034313725490196e-06,
      "loss": 0.6449,
      "step": 332
    },
    {
      "epoch": 0.04082883766552232,
      "grad_norm": 2.935081009555718,
      "learning_rate": 2.0404411764705885e-06,
      "loss": 0.6898,
      "step": 333
    },
    {
      "epoch": 0.040951446787641003,
      "grad_norm": 2.6429362305141697,
      "learning_rate": 2.0465686274509805e-06,
      "loss": 0.6492,
      "step": 334
    },
    {
      "epoch": 0.04107405590975969,
      "grad_norm": 2.610113612228474,
      "learning_rate": 2.0526960784313725e-06,
      "loss": 0.6812,
      "step": 335
    },
    {
      "epoch": 0.04119666503187837,
      "grad_norm": 2.8068669335828482,
      "learning_rate": 2.058823529411765e-06,
      "loss": 0.6912,
      "step": 336
    },
    {
      "epoch": 0.041319274153997054,
      "grad_norm": 2.8149138243299556,
      "learning_rate": 2.064950980392157e-06,
      "loss": 0.6426,
      "step": 337
    },
    {
      "epoch": 0.04144188327611574,
      "grad_norm": 3.0592990992489955,
      "learning_rate": 2.0710784313725494e-06,
      "loss": 0.6033,
      "step": 338
    },
    {
      "epoch": 0.041564492398234426,
      "grad_norm": 2.954125261043606,
      "learning_rate": 2.0772058823529414e-06,
      "loss": 0.698,
      "step": 339
    },
    {
      "epoch": 0.04168710152035311,
      "grad_norm": 2.7672966588529877,
      "learning_rate": 2.0833333333333334e-06,
      "loss": 0.7152,
      "step": 340
    },
    {
      "epoch": 0.0418097106424718,
      "grad_norm": 2.6248686562399266,
      "learning_rate": 2.089460784313726e-06,
      "loss": 0.6447,
      "step": 341
    },
    {
      "epoch": 0.041932319764590484,
      "grad_norm": 3.0562464200938155,
      "learning_rate": 2.095588235294118e-06,
      "loss": 0.7582,
      "step": 342
    },
    {
      "epoch": 0.04205492888670917,
      "grad_norm": 2.712148387545217,
      "learning_rate": 2.10171568627451e-06,
      "loss": 0.6863,
      "step": 343
    },
    {
      "epoch": 0.042177538008827856,
      "grad_norm": 3.126026685041542,
      "learning_rate": 2.1078431372549023e-06,
      "loss": 0.6688,
      "step": 344
    },
    {
      "epoch": 0.04230014713094654,
      "grad_norm": 2.852724725466301,
      "learning_rate": 2.1139705882352944e-06,
      "loss": 0.712,
      "step": 345
    },
    {
      "epoch": 0.04242275625306523,
      "grad_norm": 2.6598195286937956,
      "learning_rate": 2.1200980392156864e-06,
      "loss": 0.6289,
      "step": 346
    },
    {
      "epoch": 0.04254536537518391,
      "grad_norm": 2.955906681730705,
      "learning_rate": 2.1262254901960784e-06,
      "loss": 0.6897,
      "step": 347
    },
    {
      "epoch": 0.0426679744973026,
      "grad_norm": 2.8046185660182794,
      "learning_rate": 2.132352941176471e-06,
      "loss": 0.7002,
      "step": 348
    },
    {
      "epoch": 0.042790583619421285,
      "grad_norm": 2.8695551931492003,
      "learning_rate": 2.138480392156863e-06,
      "loss": 0.699,
      "step": 349
    },
    {
      "epoch": 0.04291319274153997,
      "grad_norm": 2.5454538570614833,
      "learning_rate": 2.144607843137255e-06,
      "loss": 0.6193,
      "step": 350
    },
    {
      "epoch": 0.04303580186365866,
      "grad_norm": 2.8739391727257155,
      "learning_rate": 2.1507352941176473e-06,
      "loss": 0.652,
      "step": 351
    },
    {
      "epoch": 0.04315841098577734,
      "grad_norm": 2.756475858061246,
      "learning_rate": 2.1568627450980393e-06,
      "loss": 0.7115,
      "step": 352
    },
    {
      "epoch": 0.04328102010789603,
      "grad_norm": 2.6686011225656068,
      "learning_rate": 2.1629901960784317e-06,
      "loss": 0.6348,
      "step": 353
    },
    {
      "epoch": 0.043403629230014715,
      "grad_norm": 2.8279054496556046,
      "learning_rate": 2.1691176470588238e-06,
      "loss": 0.6488,
      "step": 354
    },
    {
      "epoch": 0.0435262383521334,
      "grad_norm": 2.7443463442577616,
      "learning_rate": 2.1752450980392158e-06,
      "loss": 0.648,
      "step": 355
    },
    {
      "epoch": 0.04364884747425209,
      "grad_norm": 2.8632581983697962,
      "learning_rate": 2.1813725490196082e-06,
      "loss": 0.7202,
      "step": 356
    },
    {
      "epoch": 0.04377145659637077,
      "grad_norm": 3.0708976476151153,
      "learning_rate": 2.1875000000000002e-06,
      "loss": 0.6513,
      "step": 357
    },
    {
      "epoch": 0.04389406571848946,
      "grad_norm": 2.828436101403911,
      "learning_rate": 2.1936274509803922e-06,
      "loss": 0.6849,
      "step": 358
    },
    {
      "epoch": 0.044016674840608144,
      "grad_norm": 2.9754113863258023,
      "learning_rate": 2.1997549019607843e-06,
      "loss": 0.6697,
      "step": 359
    },
    {
      "epoch": 0.04413928396272683,
      "grad_norm": 2.938655001386932,
      "learning_rate": 2.2058823529411767e-06,
      "loss": 0.6413,
      "step": 360
    },
    {
      "epoch": 0.04426189308484551,
      "grad_norm": 2.8396542156450826,
      "learning_rate": 2.2120098039215687e-06,
      "loss": 0.7304,
      "step": 361
    },
    {
      "epoch": 0.044384502206964195,
      "grad_norm": 2.7892156277710396,
      "learning_rate": 2.2181372549019607e-06,
      "loss": 0.6292,
      "step": 362
    },
    {
      "epoch": 0.04450711132908288,
      "grad_norm": 2.7217732674483863,
      "learning_rate": 2.224264705882353e-06,
      "loss": 0.6594,
      "step": 363
    },
    {
      "epoch": 0.04462972045120157,
      "grad_norm": 2.8427867655090635,
      "learning_rate": 2.2303921568627456e-06,
      "loss": 0.6759,
      "step": 364
    },
    {
      "epoch": 0.04475232957332025,
      "grad_norm": 2.7078663308509947,
      "learning_rate": 2.2365196078431376e-06,
      "loss": 0.6381,
      "step": 365
    },
    {
      "epoch": 0.04487493869543894,
      "grad_norm": 2.5951136729543114,
      "learning_rate": 2.2426470588235296e-06,
      "loss": 0.6451,
      "step": 366
    },
    {
      "epoch": 0.044997547817557625,
      "grad_norm": 2.508908821039022,
      "learning_rate": 2.2487745098039217e-06,
      "loss": 0.5796,
      "step": 367
    },
    {
      "epoch": 0.04512015693967631,
      "grad_norm": 2.5113161602530845,
      "learning_rate": 2.254901960784314e-06,
      "loss": 0.682,
      "step": 368
    },
    {
      "epoch": 0.045242766061794996,
      "grad_norm": 3.0187402444044333,
      "learning_rate": 2.261029411764706e-06,
      "loss": 0.6419,
      "step": 369
    },
    {
      "epoch": 0.04536537518391368,
      "grad_norm": 2.6504325372822493,
      "learning_rate": 2.267156862745098e-06,
      "loss": 0.6888,
      "step": 370
    },
    {
      "epoch": 0.04548798430603237,
      "grad_norm": 2.5181925304307833,
      "learning_rate": 2.2732843137254906e-06,
      "loss": 0.6837,
      "step": 371
    },
    {
      "epoch": 0.045610593428151054,
      "grad_norm": 3.0570253349209935,
      "learning_rate": 2.2794117647058826e-06,
      "loss": 0.7007,
      "step": 372
    },
    {
      "epoch": 0.04573320255026974,
      "grad_norm": 2.5703563964378464,
      "learning_rate": 2.2855392156862746e-06,
      "loss": 0.6844,
      "step": 373
    },
    {
      "epoch": 0.045855811672388426,
      "grad_norm": 2.596717629716991,
      "learning_rate": 2.2916666666666666e-06,
      "loss": 0.7065,
      "step": 374
    },
    {
      "epoch": 0.04597842079450711,
      "grad_norm": 2.989759923762627,
      "learning_rate": 2.297794117647059e-06,
      "loss": 0.7583,
      "step": 375
    },
    {
      "epoch": 0.0461010299166258,
      "grad_norm": 2.8208735797981297,
      "learning_rate": 2.303921568627451e-06,
      "loss": 0.6861,
      "step": 376
    },
    {
      "epoch": 0.046223639038744484,
      "grad_norm": 2.6665881794587487,
      "learning_rate": 2.3100490196078435e-06,
      "loss": 0.6307,
      "step": 377
    },
    {
      "epoch": 0.04634624816086317,
      "grad_norm": 2.964823250817355,
      "learning_rate": 2.3161764705882355e-06,
      "loss": 0.6644,
      "step": 378
    },
    {
      "epoch": 0.046468857282981856,
      "grad_norm": 2.742068199306908,
      "learning_rate": 2.3223039215686275e-06,
      "loss": 0.6463,
      "step": 379
    },
    {
      "epoch": 0.04659146640510054,
      "grad_norm": 2.742923479333252,
      "learning_rate": 2.32843137254902e-06,
      "loss": 0.6241,
      "step": 380
    },
    {
      "epoch": 0.04671407552721923,
      "grad_norm": 2.737421926134829,
      "learning_rate": 2.334558823529412e-06,
      "loss": 0.6686,
      "step": 381
    },
    {
      "epoch": 0.04683668464933791,
      "grad_norm": 2.6637216714823975,
      "learning_rate": 2.340686274509804e-06,
      "loss": 0.6205,
      "step": 382
    },
    {
      "epoch": 0.0469592937714566,
      "grad_norm": 2.732023540060389,
      "learning_rate": 2.3468137254901964e-06,
      "loss": 0.6953,
      "step": 383
    },
    {
      "epoch": 0.047081902893575285,
      "grad_norm": 2.4662308863374376,
      "learning_rate": 2.3529411764705885e-06,
      "loss": 0.6235,
      "step": 384
    },
    {
      "epoch": 0.04720451201569397,
      "grad_norm": 2.5039520063991025,
      "learning_rate": 2.3590686274509805e-06,
      "loss": 0.6812,
      "step": 385
    },
    {
      "epoch": 0.04732712113781265,
      "grad_norm": 2.7585598905397815,
      "learning_rate": 2.3651960784313725e-06,
      "loss": 0.6692,
      "step": 386
    },
    {
      "epoch": 0.047449730259931336,
      "grad_norm": 2.711311436760385,
      "learning_rate": 2.371323529411765e-06,
      "loss": 0.7103,
      "step": 387
    },
    {
      "epoch": 0.04757233938205002,
      "grad_norm": 2.9037917217735805,
      "learning_rate": 2.377450980392157e-06,
      "loss": 0.6332,
      "step": 388
    },
    {
      "epoch": 0.04769494850416871,
      "grad_norm": 2.802807633592672,
      "learning_rate": 2.3835784313725494e-06,
      "loss": 0.6509,
      "step": 389
    },
    {
      "epoch": 0.047817557626287394,
      "grad_norm": 2.991426958649881,
      "learning_rate": 2.3897058823529414e-06,
      "loss": 0.6926,
      "step": 390
    },
    {
      "epoch": 0.04794016674840608,
      "grad_norm": 2.6540798217093884,
      "learning_rate": 2.395833333333334e-06,
      "loss": 0.6649,
      "step": 391
    },
    {
      "epoch": 0.048062775870524765,
      "grad_norm": 2.6227339445208235,
      "learning_rate": 2.401960784313726e-06,
      "loss": 0.6175,
      "step": 392
    },
    {
      "epoch": 0.04818538499264345,
      "grad_norm": 2.823408856728765,
      "learning_rate": 2.408088235294118e-06,
      "loss": 0.6622,
      "step": 393
    },
    {
      "epoch": 0.04830799411476214,
      "grad_norm": 3.098523120052236,
      "learning_rate": 2.41421568627451e-06,
      "loss": 0.6591,
      "step": 394
    },
    {
      "epoch": 0.04843060323688082,
      "grad_norm": 2.943225530989364,
      "learning_rate": 2.4203431372549023e-06,
      "loss": 0.6851,
      "step": 395
    },
    {
      "epoch": 0.04855321235899951,
      "grad_norm": 2.8086275828746516,
      "learning_rate": 2.4264705882352943e-06,
      "loss": 0.6578,
      "step": 396
    },
    {
      "epoch": 0.048675821481118195,
      "grad_norm": 2.7536092857555294,
      "learning_rate": 2.4325980392156863e-06,
      "loss": 0.6775,
      "step": 397
    },
    {
      "epoch": 0.04879843060323688,
      "grad_norm": 2.8671203126675593,
      "learning_rate": 2.4387254901960788e-06,
      "loss": 0.6523,
      "step": 398
    },
    {
      "epoch": 0.04892103972535557,
      "grad_norm": 3.149492514546111,
      "learning_rate": 2.444852941176471e-06,
      "loss": 0.6686,
      "step": 399
    },
    {
      "epoch": 0.04904364884747425,
      "grad_norm": 3.132829659417778,
      "learning_rate": 2.450980392156863e-06,
      "loss": 0.6915,
      "step": 400
    },
    {
      "epoch": 0.04916625796959294,
      "grad_norm": 2.7930207285330426,
      "learning_rate": 2.457107843137255e-06,
      "loss": 0.6352,
      "step": 401
    },
    {
      "epoch": 0.049288867091711625,
      "grad_norm": 2.933848849863724,
      "learning_rate": 2.4632352941176473e-06,
      "loss": 0.6388,
      "step": 402
    },
    {
      "epoch": 0.04941147621383031,
      "grad_norm": 2.8607281910863773,
      "learning_rate": 2.4693627450980397e-06,
      "loss": 0.6948,
      "step": 403
    },
    {
      "epoch": 0.049534085335948996,
      "grad_norm": 2.7164661907462557,
      "learning_rate": 2.4754901960784317e-06,
      "loss": 0.6889,
      "step": 404
    },
    {
      "epoch": 0.04965669445806768,
      "grad_norm": 3.1213618624007773,
      "learning_rate": 2.4816176470588237e-06,
      "loss": 0.6445,
      "step": 405
    },
    {
      "epoch": 0.04977930358018637,
      "grad_norm": 2.790015952558409,
      "learning_rate": 2.4877450980392158e-06,
      "loss": 0.66,
      "step": 406
    },
    {
      "epoch": 0.049901912702305054,
      "grad_norm": 2.6085069579400284,
      "learning_rate": 2.493872549019608e-06,
      "loss": 0.6505,
      "step": 407
    },
    {
      "epoch": 0.05002452182442374,
      "grad_norm": 2.6028808501236425,
      "learning_rate": 2.5e-06,
      "loss": 0.6707,
      "step": 408
    },
    {
      "epoch": 0.050147130946542426,
      "grad_norm": 2.7316753404053253,
      "learning_rate": 2.5061274509803922e-06,
      "loss": 0.6643,
      "step": 409
    },
    {
      "epoch": 0.05026974006866111,
      "grad_norm": 2.5578750369100596,
      "learning_rate": 2.5122549019607847e-06,
      "loss": 0.6254,
      "step": 410
    },
    {
      "epoch": 0.05039234919077979,
      "grad_norm": 2.572580775913879,
      "learning_rate": 2.5183823529411767e-06,
      "loss": 0.7449,
      "step": 411
    },
    {
      "epoch": 0.05051495831289848,
      "grad_norm": 2.672045099466593,
      "learning_rate": 2.5245098039215687e-06,
      "loss": 0.6545,
      "step": 412
    },
    {
      "epoch": 0.05063756743501716,
      "grad_norm": 2.646514163779669,
      "learning_rate": 2.5306372549019607e-06,
      "loss": 0.6352,
      "step": 413
    },
    {
      "epoch": 0.05076017655713585,
      "grad_norm": 2.9867896702259227,
      "learning_rate": 2.536764705882353e-06,
      "loss": 0.6488,
      "step": 414
    },
    {
      "epoch": 0.050882785679254534,
      "grad_norm": 3.158944765493791,
      "learning_rate": 2.542892156862745e-06,
      "loss": 0.6689,
      "step": 415
    },
    {
      "epoch": 0.05100539480137322,
      "grad_norm": 2.65865361267602,
      "learning_rate": 2.549019607843137e-06,
      "loss": 0.6188,
      "step": 416
    },
    {
      "epoch": 0.051128003923491906,
      "grad_norm": 2.8984849254611524,
      "learning_rate": 2.5551470588235296e-06,
      "loss": 0.6633,
      "step": 417
    },
    {
      "epoch": 0.05125061304561059,
      "grad_norm": 2.879025766470668,
      "learning_rate": 2.5612745098039216e-06,
      "loss": 0.7343,
      "step": 418
    },
    {
      "epoch": 0.05137322216772928,
      "grad_norm": 2.581580685469118,
      "learning_rate": 2.5674019607843136e-06,
      "loss": 0.6559,
      "step": 419
    },
    {
      "epoch": 0.051495831289847964,
      "grad_norm": 2.4654176073452034,
      "learning_rate": 2.5735294117647057e-06,
      "loss": 0.7013,
      "step": 420
    },
    {
      "epoch": 0.05161844041196665,
      "grad_norm": 2.770294958999813,
      "learning_rate": 2.5796568627450985e-06,
      "loss": 0.6896,
      "step": 421
    },
    {
      "epoch": 0.051741049534085336,
      "grad_norm": 2.906974298868829,
      "learning_rate": 2.5857843137254905e-06,
      "loss": 0.662,
      "step": 422
    },
    {
      "epoch": 0.05186365865620402,
      "grad_norm": 2.859217163402551,
      "learning_rate": 2.591911764705883e-06,
      "loss": 0.6887,
      "step": 423
    },
    {
      "epoch": 0.05198626777832271,
      "grad_norm": 2.5996409550798503,
      "learning_rate": 2.598039215686275e-06,
      "loss": 0.6788,
      "step": 424
    },
    {
      "epoch": 0.052108876900441393,
      "grad_norm": 2.832706300983865,
      "learning_rate": 2.604166666666667e-06,
      "loss": 0.5925,
      "step": 425
    },
    {
      "epoch": 0.05223148602256008,
      "grad_norm": 2.899058184123579,
      "learning_rate": 2.610294117647059e-06,
      "loss": 0.6592,
      "step": 426
    },
    {
      "epoch": 0.052354095144678765,
      "grad_norm": 2.449338734509876,
      "learning_rate": 2.6164215686274515e-06,
      "loss": 0.6294,
      "step": 427
    },
    {
      "epoch": 0.05247670426679745,
      "grad_norm": 2.6154737684442795,
      "learning_rate": 2.6225490196078435e-06,
      "loss": 0.6782,
      "step": 428
    },
    {
      "epoch": 0.05259931338891614,
      "grad_norm": 2.611190872059371,
      "learning_rate": 2.6286764705882355e-06,
      "loss": 0.6709,
      "step": 429
    },
    {
      "epoch": 0.05272192251103482,
      "grad_norm": 2.724807055352511,
      "learning_rate": 2.634803921568628e-06,
      "loss": 0.652,
      "step": 430
    },
    {
      "epoch": 0.05284453163315351,
      "grad_norm": 2.7583959449386555,
      "learning_rate": 2.64093137254902e-06,
      "loss": 0.7016,
      "step": 431
    },
    {
      "epoch": 0.052967140755272195,
      "grad_norm": 2.452312054787676,
      "learning_rate": 2.647058823529412e-06,
      "loss": 0.6437,
      "step": 432
    },
    {
      "epoch": 0.05308974987739088,
      "grad_norm": 2.9164807472268826,
      "learning_rate": 2.653186274509804e-06,
      "loss": 0.6676,
      "step": 433
    },
    {
      "epoch": 0.05321235899950957,
      "grad_norm": 2.7656250421056803,
      "learning_rate": 2.6593137254901964e-06,
      "loss": 0.6299,
      "step": 434
    },
    {
      "epoch": 0.05333496812162825,
      "grad_norm": 2.8692973960875054,
      "learning_rate": 2.6654411764705884e-06,
      "loss": 0.6142,
      "step": 435
    },
    {
      "epoch": 0.05345757724374693,
      "grad_norm": 2.837859415964548,
      "learning_rate": 2.6715686274509804e-06,
      "loss": 0.6584,
      "step": 436
    },
    {
      "epoch": 0.05358018636586562,
      "grad_norm": 2.685639626045144,
      "learning_rate": 2.677696078431373e-06,
      "loss": 0.6424,
      "step": 437
    },
    {
      "epoch": 0.0537027954879843,
      "grad_norm": 2.5478599930609063,
      "learning_rate": 2.683823529411765e-06,
      "loss": 0.6448,
      "step": 438
    },
    {
      "epoch": 0.05382540461010299,
      "grad_norm": 3.0502147921519143,
      "learning_rate": 2.689950980392157e-06,
      "loss": 0.7209,
      "step": 439
    },
    {
      "epoch": 0.053948013732221675,
      "grad_norm": 2.6748781241622703,
      "learning_rate": 2.696078431372549e-06,
      "loss": 0.6926,
      "step": 440
    },
    {
      "epoch": 0.05407062285434036,
      "grad_norm": 2.9701550660849265,
      "learning_rate": 2.7022058823529414e-06,
      "loss": 0.6802,
      "step": 441
    },
    {
      "epoch": 0.05419323197645905,
      "grad_norm": 2.766170873197876,
      "learning_rate": 2.7083333333333334e-06,
      "loss": 0.6898,
      "step": 442
    },
    {
      "epoch": 0.05431584109857773,
      "grad_norm": 2.740417489869168,
      "learning_rate": 2.7144607843137254e-06,
      "loss": 0.6971,
      "step": 443
    },
    {
      "epoch": 0.05443845022069642,
      "grad_norm": 2.422266711462762,
      "learning_rate": 2.720588235294118e-06,
      "loss": 0.6764,
      "step": 444
    },
    {
      "epoch": 0.054561059342815105,
      "grad_norm": 2.966125626857149,
      "learning_rate": 2.72671568627451e-06,
      "loss": 0.6718,
      "step": 445
    },
    {
      "epoch": 0.05468366846493379,
      "grad_norm": 2.8697335330350664,
      "learning_rate": 2.732843137254902e-06,
      "loss": 0.7212,
      "step": 446
    },
    {
      "epoch": 0.05480627758705248,
      "grad_norm": 2.6359268878266433,
      "learning_rate": 2.7389705882352947e-06,
      "loss": 0.7024,
      "step": 447
    },
    {
      "epoch": 0.05492888670917116,
      "grad_norm": 2.688672970622052,
      "learning_rate": 2.7450980392156867e-06,
      "loss": 0.6649,
      "step": 448
    },
    {
      "epoch": 0.05505149583128985,
      "grad_norm": 2.589907399294975,
      "learning_rate": 2.7512254901960788e-06,
      "loss": 0.6116,
      "step": 449
    },
    {
      "epoch": 0.055174104953408534,
      "grad_norm": 2.5082673472735864,
      "learning_rate": 2.757352941176471e-06,
      "loss": 0.7152,
      "step": 450
    },
    {
      "epoch": 0.05529671407552722,
      "grad_norm": 3.1866201994984933,
      "learning_rate": 2.763480392156863e-06,
      "loss": 0.7007,
      "step": 451
    },
    {
      "epoch": 0.055419323197645906,
      "grad_norm": 2.546917720781088,
      "learning_rate": 2.7696078431372552e-06,
      "loss": 0.7164,
      "step": 452
    },
    {
      "epoch": 0.05554193231976459,
      "grad_norm": 3.1598210796491637,
      "learning_rate": 2.7757352941176472e-06,
      "loss": 0.631,
      "step": 453
    },
    {
      "epoch": 0.05566454144188328,
      "grad_norm": 2.8351348425939764,
      "learning_rate": 2.7818627450980397e-06,
      "loss": 0.707,
      "step": 454
    },
    {
      "epoch": 0.055787150564001964,
      "grad_norm": 2.7856198337410447,
      "learning_rate": 2.7879901960784317e-06,
      "loss": 0.6556,
      "step": 455
    },
    {
      "epoch": 0.05590975968612065,
      "grad_norm": 2.674742712482218,
      "learning_rate": 2.7941176470588237e-06,
      "loss": 0.6802,
      "step": 456
    },
    {
      "epoch": 0.056032368808239336,
      "grad_norm": 2.785196584822982,
      "learning_rate": 2.800245098039216e-06,
      "loss": 0.7076,
      "step": 457
    },
    {
      "epoch": 0.05615497793035802,
      "grad_norm": 2.665180031316546,
      "learning_rate": 2.806372549019608e-06,
      "loss": 0.6302,
      "step": 458
    },
    {
      "epoch": 0.05627758705247671,
      "grad_norm": 3.0089406311244065,
      "learning_rate": 2.8125e-06,
      "loss": 0.6357,
      "step": 459
    },
    {
      "epoch": 0.05640019617459539,
      "grad_norm": 2.815490604795534,
      "learning_rate": 2.818627450980392e-06,
      "loss": 0.5997,
      "step": 460
    },
    {
      "epoch": 0.05652280529671407,
      "grad_norm": 2.586360474008607,
      "learning_rate": 2.8247549019607846e-06,
      "loss": 0.6166,
      "step": 461
    },
    {
      "epoch": 0.05664541441883276,
      "grad_norm": 2.885042080575886,
      "learning_rate": 2.8308823529411766e-06,
      "loss": 0.654,
      "step": 462
    },
    {
      "epoch": 0.056768023540951444,
      "grad_norm": 2.6110159583100314,
      "learning_rate": 2.8370098039215687e-06,
      "loss": 0.6416,
      "step": 463
    },
    {
      "epoch": 0.05689063266307013,
      "grad_norm": 2.6168268764996374,
      "learning_rate": 2.843137254901961e-06,
      "loss": 0.714,
      "step": 464
    },
    {
      "epoch": 0.057013241785188816,
      "grad_norm": 2.9645156953061336,
      "learning_rate": 2.849264705882353e-06,
      "loss": 0.6611,
      "step": 465
    },
    {
      "epoch": 0.0571358509073075,
      "grad_norm": 2.650770912718917,
      "learning_rate": 2.855392156862745e-06,
      "loss": 0.6823,
      "step": 466
    },
    {
      "epoch": 0.05725846002942619,
      "grad_norm": 2.7915718185297367,
      "learning_rate": 2.861519607843137e-06,
      "loss": 0.6788,
      "step": 467
    },
    {
      "epoch": 0.057381069151544874,
      "grad_norm": 2.816712850177145,
      "learning_rate": 2.8676470588235296e-06,
      "loss": 0.625,
      "step": 468
    },
    {
      "epoch": 0.05750367827366356,
      "grad_norm": 2.6818000886765754,
      "learning_rate": 2.8737745098039216e-06,
      "loss": 0.6432,
      "step": 469
    },
    {
      "epoch": 0.057626287395782246,
      "grad_norm": 2.770981918461299,
      "learning_rate": 2.8799019607843136e-06,
      "loss": 0.5993,
      "step": 470
    },
    {
      "epoch": 0.05774889651790093,
      "grad_norm": 2.639362190527653,
      "learning_rate": 2.886029411764706e-06,
      "loss": 0.5954,
      "step": 471
    },
    {
      "epoch": 0.05787150564001962,
      "grad_norm": 2.6972152421645004,
      "learning_rate": 2.8921568627450985e-06,
      "loss": 0.6667,
      "step": 472
    },
    {
      "epoch": 0.0579941147621383,
      "grad_norm": 2.764590434691949,
      "learning_rate": 2.8982843137254905e-06,
      "loss": 0.5968,
      "step": 473
    },
    {
      "epoch": 0.05811672388425699,
      "grad_norm": 2.573066779282763,
      "learning_rate": 2.904411764705883e-06,
      "loss": 0.6737,
      "step": 474
    },
    {
      "epoch": 0.058239333006375675,
      "grad_norm": 2.5918246299495618,
      "learning_rate": 2.910539215686275e-06,
      "loss": 0.6612,
      "step": 475
    },
    {
      "epoch": 0.05836194212849436,
      "grad_norm": 2.7640611228012157,
      "learning_rate": 2.916666666666667e-06,
      "loss": 0.6002,
      "step": 476
    },
    {
      "epoch": 0.05848455125061305,
      "grad_norm": 2.5672661703168487,
      "learning_rate": 2.9227941176470594e-06,
      "loss": 0.6333,
      "step": 477
    },
    {
      "epoch": 0.05860716037273173,
      "grad_norm": 2.858225986242088,
      "learning_rate": 2.9289215686274514e-06,
      "loss": 0.6509,
      "step": 478
    },
    {
      "epoch": 0.05872976949485042,
      "grad_norm": 2.527238398308491,
      "learning_rate": 2.9350490196078434e-06,
      "loss": 0.5402,
      "step": 479
    },
    {
      "epoch": 0.058852378616969105,
      "grad_norm": 2.541735535304326,
      "learning_rate": 2.9411764705882355e-06,
      "loss": 0.679,
      "step": 480
    },
    {
      "epoch": 0.05897498773908779,
      "grad_norm": 2.6886038089738378,
      "learning_rate": 2.947303921568628e-06,
      "loss": 0.6408,
      "step": 481
    },
    {
      "epoch": 0.059097596861206476,
      "grad_norm": 2.8230573683933184,
      "learning_rate": 2.95343137254902e-06,
      "loss": 0.6765,
      "step": 482
    },
    {
      "epoch": 0.05922020598332516,
      "grad_norm": 2.7565210498670214,
      "learning_rate": 2.959558823529412e-06,
      "loss": 0.6301,
      "step": 483
    },
    {
      "epoch": 0.05934281510544385,
      "grad_norm": 2.62806866967236,
      "learning_rate": 2.9656862745098044e-06,
      "loss": 0.595,
      "step": 484
    },
    {
      "epoch": 0.05946542422756253,
      "grad_norm": 2.404089786341026,
      "learning_rate": 2.9718137254901964e-06,
      "loss": 0.6803,
      "step": 485
    },
    {
      "epoch": 0.05958803334968121,
      "grad_norm": 2.5054251303692285,
      "learning_rate": 2.9779411764705884e-06,
      "loss": 0.6688,
      "step": 486
    },
    {
      "epoch": 0.0597106424717999,
      "grad_norm": 2.4776653768406214,
      "learning_rate": 2.9840686274509804e-06,
      "loss": 0.6612,
      "step": 487
    },
    {
      "epoch": 0.059833251593918585,
      "grad_norm": 2.835485041185976,
      "learning_rate": 2.990196078431373e-06,
      "loss": 0.6492,
      "step": 488
    },
    {
      "epoch": 0.05995586071603727,
      "grad_norm": 2.787438466249873,
      "learning_rate": 2.996323529411765e-06,
      "loss": 0.6652,
      "step": 489
    },
    {
      "epoch": 0.06007846983815596,
      "grad_norm": 2.4247451982330364,
      "learning_rate": 3.002450980392157e-06,
      "loss": 0.6237,
      "step": 490
    },
    {
      "epoch": 0.06020107896027464,
      "grad_norm": 2.797457199516836,
      "learning_rate": 3.0085784313725493e-06,
      "loss": 0.6793,
      "step": 491
    },
    {
      "epoch": 0.06032368808239333,
      "grad_norm": 2.68058195541567,
      "learning_rate": 3.0147058823529413e-06,
      "loss": 0.6284,
      "step": 492
    },
    {
      "epoch": 0.060446297204512014,
      "grad_norm": 2.7263576367177844,
      "learning_rate": 3.0208333333333334e-06,
      "loss": 0.6459,
      "step": 493
    },
    {
      "epoch": 0.0605689063266307,
      "grad_norm": 2.533045111352396,
      "learning_rate": 3.0269607843137254e-06,
      "loss": 0.6389,
      "step": 494
    },
    {
      "epoch": 0.060691515448749386,
      "grad_norm": 2.728236385843721,
      "learning_rate": 3.033088235294118e-06,
      "loss": 0.6483,
      "step": 495
    },
    {
      "epoch": 0.06081412457086807,
      "grad_norm": 2.8310870872614755,
      "learning_rate": 3.03921568627451e-06,
      "loss": 0.6577,
      "step": 496
    },
    {
      "epoch": 0.06093673369298676,
      "grad_norm": 3.0298604848263824,
      "learning_rate": 3.045343137254902e-06,
      "loss": 0.7007,
      "step": 497
    },
    {
      "epoch": 0.061059342815105444,
      "grad_norm": 2.569482813239975,
      "learning_rate": 3.0514705882352947e-06,
      "loss": 0.6842,
      "step": 498
    },
    {
      "epoch": 0.06118195193722413,
      "grad_norm": 2.451611088431613,
      "learning_rate": 3.0575980392156867e-06,
      "loss": 0.6546,
      "step": 499
    },
    {
      "epoch": 0.061304561059342816,
      "grad_norm": 2.723132116050623,
      "learning_rate": 3.0637254901960787e-06,
      "loss": 0.6612,
      "step": 500
    },
    {
      "epoch": 0.0614271701814615,
      "grad_norm": 2.6849417137926666,
      "learning_rate": 3.069852941176471e-06,
      "loss": 0.6559,
      "step": 501
    },
    {
      "epoch": 0.06154977930358019,
      "grad_norm": 2.3467084869989585,
      "learning_rate": 3.075980392156863e-06,
      "loss": 0.6854,
      "step": 502
    },
    {
      "epoch": 0.061672388425698874,
      "grad_norm": 2.516664051263749,
      "learning_rate": 3.082107843137255e-06,
      "loss": 0.6376,
      "step": 503
    },
    {
      "epoch": 0.06179499754781756,
      "grad_norm": 2.6308711308423924,
      "learning_rate": 3.0882352941176476e-06,
      "loss": 0.6212,
      "step": 504
    },
    {
      "epoch": 0.061917606669936245,
      "grad_norm": 2.83416444097851,
      "learning_rate": 3.0943627450980397e-06,
      "loss": 0.5723,
      "step": 505
    },
    {
      "epoch": 0.06204021579205493,
      "grad_norm": 2.757514846719018,
      "learning_rate": 3.1004901960784317e-06,
      "loss": 0.6128,
      "step": 506
    },
    {
      "epoch": 0.06216282491417362,
      "grad_norm": 2.8266101447745737,
      "learning_rate": 3.1066176470588237e-06,
      "loss": 0.6114,
      "step": 507
    },
    {
      "epoch": 0.0622854340362923,
      "grad_norm": 2.708472461371965,
      "learning_rate": 3.112745098039216e-06,
      "loss": 0.6228,
      "step": 508
    },
    {
      "epoch": 0.06240804315841099,
      "grad_norm": 2.679499800354595,
      "learning_rate": 3.118872549019608e-06,
      "loss": 0.6352,
      "step": 509
    },
    {
      "epoch": 0.06253065228052967,
      "grad_norm": 2.5286336697948233,
      "learning_rate": 3.125e-06,
      "loss": 0.6364,
      "step": 510
    },
    {
      "epoch": 0.06265326140264836,
      "grad_norm": 2.8504927201624954,
      "learning_rate": 3.1311274509803926e-06,
      "loss": 0.6687,
      "step": 511
    },
    {
      "epoch": 0.06277587052476705,
      "grad_norm": 2.9020109519013064,
      "learning_rate": 3.1372549019607846e-06,
      "loss": 0.646,
      "step": 512
    },
    {
      "epoch": 0.06289847964688573,
      "grad_norm": 2.5423551519561083,
      "learning_rate": 3.1433823529411766e-06,
      "loss": 0.6638,
      "step": 513
    },
    {
      "epoch": 0.06302108876900442,
      "grad_norm": 2.964262378919044,
      "learning_rate": 3.1495098039215686e-06,
      "loss": 0.7075,
      "step": 514
    },
    {
      "epoch": 0.0631436978911231,
      "grad_norm": 2.689453054417015,
      "learning_rate": 3.155637254901961e-06,
      "loss": 0.6716,
      "step": 515
    },
    {
      "epoch": 0.06326630701324179,
      "grad_norm": 2.7097377221023096,
      "learning_rate": 3.161764705882353e-06,
      "loss": 0.6936,
      "step": 516
    },
    {
      "epoch": 0.06338891613536048,
      "grad_norm": 2.471871663081827,
      "learning_rate": 3.167892156862745e-06,
      "loss": 0.6676,
      "step": 517
    },
    {
      "epoch": 0.06351152525747916,
      "grad_norm": 2.622927565911037,
      "learning_rate": 3.1740196078431375e-06,
      "loss": 0.6492,
      "step": 518
    },
    {
      "epoch": 0.06363413437959785,
      "grad_norm": 2.949813675322585,
      "learning_rate": 3.1801470588235296e-06,
      "loss": 0.6059,
      "step": 519
    },
    {
      "epoch": 0.06375674350171653,
      "grad_norm": 2.8392477464018184,
      "learning_rate": 3.1862745098039216e-06,
      "loss": 0.7138,
      "step": 520
    },
    {
      "epoch": 0.06387935262383522,
      "grad_norm": 2.439012169112751,
      "learning_rate": 3.1924019607843136e-06,
      "loss": 0.6535,
      "step": 521
    },
    {
      "epoch": 0.0640019617459539,
      "grad_norm": 2.760025043684811,
      "learning_rate": 3.198529411764706e-06,
      "loss": 0.5965,
      "step": 522
    },
    {
      "epoch": 0.06412457086807258,
      "grad_norm": 2.5032739331276685,
      "learning_rate": 3.2046568627450985e-06,
      "loss": 0.6236,
      "step": 523
    },
    {
      "epoch": 0.06424717999019126,
      "grad_norm": 2.9146450267146737,
      "learning_rate": 3.210784313725491e-06,
      "loss": 0.6063,
      "step": 524
    },
    {
      "epoch": 0.06436978911230995,
      "grad_norm": 2.6528860337227815,
      "learning_rate": 3.216911764705883e-06,
      "loss": 0.6441,
      "step": 525
    },
    {
      "epoch": 0.06449239823442864,
      "grad_norm": 3.132032845535653,
      "learning_rate": 3.223039215686275e-06,
      "loss": 0.6725,
      "step": 526
    },
    {
      "epoch": 0.06461500735654732,
      "grad_norm": 2.373796971387047,
      "learning_rate": 3.229166666666667e-06,
      "loss": 0.6388,
      "step": 527
    },
    {
      "epoch": 0.06473761647866601,
      "grad_norm": 2.2997497513857748,
      "learning_rate": 3.2352941176470594e-06,
      "loss": 0.6163,
      "step": 528
    },
    {
      "epoch": 0.0648602256007847,
      "grad_norm": 3.112502652739492,
      "learning_rate": 3.2414215686274514e-06,
      "loss": 0.6326,
      "step": 529
    },
    {
      "epoch": 0.06498283472290338,
      "grad_norm": 2.9457346497406465,
      "learning_rate": 3.2475490196078434e-06,
      "loss": 0.6963,
      "step": 530
    },
    {
      "epoch": 0.06510544384502207,
      "grad_norm": 2.509233109678335,
      "learning_rate": 3.253676470588236e-06,
      "loss": 0.6221,
      "step": 531
    },
    {
      "epoch": 0.06522805296714075,
      "grad_norm": 2.790116583778842,
      "learning_rate": 3.259803921568628e-06,
      "loss": 0.7068,
      "step": 532
    },
    {
      "epoch": 0.06535066208925944,
      "grad_norm": 2.7653237508719806,
      "learning_rate": 3.26593137254902e-06,
      "loss": 0.6466,
      "step": 533
    },
    {
      "epoch": 0.06547327121137812,
      "grad_norm": 3.032168345716266,
      "learning_rate": 3.272058823529412e-06,
      "loss": 0.7062,
      "step": 534
    },
    {
      "epoch": 0.06559588033349681,
      "grad_norm": 2.6144908754817715,
      "learning_rate": 3.2781862745098043e-06,
      "loss": 0.6333,
      "step": 535
    },
    {
      "epoch": 0.0657184894556155,
      "grad_norm": 2.742069816145003,
      "learning_rate": 3.2843137254901964e-06,
      "loss": 0.6697,
      "step": 536
    },
    {
      "epoch": 0.06584109857773418,
      "grad_norm": 2.7204822047659927,
      "learning_rate": 3.2904411764705884e-06,
      "loss": 0.6224,
      "step": 537
    },
    {
      "epoch": 0.06596370769985287,
      "grad_norm": 2.867146934558484,
      "learning_rate": 3.296568627450981e-06,
      "loss": 0.6829,
      "step": 538
    },
    {
      "epoch": 0.06608631682197155,
      "grad_norm": 2.5417332390445866,
      "learning_rate": 3.302696078431373e-06,
      "loss": 0.5818,
      "step": 539
    },
    {
      "epoch": 0.06620892594409024,
      "grad_norm": 2.6310660468946905,
      "learning_rate": 3.308823529411765e-06,
      "loss": 0.6499,
      "step": 540
    },
    {
      "epoch": 0.06633153506620892,
      "grad_norm": 2.5699741430934666,
      "learning_rate": 3.314950980392157e-06,
      "loss": 0.671,
      "step": 541
    },
    {
      "epoch": 0.06645414418832761,
      "grad_norm": 2.8539754911489994,
      "learning_rate": 3.3210784313725493e-06,
      "loss": 0.6843,
      "step": 542
    },
    {
      "epoch": 0.0665767533104463,
      "grad_norm": 2.588765207851093,
      "learning_rate": 3.3272058823529413e-06,
      "loss": 0.6435,
      "step": 543
    },
    {
      "epoch": 0.06669936243256498,
      "grad_norm": 2.779063797243973,
      "learning_rate": 3.3333333333333333e-06,
      "loss": 0.6848,
      "step": 544
    },
    {
      "epoch": 0.06682197155468367,
      "grad_norm": 2.529093202484325,
      "learning_rate": 3.3394607843137258e-06,
      "loss": 0.6268,
      "step": 545
    },
    {
      "epoch": 0.06694458067680235,
      "grad_norm": 2.4798337111753765,
      "learning_rate": 3.3455882352941178e-06,
      "loss": 0.6625,
      "step": 546
    },
    {
      "epoch": 0.06706718979892104,
      "grad_norm": 2.5465696451651896,
      "learning_rate": 3.35171568627451e-06,
      "loss": 0.6689,
      "step": 547
    },
    {
      "epoch": 0.06718979892103973,
      "grad_norm": 2.4725633588942606,
      "learning_rate": 3.357843137254902e-06,
      "loss": 0.6433,
      "step": 548
    },
    {
      "epoch": 0.06731240804315841,
      "grad_norm": 2.7271361503256766,
      "learning_rate": 3.3639705882352947e-06,
      "loss": 0.6224,
      "step": 549
    },
    {
      "epoch": 0.0674350171652771,
      "grad_norm": 2.841335570092196,
      "learning_rate": 3.3700980392156867e-06,
      "loss": 0.6158,
      "step": 550
    },
    {
      "epoch": 0.06755762628739578,
      "grad_norm": 2.5725413399020387,
      "learning_rate": 3.376225490196079e-06,
      "loss": 0.6702,
      "step": 551
    },
    {
      "epoch": 0.06768023540951447,
      "grad_norm": 2.8165848139399134,
      "learning_rate": 3.382352941176471e-06,
      "loss": 0.6132,
      "step": 552
    },
    {
      "epoch": 0.06780284453163316,
      "grad_norm": 2.6085494204019564,
      "learning_rate": 3.388480392156863e-06,
      "loss": 0.6122,
      "step": 553
    },
    {
      "epoch": 0.06792545365375184,
      "grad_norm": 2.972926700575479,
      "learning_rate": 3.394607843137255e-06,
      "loss": 0.5848,
      "step": 554
    },
    {
      "epoch": 0.06804806277587053,
      "grad_norm": 2.811170029563435,
      "learning_rate": 3.4007352941176476e-06,
      "loss": 0.6641,
      "step": 555
    },
    {
      "epoch": 0.06817067189798921,
      "grad_norm": 2.7876912943074075,
      "learning_rate": 3.4068627450980396e-06,
      "loss": 0.632,
      "step": 556
    },
    {
      "epoch": 0.0682932810201079,
      "grad_norm": 3.1450318223199245,
      "learning_rate": 3.4129901960784316e-06,
      "loss": 0.6703,
      "step": 557
    },
    {
      "epoch": 0.06841589014222658,
      "grad_norm": 3.0273739163673454,
      "learning_rate": 3.419117647058824e-06,
      "loss": 0.6613,
      "step": 558
    },
    {
      "epoch": 0.06853849926434527,
      "grad_norm": 2.870045533185178,
      "learning_rate": 3.425245098039216e-06,
      "loss": 0.6221,
      "step": 559
    },
    {
      "epoch": 0.06866110838646396,
      "grad_norm": 2.708526261826918,
      "learning_rate": 3.431372549019608e-06,
      "loss": 0.5568,
      "step": 560
    },
    {
      "epoch": 0.06878371750858264,
      "grad_norm": 2.597313707974758,
      "learning_rate": 3.4375e-06,
      "loss": 0.6992,
      "step": 561
    },
    {
      "epoch": 0.06890632663070133,
      "grad_norm": 2.494927740147448,
      "learning_rate": 3.4436274509803926e-06,
      "loss": 0.6278,
      "step": 562
    },
    {
      "epoch": 0.06902893575282001,
      "grad_norm": 2.5777764049842804,
      "learning_rate": 3.4497549019607846e-06,
      "loss": 0.6653,
      "step": 563
    },
    {
      "epoch": 0.0691515448749387,
      "grad_norm": 2.5547783614722115,
      "learning_rate": 3.4558823529411766e-06,
      "loss": 0.6524,
      "step": 564
    },
    {
      "epoch": 0.06927415399705739,
      "grad_norm": 2.7512241843906047,
      "learning_rate": 3.462009803921569e-06,
      "loss": 0.6257,
      "step": 565
    },
    {
      "epoch": 0.06939676311917607,
      "grad_norm": 2.94910166514,
      "learning_rate": 3.468137254901961e-06,
      "loss": 0.6616,
      "step": 566
    },
    {
      "epoch": 0.06951937224129476,
      "grad_norm": 2.520396141895472,
      "learning_rate": 3.474264705882353e-06,
      "loss": 0.6279,
      "step": 567
    },
    {
      "epoch": 0.06964198136341344,
      "grad_norm": 2.538213959457895,
      "learning_rate": 3.480392156862745e-06,
      "loss": 0.6398,
      "step": 568
    },
    {
      "epoch": 0.06976459048553213,
      "grad_norm": 2.817808664975529,
      "learning_rate": 3.4865196078431375e-06,
      "loss": 0.6248,
      "step": 569
    },
    {
      "epoch": 0.06988719960765082,
      "grad_norm": 2.6524292052319507,
      "learning_rate": 3.4926470588235295e-06,
      "loss": 0.6619,
      "step": 570
    },
    {
      "epoch": 0.0700098087297695,
      "grad_norm": 2.698905344979132,
      "learning_rate": 3.4987745098039215e-06,
      "loss": 0.6214,
      "step": 571
    },
    {
      "epoch": 0.07013241785188817,
      "grad_norm": 2.47695271579322,
      "learning_rate": 3.504901960784314e-06,
      "loss": 0.6529,
      "step": 572
    },
    {
      "epoch": 0.07025502697400686,
      "grad_norm": 2.7407584394763433,
      "learning_rate": 3.511029411764706e-06,
      "loss": 0.6537,
      "step": 573
    },
    {
      "epoch": 0.07037763609612555,
      "grad_norm": 2.58943486927854,
      "learning_rate": 3.5171568627450984e-06,
      "loss": 0.6679,
      "step": 574
    },
    {
      "epoch": 0.07050024521824423,
      "grad_norm": 2.8361510561598027,
      "learning_rate": 3.523284313725491e-06,
      "loss": 0.6463,
      "step": 575
    },
    {
      "epoch": 0.07062285434036292,
      "grad_norm": 2.5120248873985456,
      "learning_rate": 3.529411764705883e-06,
      "loss": 0.6285,
      "step": 576
    },
    {
      "epoch": 0.0707454634624816,
      "grad_norm": 2.9739665375212274,
      "learning_rate": 3.535539215686275e-06,
      "loss": 0.7081,
      "step": 577
    },
    {
      "epoch": 0.07086807258460029,
      "grad_norm": 2.478827895691352,
      "learning_rate": 3.5416666666666673e-06,
      "loss": 0.6206,
      "step": 578
    },
    {
      "epoch": 0.07099068170671897,
      "grad_norm": 2.625864935659635,
      "learning_rate": 3.5477941176470594e-06,
      "loss": 0.595,
      "step": 579
    },
    {
      "epoch": 0.07111329082883766,
      "grad_norm": 2.716859845813008,
      "learning_rate": 3.5539215686274514e-06,
      "loss": 0.7128,
      "step": 580
    },
    {
      "epoch": 0.07123589995095635,
      "grad_norm": 2.94139245865735,
      "learning_rate": 3.5600490196078434e-06,
      "loss": 0.6684,
      "step": 581
    },
    {
      "epoch": 0.07135850907307503,
      "grad_norm": 2.847878545478668,
      "learning_rate": 3.566176470588236e-06,
      "loss": 0.6477,
      "step": 582
    },
    {
      "epoch": 0.07148111819519372,
      "grad_norm": 2.6431810846435204,
      "learning_rate": 3.572303921568628e-06,
      "loss": 0.6624,
      "step": 583
    },
    {
      "epoch": 0.0716037273173124,
      "grad_norm": 2.6706811548363225,
      "learning_rate": 3.57843137254902e-06,
      "loss": 0.65,
      "step": 584
    },
    {
      "epoch": 0.07172633643943109,
      "grad_norm": 2.486646842670229,
      "learning_rate": 3.5845588235294123e-06,
      "loss": 0.6287,
      "step": 585
    },
    {
      "epoch": 0.07184894556154978,
      "grad_norm": 2.3050270930357324,
      "learning_rate": 3.5906862745098043e-06,
      "loss": 0.633,
      "step": 586
    },
    {
      "epoch": 0.07197155468366846,
      "grad_norm": 2.8442485309559737,
      "learning_rate": 3.5968137254901963e-06,
      "loss": 0.6377,
      "step": 587
    },
    {
      "epoch": 0.07209416380578715,
      "grad_norm": 2.7600133019387774,
      "learning_rate": 3.6029411764705883e-06,
      "loss": 0.6391,
      "step": 588
    },
    {
      "epoch": 0.07221677292790583,
      "grad_norm": 2.921031647347714,
      "learning_rate": 3.6090686274509808e-06,
      "loss": 0.6953,
      "step": 589
    },
    {
      "epoch": 0.07233938205002452,
      "grad_norm": 2.585024010990706,
      "learning_rate": 3.615196078431373e-06,
      "loss": 0.6277,
      "step": 590
    },
    {
      "epoch": 0.0724619911721432,
      "grad_norm": 2.836690846095253,
      "learning_rate": 3.621323529411765e-06,
      "loss": 0.623,
      "step": 591
    },
    {
      "epoch": 0.07258460029426189,
      "grad_norm": 2.34282516342147,
      "learning_rate": 3.6274509803921573e-06,
      "loss": 0.6633,
      "step": 592
    },
    {
      "epoch": 0.07270720941638058,
      "grad_norm": 2.9412710509678655,
      "learning_rate": 3.6335784313725493e-06,
      "loss": 0.6403,
      "step": 593
    },
    {
      "epoch": 0.07282981853849926,
      "grad_norm": 2.4244636703281754,
      "learning_rate": 3.6397058823529413e-06,
      "loss": 0.6153,
      "step": 594
    },
    {
      "epoch": 0.07295242766061795,
      "grad_norm": 2.6602648148219363,
      "learning_rate": 3.6458333333333333e-06,
      "loss": 0.5726,
      "step": 595
    },
    {
      "epoch": 0.07307503678273664,
      "grad_norm": 3.0234008847187694,
      "learning_rate": 3.6519607843137257e-06,
      "loss": 0.6306,
      "step": 596
    },
    {
      "epoch": 0.07319764590485532,
      "grad_norm": 2.495162661700813,
      "learning_rate": 3.6580882352941178e-06,
      "loss": 0.5918,
      "step": 597
    },
    {
      "epoch": 0.07332025502697401,
      "grad_norm": 2.7284139528712377,
      "learning_rate": 3.6642156862745098e-06,
      "loss": 0.6422,
      "step": 598
    },
    {
      "epoch": 0.0734428641490927,
      "grad_norm": 2.9785413710795132,
      "learning_rate": 3.6703431372549018e-06,
      "loss": 0.663,
      "step": 599
    },
    {
      "epoch": 0.07356547327121138,
      "grad_norm": 2.729482577000641,
      "learning_rate": 3.6764705882352946e-06,
      "loss": 0.6564,
      "step": 600
    },
    {
      "epoch": 0.07368808239333006,
      "grad_norm": 2.6959071065465867,
      "learning_rate": 3.6825980392156867e-06,
      "loss": 0.6229,
      "step": 601
    },
    {
      "epoch": 0.07381069151544875,
      "grad_norm": 2.824283544042305,
      "learning_rate": 3.688725490196079e-06,
      "loss": 0.6731,
      "step": 602
    },
    {
      "epoch": 0.07393330063756744,
      "grad_norm": 2.642755125011612,
      "learning_rate": 3.694852941176471e-06,
      "loss": 0.6057,
      "step": 603
    },
    {
      "epoch": 0.07405590975968612,
      "grad_norm": 2.632624307762815,
      "learning_rate": 3.700980392156863e-06,
      "loss": 0.6798,
      "step": 604
    },
    {
      "epoch": 0.07417851888180481,
      "grad_norm": 2.3862006034299417,
      "learning_rate": 3.7071078431372556e-06,
      "loss": 0.5958,
      "step": 605
    },
    {
      "epoch": 0.0743011280039235,
      "grad_norm": 2.4393589307762227,
      "learning_rate": 3.7132352941176476e-06,
      "loss": 0.6297,
      "step": 606
    },
    {
      "epoch": 0.07442373712604218,
      "grad_norm": 2.473530709384324,
      "learning_rate": 3.7193627450980396e-06,
      "loss": 0.6029,
      "step": 607
    },
    {
      "epoch": 0.07454634624816087,
      "grad_norm": 2.34504562917894,
      "learning_rate": 3.7254901960784316e-06,
      "loss": 0.6114,
      "step": 608
    },
    {
      "epoch": 0.07466895537027955,
      "grad_norm": 3.0917999492169588,
      "learning_rate": 3.731617647058824e-06,
      "loss": 0.5811,
      "step": 609
    },
    {
      "epoch": 0.07479156449239824,
      "grad_norm": 2.8885814706218516,
      "learning_rate": 3.737745098039216e-06,
      "loss": 0.6499,
      "step": 610
    },
    {
      "epoch": 0.07491417361451692,
      "grad_norm": 2.7105936541370017,
      "learning_rate": 3.743872549019608e-06,
      "loss": 0.6444,
      "step": 611
    },
    {
      "epoch": 0.07503678273663561,
      "grad_norm": 2.7712096233587133,
      "learning_rate": 3.7500000000000005e-06,
      "loss": 0.5913,
      "step": 612
    },
    {
      "epoch": 0.0751593918587543,
      "grad_norm": 2.877464903129652,
      "learning_rate": 3.7561274509803925e-06,
      "loss": 0.6418,
      "step": 613
    },
    {
      "epoch": 0.07528200098087298,
      "grad_norm": 2.6691566200872123,
      "learning_rate": 3.7622549019607845e-06,
      "loss": 0.6569,
      "step": 614
    },
    {
      "epoch": 0.07540461010299167,
      "grad_norm": 2.883184922173559,
      "learning_rate": 3.7683823529411766e-06,
      "loss": 0.6076,
      "step": 615
    },
    {
      "epoch": 0.07552721922511035,
      "grad_norm": 2.560361187127197,
      "learning_rate": 3.774509803921569e-06,
      "loss": 0.6726,
      "step": 616
    },
    {
      "epoch": 0.07564982834722904,
      "grad_norm": 2.896427046154407,
      "learning_rate": 3.780637254901961e-06,
      "loss": 0.6786,
      "step": 617
    },
    {
      "epoch": 0.07577243746934773,
      "grad_norm": 2.4730265207390523,
      "learning_rate": 3.786764705882353e-06,
      "loss": 0.5933,
      "step": 618
    },
    {
      "epoch": 0.07589504659146641,
      "grad_norm": 2.6926075773945417,
      "learning_rate": 3.792892156862745e-06,
      "loss": 0.653,
      "step": 619
    },
    {
      "epoch": 0.0760176557135851,
      "grad_norm": 2.548084582026809,
      "learning_rate": 3.7990196078431375e-06,
      "loss": 0.6877,
      "step": 620
    },
    {
      "epoch": 0.07614026483570378,
      "grad_norm": 2.688828286588657,
      "learning_rate": 3.8051470588235295e-06,
      "loss": 0.6085,
      "step": 621
    },
    {
      "epoch": 0.07626287395782246,
      "grad_norm": 2.7343660792988156,
      "learning_rate": 3.8112745098039215e-06,
      "loss": 0.666,
      "step": 622
    },
    {
      "epoch": 0.07638548307994114,
      "grad_norm": 2.4527575029241433,
      "learning_rate": 3.817401960784314e-06,
      "loss": 0.6083,
      "step": 623
    },
    {
      "epoch": 0.07650809220205983,
      "grad_norm": 2.730322714916686,
      "learning_rate": 3.8235294117647055e-06,
      "loss": 0.6377,
      "step": 624
    },
    {
      "epoch": 0.07663070132417851,
      "grad_norm": 2.6609074870195664,
      "learning_rate": 3.829656862745099e-06,
      "loss": 0.6798,
      "step": 625
    },
    {
      "epoch": 0.0767533104462972,
      "grad_norm": 2.5426028980412485,
      "learning_rate": 3.8357843137254904e-06,
      "loss": 0.6165,
      "step": 626
    },
    {
      "epoch": 0.07687591956841588,
      "grad_norm": 2.477832625843574,
      "learning_rate": 3.841911764705883e-06,
      "loss": 0.595,
      "step": 627
    },
    {
      "epoch": 0.07699852869053457,
      "grad_norm": 2.72087155503172,
      "learning_rate": 3.848039215686275e-06,
      "loss": 0.6181,
      "step": 628
    },
    {
      "epoch": 0.07712113781265326,
      "grad_norm": 2.8541542892520555,
      "learning_rate": 3.854166666666667e-06,
      "loss": 0.6942,
      "step": 629
    },
    {
      "epoch": 0.07724374693477194,
      "grad_norm": 2.985400548894846,
      "learning_rate": 3.860294117647059e-06,
      "loss": 0.7041,
      "step": 630
    },
    {
      "epoch": 0.07736635605689063,
      "grad_norm": 2.7336622672599393,
      "learning_rate": 3.866421568627452e-06,
      "loss": 0.6495,
      "step": 631
    },
    {
      "epoch": 0.07748896517900931,
      "grad_norm": 2.7689568037939254,
      "learning_rate": 3.872549019607843e-06,
      "loss": 0.6065,
      "step": 632
    },
    {
      "epoch": 0.077611574301128,
      "grad_norm": 2.750131151278545,
      "learning_rate": 3.878676470588236e-06,
      "loss": 0.6354,
      "step": 633
    },
    {
      "epoch": 0.07773418342324669,
      "grad_norm": 2.7205362128391397,
      "learning_rate": 3.884803921568628e-06,
      "loss": 0.6603,
      "step": 634
    },
    {
      "epoch": 0.07785679254536537,
      "grad_norm": 2.786583850586475,
      "learning_rate": 3.89093137254902e-06,
      "loss": 0.6234,
      "step": 635
    },
    {
      "epoch": 0.07797940166748406,
      "grad_norm": 2.6660823844097963,
      "learning_rate": 3.897058823529412e-06,
      "loss": 0.6826,
      "step": 636
    },
    {
      "epoch": 0.07810201078960274,
      "grad_norm": 2.4571212692522386,
      "learning_rate": 3.903186274509804e-06,
      "loss": 0.6463,
      "step": 637
    },
    {
      "epoch": 0.07822461991172143,
      "grad_norm": 3.0690899448090634,
      "learning_rate": 3.909313725490196e-06,
      "loss": 0.7128,
      "step": 638
    },
    {
      "epoch": 0.07834722903384012,
      "grad_norm": 2.605897499041937,
      "learning_rate": 3.915441176470589e-06,
      "loss": 0.6326,
      "step": 639
    },
    {
      "epoch": 0.0784698381559588,
      "grad_norm": 2.510359532794984,
      "learning_rate": 3.92156862745098e-06,
      "loss": 0.6634,
      "step": 640
    },
    {
      "epoch": 0.07859244727807749,
      "grad_norm": 2.816075662206749,
      "learning_rate": 3.927696078431373e-06,
      "loss": 0.6207,
      "step": 641
    },
    {
      "epoch": 0.07871505640019617,
      "grad_norm": 2.6038221066125438,
      "learning_rate": 3.933823529411765e-06,
      "loss": 0.6539,
      "step": 642
    },
    {
      "epoch": 0.07883766552231486,
      "grad_norm": 2.2800394885641353,
      "learning_rate": 3.939950980392157e-06,
      "loss": 0.6451,
      "step": 643
    },
    {
      "epoch": 0.07896027464443355,
      "grad_norm": 2.555367826344957,
      "learning_rate": 3.946078431372549e-06,
      "loss": 0.6356,
      "step": 644
    },
    {
      "epoch": 0.07908288376655223,
      "grad_norm": 2.693039410074659,
      "learning_rate": 3.952205882352942e-06,
      "loss": 0.6104,
      "step": 645
    },
    {
      "epoch": 0.07920549288867092,
      "grad_norm": 2.894607493770389,
      "learning_rate": 3.958333333333333e-06,
      "loss": 0.6283,
      "step": 646
    },
    {
      "epoch": 0.0793281020107896,
      "grad_norm": 2.689729112808601,
      "learning_rate": 3.964460784313726e-06,
      "loss": 0.6044,
      "step": 647
    },
    {
      "epoch": 0.07945071113290829,
      "grad_norm": 2.6849219543013985,
      "learning_rate": 3.970588235294118e-06,
      "loss": 0.6315,
      "step": 648
    },
    {
      "epoch": 0.07957332025502697,
      "grad_norm": 2.5070998214460953,
      "learning_rate": 3.97671568627451e-06,
      "loss": 0.5933,
      "step": 649
    },
    {
      "epoch": 0.07969592937714566,
      "grad_norm": 3.010071245176652,
      "learning_rate": 3.982843137254902e-06,
      "loss": 0.6424,
      "step": 650
    },
    {
      "epoch": 0.07981853849926435,
      "grad_norm": 2.6080531804395726,
      "learning_rate": 3.988970588235295e-06,
      "loss": 0.6247,
      "step": 651
    },
    {
      "epoch": 0.07994114762138303,
      "grad_norm": 2.6658097861638237,
      "learning_rate": 3.995098039215687e-06,
      "loss": 0.6321,
      "step": 652
    },
    {
      "epoch": 0.08006375674350172,
      "grad_norm": 2.541102924267807,
      "learning_rate": 4.001225490196079e-06,
      "loss": 0.612,
      "step": 653
    },
    {
      "epoch": 0.0801863658656204,
      "grad_norm": 3.0411567955931895,
      "learning_rate": 4.007352941176471e-06,
      "loss": 0.6273,
      "step": 654
    },
    {
      "epoch": 0.08030897498773909,
      "grad_norm": 2.6568327031939805,
      "learning_rate": 4.0134803921568635e-06,
      "loss": 0.605,
      "step": 655
    },
    {
      "epoch": 0.08043158410985778,
      "grad_norm": 3.1741059150384223,
      "learning_rate": 4.019607843137255e-06,
      "loss": 0.6761,
      "step": 656
    },
    {
      "epoch": 0.08055419323197646,
      "grad_norm": 2.7325861597060723,
      "learning_rate": 4.0257352941176476e-06,
      "loss": 0.6066,
      "step": 657
    },
    {
      "epoch": 0.08067680235409515,
      "grad_norm": 2.8681090412387404,
      "learning_rate": 4.03186274509804e-06,
      "loss": 0.6197,
      "step": 658
    },
    {
      "epoch": 0.08079941147621383,
      "grad_norm": 2.6842375633079785,
      "learning_rate": 4.037990196078432e-06,
      "loss": 0.6311,
      "step": 659
    },
    {
      "epoch": 0.08092202059833252,
      "grad_norm": 2.9958219497119343,
      "learning_rate": 4.044117647058824e-06,
      "loss": 0.6934,
      "step": 660
    },
    {
      "epoch": 0.0810446297204512,
      "grad_norm": 2.9577186779234004,
      "learning_rate": 4.0502450980392165e-06,
      "loss": 0.6563,
      "step": 661
    },
    {
      "epoch": 0.08116723884256989,
      "grad_norm": 2.6553148202543846,
      "learning_rate": 4.056372549019608e-06,
      "loss": 0.6145,
      "step": 662
    },
    {
      "epoch": 0.08128984796468858,
      "grad_norm": 2.497091574930727,
      "learning_rate": 4.0625000000000005e-06,
      "loss": 0.5624,
      "step": 663
    },
    {
      "epoch": 0.08141245708680726,
      "grad_norm": 2.5717575790672567,
      "learning_rate": 4.068627450980392e-06,
      "loss": 0.6177,
      "step": 664
    },
    {
      "epoch": 0.08153506620892595,
      "grad_norm": 2.7285826013776227,
      "learning_rate": 4.0747549019607845e-06,
      "loss": 0.7095,
      "step": 665
    },
    {
      "epoch": 0.08165767533104464,
      "grad_norm": 2.442155574797523,
      "learning_rate": 4.080882352941177e-06,
      "loss": 0.6017,
      "step": 666
    },
    {
      "epoch": 0.08178028445316332,
      "grad_norm": 3.270442554679069,
      "learning_rate": 4.0870098039215686e-06,
      "loss": 0.6399,
      "step": 667
    },
    {
      "epoch": 0.08190289357528201,
      "grad_norm": 3.0031145561552273,
      "learning_rate": 4.093137254901961e-06,
      "loss": 0.6534,
      "step": 668
    },
    {
      "epoch": 0.08202550269740069,
      "grad_norm": 2.7805165619139482,
      "learning_rate": 4.0992647058823534e-06,
      "loss": 0.6663,
      "step": 669
    },
    {
      "epoch": 0.08214811181951938,
      "grad_norm": 2.686232364502763,
      "learning_rate": 4.105392156862745e-06,
      "loss": 0.6452,
      "step": 670
    },
    {
      "epoch": 0.08227072094163806,
      "grad_norm": 2.9157491709558525,
      "learning_rate": 4.1115196078431375e-06,
      "loss": 0.5976,
      "step": 671
    },
    {
      "epoch": 0.08239333006375674,
      "grad_norm": 2.620322866295996,
      "learning_rate": 4.11764705882353e-06,
      "loss": 0.6548,
      "step": 672
    },
    {
      "epoch": 0.08251593918587542,
      "grad_norm": 2.62280590307985,
      "learning_rate": 4.1237745098039215e-06,
      "loss": 0.6129,
      "step": 673
    },
    {
      "epoch": 0.08263854830799411,
      "grad_norm": 2.7953390218821563,
      "learning_rate": 4.129901960784314e-06,
      "loss": 0.6674,
      "step": 674
    },
    {
      "epoch": 0.0827611574301128,
      "grad_norm": 2.9365814657999465,
      "learning_rate": 4.136029411764706e-06,
      "loss": 0.668,
      "step": 675
    },
    {
      "epoch": 0.08288376655223148,
      "grad_norm": 2.8164382824620486,
      "learning_rate": 4.142156862745099e-06,
      "loss": 0.587,
      "step": 676
    },
    {
      "epoch": 0.08300637567435017,
      "grad_norm": 2.524056786505249,
      "learning_rate": 4.14828431372549e-06,
      "loss": 0.6557,
      "step": 677
    },
    {
      "epoch": 0.08312898479646885,
      "grad_norm": 2.7190828163361562,
      "learning_rate": 4.154411764705883e-06,
      "loss": 0.6673,
      "step": 678
    },
    {
      "epoch": 0.08325159391858754,
      "grad_norm": 3.1222469132844677,
      "learning_rate": 4.160539215686275e-06,
      "loss": 0.642,
      "step": 679
    },
    {
      "epoch": 0.08337420304070622,
      "grad_norm": 2.623458259581635,
      "learning_rate": 4.166666666666667e-06,
      "loss": 0.58,
      "step": 680
    },
    {
      "epoch": 0.08349681216282491,
      "grad_norm": 2.458439637198349,
      "learning_rate": 4.172794117647059e-06,
      "loss": 0.5737,
      "step": 681
    },
    {
      "epoch": 0.0836194212849436,
      "grad_norm": 3.124539524211007,
      "learning_rate": 4.178921568627452e-06,
      "loss": 0.6237,
      "step": 682
    },
    {
      "epoch": 0.08374203040706228,
      "grad_norm": 3.1260099414744804,
      "learning_rate": 4.185049019607843e-06,
      "loss": 0.6719,
      "step": 683
    },
    {
      "epoch": 0.08386463952918097,
      "grad_norm": 2.5866417682732683,
      "learning_rate": 4.191176470588236e-06,
      "loss": 0.6469,
      "step": 684
    },
    {
      "epoch": 0.08398724865129965,
      "grad_norm": 2.5221757119387047,
      "learning_rate": 4.197303921568628e-06,
      "loss": 0.6189,
      "step": 685
    },
    {
      "epoch": 0.08410985777341834,
      "grad_norm": 2.7811059376857696,
      "learning_rate": 4.20343137254902e-06,
      "loss": 0.6409,
      "step": 686
    },
    {
      "epoch": 0.08423246689553703,
      "grad_norm": 2.7304078167246666,
      "learning_rate": 4.209558823529412e-06,
      "loss": 0.6457,
      "step": 687
    },
    {
      "epoch": 0.08435507601765571,
      "grad_norm": 2.7011351077070693,
      "learning_rate": 4.215686274509805e-06,
      "loss": 0.6106,
      "step": 688
    },
    {
      "epoch": 0.0844776851397744,
      "grad_norm": 2.6032612536823683,
      "learning_rate": 4.221813725490196e-06,
      "loss": 0.6076,
      "step": 689
    },
    {
      "epoch": 0.08460029426189308,
      "grad_norm": 2.5804878663875885,
      "learning_rate": 4.227941176470589e-06,
      "loss": 0.6737,
      "step": 690
    },
    {
      "epoch": 0.08472290338401177,
      "grad_norm": 2.7193479650760266,
      "learning_rate": 4.23406862745098e-06,
      "loss": 0.6085,
      "step": 691
    },
    {
      "epoch": 0.08484551250613045,
      "grad_norm": 2.244501212621832,
      "learning_rate": 4.240196078431373e-06,
      "loss": 0.624,
      "step": 692
    },
    {
      "epoch": 0.08496812162824914,
      "grad_norm": 2.476655280397174,
      "learning_rate": 4.246323529411765e-06,
      "loss": 0.6512,
      "step": 693
    },
    {
      "epoch": 0.08509073075036783,
      "grad_norm": 2.5323596651781037,
      "learning_rate": 4.252450980392157e-06,
      "loss": 0.6023,
      "step": 694
    },
    {
      "epoch": 0.08521333987248651,
      "grad_norm": 2.93421653692688,
      "learning_rate": 4.258578431372549e-06,
      "loss": 0.635,
      "step": 695
    },
    {
      "epoch": 0.0853359489946052,
      "grad_norm": 2.6800470479306058,
      "learning_rate": 4.264705882352942e-06,
      "loss": 0.6238,
      "step": 696
    },
    {
      "epoch": 0.08545855811672388,
      "grad_norm": 2.9139769274905243,
      "learning_rate": 4.270833333333333e-06,
      "loss": 0.6729,
      "step": 697
    },
    {
      "epoch": 0.08558116723884257,
      "grad_norm": 2.4948381267207873,
      "learning_rate": 4.276960784313726e-06,
      "loss": 0.6145,
      "step": 698
    },
    {
      "epoch": 0.08570377636096126,
      "grad_norm": 2.520159459545703,
      "learning_rate": 4.283088235294118e-06,
      "loss": 0.648,
      "step": 699
    },
    {
      "epoch": 0.08582638548307994,
      "grad_norm": 2.5752518288339505,
      "learning_rate": 4.28921568627451e-06,
      "loss": 0.6247,
      "step": 700
    },
    {
      "epoch": 0.08594899460519863,
      "grad_norm": 2.5746142576585247,
      "learning_rate": 4.295343137254902e-06,
      "loss": 0.6456,
      "step": 701
    },
    {
      "epoch": 0.08607160372731731,
      "grad_norm": 2.8158388494885234,
      "learning_rate": 4.301470588235295e-06,
      "loss": 0.6443,
      "step": 702
    },
    {
      "epoch": 0.086194212849436,
      "grad_norm": 2.980592989234291,
      "learning_rate": 4.307598039215687e-06,
      "loss": 0.6461,
      "step": 703
    },
    {
      "epoch": 0.08631682197155469,
      "grad_norm": 2.9884140007737328,
      "learning_rate": 4.313725490196079e-06,
      "loss": 0.6414,
      "step": 704
    },
    {
      "epoch": 0.08643943109367337,
      "grad_norm": 2.7535734163322316,
      "learning_rate": 4.319852941176471e-06,
      "loss": 0.6589,
      "step": 705
    },
    {
      "epoch": 0.08656204021579206,
      "grad_norm": 2.65577497664207,
      "learning_rate": 4.3259803921568635e-06,
      "loss": 0.637,
      "step": 706
    },
    {
      "epoch": 0.08668464933791074,
      "grad_norm": 2.242150088746107,
      "learning_rate": 4.332107843137255e-06,
      "loss": 0.5588,
      "step": 707
    },
    {
      "epoch": 0.08680725846002943,
      "grad_norm": 2.808244837174793,
      "learning_rate": 4.3382352941176475e-06,
      "loss": 0.6327,
      "step": 708
    },
    {
      "epoch": 0.08692986758214812,
      "grad_norm": 2.7654504534260886,
      "learning_rate": 4.34436274509804e-06,
      "loss": 0.6494,
      "step": 709
    },
    {
      "epoch": 0.0870524767042668,
      "grad_norm": 2.822118744327783,
      "learning_rate": 4.3504901960784316e-06,
      "loss": 0.636,
      "step": 710
    },
    {
      "epoch": 0.08717508582638549,
      "grad_norm": 2.6507838672176547,
      "learning_rate": 4.356617647058824e-06,
      "loss": 0.6594,
      "step": 711
    },
    {
      "epoch": 0.08729769494850417,
      "grad_norm": 2.809749134816758,
      "learning_rate": 4.3627450980392164e-06,
      "loss": 0.6381,
      "step": 712
    },
    {
      "epoch": 0.08742030407062286,
      "grad_norm": 2.4757797241142905,
      "learning_rate": 4.368872549019608e-06,
      "loss": 0.6434,
      "step": 713
    },
    {
      "epoch": 0.08754291319274154,
      "grad_norm": 2.44809319900098,
      "learning_rate": 4.3750000000000005e-06,
      "loss": 0.6109,
      "step": 714
    },
    {
      "epoch": 0.08766552231486023,
      "grad_norm": 2.6937206501759308,
      "learning_rate": 4.381127450980393e-06,
      "loss": 0.6258,
      "step": 715
    },
    {
      "epoch": 0.08778813143697892,
      "grad_norm": 2.423643984371306,
      "learning_rate": 4.3872549019607845e-06,
      "loss": 0.6097,
      "step": 716
    },
    {
      "epoch": 0.0879107405590976,
      "grad_norm": 2.916008298840971,
      "learning_rate": 4.393382352941177e-06,
      "loss": 0.7057,
      "step": 717
    },
    {
      "epoch": 0.08803334968121629,
      "grad_norm": 2.713066728457287,
      "learning_rate": 4.3995098039215685e-06,
      "loss": 0.6932,
      "step": 718
    },
    {
      "epoch": 0.08815595880333497,
      "grad_norm": 2.64444899970384,
      "learning_rate": 4.405637254901961e-06,
      "loss": 0.5969,
      "step": 719
    },
    {
      "epoch": 0.08827856792545366,
      "grad_norm": 2.6024653263119677,
      "learning_rate": 4.411764705882353e-06,
      "loss": 0.5443,
      "step": 720
    },
    {
      "epoch": 0.08840117704757235,
      "grad_norm": 2.595673683090649,
      "learning_rate": 4.417892156862745e-06,
      "loss": 0.6101,
      "step": 721
    },
    {
      "epoch": 0.08852378616969102,
      "grad_norm": 2.7264658887166076,
      "learning_rate": 4.4240196078431374e-06,
      "loss": 0.6798,
      "step": 722
    },
    {
      "epoch": 0.0886463952918097,
      "grad_norm": 2.780955087636537,
      "learning_rate": 4.43014705882353e-06,
      "loss": 0.6172,
      "step": 723
    },
    {
      "epoch": 0.08876900441392839,
      "grad_norm": 2.9476594001881167,
      "learning_rate": 4.4362745098039215e-06,
      "loss": 0.6537,
      "step": 724
    },
    {
      "epoch": 0.08889161353604708,
      "grad_norm": 2.5149181006291554,
      "learning_rate": 4.442401960784314e-06,
      "loss": 0.5845,
      "step": 725
    },
    {
      "epoch": 0.08901422265816576,
      "grad_norm": 2.3277384765041376,
      "learning_rate": 4.448529411764706e-06,
      "loss": 0.5969,
      "step": 726
    },
    {
      "epoch": 0.08913683178028445,
      "grad_norm": 2.5462888963048766,
      "learning_rate": 4.454656862745099e-06,
      "loss": 0.6048,
      "step": 727
    },
    {
      "epoch": 0.08925944090240313,
      "grad_norm": 2.739558383177132,
      "learning_rate": 4.460784313725491e-06,
      "loss": 0.6407,
      "step": 728
    },
    {
      "epoch": 0.08938205002452182,
      "grad_norm": 2.6745211675375224,
      "learning_rate": 4.466911764705883e-06,
      "loss": 0.5901,
      "step": 729
    },
    {
      "epoch": 0.0895046591466405,
      "grad_norm": 2.464113475440271,
      "learning_rate": 4.473039215686275e-06,
      "loss": 0.6006,
      "step": 730
    },
    {
      "epoch": 0.08962726826875919,
      "grad_norm": 2.378378405242434,
      "learning_rate": 4.479166666666667e-06,
      "loss": 0.6551,
      "step": 731
    },
    {
      "epoch": 0.08974987739087788,
      "grad_norm": 2.8716785086876855,
      "learning_rate": 4.485294117647059e-06,
      "loss": 0.6097,
      "step": 732
    },
    {
      "epoch": 0.08987248651299656,
      "grad_norm": 2.6164185165447233,
      "learning_rate": 4.491421568627452e-06,
      "loss": 0.5971,
      "step": 733
    },
    {
      "epoch": 0.08999509563511525,
      "grad_norm": 2.7755382022034776,
      "learning_rate": 4.497549019607843e-06,
      "loss": 0.6128,
      "step": 734
    },
    {
      "epoch": 0.09011770475723394,
      "grad_norm": 2.7094345308314955,
      "learning_rate": 4.503676470588236e-06,
      "loss": 0.5797,
      "step": 735
    },
    {
      "epoch": 0.09024031387935262,
      "grad_norm": 2.6006102981858406,
      "learning_rate": 4.509803921568628e-06,
      "loss": 0.6546,
      "step": 736
    },
    {
      "epoch": 0.09036292300147131,
      "grad_norm": 2.5302080782332066,
      "learning_rate": 4.51593137254902e-06,
      "loss": 0.6109,
      "step": 737
    },
    {
      "epoch": 0.09048553212358999,
      "grad_norm": 2.5409260518268075,
      "learning_rate": 4.522058823529412e-06,
      "loss": 0.6238,
      "step": 738
    },
    {
      "epoch": 0.09060814124570868,
      "grad_norm": 2.7520236375131253,
      "learning_rate": 4.528186274509805e-06,
      "loss": 0.6304,
      "step": 739
    },
    {
      "epoch": 0.09073075036782736,
      "grad_norm": 2.6741441843789415,
      "learning_rate": 4.534313725490196e-06,
      "loss": 0.5985,
      "step": 740
    },
    {
      "epoch": 0.09085335948994605,
      "grad_norm": 2.62844813749483,
      "learning_rate": 4.540441176470589e-06,
      "loss": 0.5376,
      "step": 741
    },
    {
      "epoch": 0.09097596861206474,
      "grad_norm": 2.420333739438718,
      "learning_rate": 4.546568627450981e-06,
      "loss": 0.6376,
      "step": 742
    },
    {
      "epoch": 0.09109857773418342,
      "grad_norm": 2.3002944980287827,
      "learning_rate": 4.552696078431373e-06,
      "loss": 0.5853,
      "step": 743
    },
    {
      "epoch": 0.09122118685630211,
      "grad_norm": 2.8229349430425614,
      "learning_rate": 4.558823529411765e-06,
      "loss": 0.6077,
      "step": 744
    },
    {
      "epoch": 0.0913437959784208,
      "grad_norm": 2.7415424135942255,
      "learning_rate": 4.564950980392157e-06,
      "loss": 0.6272,
      "step": 745
    },
    {
      "epoch": 0.09146640510053948,
      "grad_norm": 2.6305689576950946,
      "learning_rate": 4.571078431372549e-06,
      "loss": 0.5954,
      "step": 746
    },
    {
      "epoch": 0.09158901422265817,
      "grad_norm": 2.6298932259451004,
      "learning_rate": 4.577205882352942e-06,
      "loss": 0.5927,
      "step": 747
    },
    {
      "epoch": 0.09171162334477685,
      "grad_norm": 2.60541965256262,
      "learning_rate": 4.583333333333333e-06,
      "loss": 0.6896,
      "step": 748
    },
    {
      "epoch": 0.09183423246689554,
      "grad_norm": 2.8819164326916,
      "learning_rate": 4.589460784313726e-06,
      "loss": 0.6819,
      "step": 749
    },
    {
      "epoch": 0.09195684158901422,
      "grad_norm": 2.356519022197506,
      "learning_rate": 4.595588235294118e-06,
      "loss": 0.5495,
      "step": 750
    },
    {
      "epoch": 0.09207945071113291,
      "grad_norm": 2.548659318566853,
      "learning_rate": 4.60171568627451e-06,
      "loss": 0.6464,
      "step": 751
    },
    {
      "epoch": 0.0922020598332516,
      "grad_norm": 2.806358234573578,
      "learning_rate": 4.607843137254902e-06,
      "loss": 0.6303,
      "step": 752
    },
    {
      "epoch": 0.09232466895537028,
      "grad_norm": 2.6654347398511007,
      "learning_rate": 4.6139705882352946e-06,
      "loss": 0.6794,
      "step": 753
    },
    {
      "epoch": 0.09244727807748897,
      "grad_norm": 2.786824377224869,
      "learning_rate": 4.620098039215687e-06,
      "loss": 0.6312,
      "step": 754
    },
    {
      "epoch": 0.09256988719960765,
      "grad_norm": 2.5634626742341005,
      "learning_rate": 4.6262254901960794e-06,
      "loss": 0.6262,
      "step": 755
    },
    {
      "epoch": 0.09269249632172634,
      "grad_norm": 2.7024434854262367,
      "learning_rate": 4.632352941176471e-06,
      "loss": 0.612,
      "step": 756
    },
    {
      "epoch": 0.09281510544384503,
      "grad_norm": 2.68636298270421,
      "learning_rate": 4.6384803921568635e-06,
      "loss": 0.6834,
      "step": 757
    },
    {
      "epoch": 0.09293771456596371,
      "grad_norm": 2.8562383022734172,
      "learning_rate": 4.644607843137255e-06,
      "loss": 0.6095,
      "step": 758
    },
    {
      "epoch": 0.0930603236880824,
      "grad_norm": 2.662901863333469,
      "learning_rate": 4.6507352941176475e-06,
      "loss": 0.5932,
      "step": 759
    },
    {
      "epoch": 0.09318293281020108,
      "grad_norm": 2.632965152742554,
      "learning_rate": 4.65686274509804e-06,
      "loss": 0.6221,
      "step": 760
    },
    {
      "epoch": 0.09330554193231977,
      "grad_norm": 2.5687045435481317,
      "learning_rate": 4.6629901960784315e-06,
      "loss": 0.6182,
      "step": 761
    },
    {
      "epoch": 0.09342815105443845,
      "grad_norm": 2.377039558104293,
      "learning_rate": 4.669117647058824e-06,
      "loss": 0.6325,
      "step": 762
    },
    {
      "epoch": 0.09355076017655714,
      "grad_norm": 2.775086482158373,
      "learning_rate": 4.675245098039216e-06,
      "loss": 0.6217,
      "step": 763
    },
    {
      "epoch": 0.09367336929867583,
      "grad_norm": 2.760262018034615,
      "learning_rate": 4.681372549019608e-06,
      "loss": 0.6161,
      "step": 764
    },
    {
      "epoch": 0.09379597842079451,
      "grad_norm": 2.471262312476265,
      "learning_rate": 4.6875000000000004e-06,
      "loss": 0.5874,
      "step": 765
    },
    {
      "epoch": 0.0939185875429132,
      "grad_norm": 2.774425874744433,
      "learning_rate": 4.693627450980393e-06,
      "loss": 0.6299,
      "step": 766
    },
    {
      "epoch": 0.09404119666503188,
      "grad_norm": 2.4530266442746784,
      "learning_rate": 4.6997549019607845e-06,
      "loss": 0.6791,
      "step": 767
    },
    {
      "epoch": 0.09416380578715057,
      "grad_norm": 2.9522839522451347,
      "learning_rate": 4.705882352941177e-06,
      "loss": 0.6416,
      "step": 768
    },
    {
      "epoch": 0.09428641490926926,
      "grad_norm": 2.7879699291078053,
      "learning_rate": 4.712009803921569e-06,
      "loss": 0.6379,
      "step": 769
    },
    {
      "epoch": 0.09440902403138794,
      "grad_norm": 2.799556938991986,
      "learning_rate": 4.718137254901961e-06,
      "loss": 0.6712,
      "step": 770
    },
    {
      "epoch": 0.09453163315350661,
      "grad_norm": 2.781182272292354,
      "learning_rate": 4.724264705882353e-06,
      "loss": 0.6489,
      "step": 771
    },
    {
      "epoch": 0.0946542422756253,
      "grad_norm": 2.3012281029857933,
      "learning_rate": 4.730392156862745e-06,
      "loss": 0.6202,
      "step": 772
    },
    {
      "epoch": 0.09477685139774399,
      "grad_norm": 2.4609446017645484,
      "learning_rate": 4.736519607843137e-06,
      "loss": 0.592,
      "step": 773
    },
    {
      "epoch": 0.09489946051986267,
      "grad_norm": 2.7380876526047238,
      "learning_rate": 4.74264705882353e-06,
      "loss": 0.5571,
      "step": 774
    },
    {
      "epoch": 0.09502206964198136,
      "grad_norm": 3.0342062555074927,
      "learning_rate": 4.7487745098039214e-06,
      "loss": 0.683,
      "step": 775
    },
    {
      "epoch": 0.09514467876410004,
      "grad_norm": 2.382650239164694,
      "learning_rate": 4.754901960784314e-06,
      "loss": 0.6375,
      "step": 776
    },
    {
      "epoch": 0.09526728788621873,
      "grad_norm": 2.53369837352099,
      "learning_rate": 4.761029411764706e-06,
      "loss": 0.6482,
      "step": 777
    },
    {
      "epoch": 0.09538989700833742,
      "grad_norm": 2.641536957331772,
      "learning_rate": 4.767156862745099e-06,
      "loss": 0.5653,
      "step": 778
    },
    {
      "epoch": 0.0955125061304561,
      "grad_norm": 2.742563828522374,
      "learning_rate": 4.773284313725491e-06,
      "loss": 0.6202,
      "step": 779
    },
    {
      "epoch": 0.09563511525257479,
      "grad_norm": 2.824762316370875,
      "learning_rate": 4.779411764705883e-06,
      "loss": 0.6399,
      "step": 780
    },
    {
      "epoch": 0.09575772437469347,
      "grad_norm": 2.4434771789735223,
      "learning_rate": 4.785539215686275e-06,
      "loss": 0.6363,
      "step": 781
    },
    {
      "epoch": 0.09588033349681216,
      "grad_norm": 2.7309270449147514,
      "learning_rate": 4.791666666666668e-06,
      "loss": 0.6331,
      "step": 782
    },
    {
      "epoch": 0.09600294261893084,
      "grad_norm": 2.260018406085542,
      "learning_rate": 4.797794117647059e-06,
      "loss": 0.5503,
      "step": 783
    },
    {
      "epoch": 0.09612555174104953,
      "grad_norm": 2.664127586292755,
      "learning_rate": 4.803921568627452e-06,
      "loss": 0.6661,
      "step": 784
    },
    {
      "epoch": 0.09624816086316822,
      "grad_norm": 2.598435934776765,
      "learning_rate": 4.810049019607843e-06,
      "loss": 0.6647,
      "step": 785
    },
    {
      "epoch": 0.0963707699852869,
      "grad_norm": 2.6519957122260216,
      "learning_rate": 4.816176470588236e-06,
      "loss": 0.6275,
      "step": 786
    },
    {
      "epoch": 0.09649337910740559,
      "grad_norm": 3.0375798222826296,
      "learning_rate": 4.822303921568628e-06,
      "loss": 0.5877,
      "step": 787
    },
    {
      "epoch": 0.09661598822952427,
      "grad_norm": 2.7048552849877434,
      "learning_rate": 4.82843137254902e-06,
      "loss": 0.6328,
      "step": 788
    },
    {
      "epoch": 0.09673859735164296,
      "grad_norm": 2.620526197258887,
      "learning_rate": 4.834558823529412e-06,
      "loss": 0.6044,
      "step": 789
    },
    {
      "epoch": 0.09686120647376165,
      "grad_norm": 3.039813770441064,
      "learning_rate": 4.840686274509805e-06,
      "loss": 0.6544,
      "step": 790
    },
    {
      "epoch": 0.09698381559588033,
      "grad_norm": 2.5019332820593014,
      "learning_rate": 4.846813725490196e-06,
      "loss": 0.6182,
      "step": 791
    },
    {
      "epoch": 0.09710642471799902,
      "grad_norm": 2.821174336890772,
      "learning_rate": 4.852941176470589e-06,
      "loss": 0.5731,
      "step": 792
    },
    {
      "epoch": 0.0972290338401177,
      "grad_norm": 3.1089410026798125,
      "learning_rate": 4.859068627450981e-06,
      "loss": 0.6376,
      "step": 793
    },
    {
      "epoch": 0.09735164296223639,
      "grad_norm": 2.9314060944086426,
      "learning_rate": 4.865196078431373e-06,
      "loss": 0.5697,
      "step": 794
    },
    {
      "epoch": 0.09747425208435508,
      "grad_norm": 2.6758187526161086,
      "learning_rate": 4.871323529411765e-06,
      "loss": 0.6964,
      "step": 795
    },
    {
      "epoch": 0.09759686120647376,
      "grad_norm": 2.4292054934648943,
      "learning_rate": 4.8774509803921576e-06,
      "loss": 0.5883,
      "step": 796
    },
    {
      "epoch": 0.09771947032859245,
      "grad_norm": 2.8660742328929665,
      "learning_rate": 4.883578431372549e-06,
      "loss": 0.6745,
      "step": 797
    },
    {
      "epoch": 0.09784207945071113,
      "grad_norm": 2.720277580852752,
      "learning_rate": 4.889705882352942e-06,
      "loss": 0.6365,
      "step": 798
    },
    {
      "epoch": 0.09796468857282982,
      "grad_norm": 2.4962386314832172,
      "learning_rate": 4.895833333333333e-06,
      "loss": 0.5304,
      "step": 799
    },
    {
      "epoch": 0.0980872976949485,
      "grad_norm": 2.7154371153194057,
      "learning_rate": 4.901960784313726e-06,
      "loss": 0.7032,
      "step": 800
    },
    {
      "epoch": 0.09820990681706719,
      "grad_norm": 2.6449418949292407,
      "learning_rate": 4.908088235294118e-06,
      "loss": 0.6186,
      "step": 801
    },
    {
      "epoch": 0.09833251593918588,
      "grad_norm": 3.004045102914244,
      "learning_rate": 4.91421568627451e-06,
      "loss": 0.6239,
      "step": 802
    },
    {
      "epoch": 0.09845512506130456,
      "grad_norm": 2.5278315762755827,
      "learning_rate": 4.920343137254902e-06,
      "loss": 0.5517,
      "step": 803
    },
    {
      "epoch": 0.09857773418342325,
      "grad_norm": 2.5230143217576555,
      "learning_rate": 4.9264705882352945e-06,
      "loss": 0.6179,
      "step": 804
    },
    {
      "epoch": 0.09870034330554193,
      "grad_norm": 2.9715766250690048,
      "learning_rate": 4.932598039215687e-06,
      "loss": 0.6425,
      "step": 805
    },
    {
      "epoch": 0.09882295242766062,
      "grad_norm": 2.5075574414950705,
      "learning_rate": 4.938725490196079e-06,
      "loss": 0.6235,
      "step": 806
    },
    {
      "epoch": 0.0989455615497793,
      "grad_norm": 3.2354591491654863,
      "learning_rate": 4.944852941176471e-06,
      "loss": 0.5782,
      "step": 807
    },
    {
      "epoch": 0.09906817067189799,
      "grad_norm": 2.8766334388092982,
      "learning_rate": 4.9509803921568634e-06,
      "loss": 0.6587,
      "step": 808
    },
    {
      "epoch": 0.09919077979401668,
      "grad_norm": 2.5721463972655165,
      "learning_rate": 4.957107843137256e-06,
      "loss": 0.5753,
      "step": 809
    },
    {
      "epoch": 0.09931338891613536,
      "grad_norm": 2.395225418528663,
      "learning_rate": 4.9632352941176475e-06,
      "loss": 0.6171,
      "step": 810
    },
    {
      "epoch": 0.09943599803825405,
      "grad_norm": 2.5642011171114003,
      "learning_rate": 4.96936274509804e-06,
      "loss": 0.5889,
      "step": 811
    },
    {
      "epoch": 0.09955860716037274,
      "grad_norm": 2.6140492185743107,
      "learning_rate": 4.9754901960784315e-06,
      "loss": 0.6406,
      "step": 812
    },
    {
      "epoch": 0.09968121628249142,
      "grad_norm": 2.8061155261832926,
      "learning_rate": 4.981617647058824e-06,
      "loss": 0.5722,
      "step": 813
    },
    {
      "epoch": 0.09980382540461011,
      "grad_norm": 3.123459695885228,
      "learning_rate": 4.987745098039216e-06,
      "loss": 0.5918,
      "step": 814
    },
    {
      "epoch": 0.0999264345267288,
      "grad_norm": 2.7832897188949417,
      "learning_rate": 4.993872549019608e-06,
      "loss": 0.6197,
      "step": 815
    },
    {
      "epoch": 0.10004904364884748,
      "grad_norm": 2.445976748779052,
      "learning_rate": 5e-06,
      "loss": 0.6646,
      "step": 816
    },
    {
      "epoch": 0.10017165277096617,
      "grad_norm": 2.7724313125987647,
      "learning_rate": 4.9999999486227875e-06,
      "loss": 0.6909,
      "step": 817
    },
    {
      "epoch": 0.10029426189308485,
      "grad_norm": 2.6938352169225825,
      "learning_rate": 4.999999794491151e-06,
      "loss": 0.6246,
      "step": 818
    },
    {
      "epoch": 0.10041687101520354,
      "grad_norm": 2.8300886973049098,
      "learning_rate": 4.9999995376050965e-06,
      "loss": 0.6021,
      "step": 819
    },
    {
      "epoch": 0.10053948013732222,
      "grad_norm": 2.3958297548813876,
      "learning_rate": 4.999999177964635e-06,
      "loss": 0.624,
      "step": 820
    },
    {
      "epoch": 0.1006620892594409,
      "grad_norm": 2.5343907131246204,
      "learning_rate": 4.999998715569782e-06,
      "loss": 0.6244,
      "step": 821
    },
    {
      "epoch": 0.10078469838155958,
      "grad_norm": 2.651175137658381,
      "learning_rate": 4.999998150420556e-06,
      "loss": 0.653,
      "step": 822
    },
    {
      "epoch": 0.10090730750367827,
      "grad_norm": 2.6623761900445273,
      "learning_rate": 4.999997482516979e-06,
      "loss": 0.6814,
      "step": 823
    },
    {
      "epoch": 0.10102991662579695,
      "grad_norm": 2.7796170120061894,
      "learning_rate": 4.999996711859081e-06,
      "loss": 0.623,
      "step": 824
    },
    {
      "epoch": 0.10115252574791564,
      "grad_norm": 2.7432601993715755,
      "learning_rate": 4.999995838446892e-06,
      "loss": 0.6547,
      "step": 825
    },
    {
      "epoch": 0.10127513487003433,
      "grad_norm": 2.831035904292603,
      "learning_rate": 4.999994862280447e-06,
      "loss": 0.6701,
      "step": 826
    },
    {
      "epoch": 0.10139774399215301,
      "grad_norm": 2.407880954339152,
      "learning_rate": 4.999993783359789e-06,
      "loss": 0.5939,
      "step": 827
    },
    {
      "epoch": 0.1015203531142717,
      "grad_norm": 2.556755538914854,
      "learning_rate": 4.999992601684959e-06,
      "loss": 0.5785,
      "step": 828
    },
    {
      "epoch": 0.10164296223639038,
      "grad_norm": 2.386482169112166,
      "learning_rate": 4.999991317256007e-06,
      "loss": 0.6191,
      "step": 829
    },
    {
      "epoch": 0.10176557135850907,
      "grad_norm": 2.6267924504856253,
      "learning_rate": 4.999989930072988e-06,
      "loss": 0.6045,
      "step": 830
    },
    {
      "epoch": 0.10188818048062775,
      "grad_norm": 2.310344426182532,
      "learning_rate": 4.9999884401359555e-06,
      "loss": 0.6187,
      "step": 831
    },
    {
      "epoch": 0.10201078960274644,
      "grad_norm": 2.5712818515691898,
      "learning_rate": 4.9999868474449725e-06,
      "loss": 0.6389,
      "step": 832
    },
    {
      "epoch": 0.10213339872486513,
      "grad_norm": 2.508031766416887,
      "learning_rate": 4.999985152000104e-06,
      "loss": 0.618,
      "step": 833
    },
    {
      "epoch": 0.10225600784698381,
      "grad_norm": 2.9943088286561306,
      "learning_rate": 4.999983353801421e-06,
      "loss": 0.6264,
      "step": 834
    },
    {
      "epoch": 0.1023786169691025,
      "grad_norm": 2.6619067110675445,
      "learning_rate": 4.999981452848995e-06,
      "loss": 0.5792,
      "step": 835
    },
    {
      "epoch": 0.10250122609122118,
      "grad_norm": 2.7054349266257236,
      "learning_rate": 4.999979449142905e-06,
      "loss": 0.6518,
      "step": 836
    },
    {
      "epoch": 0.10262383521333987,
      "grad_norm": 2.5044253866477124,
      "learning_rate": 4.999977342683235e-06,
      "loss": 0.6221,
      "step": 837
    },
    {
      "epoch": 0.10274644433545856,
      "grad_norm": 2.2691200412333337,
      "learning_rate": 4.999975133470069e-06,
      "loss": 0.6107,
      "step": 838
    },
    {
      "epoch": 0.10286905345757724,
      "grad_norm": 2.4777927103505086,
      "learning_rate": 4.999972821503502e-06,
      "loss": 0.568,
      "step": 839
    },
    {
      "epoch": 0.10299166257969593,
      "grad_norm": 2.656724096650443,
      "learning_rate": 4.999970406783623e-06,
      "loss": 0.5723,
      "step": 840
    },
    {
      "epoch": 0.10311427170181461,
      "grad_norm": 2.7363886280206304,
      "learning_rate": 4.999967889310536e-06,
      "loss": 0.6233,
      "step": 841
    },
    {
      "epoch": 0.1032368808239333,
      "grad_norm": 2.5718533265520653,
      "learning_rate": 4.999965269084342e-06,
      "loss": 0.5853,
      "step": 842
    },
    {
      "epoch": 0.10335948994605199,
      "grad_norm": 2.5945028166060564,
      "learning_rate": 4.999962546105151e-06,
      "loss": 0.637,
      "step": 843
    },
    {
      "epoch": 0.10348209906817067,
      "grad_norm": 3.1011665284892067,
      "learning_rate": 4.999959720373072e-06,
      "loss": 0.5715,
      "step": 844
    },
    {
      "epoch": 0.10360470819028936,
      "grad_norm": 2.6154140466887625,
      "learning_rate": 4.999956791888224e-06,
      "loss": 0.5923,
      "step": 845
    },
    {
      "epoch": 0.10372731731240804,
      "grad_norm": 2.7331200808352256,
      "learning_rate": 4.999953760650725e-06,
      "loss": 0.6429,
      "step": 846
    },
    {
      "epoch": 0.10384992643452673,
      "grad_norm": 2.5951171363542556,
      "learning_rate": 4.999950626660702e-06,
      "loss": 0.6013,
      "step": 847
    },
    {
      "epoch": 0.10397253555664542,
      "grad_norm": 2.978426733768955,
      "learning_rate": 4.99994738991828e-06,
      "loss": 0.6372,
      "step": 848
    },
    {
      "epoch": 0.1040951446787641,
      "grad_norm": 2.7757331191203956,
      "learning_rate": 4.999944050423597e-06,
      "loss": 0.6462,
      "step": 849
    },
    {
      "epoch": 0.10421775380088279,
      "grad_norm": 2.765156202827275,
      "learning_rate": 4.999940608176787e-06,
      "loss": 0.629,
      "step": 850
    },
    {
      "epoch": 0.10434036292300147,
      "grad_norm": 2.6552219550511005,
      "learning_rate": 4.999937063177991e-06,
      "loss": 0.615,
      "step": 851
    },
    {
      "epoch": 0.10446297204512016,
      "grad_norm": 2.61121020907975,
      "learning_rate": 4.999933415427357e-06,
      "loss": 0.6451,
      "step": 852
    },
    {
      "epoch": 0.10458558116723884,
      "grad_norm": 2.486168379741056,
      "learning_rate": 4.999929664925034e-06,
      "loss": 0.6401,
      "step": 853
    },
    {
      "epoch": 0.10470819028935753,
      "grad_norm": 2.642024795876191,
      "learning_rate": 4.999925811671175e-06,
      "loss": 0.6448,
      "step": 854
    },
    {
      "epoch": 0.10483079941147622,
      "grad_norm": 2.7672648255963144,
      "learning_rate": 4.99992185566594e-06,
      "loss": 0.6334,
      "step": 855
    },
    {
      "epoch": 0.1049534085335949,
      "grad_norm": 2.6718057509216018,
      "learning_rate": 4.999917796909491e-06,
      "loss": 0.6471,
      "step": 856
    },
    {
      "epoch": 0.10507601765571359,
      "grad_norm": 2.3842896557535855,
      "learning_rate": 4.999913635401995e-06,
      "loss": 0.6182,
      "step": 857
    },
    {
      "epoch": 0.10519862677783227,
      "grad_norm": 2.4113126190485263,
      "learning_rate": 4.999909371143622e-06,
      "loss": 0.6278,
      "step": 858
    },
    {
      "epoch": 0.10532123589995096,
      "grad_norm": 2.978657291483444,
      "learning_rate": 4.999905004134549e-06,
      "loss": 0.6406,
      "step": 859
    },
    {
      "epoch": 0.10544384502206965,
      "grad_norm": 2.403258668880807,
      "learning_rate": 4.999900534374954e-06,
      "loss": 0.6324,
      "step": 860
    },
    {
      "epoch": 0.10556645414418833,
      "grad_norm": 2.6392813631559067,
      "learning_rate": 4.9998959618650225e-06,
      "loss": 0.6058,
      "step": 861
    },
    {
      "epoch": 0.10568906326630702,
      "grad_norm": 2.4620901226484753,
      "learning_rate": 4.99989128660494e-06,
      "loss": 0.6836,
      "step": 862
    },
    {
      "epoch": 0.1058116723884257,
      "grad_norm": 2.6700362384353844,
      "learning_rate": 4.9998865085949e-06,
      "loss": 0.6286,
      "step": 863
    },
    {
      "epoch": 0.10593428151054439,
      "grad_norm": 2.360829582734934,
      "learning_rate": 4.999881627835099e-06,
      "loss": 0.6269,
      "step": 864
    },
    {
      "epoch": 0.10605689063266308,
      "grad_norm": 2.3651379944666395,
      "learning_rate": 4.999876644325737e-06,
      "loss": 0.6602,
      "step": 865
    },
    {
      "epoch": 0.10617949975478176,
      "grad_norm": 2.6485745362410706,
      "learning_rate": 4.9998715580670195e-06,
      "loss": 0.6258,
      "step": 866
    },
    {
      "epoch": 0.10630210887690045,
      "grad_norm": 2.7647902418468813,
      "learning_rate": 4.9998663690591556e-06,
      "loss": 0.6563,
      "step": 867
    },
    {
      "epoch": 0.10642471799901913,
      "grad_norm": 3.159926069947599,
      "learning_rate": 4.999861077302358e-06,
      "loss": 0.6086,
      "step": 868
    },
    {
      "epoch": 0.10654732712113782,
      "grad_norm": 2.740581724102619,
      "learning_rate": 4.999855682796845e-06,
      "loss": 0.5944,
      "step": 869
    },
    {
      "epoch": 0.1066699362432565,
      "grad_norm": 2.336434598539172,
      "learning_rate": 4.999850185542837e-06,
      "loss": 0.6545,
      "step": 870
    },
    {
      "epoch": 0.10679254536537518,
      "grad_norm": 2.3386692368106954,
      "learning_rate": 4.999844585540562e-06,
      "loss": 0.6171,
      "step": 871
    },
    {
      "epoch": 0.10691515448749386,
      "grad_norm": 2.5223152711975754,
      "learning_rate": 4.999838882790248e-06,
      "loss": 0.5942,
      "step": 872
    },
    {
      "epoch": 0.10703776360961255,
      "grad_norm": 2.7864055390293716,
      "learning_rate": 4.99983307729213e-06,
      "loss": 0.5855,
      "step": 873
    },
    {
      "epoch": 0.10716037273173123,
      "grad_norm": 2.7239645072648306,
      "learning_rate": 4.999827169046448e-06,
      "loss": 0.5877,
      "step": 874
    },
    {
      "epoch": 0.10728298185384992,
      "grad_norm": 2.747951110524139,
      "learning_rate": 4.9998211580534425e-06,
      "loss": 0.6173,
      "step": 875
    },
    {
      "epoch": 0.1074055909759686,
      "grad_norm": 2.5885281440192434,
      "learning_rate": 4.999815044313362e-06,
      "loss": 0.6142,
      "step": 876
    },
    {
      "epoch": 0.10752820009808729,
      "grad_norm": 2.4409199257758867,
      "learning_rate": 4.999808827826458e-06,
      "loss": 0.5885,
      "step": 877
    },
    {
      "epoch": 0.10765080922020598,
      "grad_norm": 2.451109698890339,
      "learning_rate": 4.999802508592986e-06,
      "loss": 0.6379,
      "step": 878
    },
    {
      "epoch": 0.10777341834232466,
      "grad_norm": 2.4007873868474974,
      "learning_rate": 4.999796086613205e-06,
      "loss": 0.6472,
      "step": 879
    },
    {
      "epoch": 0.10789602746444335,
      "grad_norm": 2.466273794955739,
      "learning_rate": 4.9997895618873785e-06,
      "loss": 0.5916,
      "step": 880
    },
    {
      "epoch": 0.10801863658656204,
      "grad_norm": 2.7242696608640533,
      "learning_rate": 4.999782934415776e-06,
      "loss": 0.6181,
      "step": 881
    },
    {
      "epoch": 0.10814124570868072,
      "grad_norm": 2.4989566653860558,
      "learning_rate": 4.9997762041986695e-06,
      "loss": 0.6485,
      "step": 882
    },
    {
      "epoch": 0.10826385483079941,
      "grad_norm": 2.5525634870184164,
      "learning_rate": 4.999769371236336e-06,
      "loss": 0.6065,
      "step": 883
    },
    {
      "epoch": 0.1083864639529181,
      "grad_norm": 2.531167715993151,
      "learning_rate": 4.999762435529055e-06,
      "loss": 0.6024,
      "step": 884
    },
    {
      "epoch": 0.10850907307503678,
      "grad_norm": 2.8148508985184124,
      "learning_rate": 4.999755397077113e-06,
      "loss": 0.6223,
      "step": 885
    },
    {
      "epoch": 0.10863168219715547,
      "grad_norm": 2.774976379610041,
      "learning_rate": 4.999748255880798e-06,
      "loss": 0.6497,
      "step": 886
    },
    {
      "epoch": 0.10875429131927415,
      "grad_norm": 2.5873401558526163,
      "learning_rate": 4.999741011940405e-06,
      "loss": 0.5737,
      "step": 887
    },
    {
      "epoch": 0.10887690044139284,
      "grad_norm": 2.899951283236773,
      "learning_rate": 4.999733665256231e-06,
      "loss": 0.6385,
      "step": 888
    },
    {
      "epoch": 0.10899950956351152,
      "grad_norm": 2.798882903160497,
      "learning_rate": 4.9997262158285775e-06,
      "loss": 0.6018,
      "step": 889
    },
    {
      "epoch": 0.10912211868563021,
      "grad_norm": 2.7152452450907427,
      "learning_rate": 4.999718663657751e-06,
      "loss": 0.6468,
      "step": 890
    },
    {
      "epoch": 0.1092447278077489,
      "grad_norm": 2.4448104981940055,
      "learning_rate": 4.999711008744062e-06,
      "loss": 0.6164,
      "step": 891
    },
    {
      "epoch": 0.10936733692986758,
      "grad_norm": 2.5655744800589715,
      "learning_rate": 4.999703251087826e-06,
      "loss": 0.6363,
      "step": 892
    },
    {
      "epoch": 0.10948994605198627,
      "grad_norm": 2.4088385113871724,
      "learning_rate": 4.9996953906893605e-06,
      "loss": 0.6451,
      "step": 893
    },
    {
      "epoch": 0.10961255517410495,
      "grad_norm": 2.570721017711038,
      "learning_rate": 4.999687427548989e-06,
      "loss": 0.6083,
      "step": 894
    },
    {
      "epoch": 0.10973516429622364,
      "grad_norm": 2.5237407152726075,
      "learning_rate": 4.999679361667039e-06,
      "loss": 0.5735,
      "step": 895
    },
    {
      "epoch": 0.10985777341834232,
      "grad_norm": 2.70302544647275,
      "learning_rate": 4.999671193043841e-06,
      "loss": 0.5697,
      "step": 896
    },
    {
      "epoch": 0.10998038254046101,
      "grad_norm": 2.6219468919706044,
      "learning_rate": 4.999662921679733e-06,
      "loss": 0.6729,
      "step": 897
    },
    {
      "epoch": 0.1101029916625797,
      "grad_norm": 2.3018945659526913,
      "learning_rate": 4.999654547575053e-06,
      "loss": 0.5476,
      "step": 898
    },
    {
      "epoch": 0.11022560078469838,
      "grad_norm": 2.495159161611472,
      "learning_rate": 4.999646070730146e-06,
      "loss": 0.6552,
      "step": 899
    },
    {
      "epoch": 0.11034820990681707,
      "grad_norm": 2.432235690425386,
      "learning_rate": 4.99963749114536e-06,
      "loss": 0.6278,
      "step": 900
    },
    {
      "epoch": 0.11047081902893575,
      "grad_norm": 2.5993552466174843,
      "learning_rate": 4.9996288088210475e-06,
      "loss": 0.6335,
      "step": 901
    },
    {
      "epoch": 0.11059342815105444,
      "grad_norm": 2.4345029054001044,
      "learning_rate": 4.999620023757567e-06,
      "loss": 0.6105,
      "step": 902
    },
    {
      "epoch": 0.11071603727317313,
      "grad_norm": 2.3755719596902565,
      "learning_rate": 4.999611135955277e-06,
      "loss": 0.6147,
      "step": 903
    },
    {
      "epoch": 0.11083864639529181,
      "grad_norm": 2.684708130099906,
      "learning_rate": 4.999602145414545e-06,
      "loss": 0.6575,
      "step": 904
    },
    {
      "epoch": 0.1109612555174105,
      "grad_norm": 2.661531408078664,
      "learning_rate": 4.999593052135738e-06,
      "loss": 0.5644,
      "step": 905
    },
    {
      "epoch": 0.11108386463952918,
      "grad_norm": 2.7017137370112896,
      "learning_rate": 4.999583856119234e-06,
      "loss": 0.6554,
      "step": 906
    },
    {
      "epoch": 0.11120647376164787,
      "grad_norm": 2.5293407864649478,
      "learning_rate": 4.999574557365406e-06,
      "loss": 0.6167,
      "step": 907
    },
    {
      "epoch": 0.11132908288376656,
      "grad_norm": 2.9222032584957263,
      "learning_rate": 4.99956515587464e-06,
      "loss": 0.572,
      "step": 908
    },
    {
      "epoch": 0.11145169200588524,
      "grad_norm": 2.609163984728876,
      "learning_rate": 4.99955565164732e-06,
      "loss": 0.6213,
      "step": 909
    },
    {
      "epoch": 0.11157430112800393,
      "grad_norm": 2.5689144902567262,
      "learning_rate": 4.9995460446838375e-06,
      "loss": 0.6637,
      "step": 910
    },
    {
      "epoch": 0.11169691025012261,
      "grad_norm": 2.4877280957424026,
      "learning_rate": 4.999536334984589e-06,
      "loss": 0.6113,
      "step": 911
    },
    {
      "epoch": 0.1118195193722413,
      "grad_norm": 2.4574887461543446,
      "learning_rate": 4.9995265225499705e-06,
      "loss": 0.5885,
      "step": 912
    },
    {
      "epoch": 0.11194212849435999,
      "grad_norm": 2.8188615193029816,
      "learning_rate": 4.999516607380387e-06,
      "loss": 0.637,
      "step": 913
    },
    {
      "epoch": 0.11206473761647867,
      "grad_norm": 2.3857611010488444,
      "learning_rate": 4.999506589476246e-06,
      "loss": 0.6356,
      "step": 914
    },
    {
      "epoch": 0.11218734673859736,
      "grad_norm": 2.472586958667727,
      "learning_rate": 4.99949646883796e-06,
      "loss": 0.5986,
      "step": 915
    },
    {
      "epoch": 0.11230995586071604,
      "grad_norm": 2.8441254346320908,
      "learning_rate": 4.999486245465943e-06,
      "loss": 0.6304,
      "step": 916
    },
    {
      "epoch": 0.11243256498283473,
      "grad_norm": 2.4932354214807795,
      "learning_rate": 4.999475919360617e-06,
      "loss": 0.6489,
      "step": 917
    },
    {
      "epoch": 0.11255517410495341,
      "grad_norm": 2.4413039558789986,
      "learning_rate": 4.999465490522405e-06,
      "loss": 0.6117,
      "step": 918
    },
    {
      "epoch": 0.1126777832270721,
      "grad_norm": 2.5698770013828667,
      "learning_rate": 4.999454958951737e-06,
      "loss": 0.6199,
      "step": 919
    },
    {
      "epoch": 0.11280039234919079,
      "grad_norm": 3.084360422489756,
      "learning_rate": 4.999444324649045e-06,
      "loss": 0.644,
      "step": 920
    },
    {
      "epoch": 0.11292300147130946,
      "grad_norm": 2.513958733592005,
      "learning_rate": 4.999433587614767e-06,
      "loss": 0.596,
      "step": 921
    },
    {
      "epoch": 0.11304561059342814,
      "grad_norm": 2.5778445270707757,
      "learning_rate": 4.999422747849343e-06,
      "loss": 0.6392,
      "step": 922
    },
    {
      "epoch": 0.11316821971554683,
      "grad_norm": 2.802195314473429,
      "learning_rate": 4.99941180535322e-06,
      "loss": 0.5958,
      "step": 923
    },
    {
      "epoch": 0.11329082883766552,
      "grad_norm": 2.6467436597963543,
      "learning_rate": 4.999400760126846e-06,
      "loss": 0.6353,
      "step": 924
    },
    {
      "epoch": 0.1134134379597842,
      "grad_norm": 2.534737389443984,
      "learning_rate": 4.999389612170676e-06,
      "loss": 0.6174,
      "step": 925
    },
    {
      "epoch": 0.11353604708190289,
      "grad_norm": 2.3525936275253594,
      "learning_rate": 4.999378361485169e-06,
      "loss": 0.6228,
      "step": 926
    },
    {
      "epoch": 0.11365865620402157,
      "grad_norm": 2.4098881930198903,
      "learning_rate": 4.999367008070785e-06,
      "loss": 0.6482,
      "step": 927
    },
    {
      "epoch": 0.11378126532614026,
      "grad_norm": 2.7165614599254146,
      "learning_rate": 4.999355551927993e-06,
      "loss": 0.6854,
      "step": 928
    },
    {
      "epoch": 0.11390387444825895,
      "grad_norm": 2.9320054674553004,
      "learning_rate": 4.999343993057264e-06,
      "loss": 0.6604,
      "step": 929
    },
    {
      "epoch": 0.11402648357037763,
      "grad_norm": 2.5774969933429017,
      "learning_rate": 4.999332331459072e-06,
      "loss": 0.6398,
      "step": 930
    },
    {
      "epoch": 0.11414909269249632,
      "grad_norm": 2.659352253832549,
      "learning_rate": 4.999320567133895e-06,
      "loss": 0.5537,
      "step": 931
    },
    {
      "epoch": 0.114271701814615,
      "grad_norm": 2.5875944927869714,
      "learning_rate": 4.9993087000822185e-06,
      "loss": 0.6091,
      "step": 932
    },
    {
      "epoch": 0.11439431093673369,
      "grad_norm": 2.4721406781073045,
      "learning_rate": 4.999296730304531e-06,
      "loss": 0.6373,
      "step": 933
    },
    {
      "epoch": 0.11451692005885238,
      "grad_norm": 2.628479779292773,
      "learning_rate": 4.999284657801321e-06,
      "loss": 0.6649,
      "step": 934
    },
    {
      "epoch": 0.11463952918097106,
      "grad_norm": 2.5675459724235754,
      "learning_rate": 4.999272482573089e-06,
      "loss": 0.5879,
      "step": 935
    },
    {
      "epoch": 0.11476213830308975,
      "grad_norm": 2.357695910845071,
      "learning_rate": 4.999260204620332e-06,
      "loss": 0.6162,
      "step": 936
    },
    {
      "epoch": 0.11488474742520843,
      "grad_norm": 2.463312305997482,
      "learning_rate": 4.999247823943556e-06,
      "loss": 0.6007,
      "step": 937
    },
    {
      "epoch": 0.11500735654732712,
      "grad_norm": 2.3338483481727517,
      "learning_rate": 4.99923534054327e-06,
      "loss": 0.5992,
      "step": 938
    },
    {
      "epoch": 0.1151299656694458,
      "grad_norm": 2.6910982894130884,
      "learning_rate": 4.999222754419988e-06,
      "loss": 0.6106,
      "step": 939
    },
    {
      "epoch": 0.11525257479156449,
      "grad_norm": 2.658364200813562,
      "learning_rate": 4.9992100655742256e-06,
      "loss": 0.6107,
      "step": 940
    },
    {
      "epoch": 0.11537518391368318,
      "grad_norm": 2.6989264337423617,
      "learning_rate": 4.999197274006505e-06,
      "loss": 0.6324,
      "step": 941
    },
    {
      "epoch": 0.11549779303580186,
      "grad_norm": 2.830103937500718,
      "learning_rate": 4.999184379717352e-06,
      "loss": 0.7007,
      "step": 942
    },
    {
      "epoch": 0.11562040215792055,
      "grad_norm": 2.512417110905662,
      "learning_rate": 4.999171382707296e-06,
      "loss": 0.6126,
      "step": 943
    },
    {
      "epoch": 0.11574301128003923,
      "grad_norm": 2.5751788740774346,
      "learning_rate": 4.9991582829768725e-06,
      "loss": 0.6572,
      "step": 944
    },
    {
      "epoch": 0.11586562040215792,
      "grad_norm": 2.545754733883125,
      "learning_rate": 4.999145080526618e-06,
      "loss": 0.5783,
      "step": 945
    },
    {
      "epoch": 0.1159882295242766,
      "grad_norm": 2.799006930241921,
      "learning_rate": 4.999131775357078e-06,
      "loss": 0.6195,
      "step": 946
    },
    {
      "epoch": 0.11611083864639529,
      "grad_norm": 2.3434293766475753,
      "learning_rate": 4.999118367468797e-06,
      "loss": 0.5532,
      "step": 947
    },
    {
      "epoch": 0.11623344776851398,
      "grad_norm": 2.612011132890495,
      "learning_rate": 4.999104856862326e-06,
      "loss": 0.5816,
      "step": 948
    },
    {
      "epoch": 0.11635605689063266,
      "grad_norm": 2.4113996727575544,
      "learning_rate": 4.999091243538222e-06,
      "loss": 0.648,
      "step": 949
    },
    {
      "epoch": 0.11647866601275135,
      "grad_norm": 2.5436743465091087,
      "learning_rate": 4.999077527497044e-06,
      "loss": 0.6067,
      "step": 950
    },
    {
      "epoch": 0.11660127513487004,
      "grad_norm": 2.5092728753041467,
      "learning_rate": 4.999063708739354e-06,
      "loss": 0.6182,
      "step": 951
    },
    {
      "epoch": 0.11672388425698872,
      "grad_norm": 2.5643516702938967,
      "learning_rate": 4.999049787265722e-06,
      "loss": 0.6538,
      "step": 952
    },
    {
      "epoch": 0.11684649337910741,
      "grad_norm": 2.422465102832537,
      "learning_rate": 4.999035763076719e-06,
      "loss": 0.5803,
      "step": 953
    },
    {
      "epoch": 0.1169691025012261,
      "grad_norm": 2.5030228375543504,
      "learning_rate": 4.999021636172923e-06,
      "loss": 0.5474,
      "step": 954
    },
    {
      "epoch": 0.11709171162334478,
      "grad_norm": 2.4126330226100996,
      "learning_rate": 4.999007406554912e-06,
      "loss": 0.6144,
      "step": 955
    },
    {
      "epoch": 0.11721432074546347,
      "grad_norm": 2.2621814346227844,
      "learning_rate": 4.998993074223274e-06,
      "loss": 0.6134,
      "step": 956
    },
    {
      "epoch": 0.11733692986758215,
      "grad_norm": 2.723290527264221,
      "learning_rate": 4.998978639178596e-06,
      "loss": 0.6314,
      "step": 957
    },
    {
      "epoch": 0.11745953898970084,
      "grad_norm": 2.561432382573383,
      "learning_rate": 4.998964101421472e-06,
      "loss": 0.6462,
      "step": 958
    },
    {
      "epoch": 0.11758214811181952,
      "grad_norm": 2.619114091769879,
      "learning_rate": 4.9989494609524995e-06,
      "loss": 0.6417,
      "step": 959
    },
    {
      "epoch": 0.11770475723393821,
      "grad_norm": 2.51737537432777,
      "learning_rate": 4.998934717772279e-06,
      "loss": 0.6247,
      "step": 960
    },
    {
      "epoch": 0.1178273663560569,
      "grad_norm": 2.4892619915119454,
      "learning_rate": 4.9989198718814186e-06,
      "loss": 0.5941,
      "step": 961
    },
    {
      "epoch": 0.11794997547817558,
      "grad_norm": 2.5667510237009306,
      "learning_rate": 4.998904923280527e-06,
      "loss": 0.6565,
      "step": 962
    },
    {
      "epoch": 0.11807258460029427,
      "grad_norm": 2.5369829143848013,
      "learning_rate": 4.998889871970219e-06,
      "loss": 0.6361,
      "step": 963
    },
    {
      "epoch": 0.11819519372241295,
      "grad_norm": 2.465888922315135,
      "learning_rate": 4.998874717951114e-06,
      "loss": 0.6122,
      "step": 964
    },
    {
      "epoch": 0.11831780284453164,
      "grad_norm": 2.276854214611532,
      "learning_rate": 4.998859461223834e-06,
      "loss": 0.6194,
      "step": 965
    },
    {
      "epoch": 0.11844041196665032,
      "grad_norm": 2.63847947846329,
      "learning_rate": 4.998844101789006e-06,
      "loss": 0.6432,
      "step": 966
    },
    {
      "epoch": 0.11856302108876901,
      "grad_norm": 2.6806015447387996,
      "learning_rate": 4.998828639647262e-06,
      "loss": 0.608,
      "step": 967
    },
    {
      "epoch": 0.1186856302108877,
      "grad_norm": 2.5593531864782375,
      "learning_rate": 4.998813074799237e-06,
      "loss": 0.6502,
      "step": 968
    },
    {
      "epoch": 0.11880823933300638,
      "grad_norm": 2.6043736807161046,
      "learning_rate": 4.99879740724557e-06,
      "loss": 0.5866,
      "step": 969
    },
    {
      "epoch": 0.11893084845512505,
      "grad_norm": 2.6181831393172876,
      "learning_rate": 4.998781636986905e-06,
      "loss": 0.5952,
      "step": 970
    },
    {
      "epoch": 0.11905345757724374,
      "grad_norm": 2.543798581722839,
      "learning_rate": 4.998765764023893e-06,
      "loss": 0.6566,
      "step": 971
    },
    {
      "epoch": 0.11917606669936243,
      "grad_norm": 2.6534423332978627,
      "learning_rate": 4.998749788357184e-06,
      "loss": 0.5819,
      "step": 972
    },
    {
      "epoch": 0.11929867582148111,
      "grad_norm": 2.5850370997499743,
      "learning_rate": 4.998733709987434e-06,
      "loss": 0.5475,
      "step": 973
    },
    {
      "epoch": 0.1194212849435998,
      "grad_norm": 2.382773202372347,
      "learning_rate": 4.9987175289153065e-06,
      "loss": 0.6496,
      "step": 974
    },
    {
      "epoch": 0.11954389406571848,
      "grad_norm": 2.616235180895678,
      "learning_rate": 4.998701245141464e-06,
      "loss": 0.5753,
      "step": 975
    },
    {
      "epoch": 0.11966650318783717,
      "grad_norm": 2.5870748328863664,
      "learning_rate": 4.998684858666577e-06,
      "loss": 0.626,
      "step": 976
    },
    {
      "epoch": 0.11978911230995586,
      "grad_norm": 2.471969632017114,
      "learning_rate": 4.998668369491318e-06,
      "loss": 0.5804,
      "step": 977
    },
    {
      "epoch": 0.11991172143207454,
      "grad_norm": 2.5493657092003077,
      "learning_rate": 4.9986517776163655e-06,
      "loss": 0.6124,
      "step": 978
    },
    {
      "epoch": 0.12003433055419323,
      "grad_norm": 2.374863846988026,
      "learning_rate": 4.998635083042403e-06,
      "loss": 0.582,
      "step": 979
    },
    {
      "epoch": 0.12015693967631191,
      "grad_norm": 2.7875810924489985,
      "learning_rate": 4.998618285770113e-06,
      "loss": 0.618,
      "step": 980
    },
    {
      "epoch": 0.1202795487984306,
      "grad_norm": 2.57058026851507,
      "learning_rate": 4.998601385800189e-06,
      "loss": 0.6235,
      "step": 981
    },
    {
      "epoch": 0.12040215792054929,
      "grad_norm": 2.3402773517608826,
      "learning_rate": 4.998584383133324e-06,
      "loss": 0.5981,
      "step": 982
    },
    {
      "epoch": 0.12052476704266797,
      "grad_norm": 2.685504410469991,
      "learning_rate": 4.9985672777702186e-06,
      "loss": 0.6419,
      "step": 983
    },
    {
      "epoch": 0.12064737616478666,
      "grad_norm": 2.717927602980839,
      "learning_rate": 4.998550069711574e-06,
      "loss": 0.6731,
      "step": 984
    },
    {
      "epoch": 0.12076998528690534,
      "grad_norm": 2.559116756879504,
      "learning_rate": 4.9985327589580986e-06,
      "loss": 0.5929,
      "step": 985
    },
    {
      "epoch": 0.12089259440902403,
      "grad_norm": 2.664396960958162,
      "learning_rate": 4.998515345510503e-06,
      "loss": 0.6312,
      "step": 986
    },
    {
      "epoch": 0.12101520353114271,
      "grad_norm": 2.408319253582952,
      "learning_rate": 4.998497829369504e-06,
      "loss": 0.5844,
      "step": 987
    },
    {
      "epoch": 0.1211378126532614,
      "grad_norm": 2.526301143348296,
      "learning_rate": 4.998480210535821e-06,
      "loss": 0.6019,
      "step": 988
    },
    {
      "epoch": 0.12126042177538009,
      "grad_norm": 2.499833691559217,
      "learning_rate": 4.998462489010177e-06,
      "loss": 0.5499,
      "step": 989
    },
    {
      "epoch": 0.12138303089749877,
      "grad_norm": 2.4216806457786006,
      "learning_rate": 4.998444664793303e-06,
      "loss": 0.5446,
      "step": 990
    },
    {
      "epoch": 0.12150564001961746,
      "grad_norm": 2.670301861639063,
      "learning_rate": 4.998426737885929e-06,
      "loss": 0.625,
      "step": 991
    },
    {
      "epoch": 0.12162824914173614,
      "grad_norm": 2.4756013625129762,
      "learning_rate": 4.9984087082887935e-06,
      "loss": 0.6241,
      "step": 992
    },
    {
      "epoch": 0.12175085826385483,
      "grad_norm": 2.3594237373365616,
      "learning_rate": 4.998390576002637e-06,
      "loss": 0.6,
      "step": 993
    },
    {
      "epoch": 0.12187346738597352,
      "grad_norm": 2.56555067466932,
      "learning_rate": 4.998372341028205e-06,
      "loss": 0.6372,
      "step": 994
    },
    {
      "epoch": 0.1219960765080922,
      "grad_norm": 2.66807276377046,
      "learning_rate": 4.998354003366246e-06,
      "loss": 0.5909,
      "step": 995
    },
    {
      "epoch": 0.12211868563021089,
      "grad_norm": 2.5095522170574283,
      "learning_rate": 4.9983355630175145e-06,
      "loss": 0.6032,
      "step": 996
    },
    {
      "epoch": 0.12224129475232957,
      "grad_norm": 2.693801908998886,
      "learning_rate": 4.998317019982769e-06,
      "loss": 0.6037,
      "step": 997
    },
    {
      "epoch": 0.12236390387444826,
      "grad_norm": 2.610680148816832,
      "learning_rate": 4.998298374262771e-06,
      "loss": 0.575,
      "step": 998
    },
    {
      "epoch": 0.12248651299656695,
      "grad_norm": 2.5796526000024183,
      "learning_rate": 4.998279625858287e-06,
      "loss": 0.5822,
      "step": 999
    },
    {
      "epoch": 0.12260912211868563,
      "grad_norm": 2.5052637956132697,
      "learning_rate": 4.998260774770087e-06,
      "loss": 0.624,
      "step": 1000
    },
    {
      "epoch": 0.12273173124080432,
      "grad_norm": 2.4719348079651025,
      "learning_rate": 4.998241820998947e-06,
      "loss": 0.6354,
      "step": 1001
    },
    {
      "epoch": 0.122854340362923,
      "grad_norm": 2.344764839480834,
      "learning_rate": 4.998222764545645e-06,
      "loss": 0.632,
      "step": 1002
    },
    {
      "epoch": 0.12297694948504169,
      "grad_norm": 2.4380081529722557,
      "learning_rate": 4.9982036054109636e-06,
      "loss": 0.5817,
      "step": 1003
    },
    {
      "epoch": 0.12309955860716038,
      "grad_norm": 2.549289604151022,
      "learning_rate": 4.998184343595693e-06,
      "loss": 0.5642,
      "step": 1004
    },
    {
      "epoch": 0.12322216772927906,
      "grad_norm": 2.648869716721626,
      "learning_rate": 4.998164979100622e-06,
      "loss": 0.6604,
      "step": 1005
    },
    {
      "epoch": 0.12334477685139775,
      "grad_norm": 2.824485412167833,
      "learning_rate": 4.998145511926549e-06,
      "loss": 0.6177,
      "step": 1006
    },
    {
      "epoch": 0.12346738597351643,
      "grad_norm": 2.729309403640243,
      "learning_rate": 4.998125942074272e-06,
      "loss": 0.6607,
      "step": 1007
    },
    {
      "epoch": 0.12358999509563512,
      "grad_norm": 2.744697225561119,
      "learning_rate": 4.998106269544596e-06,
      "loss": 0.6098,
      "step": 1008
    },
    {
      "epoch": 0.1237126042177538,
      "grad_norm": 2.624815511897962,
      "learning_rate": 4.99808649433833e-06,
      "loss": 0.5885,
      "step": 1009
    },
    {
      "epoch": 0.12383521333987249,
      "grad_norm": 2.554202784304224,
      "learning_rate": 4.998066616456286e-06,
      "loss": 0.6106,
      "step": 1010
    },
    {
      "epoch": 0.12395782246199118,
      "grad_norm": 2.730513477987121,
      "learning_rate": 4.998046635899283e-06,
      "loss": 0.6715,
      "step": 1011
    },
    {
      "epoch": 0.12408043158410986,
      "grad_norm": 2.528766490350794,
      "learning_rate": 4.99802655266814e-06,
      "loss": 0.5874,
      "step": 1012
    },
    {
      "epoch": 0.12420304070622855,
      "grad_norm": 2.381905900404459,
      "learning_rate": 4.998006366763683e-06,
      "loss": 0.6029,
      "step": 1013
    },
    {
      "epoch": 0.12432564982834723,
      "grad_norm": 2.142428173572504,
      "learning_rate": 4.997986078186742e-06,
      "loss": 0.6016,
      "step": 1014
    },
    {
      "epoch": 0.12444825895046592,
      "grad_norm": 2.4816609524166875,
      "learning_rate": 4.997965686938151e-06,
      "loss": 0.5911,
      "step": 1015
    },
    {
      "epoch": 0.1245708680725846,
      "grad_norm": 2.4221096130861226,
      "learning_rate": 4.997945193018748e-06,
      "loss": 0.6168,
      "step": 1016
    },
    {
      "epoch": 0.12469347719470329,
      "grad_norm": 2.6275816596975003,
      "learning_rate": 4.997924596429375e-06,
      "loss": 0.6273,
      "step": 1017
    },
    {
      "epoch": 0.12481608631682198,
      "grad_norm": 2.418338642003743,
      "learning_rate": 4.99790389717088e-06,
      "loss": 0.614,
      "step": 1018
    },
    {
      "epoch": 0.12493869543894066,
      "grad_norm": 2.3722133555102114,
      "learning_rate": 4.997883095244112e-06,
      "loss": 0.6002,
      "step": 1019
    },
    {
      "epoch": 0.12506130456105935,
      "grad_norm": 2.6149528252103007,
      "learning_rate": 4.997862190649926e-06,
      "loss": 0.6543,
      "step": 1020
    },
    {
      "epoch": 0.12518391368317802,
      "grad_norm": 2.753171102275983,
      "learning_rate": 4.997841183389183e-06,
      "loss": 0.5946,
      "step": 1021
    },
    {
      "epoch": 0.12530652280529672,
      "grad_norm": 2.60471492274827,
      "learning_rate": 4.997820073462744e-06,
      "loss": 0.6266,
      "step": 1022
    },
    {
      "epoch": 0.1254291319274154,
      "grad_norm": 2.4859148144772245,
      "learning_rate": 4.997798860871479e-06,
      "loss": 0.5924,
      "step": 1023
    },
    {
      "epoch": 0.1255517410495341,
      "grad_norm": 2.5125387023182784,
      "learning_rate": 4.997777545616258e-06,
      "loss": 0.6107,
      "step": 1024
    },
    {
      "epoch": 0.12567435017165277,
      "grad_norm": 2.5918981762255906,
      "learning_rate": 4.997756127697958e-06,
      "loss": 0.6334,
      "step": 1025
    },
    {
      "epoch": 0.12579695929377147,
      "grad_norm": 2.476582795947772,
      "learning_rate": 4.99773460711746e-06,
      "loss": 0.5856,
      "step": 1026
    },
    {
      "epoch": 0.12591956841589014,
      "grad_norm": 2.8194599092369725,
      "learning_rate": 4.997712983875647e-06,
      "loss": 0.6602,
      "step": 1027
    },
    {
      "epoch": 0.12604217753800884,
      "grad_norm": 2.696246590594185,
      "learning_rate": 4.997691257973409e-06,
      "loss": 0.6334,
      "step": 1028
    },
    {
      "epoch": 0.1261647866601275,
      "grad_norm": 2.338414557536381,
      "learning_rate": 4.997669429411638e-06,
      "loss": 0.6533,
      "step": 1029
    },
    {
      "epoch": 0.1262873957822462,
      "grad_norm": 2.4478687569822886,
      "learning_rate": 4.9976474981912325e-06,
      "loss": 0.5856,
      "step": 1030
    },
    {
      "epoch": 0.12641000490436488,
      "grad_norm": 2.4109100357572335,
      "learning_rate": 4.997625464313092e-06,
      "loss": 0.6156,
      "step": 1031
    },
    {
      "epoch": 0.12653261402648358,
      "grad_norm": 2.417716016845816,
      "learning_rate": 4.9976033277781236e-06,
      "loss": 0.6287,
      "step": 1032
    },
    {
      "epoch": 0.12665522314860225,
      "grad_norm": 2.7421581394520573,
      "learning_rate": 4.997581088587236e-06,
      "loss": 0.598,
      "step": 1033
    },
    {
      "epoch": 0.12677783227072095,
      "grad_norm": 2.573422492318642,
      "learning_rate": 4.9975587467413456e-06,
      "loss": 0.6389,
      "step": 1034
    },
    {
      "epoch": 0.12690044139283962,
      "grad_norm": 2.365258327430518,
      "learning_rate": 4.997536302241369e-06,
      "loss": 0.6088,
      "step": 1035
    },
    {
      "epoch": 0.12702305051495832,
      "grad_norm": 2.7809415546742686,
      "learning_rate": 4.997513755088228e-06,
      "loss": 0.6086,
      "step": 1036
    },
    {
      "epoch": 0.127145659637077,
      "grad_norm": 2.538585541114903,
      "learning_rate": 4.99749110528285e-06,
      "loss": 0.5987,
      "step": 1037
    },
    {
      "epoch": 0.1272682687591957,
      "grad_norm": 1.9866122853310089,
      "learning_rate": 4.997468352826167e-06,
      "loss": 0.5745,
      "step": 1038
    },
    {
      "epoch": 0.12739087788131437,
      "grad_norm": 2.551466631534638,
      "learning_rate": 4.997445497719113e-06,
      "loss": 0.6211,
      "step": 1039
    },
    {
      "epoch": 0.12751348700343307,
      "grad_norm": 2.504565394337946,
      "learning_rate": 4.997422539962628e-06,
      "loss": 0.6347,
      "step": 1040
    },
    {
      "epoch": 0.12763609612555174,
      "grad_norm": 2.353700364669474,
      "learning_rate": 4.9973994795576544e-06,
      "loss": 0.5349,
      "step": 1041
    },
    {
      "epoch": 0.12775870524767044,
      "grad_norm": 2.2814439266303075,
      "learning_rate": 4.9973763165051415e-06,
      "loss": 0.6,
      "step": 1042
    },
    {
      "epoch": 0.1278813143697891,
      "grad_norm": 2.339228246413955,
      "learning_rate": 4.9973530508060395e-06,
      "loss": 0.6153,
      "step": 1043
    },
    {
      "epoch": 0.1280039234919078,
      "grad_norm": 2.2831434973168614,
      "learning_rate": 4.997329682461307e-06,
      "loss": 0.6575,
      "step": 1044
    },
    {
      "epoch": 0.12812653261402648,
      "grad_norm": 2.314677709708025,
      "learning_rate": 4.997306211471904e-06,
      "loss": 0.5823,
      "step": 1045
    },
    {
      "epoch": 0.12824914173614516,
      "grad_norm": 2.9102011794875464,
      "learning_rate": 4.997282637838794e-06,
      "loss": 0.608,
      "step": 1046
    },
    {
      "epoch": 0.12837175085826386,
      "grad_norm": 2.351885503231902,
      "learning_rate": 4.997258961562947e-06,
      "loss": 0.5819,
      "step": 1047
    },
    {
      "epoch": 0.12849435998038253,
      "grad_norm": 2.4723263223789727,
      "learning_rate": 4.997235182645333e-06,
      "loss": 0.5784,
      "step": 1048
    },
    {
      "epoch": 0.12861696910250123,
      "grad_norm": 2.5240061706545056,
      "learning_rate": 4.997211301086935e-06,
      "loss": 0.5623,
      "step": 1049
    },
    {
      "epoch": 0.1287395782246199,
      "grad_norm": 2.802659543521854,
      "learning_rate": 4.99718731688873e-06,
      "loss": 0.62,
      "step": 1050
    },
    {
      "epoch": 0.1288621873467386,
      "grad_norm": 2.634246350627168,
      "learning_rate": 4.997163230051706e-06,
      "loss": 0.6334,
      "step": 1051
    },
    {
      "epoch": 0.12898479646885727,
      "grad_norm": 2.6911663043015284,
      "learning_rate": 4.9971390405768524e-06,
      "loss": 0.6787,
      "step": 1052
    },
    {
      "epoch": 0.12910740559097597,
      "grad_norm": 2.722581925659892,
      "learning_rate": 4.997114748465163e-06,
      "loss": 0.6081,
      "step": 1053
    },
    {
      "epoch": 0.12923001471309464,
      "grad_norm": 2.6160489383435737,
      "learning_rate": 4.997090353717638e-06,
      "loss": 0.642,
      "step": 1054
    },
    {
      "epoch": 0.12935262383521334,
      "grad_norm": 2.550995078277415,
      "learning_rate": 4.997065856335277e-06,
      "loss": 0.6383,
      "step": 1055
    },
    {
      "epoch": 0.12947523295733201,
      "grad_norm": 2.7604144753376647,
      "learning_rate": 4.997041256319089e-06,
      "loss": 0.6263,
      "step": 1056
    },
    {
      "epoch": 0.12959784207945071,
      "grad_norm": 2.428374896353805,
      "learning_rate": 4.997016553670085e-06,
      "loss": 0.5616,
      "step": 1057
    },
    {
      "epoch": 0.1297204512015694,
      "grad_norm": 2.558358840237365,
      "learning_rate": 4.99699174838928e-06,
      "loss": 0.5944,
      "step": 1058
    },
    {
      "epoch": 0.1298430603236881,
      "grad_norm": 2.4411383038595567,
      "learning_rate": 4.996966840477693e-06,
      "loss": 0.5974,
      "step": 1059
    },
    {
      "epoch": 0.12996566944580676,
      "grad_norm": 2.5053414321537892,
      "learning_rate": 4.99694182993635e-06,
      "loss": 0.5914,
      "step": 1060
    },
    {
      "epoch": 0.13008827856792546,
      "grad_norm": 2.322253130756463,
      "learning_rate": 4.996916716766276e-06,
      "loss": 0.566,
      "step": 1061
    },
    {
      "epoch": 0.13021088769004413,
      "grad_norm": 2.7878116943920195,
      "learning_rate": 4.996891500968504e-06,
      "loss": 0.6156,
      "step": 1062
    },
    {
      "epoch": 0.13033349681216283,
      "grad_norm": 2.7092914611116306,
      "learning_rate": 4.996866182544071e-06,
      "loss": 0.6391,
      "step": 1063
    },
    {
      "epoch": 0.1304561059342815,
      "grad_norm": 2.5890134379396232,
      "learning_rate": 4.9968407614940185e-06,
      "loss": 0.5957,
      "step": 1064
    },
    {
      "epoch": 0.1305787150564002,
      "grad_norm": 2.8421644871912757,
      "learning_rate": 4.99681523781939e-06,
      "loss": 0.6152,
      "step": 1065
    },
    {
      "epoch": 0.13070132417851887,
      "grad_norm": 2.425982774282431,
      "learning_rate": 4.996789611521234e-06,
      "loss": 0.5648,
      "step": 1066
    },
    {
      "epoch": 0.13082393330063757,
      "grad_norm": 2.6422130826267116,
      "learning_rate": 4.996763882600606e-06,
      "loss": 0.6147,
      "step": 1067
    },
    {
      "epoch": 0.13094654242275625,
      "grad_norm": 2.5615810100014347,
      "learning_rate": 4.996738051058562e-06,
      "loss": 0.5785,
      "step": 1068
    },
    {
      "epoch": 0.13106915154487495,
      "grad_norm": 2.2911325804001033,
      "learning_rate": 4.996712116896164e-06,
      "loss": 0.6015,
      "step": 1069
    },
    {
      "epoch": 0.13119176066699362,
      "grad_norm": 2.576800981350883,
      "learning_rate": 4.996686080114478e-06,
      "loss": 0.5581,
      "step": 1070
    },
    {
      "epoch": 0.13131436978911232,
      "grad_norm": 2.599944544318757,
      "learning_rate": 4.996659940714574e-06,
      "loss": 0.6693,
      "step": 1071
    },
    {
      "epoch": 0.131436978911231,
      "grad_norm": 2.707298904142651,
      "learning_rate": 4.996633698697526e-06,
      "loss": 0.6003,
      "step": 1072
    },
    {
      "epoch": 0.1315595880333497,
      "grad_norm": 2.7727833529145243,
      "learning_rate": 4.996607354064413e-06,
      "loss": 0.6066,
      "step": 1073
    },
    {
      "epoch": 0.13168219715546836,
      "grad_norm": 2.8511317548634643,
      "learning_rate": 4.996580906816318e-06,
      "loss": 0.6289,
      "step": 1074
    },
    {
      "epoch": 0.13180480627758706,
      "grad_norm": 2.358205540471096,
      "learning_rate": 4.996554356954328e-06,
      "loss": 0.6197,
      "step": 1075
    },
    {
      "epoch": 0.13192741539970573,
      "grad_norm": 2.3344957170790455,
      "learning_rate": 4.996527704479535e-06,
      "loss": 0.6272,
      "step": 1076
    },
    {
      "epoch": 0.13205002452182443,
      "grad_norm": 2.401080485587354,
      "learning_rate": 4.996500949393033e-06,
      "loss": 0.6003,
      "step": 1077
    },
    {
      "epoch": 0.1321726336439431,
      "grad_norm": 2.2146645037981156,
      "learning_rate": 4.996474091695922e-06,
      "loss": 0.6165,
      "step": 1078
    },
    {
      "epoch": 0.1322952427660618,
      "grad_norm": 2.6349509742268937,
      "learning_rate": 4.996447131389306e-06,
      "loss": 0.6611,
      "step": 1079
    },
    {
      "epoch": 0.13241785188818048,
      "grad_norm": 2.487882680233149,
      "learning_rate": 4.996420068474293e-06,
      "loss": 0.6162,
      "step": 1080
    },
    {
      "epoch": 0.13254046101029918,
      "grad_norm": 2.368054537850012,
      "learning_rate": 4.996392902951997e-06,
      "loss": 0.5956,
      "step": 1081
    },
    {
      "epoch": 0.13266307013241785,
      "grad_norm": 2.7100570489801266,
      "learning_rate": 4.996365634823533e-06,
      "loss": 0.5899,
      "step": 1082
    },
    {
      "epoch": 0.13278567925453655,
      "grad_norm": 2.533709396024415,
      "learning_rate": 4.996338264090021e-06,
      "loss": 0.5866,
      "step": 1083
    },
    {
      "epoch": 0.13290828837665522,
      "grad_norm": 2.7871196563945237,
      "learning_rate": 4.9963107907525865e-06,
      "loss": 0.6308,
      "step": 1084
    },
    {
      "epoch": 0.13303089749877392,
      "grad_norm": 2.5745526963967444,
      "learning_rate": 4.99628321481236e-06,
      "loss": 0.6193,
      "step": 1085
    },
    {
      "epoch": 0.1331535066208926,
      "grad_norm": 2.65119921462841,
      "learning_rate": 4.996255536270474e-06,
      "loss": 0.6307,
      "step": 1086
    },
    {
      "epoch": 0.1332761157430113,
      "grad_norm": 2.5068308699745443,
      "learning_rate": 4.996227755128066e-06,
      "loss": 0.6001,
      "step": 1087
    },
    {
      "epoch": 0.13339872486512996,
      "grad_norm": 2.698097722571427,
      "learning_rate": 4.996199871386278e-06,
      "loss": 0.5831,
      "step": 1088
    },
    {
      "epoch": 0.13352133398724866,
      "grad_norm": 2.6529049140546577,
      "learning_rate": 4.9961718850462566e-06,
      "loss": 0.6085,
      "step": 1089
    },
    {
      "epoch": 0.13364394310936734,
      "grad_norm": 2.556059970062385,
      "learning_rate": 4.996143796109151e-06,
      "loss": 0.6505,
      "step": 1090
    },
    {
      "epoch": 0.13376655223148604,
      "grad_norm": 2.496300791206765,
      "learning_rate": 4.996115604576117e-06,
      "loss": 0.6413,
      "step": 1091
    },
    {
      "epoch": 0.1338891613536047,
      "grad_norm": 2.4314581756456586,
      "learning_rate": 4.996087310448312e-06,
      "loss": 0.5627,
      "step": 1092
    },
    {
      "epoch": 0.1340117704757234,
      "grad_norm": 2.553585038534869,
      "learning_rate": 4.9960589137269e-06,
      "loss": 0.6157,
      "step": 1093
    },
    {
      "epoch": 0.13413437959784208,
      "grad_norm": 2.462850928985416,
      "learning_rate": 4.996030414413047e-06,
      "loss": 0.5886,
      "step": 1094
    },
    {
      "epoch": 0.13425698871996075,
      "grad_norm": 2.5816483335748845,
      "learning_rate": 4.996001812507925e-06,
      "loss": 0.617,
      "step": 1095
    },
    {
      "epoch": 0.13437959784207945,
      "grad_norm": 2.9189687283049617,
      "learning_rate": 4.99597310801271e-06,
      "loss": 0.6193,
      "step": 1096
    },
    {
      "epoch": 0.13450220696419812,
      "grad_norm": 2.4622302253257606,
      "learning_rate": 4.995944300928583e-06,
      "loss": 0.6041,
      "step": 1097
    },
    {
      "epoch": 0.13462481608631682,
      "grad_norm": 2.497158115731768,
      "learning_rate": 4.995915391256725e-06,
      "loss": 0.5976,
      "step": 1098
    },
    {
      "epoch": 0.1347474252084355,
      "grad_norm": 2.5454986353237015,
      "learning_rate": 4.995886378998326e-06,
      "loss": 0.6019,
      "step": 1099
    },
    {
      "epoch": 0.1348700343305542,
      "grad_norm": 2.265022998016344,
      "learning_rate": 4.9958572641545775e-06,
      "loss": 0.5781,
      "step": 1100
    },
    {
      "epoch": 0.13499264345267287,
      "grad_norm": 2.64200931691629,
      "learning_rate": 4.995828046726677e-06,
      "loss": 0.6455,
      "step": 1101
    },
    {
      "epoch": 0.13511525257479157,
      "grad_norm": 2.377570190685651,
      "learning_rate": 4.995798726715826e-06,
      "loss": 0.6362,
      "step": 1102
    },
    {
      "epoch": 0.13523786169691024,
      "grad_norm": 2.9000734347545354,
      "learning_rate": 4.995769304123229e-06,
      "loss": 0.5988,
      "step": 1103
    },
    {
      "epoch": 0.13536047081902894,
      "grad_norm": 2.7829862646673065,
      "learning_rate": 4.995739778950095e-06,
      "loss": 0.6299,
      "step": 1104
    },
    {
      "epoch": 0.1354830799411476,
      "grad_norm": 2.5313675570948706,
      "learning_rate": 4.995710151197638e-06,
      "loss": 0.6692,
      "step": 1105
    },
    {
      "epoch": 0.1356056890632663,
      "grad_norm": 2.388383235054107,
      "learning_rate": 4.9956804208670754e-06,
      "loss": 0.6163,
      "step": 1106
    },
    {
      "epoch": 0.13572829818538498,
      "grad_norm": 2.404945556777026,
      "learning_rate": 4.995650587959628e-06,
      "loss": 0.5459,
      "step": 1107
    },
    {
      "epoch": 0.13585090730750368,
      "grad_norm": 2.461343807511971,
      "learning_rate": 4.995620652476525e-06,
      "loss": 0.614,
      "step": 1108
    },
    {
      "epoch": 0.13597351642962235,
      "grad_norm": 2.358511010514895,
      "learning_rate": 4.9955906144189934e-06,
      "loss": 0.5946,
      "step": 1109
    },
    {
      "epoch": 0.13609612555174105,
      "grad_norm": 2.5493365162229327,
      "learning_rate": 4.99556047378827e-06,
      "loss": 0.6148,
      "step": 1110
    },
    {
      "epoch": 0.13621873467385973,
      "grad_norm": 2.609755173369649,
      "learning_rate": 4.995530230585593e-06,
      "loss": 0.6299,
      "step": 1111
    },
    {
      "epoch": 0.13634134379597843,
      "grad_norm": 2.6782815480764746,
      "learning_rate": 4.995499884812206e-06,
      "loss": 0.6181,
      "step": 1112
    },
    {
      "epoch": 0.1364639529180971,
      "grad_norm": 2.3014284691199878,
      "learning_rate": 4.9954694364693566e-06,
      "loss": 0.5693,
      "step": 1113
    },
    {
      "epoch": 0.1365865620402158,
      "grad_norm": 2.2801041129051605,
      "learning_rate": 4.995438885558294e-06,
      "loss": 0.6104,
      "step": 1114
    },
    {
      "epoch": 0.13670917116233447,
      "grad_norm": 2.3382344573309757,
      "learning_rate": 4.995408232080276e-06,
      "loss": 0.5641,
      "step": 1115
    },
    {
      "epoch": 0.13683178028445317,
      "grad_norm": 2.2627765660557584,
      "learning_rate": 4.995377476036561e-06,
      "loss": 0.6092,
      "step": 1116
    },
    {
      "epoch": 0.13695438940657184,
      "grad_norm": 2.691108164581231,
      "learning_rate": 4.995346617428416e-06,
      "loss": 0.6085,
      "step": 1117
    },
    {
      "epoch": 0.13707699852869054,
      "grad_norm": 2.333831120244916,
      "learning_rate": 4.995315656257106e-06,
      "loss": 0.6035,
      "step": 1118
    },
    {
      "epoch": 0.1371996076508092,
      "grad_norm": 2.1518757484464754,
      "learning_rate": 4.9952845925239046e-06,
      "loss": 0.5975,
      "step": 1119
    },
    {
      "epoch": 0.1373222167729279,
      "grad_norm": 2.484656705913248,
      "learning_rate": 4.99525342623009e-06,
      "loss": 0.6213,
      "step": 1120
    },
    {
      "epoch": 0.13744482589504659,
      "grad_norm": 2.385416715247598,
      "learning_rate": 4.995222157376941e-06,
      "loss": 0.5681,
      "step": 1121
    },
    {
      "epoch": 0.13756743501716528,
      "grad_norm": 2.3939113224208093,
      "learning_rate": 4.995190785965745e-06,
      "loss": 0.5995,
      "step": 1122
    },
    {
      "epoch": 0.13769004413928396,
      "grad_norm": 2.506206001517721,
      "learning_rate": 4.995159311997789e-06,
      "loss": 0.558,
      "step": 1123
    },
    {
      "epoch": 0.13781265326140266,
      "grad_norm": 2.588047800070632,
      "learning_rate": 4.995127735474369e-06,
      "loss": 0.6102,
      "step": 1124
    },
    {
      "epoch": 0.13793526238352133,
      "grad_norm": 2.1961704253811,
      "learning_rate": 4.995096056396781e-06,
      "loss": 0.5535,
      "step": 1125
    },
    {
      "epoch": 0.13805787150564003,
      "grad_norm": 2.5100676425717414,
      "learning_rate": 4.995064274766328e-06,
      "loss": 0.6026,
      "step": 1126
    },
    {
      "epoch": 0.1381804806277587,
      "grad_norm": 2.23204258494174,
      "learning_rate": 4.995032390584317e-06,
      "loss": 0.5705,
      "step": 1127
    },
    {
      "epoch": 0.1383030897498774,
      "grad_norm": 2.739905934297388,
      "learning_rate": 4.995000403852057e-06,
      "loss": 0.595,
      "step": 1128
    },
    {
      "epoch": 0.13842569887199607,
      "grad_norm": 2.5032628198379356,
      "learning_rate": 4.9949683145708634e-06,
      "loss": 0.6034,
      "step": 1129
    },
    {
      "epoch": 0.13854830799411477,
      "grad_norm": 2.5896075360835558,
      "learning_rate": 4.994936122742055e-06,
      "loss": 0.5616,
      "step": 1130
    },
    {
      "epoch": 0.13867091711623344,
      "grad_norm": 2.3749207500958933,
      "learning_rate": 4.994903828366955e-06,
      "loss": 0.6211,
      "step": 1131
    },
    {
      "epoch": 0.13879352623835214,
      "grad_norm": 2.522501451087393,
      "learning_rate": 4.994871431446891e-06,
      "loss": 0.6396,
      "step": 1132
    },
    {
      "epoch": 0.13891613536047082,
      "grad_norm": 2.5211708010823033,
      "learning_rate": 4.994838931983194e-06,
      "loss": 0.5602,
      "step": 1133
    },
    {
      "epoch": 0.13903874448258952,
      "grad_norm": 2.7737034010544495,
      "learning_rate": 4.9948063299772e-06,
      "loss": 0.6285,
      "step": 1134
    },
    {
      "epoch": 0.1391613536047082,
      "grad_norm": 2.2059630880868673,
      "learning_rate": 4.99477362543025e-06,
      "loss": 0.5801,
      "step": 1135
    },
    {
      "epoch": 0.1392839627268269,
      "grad_norm": 2.4398316635084165,
      "learning_rate": 4.9947408183436864e-06,
      "loss": 0.6052,
      "step": 1136
    },
    {
      "epoch": 0.13940657184894556,
      "grad_norm": 2.4088758121423313,
      "learning_rate": 4.994707908718859e-06,
      "loss": 0.6039,
      "step": 1137
    },
    {
      "epoch": 0.13952918097106426,
      "grad_norm": 2.4002947959790326,
      "learning_rate": 4.99467489655712e-06,
      "loss": 0.5861,
      "step": 1138
    },
    {
      "epoch": 0.13965179009318293,
      "grad_norm": 2.5528368727038577,
      "learning_rate": 4.994641781859827e-06,
      "loss": 0.5865,
      "step": 1139
    },
    {
      "epoch": 0.13977439921530163,
      "grad_norm": 2.6269559667511553,
      "learning_rate": 4.994608564628339e-06,
      "loss": 0.677,
      "step": 1140
    },
    {
      "epoch": 0.1398970083374203,
      "grad_norm": 2.685515144039435,
      "learning_rate": 4.9945752448640236e-06,
      "loss": 0.6102,
      "step": 1141
    },
    {
      "epoch": 0.140019617459539,
      "grad_norm": 2.501571194710523,
      "learning_rate": 4.994541822568249e-06,
      "loss": 0.6536,
      "step": 1142
    },
    {
      "epoch": 0.14014222658165768,
      "grad_norm": 2.1849307927224584,
      "learning_rate": 4.994508297742389e-06,
      "loss": 0.5979,
      "step": 1143
    },
    {
      "epoch": 0.14026483570377635,
      "grad_norm": 2.7257074833326365,
      "learning_rate": 4.9944746703878225e-06,
      "loss": 0.6078,
      "step": 1144
    },
    {
      "epoch": 0.14038744482589505,
      "grad_norm": 2.606342358624001,
      "learning_rate": 4.99444094050593e-06,
      "loss": 0.6253,
      "step": 1145
    },
    {
      "epoch": 0.14051005394801372,
      "grad_norm": 2.348392342423329,
      "learning_rate": 4.994407108098099e-06,
      "loss": 0.5589,
      "step": 1146
    },
    {
      "epoch": 0.14063266307013242,
      "grad_norm": 2.6472948981462996,
      "learning_rate": 4.994373173165721e-06,
      "loss": 0.5828,
      "step": 1147
    },
    {
      "epoch": 0.1407552721922511,
      "grad_norm": 2.3184169411931213,
      "learning_rate": 4.994339135710188e-06,
      "loss": 0.5616,
      "step": 1148
    },
    {
      "epoch": 0.1408778813143698,
      "grad_norm": 2.8000382283626433,
      "learning_rate": 4.994304995732901e-06,
      "loss": 0.6053,
      "step": 1149
    },
    {
      "epoch": 0.14100049043648846,
      "grad_norm": 2.719056271850528,
      "learning_rate": 4.994270753235264e-06,
      "loss": 0.5552,
      "step": 1150
    },
    {
      "epoch": 0.14112309955860716,
      "grad_norm": 2.439183056712879,
      "learning_rate": 4.994236408218682e-06,
      "loss": 0.5567,
      "step": 1151
    },
    {
      "epoch": 0.14124570868072583,
      "grad_norm": 2.445559007751108,
      "learning_rate": 4.994201960684569e-06,
      "loss": 0.6494,
      "step": 1152
    },
    {
      "epoch": 0.14136831780284453,
      "grad_norm": 2.8798154679758245,
      "learning_rate": 4.994167410634339e-06,
      "loss": 0.6366,
      "step": 1153
    },
    {
      "epoch": 0.1414909269249632,
      "grad_norm": 2.7318813271999978,
      "learning_rate": 4.994132758069413e-06,
      "loss": 0.5698,
      "step": 1154
    },
    {
      "epoch": 0.1416135360470819,
      "grad_norm": 2.4775717778318764,
      "learning_rate": 4.994098002991215e-06,
      "loss": 0.6214,
      "step": 1155
    },
    {
      "epoch": 0.14173614516920058,
      "grad_norm": 2.6366359046195265,
      "learning_rate": 4.994063145401175e-06,
      "loss": 0.7065,
      "step": 1156
    },
    {
      "epoch": 0.14185875429131928,
      "grad_norm": 2.722163288810636,
      "learning_rate": 4.994028185300723e-06,
      "loss": 0.6174,
      "step": 1157
    },
    {
      "epoch": 0.14198136341343795,
      "grad_norm": 2.5032874733328896,
      "learning_rate": 4.993993122691298e-06,
      "loss": 0.6564,
      "step": 1158
    },
    {
      "epoch": 0.14210397253555665,
      "grad_norm": 2.2764324364577933,
      "learning_rate": 4.993957957574339e-06,
      "loss": 0.5731,
      "step": 1159
    },
    {
      "epoch": 0.14222658165767532,
      "grad_norm": 2.480119242961524,
      "learning_rate": 4.993922689951294e-06,
      "loss": 0.5652,
      "step": 1160
    },
    {
      "epoch": 0.14234919077979402,
      "grad_norm": 2.672370136140155,
      "learning_rate": 4.993887319823611e-06,
      "loss": 0.5657,
      "step": 1161
    },
    {
      "epoch": 0.1424717999019127,
      "grad_norm": 2.4178169424537823,
      "learning_rate": 4.993851847192743e-06,
      "loss": 0.5551,
      "step": 1162
    },
    {
      "epoch": 0.1425944090240314,
      "grad_norm": 2.671195529012794,
      "learning_rate": 4.9938162720601505e-06,
      "loss": 0.602,
      "step": 1163
    },
    {
      "epoch": 0.14271701814615007,
      "grad_norm": 2.477657969574087,
      "learning_rate": 4.9937805944272935e-06,
      "loss": 0.6209,
      "step": 1164
    },
    {
      "epoch": 0.14283962726826877,
      "grad_norm": 2.6231866475091734,
      "learning_rate": 4.993744814295639e-06,
      "loss": 0.6431,
      "step": 1165
    },
    {
      "epoch": 0.14296223639038744,
      "grad_norm": 2.411502355333302,
      "learning_rate": 4.993708931666659e-06,
      "loss": 0.6022,
      "step": 1166
    },
    {
      "epoch": 0.14308484551250614,
      "grad_norm": 2.5916806525609806,
      "learning_rate": 4.993672946541826e-06,
      "loss": 0.6451,
      "step": 1167
    },
    {
      "epoch": 0.1432074546346248,
      "grad_norm": 2.4350651037634,
      "learning_rate": 4.99363685892262e-06,
      "loss": 0.6258,
      "step": 1168
    },
    {
      "epoch": 0.1433300637567435,
      "grad_norm": 2.468446532811296,
      "learning_rate": 4.993600668810525e-06,
      "loss": 0.5632,
      "step": 1169
    },
    {
      "epoch": 0.14345267287886218,
      "grad_norm": 2.576737179769868,
      "learning_rate": 4.993564376207029e-06,
      "loss": 0.5951,
      "step": 1170
    },
    {
      "epoch": 0.14357528200098088,
      "grad_norm": 2.8349758862675416,
      "learning_rate": 4.993527981113621e-06,
      "loss": 0.579,
      "step": 1171
    },
    {
      "epoch": 0.14369789112309955,
      "grad_norm": 2.4046103893251143,
      "learning_rate": 4.9934914835318e-06,
      "loss": 0.5912,
      "step": 1172
    },
    {
      "epoch": 0.14382050024521825,
      "grad_norm": 2.5804230339726284,
      "learning_rate": 4.993454883463063e-06,
      "loss": 0.6051,
      "step": 1173
    },
    {
      "epoch": 0.14394310936733692,
      "grad_norm": 2.262600051982013,
      "learning_rate": 4.993418180908917e-06,
      "loss": 0.5873,
      "step": 1174
    },
    {
      "epoch": 0.14406571848945562,
      "grad_norm": 2.4067664466840224,
      "learning_rate": 4.9933813758708695e-06,
      "loss": 0.5594,
      "step": 1175
    },
    {
      "epoch": 0.1441883276115743,
      "grad_norm": 2.499748092528448,
      "learning_rate": 4.993344468350433e-06,
      "loss": 0.5636,
      "step": 1176
    },
    {
      "epoch": 0.144310936733693,
      "grad_norm": 2.8605538162835664,
      "learning_rate": 4.993307458349125e-06,
      "loss": 0.6596,
      "step": 1177
    },
    {
      "epoch": 0.14443354585581167,
      "grad_norm": 2.540222038269817,
      "learning_rate": 4.993270345868466e-06,
      "loss": 0.5556,
      "step": 1178
    },
    {
      "epoch": 0.14455615497793037,
      "grad_norm": 2.3964428711939405,
      "learning_rate": 4.9932331309099815e-06,
      "loss": 0.6868,
      "step": 1179
    },
    {
      "epoch": 0.14467876410004904,
      "grad_norm": 2.441112649556213,
      "learning_rate": 4.993195813475202e-06,
      "loss": 0.6457,
      "step": 1180
    },
    {
      "epoch": 0.14480137322216774,
      "grad_norm": 2.4408009175714938,
      "learning_rate": 4.9931583935656605e-06,
      "loss": 0.5576,
      "step": 1181
    },
    {
      "epoch": 0.1449239823442864,
      "grad_norm": 2.8678728216386493,
      "learning_rate": 4.993120871182895e-06,
      "loss": 0.6379,
      "step": 1182
    },
    {
      "epoch": 0.1450465914664051,
      "grad_norm": 2.255832834759684,
      "learning_rate": 4.993083246328448e-06,
      "loss": 0.6096,
      "step": 1183
    },
    {
      "epoch": 0.14516920058852378,
      "grad_norm": 2.5177568401130137,
      "learning_rate": 4.9930455190038675e-06,
      "loss": 0.6026,
      "step": 1184
    },
    {
      "epoch": 0.14529180971064248,
      "grad_norm": 2.3325912232733246,
      "learning_rate": 4.993007689210701e-06,
      "loss": 0.6419,
      "step": 1185
    },
    {
      "epoch": 0.14541441883276116,
      "grad_norm": 2.7079133756318057,
      "learning_rate": 4.992969756950505e-06,
      "loss": 0.6335,
      "step": 1186
    },
    {
      "epoch": 0.14553702795487986,
      "grad_norm": 2.707252329224019,
      "learning_rate": 4.992931722224839e-06,
      "loss": 0.637,
      "step": 1187
    },
    {
      "epoch": 0.14565963707699853,
      "grad_norm": 2.5125117320844486,
      "learning_rate": 4.992893585035265e-06,
      "loss": 0.6029,
      "step": 1188
    },
    {
      "epoch": 0.14578224619911723,
      "grad_norm": 2.398758343303042,
      "learning_rate": 4.992855345383353e-06,
      "loss": 0.6288,
      "step": 1189
    },
    {
      "epoch": 0.1459048553212359,
      "grad_norm": 2.553972550849822,
      "learning_rate": 4.992817003270671e-06,
      "loss": 0.63,
      "step": 1190
    },
    {
      "epoch": 0.1460274644433546,
      "grad_norm": 2.6198048291455795,
      "learning_rate": 4.992778558698799e-06,
      "loss": 0.6399,
      "step": 1191
    },
    {
      "epoch": 0.14615007356547327,
      "grad_norm": 2.5793260012030106,
      "learning_rate": 4.992740011669314e-06,
      "loss": 0.6207,
      "step": 1192
    },
    {
      "epoch": 0.14627268268759197,
      "grad_norm": 2.6758381685438684,
      "learning_rate": 4.992701362183801e-06,
      "loss": 0.5418,
      "step": 1193
    },
    {
      "epoch": 0.14639529180971064,
      "grad_norm": 2.343866533153608,
      "learning_rate": 4.99266261024385e-06,
      "loss": 0.6216,
      "step": 1194
    },
    {
      "epoch": 0.14651790093182931,
      "grad_norm": 2.5625568325326635,
      "learning_rate": 4.992623755851052e-06,
      "loss": 0.6099,
      "step": 1195
    },
    {
      "epoch": 0.14664051005394801,
      "grad_norm": 2.3715547343686336,
      "learning_rate": 4.992584799007006e-06,
      "loss": 0.5928,
      "step": 1196
    },
    {
      "epoch": 0.1467631191760667,
      "grad_norm": 2.3985246012335266,
      "learning_rate": 4.99254573971331e-06,
      "loss": 0.5068,
      "step": 1197
    },
    {
      "epoch": 0.1468857282981854,
      "grad_norm": 2.6773938613643504,
      "learning_rate": 4.992506577971573e-06,
      "loss": 0.5544,
      "step": 1198
    },
    {
      "epoch": 0.14700833742030406,
      "grad_norm": 2.8517391414688973,
      "learning_rate": 4.9924673137834025e-06,
      "loss": 0.6281,
      "step": 1199
    },
    {
      "epoch": 0.14713094654242276,
      "grad_norm": 2.6937499811316816,
      "learning_rate": 4.992427947150413e-06,
      "loss": 0.665,
      "step": 1200
    },
    {
      "epoch": 0.14725355566454143,
      "grad_norm": 2.359784669015537,
      "learning_rate": 4.992388478074223e-06,
      "loss": 0.6077,
      "step": 1201
    },
    {
      "epoch": 0.14737616478666013,
      "grad_norm": 2.6107687686037666,
      "learning_rate": 4.992348906556452e-06,
      "loss": 0.6045,
      "step": 1202
    },
    {
      "epoch": 0.1474987739087788,
      "grad_norm": 2.8444433361325085,
      "learning_rate": 4.992309232598731e-06,
      "loss": 0.6402,
      "step": 1203
    },
    {
      "epoch": 0.1476213830308975,
      "grad_norm": 2.1795421009908575,
      "learning_rate": 4.992269456202687e-06,
      "loss": 0.5911,
      "step": 1204
    },
    {
      "epoch": 0.14774399215301617,
      "grad_norm": 2.585076753640136,
      "learning_rate": 4.992229577369956e-06,
      "loss": 0.627,
      "step": 1205
    },
    {
      "epoch": 0.14786660127513487,
      "grad_norm": 2.307149077435508,
      "learning_rate": 4.992189596102179e-06,
      "loss": 0.6256,
      "step": 1206
    },
    {
      "epoch": 0.14798921039725355,
      "grad_norm": 2.422458547550545,
      "learning_rate": 4.992149512400995e-06,
      "loss": 0.5648,
      "step": 1207
    },
    {
      "epoch": 0.14811181951937225,
      "grad_norm": 2.5699288740347663,
      "learning_rate": 4.992109326268056e-06,
      "loss": 0.5948,
      "step": 1208
    },
    {
      "epoch": 0.14823442864149092,
      "grad_norm": 2.536301172181773,
      "learning_rate": 4.99206903770501e-06,
      "loss": 0.5807,
      "step": 1209
    },
    {
      "epoch": 0.14835703776360962,
      "grad_norm": 2.492090297228697,
      "learning_rate": 4.992028646713516e-06,
      "loss": 0.5566,
      "step": 1210
    },
    {
      "epoch": 0.1484796468857283,
      "grad_norm": 2.598036997030354,
      "learning_rate": 4.991988153295232e-06,
      "loss": 0.5814,
      "step": 1211
    },
    {
      "epoch": 0.148602256007847,
      "grad_norm": 2.2317903119218907,
      "learning_rate": 4.9919475574518235e-06,
      "loss": 0.5856,
      "step": 1212
    },
    {
      "epoch": 0.14872486512996566,
      "grad_norm": 2.338588479927466,
      "learning_rate": 4.99190685918496e-06,
      "loss": 0.6146,
      "step": 1213
    },
    {
      "epoch": 0.14884747425208436,
      "grad_norm": 2.56440232215564,
      "learning_rate": 4.99186605849631e-06,
      "loss": 0.5965,
      "step": 1214
    },
    {
      "epoch": 0.14897008337420303,
      "grad_norm": 2.510772226682131,
      "learning_rate": 4.991825155387557e-06,
      "loss": 0.5455,
      "step": 1215
    },
    {
      "epoch": 0.14909269249632173,
      "grad_norm": 2.25685934876696,
      "learning_rate": 4.991784149860377e-06,
      "loss": 0.5696,
      "step": 1216
    },
    {
      "epoch": 0.1492153016184404,
      "grad_norm": 2.6345774748489474,
      "learning_rate": 4.991743041916457e-06,
      "loss": 0.6357,
      "step": 1217
    },
    {
      "epoch": 0.1493379107405591,
      "grad_norm": 2.601993623699854,
      "learning_rate": 4.991701831557487e-06,
      "loss": 0.5673,
      "step": 1218
    },
    {
      "epoch": 0.14946051986267778,
      "grad_norm": 2.424256362136007,
      "learning_rate": 4.99166051878516e-06,
      "loss": 0.6575,
      "step": 1219
    },
    {
      "epoch": 0.14958312898479648,
      "grad_norm": 2.5182851633762198,
      "learning_rate": 4.991619103601175e-06,
      "loss": 0.5783,
      "step": 1220
    },
    {
      "epoch": 0.14970573810691515,
      "grad_norm": 2.450470351655214,
      "learning_rate": 4.991577586007233e-06,
      "loss": 0.6475,
      "step": 1221
    },
    {
      "epoch": 0.14982834722903385,
      "grad_norm": 2.5352481301205363,
      "learning_rate": 4.991535966005042e-06,
      "loss": 0.6266,
      "step": 1222
    },
    {
      "epoch": 0.14995095635115252,
      "grad_norm": 2.578953654802776,
      "learning_rate": 4.991494243596312e-06,
      "loss": 0.6287,
      "step": 1223
    },
    {
      "epoch": 0.15007356547327122,
      "grad_norm": 2.518130108323663,
      "learning_rate": 4.991452418782758e-06,
      "loss": 0.5884,
      "step": 1224
    },
    {
      "epoch": 0.1501961745953899,
      "grad_norm": 2.416430692113595,
      "learning_rate": 4.991410491566099e-06,
      "loss": 0.5762,
      "step": 1225
    },
    {
      "epoch": 0.1503187837175086,
      "grad_norm": 2.4148754486502777,
      "learning_rate": 4.991368461948056e-06,
      "loss": 0.6033,
      "step": 1226
    },
    {
      "epoch": 0.15044139283962726,
      "grad_norm": 2.3658895469401875,
      "learning_rate": 4.991326329930361e-06,
      "loss": 0.6772,
      "step": 1227
    },
    {
      "epoch": 0.15056400196174596,
      "grad_norm": 2.7572471752228425,
      "learning_rate": 4.9912840955147415e-06,
      "loss": 0.6068,
      "step": 1228
    },
    {
      "epoch": 0.15068661108386464,
      "grad_norm": 2.7196769301731014,
      "learning_rate": 4.991241758702936e-06,
      "loss": 0.6432,
      "step": 1229
    },
    {
      "epoch": 0.15080922020598334,
      "grad_norm": 2.4372764630700905,
      "learning_rate": 4.991199319496684e-06,
      "loss": 0.5805,
      "step": 1230
    },
    {
      "epoch": 0.150931829328102,
      "grad_norm": 2.330115477893155,
      "learning_rate": 4.9911567778977295e-06,
      "loss": 0.6508,
      "step": 1231
    },
    {
      "epoch": 0.1510544384502207,
      "grad_norm": 2.5460706500196695,
      "learning_rate": 4.991114133907822e-06,
      "loss": 0.6418,
      "step": 1232
    },
    {
      "epoch": 0.15117704757233938,
      "grad_norm": 2.342122443241136,
      "learning_rate": 4.991071387528712e-06,
      "loss": 0.6011,
      "step": 1233
    },
    {
      "epoch": 0.15129965669445808,
      "grad_norm": 2.6078224382095514,
      "learning_rate": 4.991028538762158e-06,
      "loss": 0.5762,
      "step": 1234
    },
    {
      "epoch": 0.15142226581657675,
      "grad_norm": 2.4973463764716377,
      "learning_rate": 4.990985587609923e-06,
      "loss": 0.6241,
      "step": 1235
    },
    {
      "epoch": 0.15154487493869545,
      "grad_norm": 2.4141029689760796,
      "learning_rate": 4.990942534073768e-06,
      "loss": 0.5804,
      "step": 1236
    },
    {
      "epoch": 0.15166748406081412,
      "grad_norm": 2.693439062951733,
      "learning_rate": 4.990899378155466e-06,
      "loss": 0.6712,
      "step": 1237
    },
    {
      "epoch": 0.15179009318293282,
      "grad_norm": 2.40033964926411,
      "learning_rate": 4.99085611985679e-06,
      "loss": 0.5631,
      "step": 1238
    },
    {
      "epoch": 0.1519127023050515,
      "grad_norm": 2.479010994878223,
      "learning_rate": 4.9908127591795176e-06,
      "loss": 0.6119,
      "step": 1239
    },
    {
      "epoch": 0.1520353114271702,
      "grad_norm": 2.686906649213728,
      "learning_rate": 4.9907692961254305e-06,
      "loss": 0.5949,
      "step": 1240
    },
    {
      "epoch": 0.15215792054928887,
      "grad_norm": 2.1455088557960442,
      "learning_rate": 4.9907257306963164e-06,
      "loss": 0.591,
      "step": 1241
    },
    {
      "epoch": 0.15228052967140757,
      "grad_norm": 2.1462488037071976,
      "learning_rate": 4.990682062893965e-06,
      "loss": 0.5638,
      "step": 1242
    },
    {
      "epoch": 0.15240313879352624,
      "grad_norm": 2.3976779974957063,
      "learning_rate": 4.990638292720172e-06,
      "loss": 0.5485,
      "step": 1243
    },
    {
      "epoch": 0.1525257479156449,
      "grad_norm": 2.21742639269759,
      "learning_rate": 4.990594420176736e-06,
      "loss": 0.5843,
      "step": 1244
    },
    {
      "epoch": 0.1526483570377636,
      "grad_norm": 2.714520545978396,
      "learning_rate": 4.990550445265458e-06,
      "loss": 0.648,
      "step": 1245
    },
    {
      "epoch": 0.15277096615988228,
      "grad_norm": 2.4407652721659048,
      "learning_rate": 4.990506367988149e-06,
      "loss": 0.6124,
      "step": 1246
    },
    {
      "epoch": 0.15289357528200098,
      "grad_norm": 2.3166413672567936,
      "learning_rate": 4.99046218834662e-06,
      "loss": 0.5969,
      "step": 1247
    },
    {
      "epoch": 0.15301618440411965,
      "grad_norm": 2.449288337663798,
      "learning_rate": 4.990417906342685e-06,
      "loss": 0.6436,
      "step": 1248
    },
    {
      "epoch": 0.15313879352623835,
      "grad_norm": 2.7279465164026897,
      "learning_rate": 4.990373521978165e-06,
      "loss": 0.5873,
      "step": 1249
    },
    {
      "epoch": 0.15326140264835703,
      "grad_norm": 2.43343434171029,
      "learning_rate": 4.990329035254885e-06,
      "loss": 0.5753,
      "step": 1250
    },
    {
      "epoch": 0.15338401177047573,
      "grad_norm": 2.281191311421567,
      "learning_rate": 4.990284446174673e-06,
      "loss": 0.5861,
      "step": 1251
    },
    {
      "epoch": 0.1535066208925944,
      "grad_norm": 2.390572709227834,
      "learning_rate": 4.990239754739361e-06,
      "loss": 0.6403,
      "step": 1252
    },
    {
      "epoch": 0.1536292300147131,
      "grad_norm": 2.477412208592153,
      "learning_rate": 4.9901949609507865e-06,
      "loss": 0.5986,
      "step": 1253
    },
    {
      "epoch": 0.15375183913683177,
      "grad_norm": 2.2512684025525718,
      "learning_rate": 4.99015006481079e-06,
      "loss": 0.5807,
      "step": 1254
    },
    {
      "epoch": 0.15387444825895047,
      "grad_norm": 2.719110080114102,
      "learning_rate": 4.990105066321219e-06,
      "loss": 0.6413,
      "step": 1255
    },
    {
      "epoch": 0.15399705738106914,
      "grad_norm": 2.2459896408100493,
      "learning_rate": 4.99005996548392e-06,
      "loss": 0.6001,
      "step": 1256
    },
    {
      "epoch": 0.15411966650318784,
      "grad_norm": 2.3818632319417996,
      "learning_rate": 4.990014762300749e-06,
      "loss": 0.5697,
      "step": 1257
    },
    {
      "epoch": 0.1542422756253065,
      "grad_norm": 2.4729476088638926,
      "learning_rate": 4.989969456773562e-06,
      "loss": 0.6124,
      "step": 1258
    },
    {
      "epoch": 0.1543648847474252,
      "grad_norm": 2.3755551953588285,
      "learning_rate": 4.989924048904223e-06,
      "loss": 0.5651,
      "step": 1259
    },
    {
      "epoch": 0.15448749386954388,
      "grad_norm": 2.382609308691054,
      "learning_rate": 4.989878538694597e-06,
      "loss": 0.5696,
      "step": 1260
    },
    {
      "epoch": 0.15461010299166258,
      "grad_norm": 2.5057157179714222,
      "learning_rate": 4.989832926146556e-06,
      "loss": 0.5961,
      "step": 1261
    },
    {
      "epoch": 0.15473271211378126,
      "grad_norm": 2.7090002076451256,
      "learning_rate": 4.989787211261972e-06,
      "loss": 0.5467,
      "step": 1262
    },
    {
      "epoch": 0.15485532123589996,
      "grad_norm": 2.5775417039140187,
      "learning_rate": 4.989741394042728e-06,
      "loss": 0.6153,
      "step": 1263
    },
    {
      "epoch": 0.15497793035801863,
      "grad_norm": 2.433190484513577,
      "learning_rate": 4.989695474490704e-06,
      "loss": 0.6307,
      "step": 1264
    },
    {
      "epoch": 0.15510053948013733,
      "grad_norm": 2.633880757821134,
      "learning_rate": 4.989649452607788e-06,
      "loss": 0.5776,
      "step": 1265
    },
    {
      "epoch": 0.155223148602256,
      "grad_norm": 2.314049991388817,
      "learning_rate": 4.9896033283958725e-06,
      "loss": 0.5857,
      "step": 1266
    },
    {
      "epoch": 0.1553457577243747,
      "grad_norm": 2.7201626823056255,
      "learning_rate": 4.989557101856852e-06,
      "loss": 0.5998,
      "step": 1267
    },
    {
      "epoch": 0.15546836684649337,
      "grad_norm": 2.503886298375222,
      "learning_rate": 4.989510772992628e-06,
      "loss": 0.6795,
      "step": 1268
    },
    {
      "epoch": 0.15559097596861207,
      "grad_norm": 2.23063831106253,
      "learning_rate": 4.9894643418051035e-06,
      "loss": 0.6145,
      "step": 1269
    },
    {
      "epoch": 0.15571358509073074,
      "grad_norm": 2.431441915858224,
      "learning_rate": 4.989417808296187e-06,
      "loss": 0.6032,
      "step": 1270
    },
    {
      "epoch": 0.15583619421284944,
      "grad_norm": 2.5340258552103982,
      "learning_rate": 4.989371172467792e-06,
      "loss": 0.6447,
      "step": 1271
    },
    {
      "epoch": 0.15595880333496812,
      "grad_norm": 2.544239180106043,
      "learning_rate": 4.9893244343218346e-06,
      "loss": 0.6001,
      "step": 1272
    },
    {
      "epoch": 0.15608141245708682,
      "grad_norm": 2.236832377294676,
      "learning_rate": 4.989277593860236e-06,
      "loss": 0.6024,
      "step": 1273
    },
    {
      "epoch": 0.1562040215792055,
      "grad_norm": 2.341207201307628,
      "learning_rate": 4.989230651084921e-06,
      "loss": 0.6152,
      "step": 1274
    },
    {
      "epoch": 0.1563266307013242,
      "grad_norm": 2.462192723389152,
      "learning_rate": 4.989183605997819e-06,
      "loss": 0.595,
      "step": 1275
    },
    {
      "epoch": 0.15644923982344286,
      "grad_norm": 2.4084022513144956,
      "learning_rate": 4.989136458600865e-06,
      "loss": 0.6632,
      "step": 1276
    },
    {
      "epoch": 0.15657184894556156,
      "grad_norm": 2.183704317685215,
      "learning_rate": 4.989089208895996e-06,
      "loss": 0.6007,
      "step": 1277
    },
    {
      "epoch": 0.15669445806768023,
      "grad_norm": 2.474493747946202,
      "learning_rate": 4.989041856885153e-06,
      "loss": 0.5903,
      "step": 1278
    },
    {
      "epoch": 0.15681706718979893,
      "grad_norm": 2.0061573017035053,
      "learning_rate": 4.988994402570284e-06,
      "loss": 0.558,
      "step": 1279
    },
    {
      "epoch": 0.1569396763119176,
      "grad_norm": 2.254080166289536,
      "learning_rate": 4.988946845953338e-06,
      "loss": 0.6348,
      "step": 1280
    },
    {
      "epoch": 0.1570622854340363,
      "grad_norm": 2.545315165713949,
      "learning_rate": 4.988899187036271e-06,
      "loss": 0.5811,
      "step": 1281
    },
    {
      "epoch": 0.15718489455615497,
      "grad_norm": 2.3516962295262047,
      "learning_rate": 4.988851425821041e-06,
      "loss": 0.5936,
      "step": 1282
    },
    {
      "epoch": 0.15730750367827367,
      "grad_norm": 2.599019807540701,
      "learning_rate": 4.9888035623096104e-06,
      "loss": 0.5678,
      "step": 1283
    },
    {
      "epoch": 0.15743011280039235,
      "grad_norm": 2.296941392225935,
      "learning_rate": 4.988755596503948e-06,
      "loss": 0.5882,
      "step": 1284
    },
    {
      "epoch": 0.15755272192251105,
      "grad_norm": 2.479663869096416,
      "learning_rate": 4.988707528406024e-06,
      "loss": 0.6005,
      "step": 1285
    },
    {
      "epoch": 0.15767533104462972,
      "grad_norm": 2.133970827433344,
      "learning_rate": 4.988659358017815e-06,
      "loss": 0.542,
      "step": 1286
    },
    {
      "epoch": 0.15779794016674842,
      "grad_norm": 2.2165446727046283,
      "learning_rate": 4.988611085341301e-06,
      "loss": 0.5671,
      "step": 1287
    },
    {
      "epoch": 0.1579205492888671,
      "grad_norm": 2.9259085719943103,
      "learning_rate": 4.988562710378466e-06,
      "loss": 0.639,
      "step": 1288
    },
    {
      "epoch": 0.1580431584109858,
      "grad_norm": 2.431006925800926,
      "learning_rate": 4.988514233131297e-06,
      "loss": 0.6002,
      "step": 1289
    },
    {
      "epoch": 0.15816576753310446,
      "grad_norm": 2.408535388378083,
      "learning_rate": 4.988465653601787e-06,
      "loss": 0.6417,
      "step": 1290
    },
    {
      "epoch": 0.15828837665522316,
      "grad_norm": 2.480674761659977,
      "learning_rate": 4.9884169717919345e-06,
      "loss": 0.5413,
      "step": 1291
    },
    {
      "epoch": 0.15841098577734183,
      "grad_norm": 2.265034513687183,
      "learning_rate": 4.988368187703738e-06,
      "loss": 0.6862,
      "step": 1292
    },
    {
      "epoch": 0.15853359489946053,
      "grad_norm": 2.564697380484386,
      "learning_rate": 4.988319301339204e-06,
      "loss": 0.654,
      "step": 1293
    },
    {
      "epoch": 0.1586562040215792,
      "grad_norm": 2.3352412336729413,
      "learning_rate": 4.988270312700342e-06,
      "loss": 0.5818,
      "step": 1294
    },
    {
      "epoch": 0.15877881314369788,
      "grad_norm": 2.6175936010443555,
      "learning_rate": 4.988221221789164e-06,
      "loss": 0.6175,
      "step": 1295
    },
    {
      "epoch": 0.15890142226581658,
      "grad_norm": 2.564140704119794,
      "learning_rate": 4.98817202860769e-06,
      "loss": 0.5591,
      "step": 1296
    },
    {
      "epoch": 0.15902403138793525,
      "grad_norm": 2.376151620092119,
      "learning_rate": 4.98812273315794e-06,
      "loss": 0.5582,
      "step": 1297
    },
    {
      "epoch": 0.15914664051005395,
      "grad_norm": 2.52620654464509,
      "learning_rate": 4.988073335441941e-06,
      "loss": 0.578,
      "step": 1298
    },
    {
      "epoch": 0.15926924963217262,
      "grad_norm": 2.31695411165148,
      "learning_rate": 4.988023835461722e-06,
      "loss": 0.6099,
      "step": 1299
    },
    {
      "epoch": 0.15939185875429132,
      "grad_norm": 2.3354801861989376,
      "learning_rate": 4.98797423321932e-06,
      "loss": 0.5881,
      "step": 1300
    },
    {
      "epoch": 0.15951446787641,
      "grad_norm": 2.407175793360601,
      "learning_rate": 4.987924528716773e-06,
      "loss": 0.5594,
      "step": 1301
    },
    {
      "epoch": 0.1596370769985287,
      "grad_norm": 2.3183730108991742,
      "learning_rate": 4.987874721956122e-06,
      "loss": 0.6326,
      "step": 1302
    },
    {
      "epoch": 0.15975968612064737,
      "grad_norm": 2.3464439294582564,
      "learning_rate": 4.987824812939416e-06,
      "loss": 0.533,
      "step": 1303
    },
    {
      "epoch": 0.15988229524276606,
      "grad_norm": 2.461996984498769,
      "learning_rate": 4.987774801668706e-06,
      "loss": 0.5666,
      "step": 1304
    },
    {
      "epoch": 0.16000490436488474,
      "grad_norm": 2.378721271413104,
      "learning_rate": 4.987724688146047e-06,
      "loss": 0.5787,
      "step": 1305
    },
    {
      "epoch": 0.16012751348700344,
      "grad_norm": 2.446782769152718,
      "learning_rate": 4.9876744723734994e-06,
      "loss": 0.562,
      "step": 1306
    },
    {
      "epoch": 0.1602501226091221,
      "grad_norm": 2.2152278423591727,
      "learning_rate": 4.987624154353127e-06,
      "loss": 0.6223,
      "step": 1307
    },
    {
      "epoch": 0.1603727317312408,
      "grad_norm": 2.1825919949100587,
      "learning_rate": 4.987573734086996e-06,
      "loss": 0.6088,
      "step": 1308
    },
    {
      "epoch": 0.16049534085335948,
      "grad_norm": 2.7461007599893015,
      "learning_rate": 4.987523211577183e-06,
      "loss": 0.6162,
      "step": 1309
    },
    {
      "epoch": 0.16061794997547818,
      "grad_norm": 2.025640944879796,
      "learning_rate": 4.987472586825762e-06,
      "loss": 0.5567,
      "step": 1310
    },
    {
      "epoch": 0.16074055909759685,
      "grad_norm": 2.4511207143387552,
      "learning_rate": 4.987421859834813e-06,
      "loss": 0.6186,
      "step": 1311
    },
    {
      "epoch": 0.16086316821971555,
      "grad_norm": 2.4398099585506454,
      "learning_rate": 4.987371030606423e-06,
      "loss": 0.6155,
      "step": 1312
    },
    {
      "epoch": 0.16098577734183422,
      "grad_norm": 2.417632595072215,
      "learning_rate": 4.987320099142678e-06,
      "loss": 0.5993,
      "step": 1313
    },
    {
      "epoch": 0.16110838646395292,
      "grad_norm": 2.516684892493449,
      "learning_rate": 4.987269065445675e-06,
      "loss": 0.5927,
      "step": 1314
    },
    {
      "epoch": 0.1612309955860716,
      "grad_norm": 2.4299039814259786,
      "learning_rate": 4.9872179295175105e-06,
      "loss": 0.6322,
      "step": 1315
    },
    {
      "epoch": 0.1613536047081903,
      "grad_norm": 2.3477559035819597,
      "learning_rate": 4.987166691360285e-06,
      "loss": 0.6567,
      "step": 1316
    },
    {
      "epoch": 0.16147621383030897,
      "grad_norm": 2.121865624447416,
      "learning_rate": 4.9871153509761064e-06,
      "loss": 0.5965,
      "step": 1317
    },
    {
      "epoch": 0.16159882295242767,
      "grad_norm": 2.2327351003301446,
      "learning_rate": 4.987063908367082e-06,
      "loss": 0.6259,
      "step": 1318
    },
    {
      "epoch": 0.16172143207454634,
      "grad_norm": 2.317161243587114,
      "learning_rate": 4.98701236353533e-06,
      "loss": 0.6508,
      "step": 1319
    },
    {
      "epoch": 0.16184404119666504,
      "grad_norm": 2.302129701870567,
      "learning_rate": 4.986960716482965e-06,
      "loss": 0.6184,
      "step": 1320
    },
    {
      "epoch": 0.1619666503187837,
      "grad_norm": 2.4163166116658723,
      "learning_rate": 4.986908967212113e-06,
      "loss": 0.6526,
      "step": 1321
    },
    {
      "epoch": 0.1620892594409024,
      "grad_norm": 2.6108548437202646,
      "learning_rate": 4.986857115724899e-06,
      "loss": 0.6244,
      "step": 1322
    },
    {
      "epoch": 0.16221186856302108,
      "grad_norm": 2.408957268439648,
      "learning_rate": 4.986805162023455e-06,
      "loss": 0.5837,
      "step": 1323
    },
    {
      "epoch": 0.16233447768513978,
      "grad_norm": 2.326338325916279,
      "learning_rate": 4.986753106109916e-06,
      "loss": 0.5921,
      "step": 1324
    },
    {
      "epoch": 0.16245708680725846,
      "grad_norm": 2.372825409452153,
      "learning_rate": 4.9867009479864234e-06,
      "loss": 0.6139,
      "step": 1325
    },
    {
      "epoch": 0.16257969592937715,
      "grad_norm": 2.4377365950979963,
      "learning_rate": 4.986648687655118e-06,
      "loss": 0.6289,
      "step": 1326
    },
    {
      "epoch": 0.16270230505149583,
      "grad_norm": 2.4053126952201564,
      "learning_rate": 4.98659632511815e-06,
      "loss": 0.6306,
      "step": 1327
    },
    {
      "epoch": 0.16282491417361453,
      "grad_norm": 2.466521702511417,
      "learning_rate": 4.986543860377671e-06,
      "loss": 0.5845,
      "step": 1328
    },
    {
      "epoch": 0.1629475232957332,
      "grad_norm": 2.489247669949294,
      "learning_rate": 4.986491293435837e-06,
      "loss": 0.6869,
      "step": 1329
    },
    {
      "epoch": 0.1630701324178519,
      "grad_norm": 2.381249408011184,
      "learning_rate": 4.986438624294808e-06,
      "loss": 0.637,
      "step": 1330
    },
    {
      "epoch": 0.16319274153997057,
      "grad_norm": 2.6998198900633463,
      "learning_rate": 4.986385852956751e-06,
      "loss": 0.565,
      "step": 1331
    },
    {
      "epoch": 0.16331535066208927,
      "grad_norm": 2.4373364566014026,
      "learning_rate": 4.986332979423833e-06,
      "loss": 0.6419,
      "step": 1332
    },
    {
      "epoch": 0.16343795978420794,
      "grad_norm": 2.5688188683270354,
      "learning_rate": 4.9862800036982275e-06,
      "loss": 0.6047,
      "step": 1333
    },
    {
      "epoch": 0.16356056890632664,
      "grad_norm": 2.2442525227226984,
      "learning_rate": 4.986226925782113e-06,
      "loss": 0.5656,
      "step": 1334
    },
    {
      "epoch": 0.16368317802844531,
      "grad_norm": 2.213940706338336,
      "learning_rate": 4.986173745677669e-06,
      "loss": 0.622,
      "step": 1335
    },
    {
      "epoch": 0.16380578715056401,
      "grad_norm": 2.235629862587952,
      "learning_rate": 4.986120463387084e-06,
      "loss": 0.579,
      "step": 1336
    },
    {
      "epoch": 0.16392839627268269,
      "grad_norm": 2.1902806980585305,
      "learning_rate": 4.9860670789125465e-06,
      "loss": 0.5833,
      "step": 1337
    },
    {
      "epoch": 0.16405100539480139,
      "grad_norm": 2.5821037893170384,
      "learning_rate": 4.98601359225625e-06,
      "loss": 0.5954,
      "step": 1338
    },
    {
      "epoch": 0.16417361451692006,
      "grad_norm": 2.1319740673061407,
      "learning_rate": 4.985960003420394e-06,
      "loss": 0.5782,
      "step": 1339
    },
    {
      "epoch": 0.16429622363903876,
      "grad_norm": 2.262880242183668,
      "learning_rate": 4.9859063124071815e-06,
      "loss": 0.613,
      "step": 1340
    },
    {
      "epoch": 0.16441883276115743,
      "grad_norm": 2.520017124186216,
      "learning_rate": 4.985852519218818e-06,
      "loss": 0.6025,
      "step": 1341
    },
    {
      "epoch": 0.16454144188327613,
      "grad_norm": 2.4066854642248514,
      "learning_rate": 4.985798623857515e-06,
      "loss": 0.583,
      "step": 1342
    },
    {
      "epoch": 0.1646640510053948,
      "grad_norm": 2.460133585935448,
      "learning_rate": 4.985744626325488e-06,
      "loss": 0.6095,
      "step": 1343
    },
    {
      "epoch": 0.16478666012751347,
      "grad_norm": 2.562531300127705,
      "learning_rate": 4.985690526624956e-06,
      "loss": 0.6196,
      "step": 1344
    },
    {
      "epoch": 0.16490926924963217,
      "grad_norm": 2.423196701831532,
      "learning_rate": 4.985636324758144e-06,
      "loss": 0.6201,
      "step": 1345
    },
    {
      "epoch": 0.16503187837175085,
      "grad_norm": 2.2973208250153,
      "learning_rate": 4.9855820207272775e-06,
      "loss": 0.5874,
      "step": 1346
    },
    {
      "epoch": 0.16515448749386955,
      "grad_norm": 2.334547149977255,
      "learning_rate": 4.98552761453459e-06,
      "loss": 0.6103,
      "step": 1347
    },
    {
      "epoch": 0.16527709661598822,
      "grad_norm": 2.672238940521008,
      "learning_rate": 4.985473106182317e-06,
      "loss": 0.6006,
      "step": 1348
    },
    {
      "epoch": 0.16539970573810692,
      "grad_norm": 2.3367287087269983,
      "learning_rate": 4.985418495672699e-06,
      "loss": 0.5972,
      "step": 1349
    },
    {
      "epoch": 0.1655223148602256,
      "grad_norm": 2.476656240097179,
      "learning_rate": 4.98536378300798e-06,
      "loss": 0.6163,
      "step": 1350
    },
    {
      "epoch": 0.1656449239823443,
      "grad_norm": 2.26138682091372,
      "learning_rate": 4.985308968190411e-06,
      "loss": 0.5765,
      "step": 1351
    },
    {
      "epoch": 0.16576753310446296,
      "grad_norm": 2.4725345599779174,
      "learning_rate": 4.985254051222243e-06,
      "loss": 0.5617,
      "step": 1352
    },
    {
      "epoch": 0.16589014222658166,
      "grad_norm": 2.345400992547203,
      "learning_rate": 4.9851990321057334e-06,
      "loss": 0.6127,
      "step": 1353
    },
    {
      "epoch": 0.16601275134870033,
      "grad_norm": 2.282149687748166,
      "learning_rate": 4.985143910843144e-06,
      "loss": 0.551,
      "step": 1354
    },
    {
      "epoch": 0.16613536047081903,
      "grad_norm": 2.3219058721896944,
      "learning_rate": 4.98508868743674e-06,
      "loss": 0.529,
      "step": 1355
    },
    {
      "epoch": 0.1662579695929377,
      "grad_norm": 2.3448661259439243,
      "learning_rate": 4.985033361888792e-06,
      "loss": 0.5621,
      "step": 1356
    },
    {
      "epoch": 0.1663805787150564,
      "grad_norm": 2.339777766667465,
      "learning_rate": 4.984977934201574e-06,
      "loss": 0.6173,
      "step": 1357
    },
    {
      "epoch": 0.16650318783717508,
      "grad_norm": 2.365188229680647,
      "learning_rate": 4.984922404377362e-06,
      "loss": 0.6153,
      "step": 1358
    },
    {
      "epoch": 0.16662579695929378,
      "grad_norm": 2.412659319501109,
      "learning_rate": 4.984866772418441e-06,
      "loss": 0.6331,
      "step": 1359
    },
    {
      "epoch": 0.16674840608141245,
      "grad_norm": 2.382372530013475,
      "learning_rate": 4.984811038327097e-06,
      "loss": 0.5692,
      "step": 1360
    },
    {
      "epoch": 0.16687101520353115,
      "grad_norm": 2.3426822492408568,
      "learning_rate": 4.984755202105619e-06,
      "loss": 0.57,
      "step": 1361
    },
    {
      "epoch": 0.16699362432564982,
      "grad_norm": 2.3403601395756573,
      "learning_rate": 4.984699263756303e-06,
      "loss": 0.609,
      "step": 1362
    },
    {
      "epoch": 0.16711623344776852,
      "grad_norm": 2.530557745189394,
      "learning_rate": 4.98464322328145e-06,
      "loss": 0.5418,
      "step": 1363
    },
    {
      "epoch": 0.1672388425698872,
      "grad_norm": 2.3244383065416274,
      "learning_rate": 4.984587080683361e-06,
      "loss": 0.555,
      "step": 1364
    },
    {
      "epoch": 0.1673614516920059,
      "grad_norm": 2.5910098437016416,
      "learning_rate": 4.984530835964345e-06,
      "loss": 0.5908,
      "step": 1365
    },
    {
      "epoch": 0.16748406081412456,
      "grad_norm": 2.4253046184682834,
      "learning_rate": 4.984474489126712e-06,
      "loss": 0.5899,
      "step": 1366
    },
    {
      "epoch": 0.16760666993624326,
      "grad_norm": 2.543763981890287,
      "learning_rate": 4.98441804017278e-06,
      "loss": 0.5692,
      "step": 1367
    },
    {
      "epoch": 0.16772927905836194,
      "grad_norm": 2.321548646207187,
      "learning_rate": 4.984361489104867e-06,
      "loss": 0.5821,
      "step": 1368
    },
    {
      "epoch": 0.16785188818048064,
      "grad_norm": 2.4859268364698233,
      "learning_rate": 4.9843048359253e-06,
      "loss": 0.5905,
      "step": 1369
    },
    {
      "epoch": 0.1679744973025993,
      "grad_norm": 2.611180661202308,
      "learning_rate": 4.984248080636406e-06,
      "loss": 0.6163,
      "step": 1370
    },
    {
      "epoch": 0.168097106424718,
      "grad_norm": 2.166812404292956,
      "learning_rate": 4.984191223240517e-06,
      "loss": 0.5893,
      "step": 1371
    },
    {
      "epoch": 0.16821971554683668,
      "grad_norm": 2.3186716151682334,
      "learning_rate": 4.984134263739971e-06,
      "loss": 0.6132,
      "step": 1372
    },
    {
      "epoch": 0.16834232466895538,
      "grad_norm": 2.530501938915876,
      "learning_rate": 4.98407720213711e-06,
      "loss": 0.5947,
      "step": 1373
    },
    {
      "epoch": 0.16846493379107405,
      "grad_norm": 2.365624254999791,
      "learning_rate": 4.984020038434277e-06,
      "loss": 0.6038,
      "step": 1374
    },
    {
      "epoch": 0.16858754291319275,
      "grad_norm": 2.180996155230622,
      "learning_rate": 4.983962772633824e-06,
      "loss": 0.5992,
      "step": 1375
    },
    {
      "epoch": 0.16871015203531142,
      "grad_norm": 2.587977123450828,
      "learning_rate": 4.983905404738103e-06,
      "loss": 0.5553,
      "step": 1376
    },
    {
      "epoch": 0.16883276115743012,
      "grad_norm": 2.479538580216306,
      "learning_rate": 4.983847934749473e-06,
      "loss": 0.5801,
      "step": 1377
    },
    {
      "epoch": 0.1689553702795488,
      "grad_norm": 2.6585605117179947,
      "learning_rate": 4.9837903626702945e-06,
      "loss": 0.6626,
      "step": 1378
    },
    {
      "epoch": 0.1690779794016675,
      "grad_norm": 2.2312262587657705,
      "learning_rate": 4.983732688502935e-06,
      "loss": 0.5825,
      "step": 1379
    },
    {
      "epoch": 0.16920058852378617,
      "grad_norm": 2.2254302368691294,
      "learning_rate": 4.983674912249765e-06,
      "loss": 0.6334,
      "step": 1380
    },
    {
      "epoch": 0.16932319764590487,
      "grad_norm": 2.3928620917589436,
      "learning_rate": 4.983617033913159e-06,
      "loss": 0.5874,
      "step": 1381
    },
    {
      "epoch": 0.16944580676802354,
      "grad_norm": 2.26746698380376,
      "learning_rate": 4.983559053495497e-06,
      "loss": 0.587,
      "step": 1382
    },
    {
      "epoch": 0.16956841589014224,
      "grad_norm": 2.2277779911070894,
      "learning_rate": 4.983500970999161e-06,
      "loss": 0.6237,
      "step": 1383
    },
    {
      "epoch": 0.1696910250122609,
      "grad_norm": 2.389682279948373,
      "learning_rate": 4.983442786426537e-06,
      "loss": 0.6731,
      "step": 1384
    },
    {
      "epoch": 0.1698136341343796,
      "grad_norm": 2.1960601431574434,
      "learning_rate": 4.983384499780019e-06,
      "loss": 0.5394,
      "step": 1385
    },
    {
      "epoch": 0.16993624325649828,
      "grad_norm": 2.4165295695126776,
      "learning_rate": 4.983326111062001e-06,
      "loss": 0.5789,
      "step": 1386
    },
    {
      "epoch": 0.17005885237861698,
      "grad_norm": 2.4160974376793063,
      "learning_rate": 4.983267620274884e-06,
      "loss": 0.6106,
      "step": 1387
    },
    {
      "epoch": 0.17018146150073565,
      "grad_norm": 2.247538084634193,
      "learning_rate": 4.983209027421072e-06,
      "loss": 0.6204,
      "step": 1388
    },
    {
      "epoch": 0.17030407062285435,
      "grad_norm": 2.136456918820717,
      "learning_rate": 4.983150332502971e-06,
      "loss": 0.5905,
      "step": 1389
    },
    {
      "epoch": 0.17042667974497303,
      "grad_norm": 2.3341410310926785,
      "learning_rate": 4.983091535522997e-06,
      "loss": 0.6268,
      "step": 1390
    },
    {
      "epoch": 0.17054928886709173,
      "grad_norm": 2.2001857865460024,
      "learning_rate": 4.983032636483564e-06,
      "loss": 0.5715,
      "step": 1391
    },
    {
      "epoch": 0.1706718979892104,
      "grad_norm": 2.231176650474856,
      "learning_rate": 4.982973635387094e-06,
      "loss": 0.6142,
      "step": 1392
    },
    {
      "epoch": 0.17079450711132907,
      "grad_norm": 2.6264272052762934,
      "learning_rate": 4.982914532236012e-06,
      "loss": 0.595,
      "step": 1393
    },
    {
      "epoch": 0.17091711623344777,
      "grad_norm": 2.54071051441323,
      "learning_rate": 4.982855327032747e-06,
      "loss": 0.6066,
      "step": 1394
    },
    {
      "epoch": 0.17103972535556644,
      "grad_norm": 2.581335975056246,
      "learning_rate": 4.982796019779732e-06,
      "loss": 0.6391,
      "step": 1395
    },
    {
      "epoch": 0.17116233447768514,
      "grad_norm": 2.5787849398342426,
      "learning_rate": 4.9827366104794064e-06,
      "loss": 0.6112,
      "step": 1396
    },
    {
      "epoch": 0.1712849435998038,
      "grad_norm": 2.4549783554811735,
      "learning_rate": 4.98267709913421e-06,
      "loss": 0.5941,
      "step": 1397
    },
    {
      "epoch": 0.1714075527219225,
      "grad_norm": 2.447996870647247,
      "learning_rate": 4.982617485746589e-06,
      "loss": 0.5818,
      "step": 1398
    },
    {
      "epoch": 0.17153016184404118,
      "grad_norm": 2.0490623787461724,
      "learning_rate": 4.982557770318995e-06,
      "loss": 0.5763,
      "step": 1399
    },
    {
      "epoch": 0.17165277096615988,
      "grad_norm": 2.381722485033396,
      "learning_rate": 4.982497952853881e-06,
      "loss": 0.6017,
      "step": 1400
    },
    {
      "epoch": 0.17177538008827856,
      "grad_norm": 2.2537843244944256,
      "learning_rate": 4.982438033353707e-06,
      "loss": 0.5862,
      "step": 1401
    },
    {
      "epoch": 0.17189798921039726,
      "grad_norm": 2.3433098391690446,
      "learning_rate": 4.982378011820934e-06,
      "loss": 0.6154,
      "step": 1402
    },
    {
      "epoch": 0.17202059833251593,
      "grad_norm": 2.260484538286892,
      "learning_rate": 4.982317888258031e-06,
      "loss": 0.5707,
      "step": 1403
    },
    {
      "epoch": 0.17214320745463463,
      "grad_norm": 2.507938270274404,
      "learning_rate": 4.9822576626674676e-06,
      "loss": 0.6459,
      "step": 1404
    },
    {
      "epoch": 0.1722658165767533,
      "grad_norm": 2.484070753314079,
      "learning_rate": 4.98219733505172e-06,
      "loss": 0.612,
      "step": 1405
    },
    {
      "epoch": 0.172388425698872,
      "grad_norm": 2.200918702164268,
      "learning_rate": 4.982136905413268e-06,
      "loss": 0.599,
      "step": 1406
    },
    {
      "epoch": 0.17251103482099067,
      "grad_norm": 2.36685579512844,
      "learning_rate": 4.982076373754594e-06,
      "loss": 0.5595,
      "step": 1407
    },
    {
      "epoch": 0.17263364394310937,
      "grad_norm": 2.3491492511703513,
      "learning_rate": 4.982015740078188e-06,
      "loss": 0.6033,
      "step": 1408
    },
    {
      "epoch": 0.17275625306522804,
      "grad_norm": 2.1405722659053494,
      "learning_rate": 4.98195500438654e-06,
      "loss": 0.5733,
      "step": 1409
    },
    {
      "epoch": 0.17287886218734674,
      "grad_norm": 2.476955013887793,
      "learning_rate": 4.981894166682148e-06,
      "loss": 0.6062,
      "step": 1410
    },
    {
      "epoch": 0.17300147130946542,
      "grad_norm": 2.2499730608901136,
      "learning_rate": 4.981833226967511e-06,
      "loss": 0.5972,
      "step": 1411
    },
    {
      "epoch": 0.17312408043158412,
      "grad_norm": 2.5228921538654014,
      "learning_rate": 4.981772185245135e-06,
      "loss": 0.5722,
      "step": 1412
    },
    {
      "epoch": 0.1732466895537028,
      "grad_norm": 2.4762080362348926,
      "learning_rate": 4.981711041517529e-06,
      "loss": 0.6508,
      "step": 1413
    },
    {
      "epoch": 0.1733692986758215,
      "grad_norm": 2.6727720603590255,
      "learning_rate": 4.9816497957872055e-06,
      "loss": 0.6183,
      "step": 1414
    },
    {
      "epoch": 0.17349190779794016,
      "grad_norm": 2.374137415237587,
      "learning_rate": 4.981588448056682e-06,
      "loss": 0.5537,
      "step": 1415
    },
    {
      "epoch": 0.17361451692005886,
      "grad_norm": 2.2497590411275294,
      "learning_rate": 4.98152699832848e-06,
      "loss": 0.6483,
      "step": 1416
    },
    {
      "epoch": 0.17373712604217753,
      "grad_norm": 2.3924002118520735,
      "learning_rate": 4.9814654466051245e-06,
      "loss": 0.5908,
      "step": 1417
    },
    {
      "epoch": 0.17385973516429623,
      "grad_norm": 2.2379136402963367,
      "learning_rate": 4.981403792889147e-06,
      "loss": 0.5633,
      "step": 1418
    },
    {
      "epoch": 0.1739823442864149,
      "grad_norm": 2.4635868081534102,
      "learning_rate": 4.98134203718308e-06,
      "loss": 0.6043,
      "step": 1419
    },
    {
      "epoch": 0.1741049534085336,
      "grad_norm": 2.3521072714846185,
      "learning_rate": 4.9812801794894625e-06,
      "loss": 0.619,
      "step": 1420
    },
    {
      "epoch": 0.17422756253065227,
      "grad_norm": 2.428511068061862,
      "learning_rate": 4.9812182198108375e-06,
      "loss": 0.574,
      "step": 1421
    },
    {
      "epoch": 0.17435017165277097,
      "grad_norm": 2.294571171352187,
      "learning_rate": 4.981156158149751e-06,
      "loss": 0.6402,
      "step": 1422
    },
    {
      "epoch": 0.17447278077488965,
      "grad_norm": 2.3097691967768483,
      "learning_rate": 4.981093994508753e-06,
      "loss": 0.6081,
      "step": 1423
    },
    {
      "epoch": 0.17459538989700835,
      "grad_norm": 2.6224655253328675,
      "learning_rate": 4.9810317288903995e-06,
      "loss": 0.6064,
      "step": 1424
    },
    {
      "epoch": 0.17471799901912702,
      "grad_norm": 2.606563732381545,
      "learning_rate": 4.9809693612972505e-06,
      "loss": 0.5902,
      "step": 1425
    },
    {
      "epoch": 0.17484060814124572,
      "grad_norm": 2.417860919281375,
      "learning_rate": 4.980906891731868e-06,
      "loss": 0.5799,
      "step": 1426
    },
    {
      "epoch": 0.1749632172633644,
      "grad_norm": 2.1010237064025707,
      "learning_rate": 4.98084432019682e-06,
      "loss": 0.6667,
      "step": 1427
    },
    {
      "epoch": 0.1750858263854831,
      "grad_norm": 2.3544948153534566,
      "learning_rate": 4.980781646694678e-06,
      "loss": 0.6126,
      "step": 1428
    },
    {
      "epoch": 0.17520843550760176,
      "grad_norm": 2.6662624732601543,
      "learning_rate": 4.9807188712280195e-06,
      "loss": 0.6133,
      "step": 1429
    },
    {
      "epoch": 0.17533104462972046,
      "grad_norm": 2.2271563469811397,
      "learning_rate": 4.980655993799423e-06,
      "loss": 0.5793,
      "step": 1430
    },
    {
      "epoch": 0.17545365375183913,
      "grad_norm": 2.558982794145673,
      "learning_rate": 4.980593014411473e-06,
      "loss": 0.5872,
      "step": 1431
    },
    {
      "epoch": 0.17557626287395783,
      "grad_norm": 2.3213480202355488,
      "learning_rate": 4.98052993306676e-06,
      "loss": 0.5844,
      "step": 1432
    },
    {
      "epoch": 0.1756988719960765,
      "grad_norm": 2.2966343708248456,
      "learning_rate": 4.980466749767875e-06,
      "loss": 0.5983,
      "step": 1433
    },
    {
      "epoch": 0.1758214811181952,
      "grad_norm": 2.492630278514891,
      "learning_rate": 4.980403464517414e-06,
      "loss": 0.6553,
      "step": 1434
    },
    {
      "epoch": 0.17594409024031388,
      "grad_norm": 2.510121823185505,
      "learning_rate": 4.9803400773179796e-06,
      "loss": 0.595,
      "step": 1435
    },
    {
      "epoch": 0.17606669936243258,
      "grad_norm": 2.4710015502151896,
      "learning_rate": 4.980276588172178e-06,
      "loss": 0.5977,
      "step": 1436
    },
    {
      "epoch": 0.17618930848455125,
      "grad_norm": 2.6700881039958513,
      "learning_rate": 4.980212997082618e-06,
      "loss": 0.6078,
      "step": 1437
    },
    {
      "epoch": 0.17631191760666995,
      "grad_norm": 2.3183587315772605,
      "learning_rate": 4.980149304051911e-06,
      "loss": 0.6136,
      "step": 1438
    },
    {
      "epoch": 0.17643452672878862,
      "grad_norm": 2.182955702493363,
      "learning_rate": 4.980085509082679e-06,
      "loss": 0.6112,
      "step": 1439
    },
    {
      "epoch": 0.17655713585090732,
      "grad_norm": 2.300140975310315,
      "learning_rate": 4.9800216121775404e-06,
      "loss": 0.6137,
      "step": 1440
    },
    {
      "epoch": 0.176679744973026,
      "grad_norm": 2.589972457910982,
      "learning_rate": 4.979957613339123e-06,
      "loss": 0.6525,
      "step": 1441
    },
    {
      "epoch": 0.1768023540951447,
      "grad_norm": 2.418179709867177,
      "learning_rate": 4.979893512570058e-06,
      "loss": 0.6434,
      "step": 1442
    },
    {
      "epoch": 0.17692496321726336,
      "grad_norm": 2.3244154327113673,
      "learning_rate": 4.979829309872979e-06,
      "loss": 0.6044,
      "step": 1443
    },
    {
      "epoch": 0.17704757233938204,
      "grad_norm": 2.236791861608018,
      "learning_rate": 4.979765005250525e-06,
      "loss": 0.6217,
      "step": 1444
    },
    {
      "epoch": 0.17717018146150074,
      "grad_norm": 2.067163118152957,
      "learning_rate": 4.97970059870534e-06,
      "loss": 0.5652,
      "step": 1445
    },
    {
      "epoch": 0.1772927905836194,
      "grad_norm": 2.4031328781705614,
      "learning_rate": 4.9796360902400695e-06,
      "loss": 0.5604,
      "step": 1446
    },
    {
      "epoch": 0.1774153997057381,
      "grad_norm": 2.2163855846644887,
      "learning_rate": 4.979571479857366e-06,
      "loss": 0.5471,
      "step": 1447
    },
    {
      "epoch": 0.17753800882785678,
      "grad_norm": 2.3082800697780055,
      "learning_rate": 4.979506767559885e-06,
      "loss": 0.5598,
      "step": 1448
    },
    {
      "epoch": 0.17766061794997548,
      "grad_norm": 2.2929018230313063,
      "learning_rate": 4.979441953350287e-06,
      "loss": 0.589,
      "step": 1449
    },
    {
      "epoch": 0.17778322707209415,
      "grad_norm": 2.5897463925802473,
      "learning_rate": 4.979377037231235e-06,
      "loss": 0.6531,
      "step": 1450
    },
    {
      "epoch": 0.17790583619421285,
      "grad_norm": 2.549478816883973,
      "learning_rate": 4.979312019205397e-06,
      "loss": 0.5924,
      "step": 1451
    },
    {
      "epoch": 0.17802844531633152,
      "grad_norm": 2.3019239695567855,
      "learning_rate": 4.979246899275445e-06,
      "loss": 0.6238,
      "step": 1452
    },
    {
      "epoch": 0.17815105443845022,
      "grad_norm": 2.208229090443596,
      "learning_rate": 4.979181677444057e-06,
      "loss": 0.5629,
      "step": 1453
    },
    {
      "epoch": 0.1782736635605689,
      "grad_norm": 2.414555799323564,
      "learning_rate": 4.979116353713913e-06,
      "loss": 0.605,
      "step": 1454
    },
    {
      "epoch": 0.1783962726826876,
      "grad_norm": 2.0591105485158248,
      "learning_rate": 4.979050928087698e-06,
      "loss": 0.5443,
      "step": 1455
    },
    {
      "epoch": 0.17851888180480627,
      "grad_norm": 2.4002487091912217,
      "learning_rate": 4.978985400568101e-06,
      "loss": 0.582,
      "step": 1456
    },
    {
      "epoch": 0.17864149092692497,
      "grad_norm": 2.40286035303665,
      "learning_rate": 4.9789197711578155e-06,
      "loss": 0.6441,
      "step": 1457
    },
    {
      "epoch": 0.17876410004904364,
      "grad_norm": 1.941789187800893,
      "learning_rate": 4.978854039859538e-06,
      "loss": 0.5687,
      "step": 1458
    },
    {
      "epoch": 0.17888670917116234,
      "grad_norm": 2.530143406818697,
      "learning_rate": 4.9787882066759715e-06,
      "loss": 0.6416,
      "step": 1459
    },
    {
      "epoch": 0.179009318293281,
      "grad_norm": 2.5245502859405207,
      "learning_rate": 4.9787222716098225e-06,
      "loss": 0.5717,
      "step": 1460
    },
    {
      "epoch": 0.1791319274153997,
      "grad_norm": 2.2040425219743027,
      "learning_rate": 4.978656234663798e-06,
      "loss": 0.539,
      "step": 1461
    },
    {
      "epoch": 0.17925453653751838,
      "grad_norm": 2.3817532428016444,
      "learning_rate": 4.978590095840615e-06,
      "loss": 0.625,
      "step": 1462
    },
    {
      "epoch": 0.17937714565963708,
      "grad_norm": 2.2890019603413743,
      "learning_rate": 4.9785238551429915e-06,
      "loss": 0.5527,
      "step": 1463
    },
    {
      "epoch": 0.17949975478175575,
      "grad_norm": 2.1735121640886046,
      "learning_rate": 4.978457512573649e-06,
      "loss": 0.5983,
      "step": 1464
    },
    {
      "epoch": 0.17962236390387445,
      "grad_norm": 2.3532141953575447,
      "learning_rate": 4.978391068135315e-06,
      "loss": 0.6135,
      "step": 1465
    },
    {
      "epoch": 0.17974497302599313,
      "grad_norm": 2.5255159024337503,
      "learning_rate": 4.978324521830721e-06,
      "loss": 0.5887,
      "step": 1466
    },
    {
      "epoch": 0.17986758214811183,
      "grad_norm": 2.3118160484528185,
      "learning_rate": 4.978257873662601e-06,
      "loss": 0.5961,
      "step": 1467
    },
    {
      "epoch": 0.1799901912702305,
      "grad_norm": 2.3295625150725097,
      "learning_rate": 4.978191123633695e-06,
      "loss": 0.6035,
      "step": 1468
    },
    {
      "epoch": 0.1801128003923492,
      "grad_norm": 2.2628187255432395,
      "learning_rate": 4.978124271746748e-06,
      "loss": 0.5595,
      "step": 1469
    },
    {
      "epoch": 0.18023540951446787,
      "grad_norm": 2.353114481582541,
      "learning_rate": 4.978057318004505e-06,
      "loss": 0.6379,
      "step": 1470
    },
    {
      "epoch": 0.18035801863658657,
      "grad_norm": 2.3243275448481024,
      "learning_rate": 4.977990262409718e-06,
      "loss": 0.6263,
      "step": 1471
    },
    {
      "epoch": 0.18048062775870524,
      "grad_norm": 2.0952223960267826,
      "learning_rate": 4.977923104965147e-06,
      "loss": 0.6271,
      "step": 1472
    },
    {
      "epoch": 0.18060323688082394,
      "grad_norm": 2.212982914716202,
      "learning_rate": 4.977855845673548e-06,
      "loss": 0.5636,
      "step": 1473
    },
    {
      "epoch": 0.18072584600294261,
      "grad_norm": 2.4042317780291045,
      "learning_rate": 4.9777884845376865e-06,
      "loss": 0.5483,
      "step": 1474
    },
    {
      "epoch": 0.1808484551250613,
      "grad_norm": 2.3903512060202345,
      "learning_rate": 4.977721021560332e-06,
      "loss": 0.6217,
      "step": 1475
    },
    {
      "epoch": 0.18097106424717999,
      "grad_norm": 2.2542438977693857,
      "learning_rate": 4.977653456744258e-06,
      "loss": 0.611,
      "step": 1476
    },
    {
      "epoch": 0.18109367336929869,
      "grad_norm": 2.3596873484836425,
      "learning_rate": 4.97758579009224e-06,
      "loss": 0.5701,
      "step": 1477
    },
    {
      "epoch": 0.18121628249141736,
      "grad_norm": 2.283262517543709,
      "learning_rate": 4.9775180216070594e-06,
      "loss": 0.627,
      "step": 1478
    },
    {
      "epoch": 0.18133889161353606,
      "grad_norm": 2.5428122434746014,
      "learning_rate": 4.977450151291503e-06,
      "loss": 0.601,
      "step": 1479
    },
    {
      "epoch": 0.18146150073565473,
      "grad_norm": 2.1961933518777923,
      "learning_rate": 4.9773821791483585e-06,
      "loss": 0.5663,
      "step": 1480
    },
    {
      "epoch": 0.18158410985777343,
      "grad_norm": 2.398820353813704,
      "learning_rate": 4.977314105180421e-06,
      "loss": 0.5489,
      "step": 1481
    },
    {
      "epoch": 0.1817067189798921,
      "grad_norm": 2.1126387526823525,
      "learning_rate": 4.977245929390488e-06,
      "loss": 0.5832,
      "step": 1482
    },
    {
      "epoch": 0.1818293281020108,
      "grad_norm": 2.170306440735523,
      "learning_rate": 4.9771776517813615e-06,
      "loss": 0.6008,
      "step": 1483
    },
    {
      "epoch": 0.18195193722412947,
      "grad_norm": 2.511919099597879,
      "learning_rate": 4.977109272355848e-06,
      "loss": 0.5701,
      "step": 1484
    },
    {
      "epoch": 0.18207454634624817,
      "grad_norm": 2.6356538049732934,
      "learning_rate": 4.977040791116759e-06,
      "loss": 0.612,
      "step": 1485
    },
    {
      "epoch": 0.18219715546836684,
      "grad_norm": 2.079928363230789,
      "learning_rate": 4.976972208066906e-06,
      "loss": 0.5985,
      "step": 1486
    },
    {
      "epoch": 0.18231976459048554,
      "grad_norm": 2.2735428154430566,
      "learning_rate": 4.976903523209112e-06,
      "loss": 0.5738,
      "step": 1487
    },
    {
      "epoch": 0.18244237371260422,
      "grad_norm": 2.217039575651153,
      "learning_rate": 4.9768347365461975e-06,
      "loss": 0.594,
      "step": 1488
    },
    {
      "epoch": 0.18256498283472292,
      "grad_norm": 2.099299337782455,
      "learning_rate": 4.976765848080991e-06,
      "loss": 0.5794,
      "step": 1489
    },
    {
      "epoch": 0.1826875919568416,
      "grad_norm": 2.6832983230572616,
      "learning_rate": 4.976696857816323e-06,
      "loss": 0.616,
      "step": 1490
    },
    {
      "epoch": 0.1828102010789603,
      "grad_norm": 2.30318567180271,
      "learning_rate": 4.976627765755029e-06,
      "loss": 0.5629,
      "step": 1491
    },
    {
      "epoch": 0.18293281020107896,
      "grad_norm": 2.164157466035948,
      "learning_rate": 4.97655857189995e-06,
      "loss": 0.5561,
      "step": 1492
    },
    {
      "epoch": 0.18305541932319763,
      "grad_norm": 2.4079490626681594,
      "learning_rate": 4.976489276253929e-06,
      "loss": 0.5898,
      "step": 1493
    },
    {
      "epoch": 0.18317802844531633,
      "grad_norm": 2.4563354185617667,
      "learning_rate": 4.976419878819815e-06,
      "loss": 0.5929,
      "step": 1494
    },
    {
      "epoch": 0.183300637567435,
      "grad_norm": 2.1718613802734574,
      "learning_rate": 4.976350379600458e-06,
      "loss": 0.5873,
      "step": 1495
    },
    {
      "epoch": 0.1834232466895537,
      "grad_norm": 2.4502877935325076,
      "learning_rate": 4.976280778598719e-06,
      "loss": 0.5516,
      "step": 1496
    },
    {
      "epoch": 0.18354585581167238,
      "grad_norm": 2.1928907380517364,
      "learning_rate": 4.976211075817454e-06,
      "loss": 0.5599,
      "step": 1497
    },
    {
      "epoch": 0.18366846493379108,
      "grad_norm": 2.1572097027085664,
      "learning_rate": 4.976141271259532e-06,
      "loss": 0.5898,
      "step": 1498
    },
    {
      "epoch": 0.18379107405590975,
      "grad_norm": 2.3151100431067375,
      "learning_rate": 4.976071364927818e-06,
      "loss": 0.5693,
      "step": 1499
    },
    {
      "epoch": 0.18391368317802845,
      "grad_norm": 2.626029131800204,
      "learning_rate": 4.9760013568251885e-06,
      "loss": 0.5829,
      "step": 1500
    },
    {
      "epoch": 0.18403629230014712,
      "grad_norm": 2.39422170683204,
      "learning_rate": 4.97593124695452e-06,
      "loss": 0.553,
      "step": 1501
    },
    {
      "epoch": 0.18415890142226582,
      "grad_norm": 2.3092232966814765,
      "learning_rate": 4.975861035318693e-06,
      "loss": 0.5799,
      "step": 1502
    },
    {
      "epoch": 0.1842815105443845,
      "grad_norm": 2.4973241852364865,
      "learning_rate": 4.975790721920595e-06,
      "loss": 0.6722,
      "step": 1503
    },
    {
      "epoch": 0.1844041196665032,
      "grad_norm": 2.172944961845063,
      "learning_rate": 4.975720306763115e-06,
      "loss": 0.5377,
      "step": 1504
    },
    {
      "epoch": 0.18452672878862186,
      "grad_norm": 2.4467824494735733,
      "learning_rate": 4.9756497898491475e-06,
      "loss": 0.5778,
      "step": 1505
    },
    {
      "epoch": 0.18464933791074056,
      "grad_norm": 2.50816284860462,
      "learning_rate": 4.975579171181591e-06,
      "loss": 0.5615,
      "step": 1506
    },
    {
      "epoch": 0.18477194703285924,
      "grad_norm": 2.39018048851264,
      "learning_rate": 4.975508450763348e-06,
      "loss": 0.6311,
      "step": 1507
    },
    {
      "epoch": 0.18489455615497793,
      "grad_norm": 2.522045130538179,
      "learning_rate": 4.975437628597325e-06,
      "loss": 0.5952,
      "step": 1508
    },
    {
      "epoch": 0.1850171652770966,
      "grad_norm": 2.404956169974717,
      "learning_rate": 4.975366704686433e-06,
      "loss": 0.6251,
      "step": 1509
    },
    {
      "epoch": 0.1851397743992153,
      "grad_norm": 2.525846041501543,
      "learning_rate": 4.975295679033587e-06,
      "loss": 0.6119,
      "step": 1510
    },
    {
      "epoch": 0.18526238352133398,
      "grad_norm": 2.5349209927234235,
      "learning_rate": 4.975224551641706e-06,
      "loss": 0.5778,
      "step": 1511
    },
    {
      "epoch": 0.18538499264345268,
      "grad_norm": 2.5383322588847386,
      "learning_rate": 4.975153322513715e-06,
      "loss": 0.6,
      "step": 1512
    },
    {
      "epoch": 0.18550760176557135,
      "grad_norm": 2.190367910458738,
      "learning_rate": 4.97508199165254e-06,
      "loss": 0.58,
      "step": 1513
    },
    {
      "epoch": 0.18563021088769005,
      "grad_norm": 2.3252004411197875,
      "learning_rate": 4.975010559061113e-06,
      "loss": 0.6314,
      "step": 1514
    },
    {
      "epoch": 0.18575282000980872,
      "grad_norm": 2.542089985205235,
      "learning_rate": 4.9749390247423715e-06,
      "loss": 0.5674,
      "step": 1515
    },
    {
      "epoch": 0.18587542913192742,
      "grad_norm": 2.0933973421893244,
      "learning_rate": 4.974867388699253e-06,
      "loss": 0.5378,
      "step": 1516
    },
    {
      "epoch": 0.1859980382540461,
      "grad_norm": 2.1680145618121167,
      "learning_rate": 4.9747956509347046e-06,
      "loss": 0.5732,
      "step": 1517
    },
    {
      "epoch": 0.1861206473761648,
      "grad_norm": 2.4109752680430283,
      "learning_rate": 4.974723811451673e-06,
      "loss": 0.6177,
      "step": 1518
    },
    {
      "epoch": 0.18624325649828347,
      "grad_norm": 2.2648647630987817,
      "learning_rate": 4.974651870253112e-06,
      "loss": 0.5521,
      "step": 1519
    },
    {
      "epoch": 0.18636586562040217,
      "grad_norm": 2.383208559617692,
      "learning_rate": 4.974579827341977e-06,
      "loss": 0.5362,
      "step": 1520
    },
    {
      "epoch": 0.18648847474252084,
      "grad_norm": 2.5119979016765286,
      "learning_rate": 4.974507682721232e-06,
      "loss": 0.5396,
      "step": 1521
    },
    {
      "epoch": 0.18661108386463954,
      "grad_norm": 2.230345698139807,
      "learning_rate": 4.974435436393839e-06,
      "loss": 0.5915,
      "step": 1522
    },
    {
      "epoch": 0.1867336929867582,
      "grad_norm": 2.3459551766941438,
      "learning_rate": 4.97436308836277e-06,
      "loss": 0.5419,
      "step": 1523
    },
    {
      "epoch": 0.1868563021088769,
      "grad_norm": 2.4303947165154733,
      "learning_rate": 4.974290638630997e-06,
      "loss": 0.6084,
      "step": 1524
    },
    {
      "epoch": 0.18697891123099558,
      "grad_norm": 2.323500218487808,
      "learning_rate": 4.974218087201499e-06,
      "loss": 0.5823,
      "step": 1525
    },
    {
      "epoch": 0.18710152035311428,
      "grad_norm": 2.1753766428459684,
      "learning_rate": 4.974145434077257e-06,
      "loss": 0.5255,
      "step": 1526
    },
    {
      "epoch": 0.18722412947523295,
      "grad_norm": 2.0943511027484387,
      "learning_rate": 4.974072679261257e-06,
      "loss": 0.5663,
      "step": 1527
    },
    {
      "epoch": 0.18734673859735165,
      "grad_norm": 2.3976738409419633,
      "learning_rate": 4.97399982275649e-06,
      "loss": 0.66,
      "step": 1528
    },
    {
      "epoch": 0.18746934771947033,
      "grad_norm": 2.4965082138683865,
      "learning_rate": 4.97392686456595e-06,
      "loss": 0.5953,
      "step": 1529
    },
    {
      "epoch": 0.18759195684158902,
      "grad_norm": 2.421873637664199,
      "learning_rate": 4.973853804692637e-06,
      "loss": 0.596,
      "step": 1530
    },
    {
      "epoch": 0.1877145659637077,
      "grad_norm": 2.2474999557387627,
      "learning_rate": 4.973780643139553e-06,
      "loss": 0.5806,
      "step": 1531
    },
    {
      "epoch": 0.1878371750858264,
      "grad_norm": 2.269393577350137,
      "learning_rate": 4.973707379909705e-06,
      "loss": 0.5684,
      "step": 1532
    },
    {
      "epoch": 0.18795978420794507,
      "grad_norm": 2.528163219431363,
      "learning_rate": 4.973634015006104e-06,
      "loss": 0.6495,
      "step": 1533
    },
    {
      "epoch": 0.18808239333006377,
      "grad_norm": 2.194302380942769,
      "learning_rate": 4.973560548431766e-06,
      "loss": 0.6139,
      "step": 1534
    },
    {
      "epoch": 0.18820500245218244,
      "grad_norm": 2.3609711196803853,
      "learning_rate": 4.9734869801897105e-06,
      "loss": 0.6487,
      "step": 1535
    },
    {
      "epoch": 0.18832761157430114,
      "grad_norm": 2.3542328412977414,
      "learning_rate": 4.9734133102829606e-06,
      "loss": 0.6217,
      "step": 1536
    },
    {
      "epoch": 0.1884502206964198,
      "grad_norm": 2.24460358485131,
      "learning_rate": 4.9733395387145455e-06,
      "loss": 0.5929,
      "step": 1537
    },
    {
      "epoch": 0.1885728298185385,
      "grad_norm": 2.6642540580149436,
      "learning_rate": 4.973265665487496e-06,
      "loss": 0.607,
      "step": 1538
    },
    {
      "epoch": 0.18869543894065718,
      "grad_norm": 2.419430908459478,
      "learning_rate": 4.9731916906048495e-06,
      "loss": 0.5921,
      "step": 1539
    },
    {
      "epoch": 0.18881804806277588,
      "grad_norm": 2.5503773942057357,
      "learning_rate": 4.973117614069646e-06,
      "loss": 0.5996,
      "step": 1540
    },
    {
      "epoch": 0.18894065718489456,
      "grad_norm": 2.8841553818859316,
      "learning_rate": 4.97304343588493e-06,
      "loss": 0.6287,
      "step": 1541
    },
    {
      "epoch": 0.18906326630701323,
      "grad_norm": 2.277300052507531,
      "learning_rate": 4.972969156053751e-06,
      "loss": 0.5875,
      "step": 1542
    },
    {
      "epoch": 0.18918587542913193,
      "grad_norm": 2.421483132338099,
      "learning_rate": 4.9728947745791615e-06,
      "loss": 0.5625,
      "step": 1543
    },
    {
      "epoch": 0.1893084845512506,
      "grad_norm": 2.3003640201827014,
      "learning_rate": 4.972820291464219e-06,
      "loss": 0.5605,
      "step": 1544
    },
    {
      "epoch": 0.1894310936733693,
      "grad_norm": 2.3216604161508645,
      "learning_rate": 4.972745706711985e-06,
      "loss": 0.5516,
      "step": 1545
    },
    {
      "epoch": 0.18955370279548797,
      "grad_norm": 2.012142594081799,
      "learning_rate": 4.972671020325523e-06,
      "loss": 0.6333,
      "step": 1546
    },
    {
      "epoch": 0.18967631191760667,
      "grad_norm": 2.3006794955145313,
      "learning_rate": 4.972596232307906e-06,
      "loss": 0.6076,
      "step": 1547
    },
    {
      "epoch": 0.18979892103972534,
      "grad_norm": 2.539157694851527,
      "learning_rate": 4.972521342662207e-06,
      "loss": 0.6447,
      "step": 1548
    },
    {
      "epoch": 0.18992153016184404,
      "grad_norm": 2.2452095388601907,
      "learning_rate": 4.972446351391503e-06,
      "loss": 0.5835,
      "step": 1549
    },
    {
      "epoch": 0.19004413928396272,
      "grad_norm": 2.6820768528014107,
      "learning_rate": 4.972371258498877e-06,
      "loss": 0.6353,
      "step": 1550
    },
    {
      "epoch": 0.19016674840608142,
      "grad_norm": 2.2779669133661664,
      "learning_rate": 4.972296063987415e-06,
      "loss": 0.5938,
      "step": 1551
    },
    {
      "epoch": 0.1902893575282001,
      "grad_norm": 2.21776894214848,
      "learning_rate": 4.972220767860208e-06,
      "loss": 0.6175,
      "step": 1552
    },
    {
      "epoch": 0.1904119666503188,
      "grad_norm": 2.342977386370764,
      "learning_rate": 4.97214537012035e-06,
      "loss": 0.5571,
      "step": 1553
    },
    {
      "epoch": 0.19053457577243746,
      "grad_norm": 2.2426367398277067,
      "learning_rate": 4.972069870770941e-06,
      "loss": 0.5521,
      "step": 1554
    },
    {
      "epoch": 0.19065718489455616,
      "grad_norm": 2.7424911591613492,
      "learning_rate": 4.971994269815085e-06,
      "loss": 0.5866,
      "step": 1555
    },
    {
      "epoch": 0.19077979401667483,
      "grad_norm": 2.435358837533763,
      "learning_rate": 4.971918567255887e-06,
      "loss": 0.6025,
      "step": 1556
    },
    {
      "epoch": 0.19090240313879353,
      "grad_norm": 2.4066063566011455,
      "learning_rate": 4.971842763096461e-06,
      "loss": 0.5797,
      "step": 1557
    },
    {
      "epoch": 0.1910250122609122,
      "grad_norm": 2.2955593814956297,
      "learning_rate": 4.971766857339919e-06,
      "loss": 0.5719,
      "step": 1558
    },
    {
      "epoch": 0.1911476213830309,
      "grad_norm": 2.464714267826973,
      "learning_rate": 4.971690849989386e-06,
      "loss": 0.6015,
      "step": 1559
    },
    {
      "epoch": 0.19127023050514957,
      "grad_norm": 2.3466695712540835,
      "learning_rate": 4.971614741047982e-06,
      "loss": 0.6008,
      "step": 1560
    },
    {
      "epoch": 0.19139283962726827,
      "grad_norm": 2.4847797592697214,
      "learning_rate": 4.971538530518836e-06,
      "loss": 0.5784,
      "step": 1561
    },
    {
      "epoch": 0.19151544874938695,
      "grad_norm": 2.357590285828855,
      "learning_rate": 4.971462218405081e-06,
      "loss": 0.6005,
      "step": 1562
    },
    {
      "epoch": 0.19163805787150565,
      "grad_norm": 2.241248585296013,
      "learning_rate": 4.971385804709854e-06,
      "loss": 0.5884,
      "step": 1563
    },
    {
      "epoch": 0.19176066699362432,
      "grad_norm": 2.1200253761258185,
      "learning_rate": 4.971309289436295e-06,
      "loss": 0.5485,
      "step": 1564
    },
    {
      "epoch": 0.19188327611574302,
      "grad_norm": 2.317656809702682,
      "learning_rate": 4.971232672587549e-06,
      "loss": 0.5895,
      "step": 1565
    },
    {
      "epoch": 0.1920058852378617,
      "grad_norm": 2.3713326107541506,
      "learning_rate": 4.971155954166766e-06,
      "loss": 0.5813,
      "step": 1566
    },
    {
      "epoch": 0.1921284943599804,
      "grad_norm": 2.265994339935075,
      "learning_rate": 4.971079134177097e-06,
      "loss": 0.5891,
      "step": 1567
    },
    {
      "epoch": 0.19225110348209906,
      "grad_norm": 2.2922045549085728,
      "learning_rate": 4.971002212621701e-06,
      "loss": 0.615,
      "step": 1568
    },
    {
      "epoch": 0.19237371260421776,
      "grad_norm": 2.270398432959497,
      "learning_rate": 4.970925189503741e-06,
      "loss": 0.5823,
      "step": 1569
    },
    {
      "epoch": 0.19249632172633643,
      "grad_norm": 2.1527682539092057,
      "learning_rate": 4.97084806482638e-06,
      "loss": 0.5362,
      "step": 1570
    },
    {
      "epoch": 0.19261893084845513,
      "grad_norm": 2.4479255338127777,
      "learning_rate": 4.97077083859279e-06,
      "loss": 0.6151,
      "step": 1571
    },
    {
      "epoch": 0.1927415399705738,
      "grad_norm": 2.4233503745327383,
      "learning_rate": 4.970693510806144e-06,
      "loss": 0.5803,
      "step": 1572
    },
    {
      "epoch": 0.1928641490926925,
      "grad_norm": 2.2785192531640197,
      "learning_rate": 4.970616081469621e-06,
      "loss": 0.6213,
      "step": 1573
    },
    {
      "epoch": 0.19298675821481118,
      "grad_norm": 2.621134325651386,
      "learning_rate": 4.970538550586403e-06,
      "loss": 0.617,
      "step": 1574
    },
    {
      "epoch": 0.19310936733692988,
      "grad_norm": 2.2720434597221537,
      "learning_rate": 4.970460918159677e-06,
      "loss": 0.5494,
      "step": 1575
    },
    {
      "epoch": 0.19323197645904855,
      "grad_norm": 2.742047839317978,
      "learning_rate": 4.970383184192633e-06,
      "loss": 0.6268,
      "step": 1576
    },
    {
      "epoch": 0.19335458558116725,
      "grad_norm": 2.4501021711826025,
      "learning_rate": 4.970305348688467e-06,
      "loss": 0.6303,
      "step": 1577
    },
    {
      "epoch": 0.19347719470328592,
      "grad_norm": 2.0576693329258213,
      "learning_rate": 4.970227411650378e-06,
      "loss": 0.605,
      "step": 1578
    },
    {
      "epoch": 0.19359980382540462,
      "grad_norm": 2.263131920580064,
      "learning_rate": 4.970149373081569e-06,
      "loss": 0.5764,
      "step": 1579
    },
    {
      "epoch": 0.1937224129475233,
      "grad_norm": 2.821398795267253,
      "learning_rate": 4.9700712329852474e-06,
      "loss": 0.6908,
      "step": 1580
    },
    {
      "epoch": 0.193845022069642,
      "grad_norm": 2.525297788162533,
      "learning_rate": 4.969992991364626e-06,
      "loss": 0.5703,
      "step": 1581
    },
    {
      "epoch": 0.19396763119176066,
      "grad_norm": 2.451703138178556,
      "learning_rate": 4.969914648222919e-06,
      "loss": 0.5811,
      "step": 1582
    },
    {
      "epoch": 0.19409024031387936,
      "grad_norm": 2.326759341858628,
      "learning_rate": 4.969836203563348e-06,
      "loss": 0.6306,
      "step": 1583
    },
    {
      "epoch": 0.19421284943599804,
      "grad_norm": 2.1785000491672593,
      "learning_rate": 4.969757657389136e-06,
      "loss": 0.6264,
      "step": 1584
    },
    {
      "epoch": 0.19433545855811674,
      "grad_norm": 2.3524195624073396,
      "learning_rate": 4.969679009703512e-06,
      "loss": 0.5809,
      "step": 1585
    },
    {
      "epoch": 0.1944580676802354,
      "grad_norm": 2.2406915402385383,
      "learning_rate": 4.969600260509709e-06,
      "loss": 0.6576,
      "step": 1586
    },
    {
      "epoch": 0.1945806768023541,
      "grad_norm": 2.248689488903278,
      "learning_rate": 4.969521409810963e-06,
      "loss": 0.5641,
      "step": 1587
    },
    {
      "epoch": 0.19470328592447278,
      "grad_norm": 2.4199757637492114,
      "learning_rate": 4.969442457610515e-06,
      "loss": 0.5943,
      "step": 1588
    },
    {
      "epoch": 0.19482589504659148,
      "grad_norm": 2.143300400050352,
      "learning_rate": 4.9693634039116105e-06,
      "loss": 0.5403,
      "step": 1589
    },
    {
      "epoch": 0.19494850416871015,
      "grad_norm": 2.2634875040366755,
      "learning_rate": 4.969284248717498e-06,
      "loss": 0.5778,
      "step": 1590
    },
    {
      "epoch": 0.19507111329082885,
      "grad_norm": 2.345017862884853,
      "learning_rate": 4.969204992031431e-06,
      "loss": 0.6291,
      "step": 1591
    },
    {
      "epoch": 0.19519372241294752,
      "grad_norm": 2.375245866566709,
      "learning_rate": 4.9691256338566685e-06,
      "loss": 0.6404,
      "step": 1592
    },
    {
      "epoch": 0.1953163315350662,
      "grad_norm": 2.219644936279012,
      "learning_rate": 4.96904617419647e-06,
      "loss": 0.5856,
      "step": 1593
    },
    {
      "epoch": 0.1954389406571849,
      "grad_norm": 2.4729079625175494,
      "learning_rate": 4.9689666130541035e-06,
      "loss": 0.5515,
      "step": 1594
    },
    {
      "epoch": 0.19556154977930357,
      "grad_norm": 2.3548262319063165,
      "learning_rate": 4.968886950432838e-06,
      "loss": 0.6102,
      "step": 1595
    },
    {
      "epoch": 0.19568415890142227,
      "grad_norm": 2.803337411142021,
      "learning_rate": 4.968807186335948e-06,
      "loss": 0.5717,
      "step": 1596
    },
    {
      "epoch": 0.19580676802354094,
      "grad_norm": 2.499472866010737,
      "learning_rate": 4.968727320766712e-06,
      "loss": 0.6032,
      "step": 1597
    },
    {
      "epoch": 0.19592937714565964,
      "grad_norm": 2.3672487137238716,
      "learning_rate": 4.968647353728413e-06,
      "loss": 0.5814,
      "step": 1598
    },
    {
      "epoch": 0.1960519862677783,
      "grad_norm": 2.4093706906923074,
      "learning_rate": 4.9685672852243375e-06,
      "loss": 0.61,
      "step": 1599
    },
    {
      "epoch": 0.196174595389897,
      "grad_norm": 2.5142742064290498,
      "learning_rate": 4.968487115257775e-06,
      "loss": 0.6214,
      "step": 1600
    },
    {
      "epoch": 0.19629720451201568,
      "grad_norm": 2.6522721952020145,
      "learning_rate": 4.968406843832024e-06,
      "loss": 0.6287,
      "step": 1601
    },
    {
      "epoch": 0.19641981363413438,
      "grad_norm": 2.434732120200167,
      "learning_rate": 4.9683264709503795e-06,
      "loss": 0.6499,
      "step": 1602
    },
    {
      "epoch": 0.19654242275625305,
      "grad_norm": 2.33559678840712,
      "learning_rate": 4.9682459966161476e-06,
      "loss": 0.5945,
      "step": 1603
    },
    {
      "epoch": 0.19666503187837175,
      "grad_norm": 2.2017695724937627,
      "learning_rate": 4.968165420832637e-06,
      "loss": 0.5891,
      "step": 1604
    },
    {
      "epoch": 0.19678764100049043,
      "grad_norm": 2.456201515549834,
      "learning_rate": 4.968084743603156e-06,
      "loss": 0.6292,
      "step": 1605
    },
    {
      "epoch": 0.19691025012260913,
      "grad_norm": 2.1883131482422318,
      "learning_rate": 4.968003964931024e-06,
      "loss": 0.5735,
      "step": 1606
    },
    {
      "epoch": 0.1970328592447278,
      "grad_norm": 2.2736739378312314,
      "learning_rate": 4.96792308481956e-06,
      "loss": 0.5811,
      "step": 1607
    },
    {
      "epoch": 0.1971554683668465,
      "grad_norm": 2.269005647018254,
      "learning_rate": 4.967842103272088e-06,
      "loss": 0.551,
      "step": 1608
    },
    {
      "epoch": 0.19727807748896517,
      "grad_norm": 2.1826020650683815,
      "learning_rate": 4.9677610202919356e-06,
      "loss": 0.6027,
      "step": 1609
    },
    {
      "epoch": 0.19740068661108387,
      "grad_norm": 2.2191892254400454,
      "learning_rate": 4.967679835882438e-06,
      "loss": 0.5747,
      "step": 1610
    },
    {
      "epoch": 0.19752329573320254,
      "grad_norm": 2.468654290632266,
      "learning_rate": 4.9675985500469296e-06,
      "loss": 0.5925,
      "step": 1611
    },
    {
      "epoch": 0.19764590485532124,
      "grad_norm": 2.4095122382606715,
      "learning_rate": 4.967517162788752e-06,
      "loss": 0.5855,
      "step": 1612
    },
    {
      "epoch": 0.1977685139774399,
      "grad_norm": 2.0960667211683552,
      "learning_rate": 4.967435674111251e-06,
      "loss": 0.6046,
      "step": 1613
    },
    {
      "epoch": 0.1978911230995586,
      "grad_norm": 2.2990628340479264,
      "learning_rate": 4.967354084017775e-06,
      "loss": 0.6482,
      "step": 1614
    },
    {
      "epoch": 0.19801373222167729,
      "grad_norm": 2.040328066294717,
      "learning_rate": 4.967272392511679e-06,
      "loss": 0.5956,
      "step": 1615
    },
    {
      "epoch": 0.19813634134379599,
      "grad_norm": 2.420864119099402,
      "learning_rate": 4.967190599596319e-06,
      "loss": 0.5801,
      "step": 1616
    },
    {
      "epoch": 0.19825895046591466,
      "grad_norm": 2.188855938139264,
      "learning_rate": 4.967108705275058e-06,
      "loss": 0.6106,
      "step": 1617
    },
    {
      "epoch": 0.19838155958803336,
      "grad_norm": 2.2855861059905327,
      "learning_rate": 4.96702670955126e-06,
      "loss": 0.5493,
      "step": 1618
    },
    {
      "epoch": 0.19850416871015203,
      "grad_norm": 2.2205420200011847,
      "learning_rate": 4.966944612428298e-06,
      "loss": 0.6206,
      "step": 1619
    },
    {
      "epoch": 0.19862677783227073,
      "grad_norm": 2.321276184144881,
      "learning_rate": 4.966862413909545e-06,
      "loss": 0.583,
      "step": 1620
    },
    {
      "epoch": 0.1987493869543894,
      "grad_norm": 2.416099602742343,
      "learning_rate": 4.96678011399838e-06,
      "loss": 0.6085,
      "step": 1621
    },
    {
      "epoch": 0.1988719960765081,
      "grad_norm": 2.2673007105883753,
      "learning_rate": 4.966697712698185e-06,
      "loss": 0.59,
      "step": 1622
    },
    {
      "epoch": 0.19899460519862677,
      "grad_norm": 2.389279459269057,
      "learning_rate": 4.966615210012346e-06,
      "loss": 0.5844,
      "step": 1623
    },
    {
      "epoch": 0.19911721432074547,
      "grad_norm": 2.2629170220700265,
      "learning_rate": 4.9665326059442565e-06,
      "loss": 0.5732,
      "step": 1624
    },
    {
      "epoch": 0.19923982344286414,
      "grad_norm": 2.185552095227119,
      "learning_rate": 4.96644990049731e-06,
      "loss": 0.5984,
      "step": 1625
    },
    {
      "epoch": 0.19936243256498284,
      "grad_norm": 2.1764759892285594,
      "learning_rate": 4.966367093674905e-06,
      "loss": 0.5355,
      "step": 1626
    },
    {
      "epoch": 0.19948504168710152,
      "grad_norm": 2.0992513609774472,
      "learning_rate": 4.966284185480447e-06,
      "loss": 0.5618,
      "step": 1627
    },
    {
      "epoch": 0.19960765080922022,
      "grad_norm": 2.1862666789653296,
      "learning_rate": 4.966201175917343e-06,
      "loss": 0.5685,
      "step": 1628
    },
    {
      "epoch": 0.1997302599313389,
      "grad_norm": 2.182507277537254,
      "learning_rate": 4.966118064989004e-06,
      "loss": 0.5967,
      "step": 1629
    },
    {
      "epoch": 0.1998528690534576,
      "grad_norm": 2.283920269675738,
      "learning_rate": 4.966034852698846e-06,
      "loss": 0.5939,
      "step": 1630
    },
    {
      "epoch": 0.19997547817557626,
      "grad_norm": 2.507990394192595,
      "learning_rate": 4.965951539050292e-06,
      "loss": 0.663,
      "step": 1631
    },
    {
      "epoch": 0.20009808729769496,
      "grad_norm": 2.122126910610123,
      "learning_rate": 4.965868124046762e-06,
      "loss": 0.5527,
      "step": 1632
    },
    {
      "epoch": 0.20022069641981363,
      "grad_norm": 2.366139436464341,
      "learning_rate": 4.965784607691687e-06,
      "loss": 0.6089,
      "step": 1633
    },
    {
      "epoch": 0.20034330554193233,
      "grad_norm": 2.4020118979263536,
      "learning_rate": 4.9657009899885e-06,
      "loss": 0.5539,
      "step": 1634
    },
    {
      "epoch": 0.200465914664051,
      "grad_norm": 2.2830507484982454,
      "learning_rate": 4.9656172709406366e-06,
      "loss": 0.5663,
      "step": 1635
    },
    {
      "epoch": 0.2005885237861697,
      "grad_norm": 2.320106950444327,
      "learning_rate": 4.965533450551539e-06,
      "loss": 0.5963,
      "step": 1636
    },
    {
      "epoch": 0.20071113290828838,
      "grad_norm": 2.7493835679839504,
      "learning_rate": 4.965449528824651e-06,
      "loss": 0.6431,
      "step": 1637
    },
    {
      "epoch": 0.20083374203040708,
      "grad_norm": 2.2489186063312916,
      "learning_rate": 4.965365505763423e-06,
      "loss": 0.595,
      "step": 1638
    },
    {
      "epoch": 0.20095635115252575,
      "grad_norm": 2.3495474431910037,
      "learning_rate": 4.965281381371308e-06,
      "loss": 0.635,
      "step": 1639
    },
    {
      "epoch": 0.20107896027464445,
      "grad_norm": 2.2176861727723685,
      "learning_rate": 4.965197155651763e-06,
      "loss": 0.6497,
      "step": 1640
    },
    {
      "epoch": 0.20120156939676312,
      "grad_norm": 2.295951721211305,
      "learning_rate": 4.9651128286082515e-06,
      "loss": 0.5583,
      "step": 1641
    },
    {
      "epoch": 0.2013241785188818,
      "grad_norm": 2.1487126291384433,
      "learning_rate": 4.965028400244238e-06,
      "loss": 0.555,
      "step": 1642
    },
    {
      "epoch": 0.2014467876410005,
      "grad_norm": 2.3010956498132322,
      "learning_rate": 4.9649438705631945e-06,
      "loss": 0.5715,
      "step": 1643
    },
    {
      "epoch": 0.20156939676311916,
      "grad_norm": 2.4113723353627408,
      "learning_rate": 4.9648592395685935e-06,
      "loss": 0.5698,
      "step": 1644
    },
    {
      "epoch": 0.20169200588523786,
      "grad_norm": 2.4964108282249584,
      "learning_rate": 4.964774507263914e-06,
      "loss": 0.6784,
      "step": 1645
    },
    {
      "epoch": 0.20181461500735653,
      "grad_norm": 2.4425866098962277,
      "learning_rate": 4.964689673652638e-06,
      "loss": 0.5829,
      "step": 1646
    },
    {
      "epoch": 0.20193722412947523,
      "grad_norm": 2.2854156662923124,
      "learning_rate": 4.9646047387382535e-06,
      "loss": 0.5865,
      "step": 1647
    },
    {
      "epoch": 0.2020598332515939,
      "grad_norm": 2.233200840617831,
      "learning_rate": 4.964519702524251e-06,
      "loss": 0.6072,
      "step": 1648
    },
    {
      "epoch": 0.2021824423737126,
      "grad_norm": 2.3784297131283325,
      "learning_rate": 4.964434565014125e-06,
      "loss": 0.5831,
      "step": 1649
    },
    {
      "epoch": 0.20230505149583128,
      "grad_norm": 2.2802247136173577,
      "learning_rate": 4.9643493262113775e-06,
      "loss": 0.5789,
      "step": 1650
    },
    {
      "epoch": 0.20242766061794998,
      "grad_norm": 2.6296036764753206,
      "learning_rate": 4.964263986119508e-06,
      "loss": 0.5888,
      "step": 1651
    },
    {
      "epoch": 0.20255026974006865,
      "grad_norm": 2.2025845850235295,
      "learning_rate": 4.964178544742026e-06,
      "loss": 0.5689,
      "step": 1652
    },
    {
      "epoch": 0.20267287886218735,
      "grad_norm": 2.571364157837625,
      "learning_rate": 4.9640930020824445e-06,
      "loss": 0.5918,
      "step": 1653
    },
    {
      "epoch": 0.20279548798430602,
      "grad_norm": 2.4521198138957847,
      "learning_rate": 4.9640073581442786e-06,
      "loss": 0.5991,
      "step": 1654
    },
    {
      "epoch": 0.20291809710642472,
      "grad_norm": 2.2315556827165532,
      "learning_rate": 4.963921612931046e-06,
      "loss": 0.5464,
      "step": 1655
    },
    {
      "epoch": 0.2030407062285434,
      "grad_norm": 2.4069680401624205,
      "learning_rate": 4.963835766446275e-06,
      "loss": 0.6323,
      "step": 1656
    },
    {
      "epoch": 0.2031633153506621,
      "grad_norm": 2.4243761345788744,
      "learning_rate": 4.963749818693491e-06,
      "loss": 0.5528,
      "step": 1657
    },
    {
      "epoch": 0.20328592447278077,
      "grad_norm": 2.4577965475832237,
      "learning_rate": 4.963663769676228e-06,
      "loss": 0.5663,
      "step": 1658
    },
    {
      "epoch": 0.20340853359489947,
      "grad_norm": 2.254234627590077,
      "learning_rate": 4.963577619398023e-06,
      "loss": 0.5371,
      "step": 1659
    },
    {
      "epoch": 0.20353114271701814,
      "grad_norm": 2.496318636417951,
      "learning_rate": 4.9634913678624165e-06,
      "loss": 0.5363,
      "step": 1660
    },
    {
      "epoch": 0.20365375183913684,
      "grad_norm": 2.514444364188334,
      "learning_rate": 4.963405015072953e-06,
      "loss": 0.5339,
      "step": 1661
    },
    {
      "epoch": 0.2037763609612555,
      "grad_norm": 2.2342419268007854,
      "learning_rate": 4.963318561033182e-06,
      "loss": 0.5849,
      "step": 1662
    },
    {
      "epoch": 0.2038989700833742,
      "grad_norm": 2.581005255402,
      "learning_rate": 4.963232005746658e-06,
      "loss": 0.6138,
      "step": 1663
    },
    {
      "epoch": 0.20402157920549288,
      "grad_norm": 2.2357422351870824,
      "learning_rate": 4.963145349216937e-06,
      "loss": 0.5475,
      "step": 1664
    },
    {
      "epoch": 0.20414418832761158,
      "grad_norm": 2.4781161930145514,
      "learning_rate": 4.963058591447582e-06,
      "loss": 0.5778,
      "step": 1665
    },
    {
      "epoch": 0.20426679744973025,
      "grad_norm": 2.4180869856292304,
      "learning_rate": 4.962971732442158e-06,
      "loss": 0.6092,
      "step": 1666
    },
    {
      "epoch": 0.20438940657184895,
      "grad_norm": 2.478748345203845,
      "learning_rate": 4.962884772204235e-06,
      "loss": 0.5999,
      "step": 1667
    },
    {
      "epoch": 0.20451201569396762,
      "grad_norm": 2.228364278149826,
      "learning_rate": 4.9627977107373885e-06,
      "loss": 0.5816,
      "step": 1668
    },
    {
      "epoch": 0.20463462481608632,
      "grad_norm": 2.421009761784144,
      "learning_rate": 4.962710548045196e-06,
      "loss": 0.5839,
      "step": 1669
    },
    {
      "epoch": 0.204757233938205,
      "grad_norm": 2.243105676603552,
      "learning_rate": 4.962623284131239e-06,
      "loss": 0.6046,
      "step": 1670
    },
    {
      "epoch": 0.2048798430603237,
      "grad_norm": 2.317369776674691,
      "learning_rate": 4.962535918999106e-06,
      "loss": 0.637,
      "step": 1671
    },
    {
      "epoch": 0.20500245218244237,
      "grad_norm": 2.2831060657738416,
      "learning_rate": 4.962448452652388e-06,
      "loss": 0.5664,
      "step": 1672
    },
    {
      "epoch": 0.20512506130456107,
      "grad_norm": 2.5241604122133494,
      "learning_rate": 4.962360885094678e-06,
      "loss": 0.5643,
      "step": 1673
    },
    {
      "epoch": 0.20524767042667974,
      "grad_norm": 2.4758851990413877,
      "learning_rate": 4.962273216329577e-06,
      "loss": 0.625,
      "step": 1674
    },
    {
      "epoch": 0.20537027954879844,
      "grad_norm": 2.6769386458009903,
      "learning_rate": 4.9621854463606874e-06,
      "loss": 0.5811,
      "step": 1675
    },
    {
      "epoch": 0.2054928886709171,
      "grad_norm": 2.470043669269327,
      "learning_rate": 4.962097575191618e-06,
      "loss": 0.6479,
      "step": 1676
    },
    {
      "epoch": 0.2056154977930358,
      "grad_norm": 2.014226227160287,
      "learning_rate": 4.962009602825978e-06,
      "loss": 0.5644,
      "step": 1677
    },
    {
      "epoch": 0.20573810691515448,
      "grad_norm": 2.4062417577147324,
      "learning_rate": 4.961921529267386e-06,
      "loss": 0.5494,
      "step": 1678
    },
    {
      "epoch": 0.20586071603727318,
      "grad_norm": 2.4278140351730464,
      "learning_rate": 4.96183335451946e-06,
      "loss": 0.5811,
      "step": 1679
    },
    {
      "epoch": 0.20598332515939186,
      "grad_norm": 2.3913023920366623,
      "learning_rate": 4.961745078585825e-06,
      "loss": 0.5585,
      "step": 1680
    },
    {
      "epoch": 0.20610593428151056,
      "grad_norm": 2.3270912898596103,
      "learning_rate": 4.961656701470108e-06,
      "loss": 0.5832,
      "step": 1681
    },
    {
      "epoch": 0.20622854340362923,
      "grad_norm": 2.067268347180257,
      "learning_rate": 4.961568223175943e-06,
      "loss": 0.5756,
      "step": 1682
    },
    {
      "epoch": 0.20635115252574793,
      "grad_norm": 2.2948794828182493,
      "learning_rate": 4.9614796437069665e-06,
      "loss": 0.6079,
      "step": 1683
    },
    {
      "epoch": 0.2064737616478666,
      "grad_norm": 2.4191936419483513,
      "learning_rate": 4.961390963066819e-06,
      "loss": 0.6388,
      "step": 1684
    },
    {
      "epoch": 0.2065963707699853,
      "grad_norm": 2.3665344039859324,
      "learning_rate": 4.961302181259146e-06,
      "loss": 0.567,
      "step": 1685
    },
    {
      "epoch": 0.20671897989210397,
      "grad_norm": 2.2477400256737448,
      "learning_rate": 4.9612132982875946e-06,
      "loss": 0.6024,
      "step": 1686
    },
    {
      "epoch": 0.20684158901422267,
      "grad_norm": 2.403131242098018,
      "learning_rate": 4.961124314155821e-06,
      "loss": 0.6679,
      "step": 1687
    },
    {
      "epoch": 0.20696419813634134,
      "grad_norm": 2.1774363504676577,
      "learning_rate": 4.96103522886748e-06,
      "loss": 0.5644,
      "step": 1688
    },
    {
      "epoch": 0.20708680725846004,
      "grad_norm": 2.261497783400117,
      "learning_rate": 4.960946042426234e-06,
      "loss": 0.5748,
      "step": 1689
    },
    {
      "epoch": 0.20720941638057871,
      "grad_norm": 2.624966689270812,
      "learning_rate": 4.960856754835749e-06,
      "loss": 0.5929,
      "step": 1690
    },
    {
      "epoch": 0.2073320255026974,
      "grad_norm": 2.2782730188736076,
      "learning_rate": 4.960767366099695e-06,
      "loss": 0.5591,
      "step": 1691
    },
    {
      "epoch": 0.2074546346248161,
      "grad_norm": 2.3428787996246165,
      "learning_rate": 4.960677876221746e-06,
      "loss": 0.6393,
      "step": 1692
    },
    {
      "epoch": 0.20757724374693476,
      "grad_norm": 2.2363249574388897,
      "learning_rate": 4.96058828520558e-06,
      "loss": 0.5681,
      "step": 1693
    },
    {
      "epoch": 0.20769985286905346,
      "grad_norm": 2.2935556881528165,
      "learning_rate": 4.960498593054879e-06,
      "loss": 0.5978,
      "step": 1694
    },
    {
      "epoch": 0.20782246199117213,
      "grad_norm": 2.360295252131974,
      "learning_rate": 4.9604087997733295e-06,
      "loss": 0.6253,
      "step": 1695
    },
    {
      "epoch": 0.20794507111329083,
      "grad_norm": 2.193520876677897,
      "learning_rate": 4.960318905364623e-06,
      "loss": 0.5579,
      "step": 1696
    },
    {
      "epoch": 0.2080676802354095,
      "grad_norm": 2.3610967854728226,
      "learning_rate": 4.960228909832454e-06,
      "loss": 0.5976,
      "step": 1697
    },
    {
      "epoch": 0.2081902893575282,
      "grad_norm": 2.2906424725745866,
      "learning_rate": 4.960138813180522e-06,
      "loss": 0.5607,
      "step": 1698
    },
    {
      "epoch": 0.20831289847964687,
      "grad_norm": 2.2698099670693277,
      "learning_rate": 4.9600486154125286e-06,
      "loss": 0.6057,
      "step": 1699
    },
    {
      "epoch": 0.20843550760176557,
      "grad_norm": 2.521912627862457,
      "learning_rate": 4.959958316532181e-06,
      "loss": 0.6526,
      "step": 1700
    },
    {
      "epoch": 0.20855811672388425,
      "grad_norm": 2.183681702616677,
      "learning_rate": 4.959867916543193e-06,
      "loss": 0.6257,
      "step": 1701
    },
    {
      "epoch": 0.20868072584600295,
      "grad_norm": 2.1033976461704054,
      "learning_rate": 4.959777415449278e-06,
      "loss": 0.6242,
      "step": 1702
    },
    {
      "epoch": 0.20880333496812162,
      "grad_norm": 2.274700900825532,
      "learning_rate": 4.959686813254156e-06,
      "loss": 0.5663,
      "step": 1703
    },
    {
      "epoch": 0.20892594409024032,
      "grad_norm": 2.540985196360038,
      "learning_rate": 4.959596109961553e-06,
      "loss": 0.6227,
      "step": 1704
    },
    {
      "epoch": 0.209048553212359,
      "grad_norm": 2.1467957391821026,
      "learning_rate": 4.959505305575195e-06,
      "loss": 0.5859,
      "step": 1705
    },
    {
      "epoch": 0.2091711623344777,
      "grad_norm": 3.065055682471649,
      "learning_rate": 4.959414400098814e-06,
      "loss": 0.6393,
      "step": 1706
    },
    {
      "epoch": 0.20929377145659636,
      "grad_norm": 2.2604202378225224,
      "learning_rate": 4.959323393536148e-06,
      "loss": 0.6103,
      "step": 1707
    },
    {
      "epoch": 0.20941638057871506,
      "grad_norm": 2.2828133410148186,
      "learning_rate": 4.9592322858909355e-06,
      "loss": 0.574,
      "step": 1708
    },
    {
      "epoch": 0.20953898970083373,
      "grad_norm": 2.3114051778307405,
      "learning_rate": 4.9591410771669234e-06,
      "loss": 0.5767,
      "step": 1709
    },
    {
      "epoch": 0.20966159882295243,
      "grad_norm": 2.1634462016338736,
      "learning_rate": 4.95904976736786e-06,
      "loss": 0.5706,
      "step": 1710
    },
    {
      "epoch": 0.2097842079450711,
      "grad_norm": 2.5820550549924106,
      "learning_rate": 4.958958356497497e-06,
      "loss": 0.6932,
      "step": 1711
    },
    {
      "epoch": 0.2099068170671898,
      "grad_norm": 2.328932449788501,
      "learning_rate": 4.958866844559591e-06,
      "loss": 0.6407,
      "step": 1712
    },
    {
      "epoch": 0.21002942618930848,
      "grad_norm": 2.242337851623521,
      "learning_rate": 4.958775231557906e-06,
      "loss": 0.6061,
      "step": 1713
    },
    {
      "epoch": 0.21015203531142718,
      "grad_norm": 2.1842706250776356,
      "learning_rate": 4.958683517496206e-06,
      "loss": 0.6002,
      "step": 1714
    },
    {
      "epoch": 0.21027464443354585,
      "grad_norm": 2.3434794347510017,
      "learning_rate": 4.95859170237826e-06,
      "loss": 0.6042,
      "step": 1715
    },
    {
      "epoch": 0.21039725355566455,
      "grad_norm": 2.271942908927076,
      "learning_rate": 4.958499786207843e-06,
      "loss": 0.5745,
      "step": 1716
    },
    {
      "epoch": 0.21051986267778322,
      "grad_norm": 2.3755628608571744,
      "learning_rate": 4.958407768988733e-06,
      "loss": 0.6437,
      "step": 1717
    },
    {
      "epoch": 0.21064247179990192,
      "grad_norm": 2.4777972464060767,
      "learning_rate": 4.958315650724711e-06,
      "loss": 0.6272,
      "step": 1718
    },
    {
      "epoch": 0.2107650809220206,
      "grad_norm": 2.2056178695979325,
      "learning_rate": 4.958223431419564e-06,
      "loss": 0.5851,
      "step": 1719
    },
    {
      "epoch": 0.2108876900441393,
      "grad_norm": 2.435177310927417,
      "learning_rate": 4.95813111107708e-06,
      "loss": 0.5797,
      "step": 1720
    },
    {
      "epoch": 0.21101029916625796,
      "grad_norm": 2.0839966233822724,
      "learning_rate": 4.958038689701058e-06,
      "loss": 0.6094,
      "step": 1721
    },
    {
      "epoch": 0.21113290828837666,
      "grad_norm": 2.062995964847084,
      "learning_rate": 4.9579461672952925e-06,
      "loss": 0.5625,
      "step": 1722
    },
    {
      "epoch": 0.21125551741049534,
      "grad_norm": 2.2340532306532874,
      "learning_rate": 4.957853543863589e-06,
      "loss": 0.5927,
      "step": 1723
    },
    {
      "epoch": 0.21137812653261404,
      "grad_norm": 2.26711091744546,
      "learning_rate": 4.9577608194097535e-06,
      "loss": 0.509,
      "step": 1724
    },
    {
      "epoch": 0.2115007356547327,
      "grad_norm": 2.2157083423785173,
      "learning_rate": 4.957667993937596e-06,
      "loss": 0.6113,
      "step": 1725
    },
    {
      "epoch": 0.2116233447768514,
      "grad_norm": 2.3763035265718524,
      "learning_rate": 4.957575067450935e-06,
      "loss": 0.6535,
      "step": 1726
    },
    {
      "epoch": 0.21174595389897008,
      "grad_norm": 2.463021134371631,
      "learning_rate": 4.9574820399535864e-06,
      "loss": 0.6248,
      "step": 1727
    },
    {
      "epoch": 0.21186856302108878,
      "grad_norm": 2.103661106802358,
      "learning_rate": 4.957388911449376e-06,
      "loss": 0.5579,
      "step": 1728
    },
    {
      "epoch": 0.21199117214320745,
      "grad_norm": 2.1047936581915176,
      "learning_rate": 4.957295681942131e-06,
      "loss": 0.551,
      "step": 1729
    },
    {
      "epoch": 0.21211378126532615,
      "grad_norm": 2.44865318009651,
      "learning_rate": 4.957202351435683e-06,
      "loss": 0.6067,
      "step": 1730
    },
    {
      "epoch": 0.21223639038744482,
      "grad_norm": 2.23560047203879,
      "learning_rate": 4.957108919933868e-06,
      "loss": 0.5623,
      "step": 1731
    },
    {
      "epoch": 0.21235899950956352,
      "grad_norm": 2.3001241024523273,
      "learning_rate": 4.957015387440527e-06,
      "loss": 0.6235,
      "step": 1732
    },
    {
      "epoch": 0.2124816086316822,
      "grad_norm": 2.3417014002918184,
      "learning_rate": 4.956921753959502e-06,
      "loss": 0.5917,
      "step": 1733
    },
    {
      "epoch": 0.2126042177538009,
      "grad_norm": 2.1993973570799397,
      "learning_rate": 4.956828019494646e-06,
      "loss": 0.6061,
      "step": 1734
    },
    {
      "epoch": 0.21272682687591957,
      "grad_norm": 2.531103651102391,
      "learning_rate": 4.956734184049807e-06,
      "loss": 0.5725,
      "step": 1735
    },
    {
      "epoch": 0.21284943599803827,
      "grad_norm": 2.3237895441814738,
      "learning_rate": 4.956640247628844e-06,
      "loss": 0.6363,
      "step": 1736
    },
    {
      "epoch": 0.21297204512015694,
      "grad_norm": 2.2019715028445805,
      "learning_rate": 4.956546210235618e-06,
      "loss": 0.5585,
      "step": 1737
    },
    {
      "epoch": 0.21309465424227564,
      "grad_norm": 2.2198551415021224,
      "learning_rate": 4.956452071873994e-06,
      "loss": 0.5938,
      "step": 1738
    },
    {
      "epoch": 0.2132172633643943,
      "grad_norm": 2.406006204722173,
      "learning_rate": 4.9563578325478415e-06,
      "loss": 0.6129,
      "step": 1739
    },
    {
      "epoch": 0.213339872486513,
      "grad_norm": 2.3751241049684335,
      "learning_rate": 4.956263492261032e-06,
      "loss": 0.6776,
      "step": 1740
    },
    {
      "epoch": 0.21346248160863168,
      "grad_norm": 2.263856458247828,
      "learning_rate": 4.956169051017445e-06,
      "loss": 0.5827,
      "step": 1741
    },
    {
      "epoch": 0.21358509073075035,
      "grad_norm": 2.1794284803895234,
      "learning_rate": 4.956074508820962e-06,
      "loss": 0.5873,
      "step": 1742
    },
    {
      "epoch": 0.21370769985286905,
      "grad_norm": 2.23829302147125,
      "learning_rate": 4.955979865675469e-06,
      "loss": 0.5651,
      "step": 1743
    },
    {
      "epoch": 0.21383030897498773,
      "grad_norm": 2.2431759995772693,
      "learning_rate": 4.955885121584855e-06,
      "loss": 0.6045,
      "step": 1744
    },
    {
      "epoch": 0.21395291809710643,
      "grad_norm": 2.387891474529674,
      "learning_rate": 4.955790276553014e-06,
      "loss": 0.6076,
      "step": 1745
    },
    {
      "epoch": 0.2140755272192251,
      "grad_norm": 2.3575853279100345,
      "learning_rate": 4.955695330583846e-06,
      "loss": 0.5819,
      "step": 1746
    },
    {
      "epoch": 0.2141981363413438,
      "grad_norm": 2.176834019288478,
      "learning_rate": 4.955600283681252e-06,
      "loss": 0.562,
      "step": 1747
    },
    {
      "epoch": 0.21432074546346247,
      "grad_norm": 2.4267073827736962,
      "learning_rate": 4.9555051358491394e-06,
      "loss": 0.5954,
      "step": 1748
    },
    {
      "epoch": 0.21444335458558117,
      "grad_norm": 2.187440770061682,
      "learning_rate": 4.9554098870914186e-06,
      "loss": 0.5171,
      "step": 1749
    },
    {
      "epoch": 0.21456596370769984,
      "grad_norm": 2.153400246016118,
      "learning_rate": 4.955314537412004e-06,
      "loss": 0.5621,
      "step": 1750
    },
    {
      "epoch": 0.21468857282981854,
      "grad_norm": 2.3162516475712245,
      "learning_rate": 4.955219086814816e-06,
      "loss": 0.5425,
      "step": 1751
    },
    {
      "epoch": 0.2148111819519372,
      "grad_norm": 2.5607388778673217,
      "learning_rate": 4.955123535303775e-06,
      "loss": 0.5907,
      "step": 1752
    },
    {
      "epoch": 0.2149337910740559,
      "grad_norm": 2.0493069877051555,
      "learning_rate": 4.955027882882813e-06,
      "loss": 0.5336,
      "step": 1753
    },
    {
      "epoch": 0.21505640019617459,
      "grad_norm": 2.3804513950884307,
      "learning_rate": 4.954932129555857e-06,
      "loss": 0.567,
      "step": 1754
    },
    {
      "epoch": 0.21517900931829329,
      "grad_norm": 2.260816934821278,
      "learning_rate": 4.9548362753268446e-06,
      "loss": 0.5501,
      "step": 1755
    },
    {
      "epoch": 0.21530161844041196,
      "grad_norm": 2.037851236356583,
      "learning_rate": 4.954740320199715e-06,
      "loss": 0.5698,
      "step": 1756
    },
    {
      "epoch": 0.21542422756253066,
      "grad_norm": 2.2115779147228687,
      "learning_rate": 4.9546442641784135e-06,
      "loss": 0.5485,
      "step": 1757
    },
    {
      "epoch": 0.21554683668464933,
      "grad_norm": 2.084945968917317,
      "learning_rate": 4.954548107266886e-06,
      "loss": 0.5363,
      "step": 1758
    },
    {
      "epoch": 0.21566944580676803,
      "grad_norm": 2.1555374547948167,
      "learning_rate": 4.954451849469087e-06,
      "loss": 0.6012,
      "step": 1759
    },
    {
      "epoch": 0.2157920549288867,
      "grad_norm": 2.2606541075174973,
      "learning_rate": 4.95435549078897e-06,
      "loss": 0.6183,
      "step": 1760
    },
    {
      "epoch": 0.2159146640510054,
      "grad_norm": 2.113725102977604,
      "learning_rate": 4.954259031230498e-06,
      "loss": 0.5426,
      "step": 1761
    },
    {
      "epoch": 0.21603727317312407,
      "grad_norm": 2.179371846038098,
      "learning_rate": 4.954162470797635e-06,
      "loss": 0.5514,
      "step": 1762
    },
    {
      "epoch": 0.21615988229524277,
      "grad_norm": 2.3279612929311915,
      "learning_rate": 4.954065809494351e-06,
      "loss": 0.6162,
      "step": 1763
    },
    {
      "epoch": 0.21628249141736144,
      "grad_norm": 2.3588947946250096,
      "learning_rate": 4.953969047324616e-06,
      "loss": 0.6126,
      "step": 1764
    },
    {
      "epoch": 0.21640510053948014,
      "grad_norm": 2.3787931583018502,
      "learning_rate": 4.95387218429241e-06,
      "loss": 0.6186,
      "step": 1765
    },
    {
      "epoch": 0.21652770966159882,
      "grad_norm": 2.158572676957473,
      "learning_rate": 4.953775220401712e-06,
      "loss": 0.6212,
      "step": 1766
    },
    {
      "epoch": 0.21665031878371752,
      "grad_norm": 2.3505057002308156,
      "learning_rate": 4.953678155656508e-06,
      "loss": 0.5812,
      "step": 1767
    },
    {
      "epoch": 0.2167729279058362,
      "grad_norm": 2.4159188523907713,
      "learning_rate": 4.953580990060789e-06,
      "loss": 0.622,
      "step": 1768
    },
    {
      "epoch": 0.2168955370279549,
      "grad_norm": 2.4118367474728974,
      "learning_rate": 4.953483723618547e-06,
      "loss": 0.5826,
      "step": 1769
    },
    {
      "epoch": 0.21701814615007356,
      "grad_norm": 2.409378347146923,
      "learning_rate": 4.95338635633378e-06,
      "loss": 0.603,
      "step": 1770
    },
    {
      "epoch": 0.21714075527219226,
      "grad_norm": 2.325744892184617,
      "learning_rate": 4.953288888210491e-06,
      "loss": 0.6215,
      "step": 1771
    },
    {
      "epoch": 0.21726336439431093,
      "grad_norm": 2.0118710746256525,
      "learning_rate": 4.953191319252686e-06,
      "loss": 0.5677,
      "step": 1772
    },
    {
      "epoch": 0.21738597351642963,
      "grad_norm": 2.2175871115710017,
      "learning_rate": 4.953093649464374e-06,
      "loss": 0.5978,
      "step": 1773
    },
    {
      "epoch": 0.2175085826385483,
      "grad_norm": 1.8587059404966506,
      "learning_rate": 4.95299587884957e-06,
      "loss": 0.5074,
      "step": 1774
    },
    {
      "epoch": 0.217631191760667,
      "grad_norm": 2.3757501902188896,
      "learning_rate": 4.952898007412294e-06,
      "loss": 0.594,
      "step": 1775
    },
    {
      "epoch": 0.21775380088278568,
      "grad_norm": 2.33436005851082,
      "learning_rate": 4.952800035156565e-06,
      "loss": 0.6179,
      "step": 1776
    },
    {
      "epoch": 0.21787641000490438,
      "grad_norm": 2.0792424310863384,
      "learning_rate": 4.9527019620864145e-06,
      "loss": 0.5214,
      "step": 1777
    },
    {
      "epoch": 0.21799901912702305,
      "grad_norm": 2.3513429597399447,
      "learning_rate": 4.95260378820587e-06,
      "loss": 0.6487,
      "step": 1778
    },
    {
      "epoch": 0.21812162824914175,
      "grad_norm": 2.5299061409919172,
      "learning_rate": 4.952505513518968e-06,
      "loss": 0.6756,
      "step": 1779
    },
    {
      "epoch": 0.21824423737126042,
      "grad_norm": 2.2729934274583177,
      "learning_rate": 4.9524071380297474e-06,
      "loss": 0.5661,
      "step": 1780
    },
    {
      "epoch": 0.21836684649337912,
      "grad_norm": 2.6146701916382806,
      "learning_rate": 4.952308661742252e-06,
      "loss": 0.5391,
      "step": 1781
    },
    {
      "epoch": 0.2184894556154978,
      "grad_norm": 2.137646158862619,
      "learning_rate": 4.952210084660529e-06,
      "loss": 0.5479,
      "step": 1782
    },
    {
      "epoch": 0.2186120647376165,
      "grad_norm": 2.128420452403992,
      "learning_rate": 4.952111406788631e-06,
      "loss": 0.5785,
      "step": 1783
    },
    {
      "epoch": 0.21873467385973516,
      "grad_norm": 2.410135560039185,
      "learning_rate": 4.952012628130612e-06,
      "loss": 0.61,
      "step": 1784
    },
    {
      "epoch": 0.21885728298185386,
      "grad_norm": 2.0227193304602773,
      "learning_rate": 4.951913748690534e-06,
      "loss": 0.5826,
      "step": 1785
    },
    {
      "epoch": 0.21897989210397253,
      "grad_norm": 2.2388407695419716,
      "learning_rate": 4.95181476847246e-06,
      "loss": 0.6029,
      "step": 1786
    },
    {
      "epoch": 0.21910250122609123,
      "grad_norm": 2.223496231333312,
      "learning_rate": 4.951715687480458e-06,
      "loss": 0.6058,
      "step": 1787
    },
    {
      "epoch": 0.2192251103482099,
      "grad_norm": 1.976594871752472,
      "learning_rate": 4.951616505718601e-06,
      "loss": 0.5626,
      "step": 1788
    },
    {
      "epoch": 0.2193477194703286,
      "grad_norm": 2.4313533925234196,
      "learning_rate": 4.951517223190965e-06,
      "loss": 0.5841,
      "step": 1789
    },
    {
      "epoch": 0.21947032859244728,
      "grad_norm": 2.198255680443867,
      "learning_rate": 4.951417839901631e-06,
      "loss": 0.5507,
      "step": 1790
    },
    {
      "epoch": 0.21959293771456595,
      "grad_norm": 2.236815211086097,
      "learning_rate": 4.951318355854684e-06,
      "loss": 0.6325,
      "step": 1791
    },
    {
      "epoch": 0.21971554683668465,
      "grad_norm": 2.480217085538718,
      "learning_rate": 4.951218771054213e-06,
      "loss": 0.5857,
      "step": 1792
    },
    {
      "epoch": 0.21983815595880332,
      "grad_norm": 1.9920611759878066,
      "learning_rate": 4.951119085504311e-06,
      "loss": 0.5606,
      "step": 1793
    },
    {
      "epoch": 0.21996076508092202,
      "grad_norm": 2.1996126053969896,
      "learning_rate": 4.951019299209075e-06,
      "loss": 0.5322,
      "step": 1794
    },
    {
      "epoch": 0.2200833742030407,
      "grad_norm": 2.336271920029637,
      "learning_rate": 4.950919412172607e-06,
      "loss": 0.5757,
      "step": 1795
    },
    {
      "epoch": 0.2202059833251594,
      "grad_norm": 2.3446057060035708,
      "learning_rate": 4.950819424399012e-06,
      "loss": 0.5575,
      "step": 1796
    },
    {
      "epoch": 0.22032859244727807,
      "grad_norm": 2.3447420883247783,
      "learning_rate": 4.9507193358924e-06,
      "loss": 0.5765,
      "step": 1797
    },
    {
      "epoch": 0.22045120156939677,
      "grad_norm": 1.9544770654406476,
      "learning_rate": 4.9506191466568845e-06,
      "loss": 0.5721,
      "step": 1798
    },
    {
      "epoch": 0.22057381069151544,
      "grad_norm": 2.3699785476747675,
      "learning_rate": 4.950518856696583e-06,
      "loss": 0.5434,
      "step": 1799
    },
    {
      "epoch": 0.22069641981363414,
      "grad_norm": 2.4454674098700426,
      "learning_rate": 4.950418466015619e-06,
      "loss": 0.6376,
      "step": 1800
    },
    {
      "epoch": 0.2208190289357528,
      "grad_norm": 2.2434721213057403,
      "learning_rate": 4.950317974618118e-06,
      "loss": 0.5808,
      "step": 1801
    },
    {
      "epoch": 0.2209416380578715,
      "grad_norm": 2.40709210053278,
      "learning_rate": 4.95021738250821e-06,
      "loss": 0.5301,
      "step": 1802
    },
    {
      "epoch": 0.22106424717999018,
      "grad_norm": 2.1664976783488443,
      "learning_rate": 4.95011668969003e-06,
      "loss": 0.5866,
      "step": 1803
    },
    {
      "epoch": 0.22118685630210888,
      "grad_norm": 2.5197664807787166,
      "learning_rate": 4.950015896167716e-06,
      "loss": 0.5093,
      "step": 1804
    },
    {
      "epoch": 0.22130946542422755,
      "grad_norm": 2.2408889659933298,
      "learning_rate": 4.949915001945411e-06,
      "loss": 0.5445,
      "step": 1805
    },
    {
      "epoch": 0.22143207454634625,
      "grad_norm": 2.157852511017864,
      "learning_rate": 4.949814007027263e-06,
      "loss": 0.6135,
      "step": 1806
    },
    {
      "epoch": 0.22155468366846492,
      "grad_norm": 2.2486543572608033,
      "learning_rate": 4.949712911417422e-06,
      "loss": 0.5333,
      "step": 1807
    },
    {
      "epoch": 0.22167729279058362,
      "grad_norm": 2.305567392689942,
      "learning_rate": 4.949611715120043e-06,
      "loss": 0.5486,
      "step": 1808
    },
    {
      "epoch": 0.2217999019127023,
      "grad_norm": 2.4470700063234654,
      "learning_rate": 4.949510418139286e-06,
      "loss": 0.5661,
      "step": 1809
    },
    {
      "epoch": 0.221922511034821,
      "grad_norm": 2.1998275158978857,
      "learning_rate": 4.949409020479315e-06,
      "loss": 0.5575,
      "step": 1810
    },
    {
      "epoch": 0.22204512015693967,
      "grad_norm": 2.1702389701271296,
      "learning_rate": 4.9493075221442965e-06,
      "loss": 0.552,
      "step": 1811
    },
    {
      "epoch": 0.22216772927905837,
      "grad_norm": 2.3938804467052255,
      "learning_rate": 4.949205923138403e-06,
      "loss": 0.5561,
      "step": 1812
    },
    {
      "epoch": 0.22229033840117704,
      "grad_norm": 2.025303180041747,
      "learning_rate": 4.949104223465809e-06,
      "loss": 0.5209,
      "step": 1813
    },
    {
      "epoch": 0.22241294752329574,
      "grad_norm": 2.343626402776832,
      "learning_rate": 4.949002423130697e-06,
      "loss": 0.6004,
      "step": 1814
    },
    {
      "epoch": 0.2225355566454144,
      "grad_norm": 2.24280394187528,
      "learning_rate": 4.948900522137249e-06,
      "loss": 0.6067,
      "step": 1815
    },
    {
      "epoch": 0.2226581657675331,
      "grad_norm": 2.2927035127776345,
      "learning_rate": 4.948798520489654e-06,
      "loss": 0.5809,
      "step": 1816
    },
    {
      "epoch": 0.22278077488965178,
      "grad_norm": 2.1053777147449835,
      "learning_rate": 4.948696418192105e-06,
      "loss": 0.6051,
      "step": 1817
    },
    {
      "epoch": 0.22290338401177048,
      "grad_norm": 2.1509037075220427,
      "learning_rate": 4.9485942152487975e-06,
      "loss": 0.5719,
      "step": 1818
    },
    {
      "epoch": 0.22302599313388916,
      "grad_norm": 2.065396827240128,
      "learning_rate": 4.948491911663933e-06,
      "loss": 0.5915,
      "step": 1819
    },
    {
      "epoch": 0.22314860225600786,
      "grad_norm": 2.326896631762251,
      "learning_rate": 4.948389507441717e-06,
      "loss": 0.6449,
      "step": 1820
    },
    {
      "epoch": 0.22327121137812653,
      "grad_norm": 2.118555045256659,
      "learning_rate": 4.948287002586357e-06,
      "loss": 0.5449,
      "step": 1821
    },
    {
      "epoch": 0.22339382050024523,
      "grad_norm": 2.2723970743283806,
      "learning_rate": 4.948184397102067e-06,
      "loss": 0.6126,
      "step": 1822
    },
    {
      "epoch": 0.2235164296223639,
      "grad_norm": 2.142392932135924,
      "learning_rate": 4.948081690993064e-06,
      "loss": 0.6292,
      "step": 1823
    },
    {
      "epoch": 0.2236390387444826,
      "grad_norm": 2.162116893143904,
      "learning_rate": 4.9479788842635695e-06,
      "loss": 0.5291,
      "step": 1824
    },
    {
      "epoch": 0.22376164786660127,
      "grad_norm": 2.4765851926519384,
      "learning_rate": 4.947875976917809e-06,
      "loss": 0.5383,
      "step": 1825
    },
    {
      "epoch": 0.22388425698871997,
      "grad_norm": 2.294735178421592,
      "learning_rate": 4.947772968960013e-06,
      "loss": 0.5713,
      "step": 1826
    },
    {
      "epoch": 0.22400686611083864,
      "grad_norm": 2.061127328452858,
      "learning_rate": 4.947669860394414e-06,
      "loss": 0.5465,
      "step": 1827
    },
    {
      "epoch": 0.22412947523295734,
      "grad_norm": 2.164959080099467,
      "learning_rate": 4.94756665122525e-06,
      "loss": 0.6147,
      "step": 1828
    },
    {
      "epoch": 0.22425208435507601,
      "grad_norm": 1.9479004822456587,
      "learning_rate": 4.947463341456764e-06,
      "loss": 0.6102,
      "step": 1829
    },
    {
      "epoch": 0.22437469347719471,
      "grad_norm": 2.158448384622164,
      "learning_rate": 4.947359931093202e-06,
      "loss": 0.5953,
      "step": 1830
    },
    {
      "epoch": 0.2244973025993134,
      "grad_norm": 2.1857636703419674,
      "learning_rate": 4.947256420138814e-06,
      "loss": 0.57,
      "step": 1831
    },
    {
      "epoch": 0.2246199117214321,
      "grad_norm": 2.1891885757991516,
      "learning_rate": 4.947152808597853e-06,
      "loss": 0.5755,
      "step": 1832
    },
    {
      "epoch": 0.22474252084355076,
      "grad_norm": 2.233876926724756,
      "learning_rate": 4.947049096474582e-06,
      "loss": 0.6355,
      "step": 1833
    },
    {
      "epoch": 0.22486512996566946,
      "grad_norm": 2.386132901613447,
      "learning_rate": 4.94694528377326e-06,
      "loss": 0.5909,
      "step": 1834
    },
    {
      "epoch": 0.22498773908778813,
      "grad_norm": 2.156528784613418,
      "learning_rate": 4.946841370498154e-06,
      "loss": 0.5949,
      "step": 1835
    },
    {
      "epoch": 0.22511034820990683,
      "grad_norm": 2.270692464581508,
      "learning_rate": 4.946737356653537e-06,
      "loss": 0.5737,
      "step": 1836
    },
    {
      "epoch": 0.2252329573320255,
      "grad_norm": 2.514114469205459,
      "learning_rate": 4.9466332422436815e-06,
      "loss": 0.5845,
      "step": 1837
    },
    {
      "epoch": 0.2253555664541442,
      "grad_norm": 2.229479224899249,
      "learning_rate": 4.9465290272728695e-06,
      "loss": 0.5747,
      "step": 1838
    },
    {
      "epoch": 0.22547817557626287,
      "grad_norm": 1.9399226126895188,
      "learning_rate": 4.946424711745383e-06,
      "loss": 0.5664,
      "step": 1839
    },
    {
      "epoch": 0.22560078469838157,
      "grad_norm": 2.196756454596855,
      "learning_rate": 4.9463202956655096e-06,
      "loss": 0.5897,
      "step": 1840
    },
    {
      "epoch": 0.22572339382050025,
      "grad_norm": 2.426393209148004,
      "learning_rate": 4.946215779037542e-06,
      "loss": 0.6415,
      "step": 1841
    },
    {
      "epoch": 0.22584600294261892,
      "grad_norm": 2.129511232792416,
      "learning_rate": 4.9461111618657745e-06,
      "loss": 0.5959,
      "step": 1842
    },
    {
      "epoch": 0.22596861206473762,
      "grad_norm": 2.0028250694741607,
      "learning_rate": 4.946006444154507e-06,
      "loss": 0.5639,
      "step": 1843
    },
    {
      "epoch": 0.2260912211868563,
      "grad_norm": 2.310277937003683,
      "learning_rate": 4.945901625908046e-06,
      "loss": 0.6302,
      "step": 1844
    },
    {
      "epoch": 0.226213830308975,
      "grad_norm": 2.142508420684766,
      "learning_rate": 4.9457967071306965e-06,
      "loss": 0.5712,
      "step": 1845
    },
    {
      "epoch": 0.22633643943109366,
      "grad_norm": 2.2273940880663674,
      "learning_rate": 4.9456916878267736e-06,
      "loss": 0.5789,
      "step": 1846
    },
    {
      "epoch": 0.22645904855321236,
      "grad_norm": 2.3876677824962504,
      "learning_rate": 4.945586568000592e-06,
      "loss": 0.5951,
      "step": 1847
    },
    {
      "epoch": 0.22658165767533103,
      "grad_norm": 2.183589912241365,
      "learning_rate": 4.945481347656473e-06,
      "loss": 0.5647,
      "step": 1848
    },
    {
      "epoch": 0.22670426679744973,
      "grad_norm": 1.9973168568298894,
      "learning_rate": 4.945376026798741e-06,
      "loss": 0.5717,
      "step": 1849
    },
    {
      "epoch": 0.2268268759195684,
      "grad_norm": 2.2795457577726994,
      "learning_rate": 4.945270605431726e-06,
      "loss": 0.6045,
      "step": 1850
    },
    {
      "epoch": 0.2269494850416871,
      "grad_norm": 2.1678701393149327,
      "learning_rate": 4.9451650835597585e-06,
      "loss": 0.6342,
      "step": 1851
    },
    {
      "epoch": 0.22707209416380578,
      "grad_norm": 2.1428119272775596,
      "learning_rate": 4.945059461187178e-06,
      "loss": 0.6204,
      "step": 1852
    },
    {
      "epoch": 0.22719470328592448,
      "grad_norm": 2.176733644472983,
      "learning_rate": 4.944953738318326e-06,
      "loss": 0.5876,
      "step": 1853
    },
    {
      "epoch": 0.22731731240804315,
      "grad_norm": 2.2924197571629836,
      "learning_rate": 4.944847914957546e-06,
      "loss": 0.5634,
      "step": 1854
    },
    {
      "epoch": 0.22743992153016185,
      "grad_norm": 2.1326424131056387,
      "learning_rate": 4.944741991109188e-06,
      "loss": 0.5526,
      "step": 1855
    },
    {
      "epoch": 0.22756253065228052,
      "grad_norm": 2.187064393650996,
      "learning_rate": 4.944635966777607e-06,
      "loss": 0.6184,
      "step": 1856
    },
    {
      "epoch": 0.22768513977439922,
      "grad_norm": 2.3674099499505123,
      "learning_rate": 4.944529841967159e-06,
      "loss": 0.6263,
      "step": 1857
    },
    {
      "epoch": 0.2278077488965179,
      "grad_norm": 2.1021630331129564,
      "learning_rate": 4.944423616682208e-06,
      "loss": 0.5562,
      "step": 1858
    },
    {
      "epoch": 0.2279303580186366,
      "grad_norm": 2.292162677233169,
      "learning_rate": 4.9443172909271174e-06,
      "loss": 0.5565,
      "step": 1859
    },
    {
      "epoch": 0.22805296714075526,
      "grad_norm": 2.2397001588642245,
      "learning_rate": 4.94421086470626e-06,
      "loss": 0.5531,
      "step": 1860
    },
    {
      "epoch": 0.22817557626287396,
      "grad_norm": 2.310606081982031,
      "learning_rate": 4.944104338024008e-06,
      "loss": 0.5764,
      "step": 1861
    },
    {
      "epoch": 0.22829818538499264,
      "grad_norm": 2.4336907094573057,
      "learning_rate": 4.943997710884741e-06,
      "loss": 0.6722,
      "step": 1862
    },
    {
      "epoch": 0.22842079450711134,
      "grad_norm": 2.011339653113614,
      "learning_rate": 4.9438909832928415e-06,
      "loss": 0.5955,
      "step": 1863
    },
    {
      "epoch": 0.22854340362923,
      "grad_norm": 2.1348346268911924,
      "learning_rate": 4.943784155252696e-06,
      "loss": 0.5716,
      "step": 1864
    },
    {
      "epoch": 0.2286660127513487,
      "grad_norm": 2.2754995050385207,
      "learning_rate": 4.943677226768696e-06,
      "loss": 0.6125,
      "step": 1865
    },
    {
      "epoch": 0.22878862187346738,
      "grad_norm": 2.1198866438464097,
      "learning_rate": 4.943570197845235e-06,
      "loss": 0.5738,
      "step": 1866
    },
    {
      "epoch": 0.22891123099558608,
      "grad_norm": 2.133685106605385,
      "learning_rate": 4.943463068486713e-06,
      "loss": 0.6216,
      "step": 1867
    },
    {
      "epoch": 0.22903384011770475,
      "grad_norm": 2.1282267320858974,
      "learning_rate": 4.943355838697533e-06,
      "loss": 0.5509,
      "step": 1868
    },
    {
      "epoch": 0.22915644923982345,
      "grad_norm": 2.533248218745524,
      "learning_rate": 4.943248508482102e-06,
      "loss": 0.6338,
      "step": 1869
    },
    {
      "epoch": 0.22927905836194212,
      "grad_norm": 2.1365647212830137,
      "learning_rate": 4.943141077844833e-06,
      "loss": 0.5846,
      "step": 1870
    },
    {
      "epoch": 0.22940166748406082,
      "grad_norm": 2.386630514281521,
      "learning_rate": 4.9430335467901406e-06,
      "loss": 0.5423,
      "step": 1871
    },
    {
      "epoch": 0.2295242766061795,
      "grad_norm": 2.2318895727321206,
      "learning_rate": 4.942925915322443e-06,
      "loss": 0.5744,
      "step": 1872
    },
    {
      "epoch": 0.2296468857282982,
      "grad_norm": 2.3082965717508745,
      "learning_rate": 4.942818183446166e-06,
      "loss": 0.6172,
      "step": 1873
    },
    {
      "epoch": 0.22976949485041687,
      "grad_norm": 2.2772069736462885,
      "learning_rate": 4.942710351165737e-06,
      "loss": 0.5607,
      "step": 1874
    },
    {
      "epoch": 0.22989210397253557,
      "grad_norm": 2.1802420038520895,
      "learning_rate": 4.942602418485588e-06,
      "loss": 0.5705,
      "step": 1875
    },
    {
      "epoch": 0.23001471309465424,
      "grad_norm": 2.158643595713686,
      "learning_rate": 4.942494385410155e-06,
      "loss": 0.5757,
      "step": 1876
    },
    {
      "epoch": 0.23013732221677294,
      "grad_norm": 2.0212355367756722,
      "learning_rate": 4.942386251943879e-06,
      "loss": 0.5272,
      "step": 1877
    },
    {
      "epoch": 0.2302599313388916,
      "grad_norm": 2.2221067579160096,
      "learning_rate": 4.942278018091204e-06,
      "loss": 0.5532,
      "step": 1878
    },
    {
      "epoch": 0.2303825404610103,
      "grad_norm": 2.062867522516271,
      "learning_rate": 4.94216968385658e-06,
      "loss": 0.6005,
      "step": 1879
    },
    {
      "epoch": 0.23050514958312898,
      "grad_norm": 2.1251286420275703,
      "learning_rate": 4.942061249244457e-06,
      "loss": 0.5435,
      "step": 1880
    },
    {
      "epoch": 0.23062775870524768,
      "grad_norm": 2.4432984452370974,
      "learning_rate": 4.941952714259293e-06,
      "loss": 0.6066,
      "step": 1881
    },
    {
      "epoch": 0.23075036782736635,
      "grad_norm": 2.297278889435217,
      "learning_rate": 4.941844078905551e-06,
      "loss": 0.6982,
      "step": 1882
    },
    {
      "epoch": 0.23087297694948505,
      "grad_norm": 2.1267828037023233,
      "learning_rate": 4.941735343187694e-06,
      "loss": 0.555,
      "step": 1883
    },
    {
      "epoch": 0.23099558607160373,
      "grad_norm": 2.500484301064396,
      "learning_rate": 4.941626507110191e-06,
      "loss": 0.5642,
      "step": 1884
    },
    {
      "epoch": 0.23111819519372243,
      "grad_norm": 2.079174173988208,
      "learning_rate": 4.941517570677516e-06,
      "loss": 0.5677,
      "step": 1885
    },
    {
      "epoch": 0.2312408043158411,
      "grad_norm": 2.173067661158134,
      "learning_rate": 4.941408533894147e-06,
      "loss": 0.6157,
      "step": 1886
    },
    {
      "epoch": 0.2313634134379598,
      "grad_norm": 2.291586237276953,
      "learning_rate": 4.941299396764564e-06,
      "loss": 0.6422,
      "step": 1887
    },
    {
      "epoch": 0.23148602256007847,
      "grad_norm": 2.405422625207477,
      "learning_rate": 4.9411901592932545e-06,
      "loss": 0.5509,
      "step": 1888
    },
    {
      "epoch": 0.23160863168219717,
      "grad_norm": 2.1753320184654776,
      "learning_rate": 4.941080821484708e-06,
      "loss": 0.5986,
      "step": 1889
    },
    {
      "epoch": 0.23173124080431584,
      "grad_norm": 2.266608894929078,
      "learning_rate": 4.940971383343418e-06,
      "loss": 0.5925,
      "step": 1890
    },
    {
      "epoch": 0.2318538499264345,
      "grad_norm": 2.197088856002678,
      "learning_rate": 4.940861844873882e-06,
      "loss": 0.6344,
      "step": 1891
    },
    {
      "epoch": 0.2319764590485532,
      "grad_norm": 2.3049810096142833,
      "learning_rate": 4.9407522060806035e-06,
      "loss": 0.5339,
      "step": 1892
    },
    {
      "epoch": 0.23209906817067189,
      "grad_norm": 2.082823725375649,
      "learning_rate": 4.940642466968089e-06,
      "loss": 0.5516,
      "step": 1893
    },
    {
      "epoch": 0.23222167729279058,
      "grad_norm": 2.127144586341707,
      "learning_rate": 4.940532627540847e-06,
      "loss": 0.5841,
      "step": 1894
    },
    {
      "epoch": 0.23234428641490926,
      "grad_norm": 2.1431854191008317,
      "learning_rate": 4.940422687803395e-06,
      "loss": 0.5776,
      "step": 1895
    },
    {
      "epoch": 0.23246689553702796,
      "grad_norm": 2.1558268137028485,
      "learning_rate": 4.940312647760248e-06,
      "loss": 0.6208,
      "step": 1896
    },
    {
      "epoch": 0.23258950465914663,
      "grad_norm": 2.1963047293659024,
      "learning_rate": 4.940202507415933e-06,
      "loss": 0.5583,
      "step": 1897
    },
    {
      "epoch": 0.23271211378126533,
      "grad_norm": 2.238978248446138,
      "learning_rate": 4.940092266774973e-06,
      "loss": 0.5796,
      "step": 1898
    },
    {
      "epoch": 0.232834722903384,
      "grad_norm": 2.066922973218185,
      "learning_rate": 4.939981925841902e-06,
      "loss": 0.5759,
      "step": 1899
    },
    {
      "epoch": 0.2329573320255027,
      "grad_norm": 2.241442773823402,
      "learning_rate": 4.939871484621255e-06,
      "loss": 0.5608,
      "step": 1900
    },
    {
      "epoch": 0.23307994114762137,
      "grad_norm": 1.952622598474811,
      "learning_rate": 4.93976094311757e-06,
      "loss": 0.5055,
      "step": 1901
    },
    {
      "epoch": 0.23320255026974007,
      "grad_norm": 2.236298524465375,
      "learning_rate": 4.939650301335391e-06,
      "loss": 0.5912,
      "step": 1902
    },
    {
      "epoch": 0.23332515939185874,
      "grad_norm": 2.372774068847823,
      "learning_rate": 4.939539559279264e-06,
      "loss": 0.5423,
      "step": 1903
    },
    {
      "epoch": 0.23344776851397744,
      "grad_norm": 2.295199353683956,
      "learning_rate": 4.939428716953744e-06,
      "loss": 0.5697,
      "step": 1904
    },
    {
      "epoch": 0.23357037763609612,
      "grad_norm": 2.2386663495276675,
      "learning_rate": 4.9393177743633845e-06,
      "loss": 0.6137,
      "step": 1905
    },
    {
      "epoch": 0.23369298675821482,
      "grad_norm": 2.3695946072556646,
      "learning_rate": 4.9392067315127455e-06,
      "loss": 0.5964,
      "step": 1906
    },
    {
      "epoch": 0.2338155958803335,
      "grad_norm": 2.2527619155298484,
      "learning_rate": 4.939095588406392e-06,
      "loss": 0.537,
      "step": 1907
    },
    {
      "epoch": 0.2339382050024522,
      "grad_norm": 2.518451704394603,
      "learning_rate": 4.938984345048892e-06,
      "loss": 0.5459,
      "step": 1908
    },
    {
      "epoch": 0.23406081412457086,
      "grad_norm": 2.4860032887504278,
      "learning_rate": 4.938873001444818e-06,
      "loss": 0.5762,
      "step": 1909
    },
    {
      "epoch": 0.23418342324668956,
      "grad_norm": 2.3227755960585026,
      "learning_rate": 4.938761557598745e-06,
      "loss": 0.6154,
      "step": 1910
    },
    {
      "epoch": 0.23430603236880823,
      "grad_norm": 2.2066002238137536,
      "learning_rate": 4.938650013515255e-06,
      "loss": 0.5928,
      "step": 1911
    },
    {
      "epoch": 0.23442864149092693,
      "grad_norm": 2.2220918052748897,
      "learning_rate": 4.938538369198932e-06,
      "loss": 0.5735,
      "step": 1912
    },
    {
      "epoch": 0.2345512506130456,
      "grad_norm": 2.621351509702348,
      "learning_rate": 4.938426624654364e-06,
      "loss": 0.5936,
      "step": 1913
    },
    {
      "epoch": 0.2346738597351643,
      "grad_norm": 2.398006766620534,
      "learning_rate": 4.9383147798861465e-06,
      "loss": 0.6265,
      "step": 1914
    },
    {
      "epoch": 0.23479646885728298,
      "grad_norm": 2.1224901475332705,
      "learning_rate": 4.938202834898874e-06,
      "loss": 0.5654,
      "step": 1915
    },
    {
      "epoch": 0.23491907797940167,
      "grad_norm": 2.3926470587915554,
      "learning_rate": 4.9380907896971485e-06,
      "loss": 0.6032,
      "step": 1916
    },
    {
      "epoch": 0.23504168710152035,
      "grad_norm": 2.1205266820285833,
      "learning_rate": 4.9379786442855755e-06,
      "loss": 0.5848,
      "step": 1917
    },
    {
      "epoch": 0.23516429622363905,
      "grad_norm": 2.3033910286136914,
      "learning_rate": 4.937866398668764e-06,
      "loss": 0.5812,
      "step": 1918
    },
    {
      "epoch": 0.23528690534575772,
      "grad_norm": 2.305077859245341,
      "learning_rate": 4.9377540528513276e-06,
      "loss": 0.576,
      "step": 1919
    },
    {
      "epoch": 0.23540951446787642,
      "grad_norm": 2.4409196890369746,
      "learning_rate": 4.9376416068378845e-06,
      "loss": 0.5959,
      "step": 1920
    },
    {
      "epoch": 0.2355321235899951,
      "grad_norm": 2.2479018442985894,
      "learning_rate": 4.937529060633054e-06,
      "loss": 0.5837,
      "step": 1921
    },
    {
      "epoch": 0.2356547327121138,
      "grad_norm": 2.390817495047704,
      "learning_rate": 4.937416414241465e-06,
      "loss": 0.5797,
      "step": 1922
    },
    {
      "epoch": 0.23577734183423246,
      "grad_norm": 2.371724230426368,
      "learning_rate": 4.9373036676677466e-06,
      "loss": 0.6074,
      "step": 1923
    },
    {
      "epoch": 0.23589995095635116,
      "grad_norm": 2.253958086075756,
      "learning_rate": 4.937190820916533e-06,
      "loss": 0.5326,
      "step": 1924
    },
    {
      "epoch": 0.23602256007846983,
      "grad_norm": 2.89897601679598,
      "learning_rate": 4.937077873992461e-06,
      "loss": 0.5851,
      "step": 1925
    },
    {
      "epoch": 0.23614516920058853,
      "grad_norm": 2.5299354719359366,
      "learning_rate": 4.936964826900174e-06,
      "loss": 0.6004,
      "step": 1926
    },
    {
      "epoch": 0.2362677783227072,
      "grad_norm": 2.2055268906496446,
      "learning_rate": 4.936851679644318e-06,
      "loss": 0.6078,
      "step": 1927
    },
    {
      "epoch": 0.2363903874448259,
      "grad_norm": 2.1792034121934356,
      "learning_rate": 4.936738432229544e-06,
      "loss": 0.5747,
      "step": 1928
    },
    {
      "epoch": 0.23651299656694458,
      "grad_norm": 2.2229179204988267,
      "learning_rate": 4.936625084660507e-06,
      "loss": 0.5669,
      "step": 1929
    },
    {
      "epoch": 0.23663560568906328,
      "grad_norm": 2.2350583599914313,
      "learning_rate": 4.9365116369418654e-06,
      "loss": 0.6114,
      "step": 1930
    },
    {
      "epoch": 0.23675821481118195,
      "grad_norm": 2.364474403842644,
      "learning_rate": 4.936398089078281e-06,
      "loss": 0.6028,
      "step": 1931
    },
    {
      "epoch": 0.23688082393330065,
      "grad_norm": 2.3597970607312604,
      "learning_rate": 4.936284441074423e-06,
      "loss": 0.5826,
      "step": 1932
    },
    {
      "epoch": 0.23700343305541932,
      "grad_norm": 2.5340077844738427,
      "learning_rate": 4.936170692934961e-06,
      "loss": 0.5661,
      "step": 1933
    },
    {
      "epoch": 0.23712604217753802,
      "grad_norm": 2.4015879742766764,
      "learning_rate": 4.936056844664571e-06,
      "loss": 0.6253,
      "step": 1934
    },
    {
      "epoch": 0.2372486512996567,
      "grad_norm": 2.3585901855833864,
      "learning_rate": 4.935942896267931e-06,
      "loss": 0.5899,
      "step": 1935
    },
    {
      "epoch": 0.2373712604217754,
      "grad_norm": 2.2453434936756635,
      "learning_rate": 4.935828847749726e-06,
      "loss": 0.5974,
      "step": 1936
    },
    {
      "epoch": 0.23749386954389407,
      "grad_norm": 2.215610119576248,
      "learning_rate": 4.9357146991146435e-06,
      "loss": 0.5611,
      "step": 1937
    },
    {
      "epoch": 0.23761647866601276,
      "grad_norm": 2.5650140559026884,
      "learning_rate": 4.935600450367374e-06,
      "loss": 0.5602,
      "step": 1938
    },
    {
      "epoch": 0.23773908778813144,
      "grad_norm": 2.2737265574163468,
      "learning_rate": 4.935486101512614e-06,
      "loss": 0.5478,
      "step": 1939
    },
    {
      "epoch": 0.2378616969102501,
      "grad_norm": 1.9974307339150483,
      "learning_rate": 4.935371652555064e-06,
      "loss": 0.5567,
      "step": 1940
    },
    {
      "epoch": 0.2379843060323688,
      "grad_norm": 2.199044349127321,
      "learning_rate": 4.9352571034994275e-06,
      "loss": 0.5479,
      "step": 1941
    },
    {
      "epoch": 0.23810691515448748,
      "grad_norm": 2.0643872262351826,
      "learning_rate": 4.935142454350414e-06,
      "loss": 0.5589,
      "step": 1942
    },
    {
      "epoch": 0.23822952427660618,
      "grad_norm": 2.4012246353735334,
      "learning_rate": 4.935027705112733e-06,
      "loss": 0.6051,
      "step": 1943
    },
    {
      "epoch": 0.23835213339872485,
      "grad_norm": 2.2367992804159447,
      "learning_rate": 4.934912855791102e-06,
      "loss": 0.6277,
      "step": 1944
    },
    {
      "epoch": 0.23847474252084355,
      "grad_norm": 2.2940117649794574,
      "learning_rate": 4.934797906390243e-06,
      "loss": 0.56,
      "step": 1945
    },
    {
      "epoch": 0.23859735164296222,
      "grad_norm": 2.2994704708746645,
      "learning_rate": 4.9346828569148795e-06,
      "loss": 0.5558,
      "step": 1946
    },
    {
      "epoch": 0.23871996076508092,
      "grad_norm": 2.252678494658099,
      "learning_rate": 4.93456770736974e-06,
      "loss": 0.5525,
      "step": 1947
    },
    {
      "epoch": 0.2388425698871996,
      "grad_norm": 2.348189857370122,
      "learning_rate": 4.934452457759558e-06,
      "loss": 0.5916,
      "step": 1948
    },
    {
      "epoch": 0.2389651790093183,
      "grad_norm": 2.0362668018007044,
      "learning_rate": 4.9343371080890704e-06,
      "loss": 0.5623,
      "step": 1949
    },
    {
      "epoch": 0.23908778813143697,
      "grad_norm": 2.1355844387743756,
      "learning_rate": 4.934221658363017e-06,
      "loss": 0.5438,
      "step": 1950
    },
    {
      "epoch": 0.23921039725355567,
      "grad_norm": 2.0979872184599233,
      "learning_rate": 4.934106108586144e-06,
      "loss": 0.5378,
      "step": 1951
    },
    {
      "epoch": 0.23933300637567434,
      "grad_norm": 2.398360976448104,
      "learning_rate": 4.933990458763202e-06,
      "loss": 0.545,
      "step": 1952
    },
    {
      "epoch": 0.23945561549779304,
      "grad_norm": 2.027399405473879,
      "learning_rate": 4.933874708898942e-06,
      "loss": 0.5794,
      "step": 1953
    },
    {
      "epoch": 0.2395782246199117,
      "grad_norm": 2.3250107615804896,
      "learning_rate": 4.933758858998123e-06,
      "loss": 0.6093,
      "step": 1954
    },
    {
      "epoch": 0.2397008337420304,
      "grad_norm": 2.477961066942588,
      "learning_rate": 4.933642909065506e-06,
      "loss": 0.5825,
      "step": 1955
    },
    {
      "epoch": 0.23982344286414908,
      "grad_norm": 2.1692458560578665,
      "learning_rate": 4.9335268591058575e-06,
      "loss": 0.588,
      "step": 1956
    },
    {
      "epoch": 0.23994605198626778,
      "grad_norm": 2.434710064723215,
      "learning_rate": 4.933410709123947e-06,
      "loss": 0.6202,
      "step": 1957
    },
    {
      "epoch": 0.24006866110838646,
      "grad_norm": 2.5150977827714804,
      "learning_rate": 4.933294459124547e-06,
      "loss": 0.5252,
      "step": 1958
    },
    {
      "epoch": 0.24019127023050516,
      "grad_norm": 2.519387851513091,
      "learning_rate": 4.933178109112438e-06,
      "loss": 0.572,
      "step": 1959
    },
    {
      "epoch": 0.24031387935262383,
      "grad_norm": 2.1565012179958414,
      "learning_rate": 4.933061659092401e-06,
      "loss": 0.545,
      "step": 1960
    },
    {
      "epoch": 0.24043648847474253,
      "grad_norm": 2.116626179847483,
      "learning_rate": 4.932945109069223e-06,
      "loss": 0.6252,
      "step": 1961
    },
    {
      "epoch": 0.2405590975968612,
      "grad_norm": 2.292938702746222,
      "learning_rate": 4.932828459047693e-06,
      "loss": 0.6385,
      "step": 1962
    },
    {
      "epoch": 0.2406817067189799,
      "grad_norm": 1.9249607961539104,
      "learning_rate": 4.932711709032607e-06,
      "loss": 0.5863,
      "step": 1963
    },
    {
      "epoch": 0.24080431584109857,
      "grad_norm": 2.0423355023620853,
      "learning_rate": 4.932594859028762e-06,
      "loss": 0.5538,
      "step": 1964
    },
    {
      "epoch": 0.24092692496321727,
      "grad_norm": 2.472760167864828,
      "learning_rate": 4.9324779090409625e-06,
      "loss": 0.5385,
      "step": 1965
    },
    {
      "epoch": 0.24104953408533594,
      "grad_norm": 2.1152772746658965,
      "learning_rate": 4.932360859074014e-06,
      "loss": 0.5928,
      "step": 1966
    },
    {
      "epoch": 0.24117214320745464,
      "grad_norm": 2.033538070411867,
      "learning_rate": 4.932243709132728e-06,
      "loss": 0.5616,
      "step": 1967
    },
    {
      "epoch": 0.24129475232957331,
      "grad_norm": 2.1770487736783184,
      "learning_rate": 4.932126459221921e-06,
      "loss": 0.6013,
      "step": 1968
    },
    {
      "epoch": 0.24141736145169201,
      "grad_norm": 2.472505963650954,
      "learning_rate": 4.93200910934641e-06,
      "loss": 0.5336,
      "step": 1969
    },
    {
      "epoch": 0.2415399705738107,
      "grad_norm": 2.5001115199499373,
      "learning_rate": 4.931891659511018e-06,
      "loss": 0.5551,
      "step": 1970
    },
    {
      "epoch": 0.24166257969592939,
      "grad_norm": 2.19989706944098,
      "learning_rate": 4.931774109720574e-06,
      "loss": 0.5913,
      "step": 1971
    },
    {
      "epoch": 0.24178518881804806,
      "grad_norm": 2.276392226302773,
      "learning_rate": 4.93165645997991e-06,
      "loss": 0.5854,
      "step": 1972
    },
    {
      "epoch": 0.24190779794016676,
      "grad_norm": 2.142627723223788,
      "learning_rate": 4.93153871029386e-06,
      "loss": 0.6412,
      "step": 1973
    },
    {
      "epoch": 0.24203040706228543,
      "grad_norm": 2.198116062657878,
      "learning_rate": 4.9314208606672635e-06,
      "loss": 0.5468,
      "step": 1974
    },
    {
      "epoch": 0.24215301618440413,
      "grad_norm": 2.317267140132006,
      "learning_rate": 4.931302911104966e-06,
      "loss": 0.6325,
      "step": 1975
    },
    {
      "epoch": 0.2422756253065228,
      "grad_norm": 1.9940045982399432,
      "learning_rate": 4.931184861611814e-06,
      "loss": 0.5251,
      "step": 1976
    },
    {
      "epoch": 0.2423982344286415,
      "grad_norm": 2.3403109874014643,
      "learning_rate": 4.931066712192661e-06,
      "loss": 0.6048,
      "step": 1977
    },
    {
      "epoch": 0.24252084355076017,
      "grad_norm": 2.2058209659779164,
      "learning_rate": 4.930948462852362e-06,
      "loss": 0.5589,
      "step": 1978
    },
    {
      "epoch": 0.24264345267287887,
      "grad_norm": 2.132558420559285,
      "learning_rate": 4.930830113595777e-06,
      "loss": 0.5872,
      "step": 1979
    },
    {
      "epoch": 0.24276606179499755,
      "grad_norm": 2.504313997129672,
      "learning_rate": 4.930711664427771e-06,
      "loss": 0.6027,
      "step": 1980
    },
    {
      "epoch": 0.24288867091711625,
      "grad_norm": 2.4913641414662266,
      "learning_rate": 4.930593115353213e-06,
      "loss": 0.5989,
      "step": 1981
    },
    {
      "epoch": 0.24301128003923492,
      "grad_norm": 2.1532547169071896,
      "learning_rate": 4.9304744663769745e-06,
      "loss": 0.5686,
      "step": 1982
    },
    {
      "epoch": 0.24313388916135362,
      "grad_norm": 2.065786105081435,
      "learning_rate": 4.930355717503932e-06,
      "loss": 0.5799,
      "step": 1983
    },
    {
      "epoch": 0.2432564982834723,
      "grad_norm": 2.0635279807596727,
      "learning_rate": 4.930236868738968e-06,
      "loss": 0.5865,
      "step": 1984
    },
    {
      "epoch": 0.243379107405591,
      "grad_norm": 2.346883163938599,
      "learning_rate": 4.930117920086965e-06,
      "loss": 0.5112,
      "step": 1985
    },
    {
      "epoch": 0.24350171652770966,
      "grad_norm": 2.3364018163510782,
      "learning_rate": 4.929998871552814e-06,
      "loss": 0.6355,
      "step": 1986
    },
    {
      "epoch": 0.24362432564982836,
      "grad_norm": 2.079492260660384,
      "learning_rate": 4.929879723141407e-06,
      "loss": 0.5912,
      "step": 1987
    },
    {
      "epoch": 0.24374693477194703,
      "grad_norm": 2.2770998373740117,
      "learning_rate": 4.929760474857643e-06,
      "loss": 0.5377,
      "step": 1988
    },
    {
      "epoch": 0.24386954389406573,
      "grad_norm": 2.2636116007249427,
      "learning_rate": 4.929641126706421e-06,
      "loss": 0.6266,
      "step": 1989
    },
    {
      "epoch": 0.2439921530161844,
      "grad_norm": 2.2121021740260796,
      "learning_rate": 4.929521678692647e-06,
      "loss": 0.6068,
      "step": 1990
    },
    {
      "epoch": 0.24411476213830308,
      "grad_norm": 2.0558174927299606,
      "learning_rate": 4.929402130821231e-06,
      "loss": 0.639,
      "step": 1991
    },
    {
      "epoch": 0.24423737126042178,
      "grad_norm": 2.1447530536961428,
      "learning_rate": 4.929282483097087e-06,
      "loss": 0.5775,
      "step": 1992
    },
    {
      "epoch": 0.24435998038254045,
      "grad_norm": 2.1190842767571136,
      "learning_rate": 4.929162735525133e-06,
      "loss": 0.582,
      "step": 1993
    },
    {
      "epoch": 0.24448258950465915,
      "grad_norm": 2.104002749397253,
      "learning_rate": 4.929042888110288e-06,
      "loss": 0.6152,
      "step": 1994
    },
    {
      "epoch": 0.24460519862677782,
      "grad_norm": 2.3931503365219506,
      "learning_rate": 4.928922940857481e-06,
      "loss": 0.5974,
      "step": 1995
    },
    {
      "epoch": 0.24472780774889652,
      "grad_norm": 2.0441934232981622,
      "learning_rate": 4.9288028937716415e-06,
      "loss": 0.5649,
      "step": 1996
    },
    {
      "epoch": 0.2448504168710152,
      "grad_norm": 2.1799546545452735,
      "learning_rate": 4.928682746857703e-06,
      "loss": 0.594,
      "step": 1997
    },
    {
      "epoch": 0.2449730259931339,
      "grad_norm": 2.114735458944203,
      "learning_rate": 4.9285625001206036e-06,
      "loss": 0.5895,
      "step": 1998
    },
    {
      "epoch": 0.24509563511525256,
      "grad_norm": 2.4341251259402807,
      "learning_rate": 4.928442153565286e-06,
      "loss": 0.5581,
      "step": 1999
    },
    {
      "epoch": 0.24521824423737126,
      "grad_norm": 2.0190000257437637,
      "learning_rate": 4.928321707196697e-06,
      "loss": 0.5818,
      "step": 2000
    },
    {
      "epoch": 0.24534085335948994,
      "grad_norm": 2.168249242405798,
      "learning_rate": 4.928201161019787e-06,
      "loss": 0.5875,
      "step": 2001
    },
    {
      "epoch": 0.24546346248160864,
      "grad_norm": 2.228954037887039,
      "learning_rate": 4.92808051503951e-06,
      "loss": 0.5182,
      "step": 2002
    },
    {
      "epoch": 0.2455860716037273,
      "grad_norm": 2.111297626689257,
      "learning_rate": 4.927959769260825e-06,
      "loss": 0.618,
      "step": 2003
    },
    {
      "epoch": 0.245708680725846,
      "grad_norm": 1.9117894733168679,
      "learning_rate": 4.927838923688696e-06,
      "loss": 0.548,
      "step": 2004
    },
    {
      "epoch": 0.24583128984796468,
      "grad_norm": 2.1106863394710444,
      "learning_rate": 4.927717978328089e-06,
      "loss": 0.5451,
      "step": 2005
    },
    {
      "epoch": 0.24595389897008338,
      "grad_norm": 2.211360029844754,
      "learning_rate": 4.9275969331839745e-06,
      "loss": 0.5825,
      "step": 2006
    },
    {
      "epoch": 0.24607650809220205,
      "grad_norm": 2.438520442386629,
      "learning_rate": 4.927475788261329e-06,
      "loss": 0.5878,
      "step": 2007
    },
    {
      "epoch": 0.24619911721432075,
      "grad_norm": 2.413206447980687,
      "learning_rate": 4.927354543565131e-06,
      "loss": 0.568,
      "step": 2008
    },
    {
      "epoch": 0.24632172633643942,
      "grad_norm": 2.3361892673031397,
      "learning_rate": 4.927233199100363e-06,
      "loss": 0.6178,
      "step": 2009
    },
    {
      "epoch": 0.24644433545855812,
      "grad_norm": 2.247364233058328,
      "learning_rate": 4.927111754872014e-06,
      "loss": 0.5832,
      "step": 2010
    },
    {
      "epoch": 0.2465669445806768,
      "grad_norm": 2.0894400678494214,
      "learning_rate": 4.926990210885075e-06,
      "loss": 0.5952,
      "step": 2011
    },
    {
      "epoch": 0.2466895537027955,
      "grad_norm": 2.4251743142963944,
      "learning_rate": 4.926868567144543e-06,
      "loss": 0.572,
      "step": 2012
    },
    {
      "epoch": 0.24681216282491417,
      "grad_norm": 2.351737560956717,
      "learning_rate": 4.9267468236554145e-06,
      "loss": 0.6463,
      "step": 2013
    },
    {
      "epoch": 0.24693477194703287,
      "grad_norm": 2.1016133170626636,
      "learning_rate": 4.9266249804226965e-06,
      "loss": 0.5443,
      "step": 2014
    },
    {
      "epoch": 0.24705738106915154,
      "grad_norm": 2.2912761034385705,
      "learning_rate": 4.926503037451395e-06,
      "loss": 0.5599,
      "step": 2015
    },
    {
      "epoch": 0.24717999019127024,
      "grad_norm": 2.376828471085611,
      "learning_rate": 4.9263809947465234e-06,
      "loss": 0.5899,
      "step": 2016
    },
    {
      "epoch": 0.2473025993133889,
      "grad_norm": 2.2965016839113384,
      "learning_rate": 4.926258852313097e-06,
      "loss": 0.5861,
      "step": 2017
    },
    {
      "epoch": 0.2474252084355076,
      "grad_norm": 2.0456993924614246,
      "learning_rate": 4.926136610156137e-06,
      "loss": 0.5482,
      "step": 2018
    },
    {
      "epoch": 0.24754781755762628,
      "grad_norm": 2.4059684545682325,
      "learning_rate": 4.9260142682806665e-06,
      "loss": 0.6112,
      "step": 2019
    },
    {
      "epoch": 0.24767042667974498,
      "grad_norm": 2.321498600359463,
      "learning_rate": 4.925891826691715e-06,
      "loss": 0.5883,
      "step": 2020
    },
    {
      "epoch": 0.24779303580186365,
      "grad_norm": 2.3470692960473243,
      "learning_rate": 4.925769285394315e-06,
      "loss": 0.5549,
      "step": 2021
    },
    {
      "epoch": 0.24791564492398235,
      "grad_norm": 2.4062217672524695,
      "learning_rate": 4.925646644393503e-06,
      "loss": 0.5673,
      "step": 2022
    },
    {
      "epoch": 0.24803825404610103,
      "grad_norm": 2.297085798102744,
      "learning_rate": 4.925523903694319e-06,
      "loss": 0.5665,
      "step": 2023
    },
    {
      "epoch": 0.24816086316821973,
      "grad_norm": 2.163334897031941,
      "learning_rate": 4.925401063301809e-06,
      "loss": 0.5858,
      "step": 2024
    },
    {
      "epoch": 0.2482834722903384,
      "grad_norm": 2.1192146708584816,
      "learning_rate": 4.925278123221021e-06,
      "loss": 0.5604,
      "step": 2025
    },
    {
      "epoch": 0.2484060814124571,
      "grad_norm": 2.2275075870922225,
      "learning_rate": 4.925155083457009e-06,
      "loss": 0.5563,
      "step": 2026
    },
    {
      "epoch": 0.24852869053457577,
      "grad_norm": 2.1826569080318725,
      "learning_rate": 4.925031944014828e-06,
      "loss": 0.548,
      "step": 2027
    },
    {
      "epoch": 0.24865129965669447,
      "grad_norm": 2.3851465921365627,
      "learning_rate": 4.924908704899543e-06,
      "loss": 0.5659,
      "step": 2028
    },
    {
      "epoch": 0.24877390877881314,
      "grad_norm": 2.3477973784937882,
      "learning_rate": 4.924785366116217e-06,
      "loss": 0.5558,
      "step": 2029
    },
    {
      "epoch": 0.24889651790093184,
      "grad_norm": 2.4807977613319165,
      "learning_rate": 4.924661927669919e-06,
      "loss": 0.6103,
      "step": 2030
    },
    {
      "epoch": 0.2490191270230505,
      "grad_norm": 2.3956741262465773,
      "learning_rate": 4.924538389565724e-06,
      "loss": 0.5078,
      "step": 2031
    },
    {
      "epoch": 0.2491417361451692,
      "grad_norm": 2.2899174963026256,
      "learning_rate": 4.924414751808709e-06,
      "loss": 0.5855,
      "step": 2032
    },
    {
      "epoch": 0.24926434526728788,
      "grad_norm": 2.7380173679018527,
      "learning_rate": 4.9242910144039545e-06,
      "loss": 0.5864,
      "step": 2033
    },
    {
      "epoch": 0.24938695438940658,
      "grad_norm": 2.1924425644941157,
      "learning_rate": 4.924167177356548e-06,
      "loss": 0.5486,
      "step": 2034
    },
    {
      "epoch": 0.24950956351152526,
      "grad_norm": 2.1209245293488936,
      "learning_rate": 4.9240432406715785e-06,
      "loss": 0.5522,
      "step": 2035
    },
    {
      "epoch": 0.24963217263364396,
      "grad_norm": 2.219061363434667,
      "learning_rate": 4.9239192043541404e-06,
      "loss": 0.5584,
      "step": 2036
    },
    {
      "epoch": 0.24975478175576263,
      "grad_norm": 2.2696127971871656,
      "learning_rate": 4.9237950684093326e-06,
      "loss": 0.5249,
      "step": 2037
    },
    {
      "epoch": 0.24987739087788133,
      "grad_norm": 2.1807453877546066,
      "learning_rate": 4.923670832842256e-06,
      "loss": 0.5312,
      "step": 2038
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.0315571191763975,
      "learning_rate": 4.9235464976580175e-06,
      "loss": 0.6138,
      "step": 2039
    },
    {
      "epoch": 0.2501226091221187,
      "grad_norm": 2.198515362881359,
      "learning_rate": 4.923422062861727e-06,
      "loss": 0.5708,
      "step": 2040
    },
    {
      "epoch": 0.25024521824423734,
      "grad_norm": 2.5711140119112006,
      "learning_rate": 4.923297528458499e-06,
      "loss": 0.6364,
      "step": 2041
    },
    {
      "epoch": 0.25036782736635604,
      "grad_norm": 2.0754258430970047,
      "learning_rate": 4.923172894453454e-06,
      "loss": 0.6023,
      "step": 2042
    },
    {
      "epoch": 0.25049043648847474,
      "grad_norm": 1.9315660850866858,
      "learning_rate": 4.923048160851712e-06,
      "loss": 0.5706,
      "step": 2043
    },
    {
      "epoch": 0.25061304561059344,
      "grad_norm": 2.0971292439700333,
      "learning_rate": 4.922923327658401e-06,
      "loss": 0.5777,
      "step": 2044
    },
    {
      "epoch": 0.2507356547327121,
      "grad_norm": 2.4124133158466643,
      "learning_rate": 4.922798394878652e-06,
      "loss": 0.5999,
      "step": 2045
    },
    {
      "epoch": 0.2508582638548308,
      "grad_norm": 1.9366158711791246,
      "learning_rate": 4.922673362517601e-06,
      "loss": 0.5612,
      "step": 2046
    },
    {
      "epoch": 0.2509808729769495,
      "grad_norm": 2.010434219444772,
      "learning_rate": 4.922548230580384e-06,
      "loss": 0.594,
      "step": 2047
    },
    {
      "epoch": 0.2511034820990682,
      "grad_norm": 2.2640964122814684,
      "learning_rate": 4.922422999072146e-06,
      "loss": 0.5499,
      "step": 2048
    },
    {
      "epoch": 0.25122609122118683,
      "grad_norm": 2.219172645798461,
      "learning_rate": 4.922297667998035e-06,
      "loss": 0.5553,
      "step": 2049
    },
    {
      "epoch": 0.25134870034330553,
      "grad_norm": 2.0670662176259795,
      "learning_rate": 4.922172237363201e-06,
      "loss": 0.6058,
      "step": 2050
    },
    {
      "epoch": 0.25147130946542423,
      "grad_norm": 2.1887932510097365,
      "learning_rate": 4.9220467071728e-06,
      "loss": 0.594,
      "step": 2051
    },
    {
      "epoch": 0.25159391858754293,
      "grad_norm": 2.3614086835512382,
      "learning_rate": 4.921921077431991e-06,
      "loss": 0.6092,
      "step": 2052
    },
    {
      "epoch": 0.2517165277096616,
      "grad_norm": 1.9963616668050974,
      "learning_rate": 4.921795348145939e-06,
      "loss": 0.536,
      "step": 2053
    },
    {
      "epoch": 0.2518391368317803,
      "grad_norm": 2.030419362040186,
      "learning_rate": 4.921669519319811e-06,
      "loss": 0.5857,
      "step": 2054
    },
    {
      "epoch": 0.251961745953899,
      "grad_norm": 2.316028556673973,
      "learning_rate": 4.921543590958777e-06,
      "loss": 0.5519,
      "step": 2055
    },
    {
      "epoch": 0.2520843550760177,
      "grad_norm": 1.9864028083338225,
      "learning_rate": 4.921417563068015e-06,
      "loss": 0.5628,
      "step": 2056
    },
    {
      "epoch": 0.2522069641981363,
      "grad_norm": 2.0491356587009153,
      "learning_rate": 4.921291435652705e-06,
      "loss": 0.5377,
      "step": 2057
    },
    {
      "epoch": 0.252329573320255,
      "grad_norm": 2.2119316450207545,
      "learning_rate": 4.92116520871803e-06,
      "loss": 0.5949,
      "step": 2058
    },
    {
      "epoch": 0.2524521824423737,
      "grad_norm": 2.128048426646885,
      "learning_rate": 4.921038882269179e-06,
      "loss": 0.615,
      "step": 2059
    },
    {
      "epoch": 0.2525747915644924,
      "grad_norm": 2.3274209818797273,
      "learning_rate": 4.920912456311343e-06,
      "loss": 0.5893,
      "step": 2060
    },
    {
      "epoch": 0.25269740068661106,
      "grad_norm": 2.2321760287624706,
      "learning_rate": 4.92078593084972e-06,
      "loss": 0.5576,
      "step": 2061
    },
    {
      "epoch": 0.25282000980872976,
      "grad_norm": 2.316829825472374,
      "learning_rate": 4.920659305889509e-06,
      "loss": 0.5856,
      "step": 2062
    },
    {
      "epoch": 0.25294261893084846,
      "grad_norm": 2.2275106396256326,
      "learning_rate": 4.920532581435915e-06,
      "loss": 0.6338,
      "step": 2063
    },
    {
      "epoch": 0.25306522805296716,
      "grad_norm": 2.053423151117498,
      "learning_rate": 4.920405757494147e-06,
      "loss": 0.5758,
      "step": 2064
    },
    {
      "epoch": 0.2531878371750858,
      "grad_norm": 2.2623813093102974,
      "learning_rate": 4.920278834069417e-06,
      "loss": 0.6041,
      "step": 2065
    },
    {
      "epoch": 0.2533104462972045,
      "grad_norm": 2.424102454405068,
      "learning_rate": 4.920151811166943e-06,
      "loss": 0.6289,
      "step": 2066
    },
    {
      "epoch": 0.2534330554193232,
      "grad_norm": 2.2067042529186334,
      "learning_rate": 4.9200246887919446e-06,
      "loss": 0.6025,
      "step": 2067
    },
    {
      "epoch": 0.2535556645414419,
      "grad_norm": 2.33288469843875,
      "learning_rate": 4.919897466949647e-06,
      "loss": 0.6168,
      "step": 2068
    },
    {
      "epoch": 0.25367827366356055,
      "grad_norm": 2.2626629372069855,
      "learning_rate": 4.919770145645279e-06,
      "loss": 0.575,
      "step": 2069
    },
    {
      "epoch": 0.25380088278567925,
      "grad_norm": 2.0894560353864358,
      "learning_rate": 4.919642724884075e-06,
      "loss": 0.569,
      "step": 2070
    },
    {
      "epoch": 0.25392349190779795,
      "grad_norm": 2.202863672789426,
      "learning_rate": 4.919515204671272e-06,
      "loss": 0.5663,
      "step": 2071
    },
    {
      "epoch": 0.25404610102991665,
      "grad_norm": 2.1857404100491284,
      "learning_rate": 4.919387585012108e-06,
      "loss": 0.601,
      "step": 2072
    },
    {
      "epoch": 0.2541687101520353,
      "grad_norm": 2.3377616479438097,
      "learning_rate": 4.919259865911834e-06,
      "loss": 0.5669,
      "step": 2073
    },
    {
      "epoch": 0.254291319274154,
      "grad_norm": 2.4759094482978075,
      "learning_rate": 4.9191320473756955e-06,
      "loss": 0.5907,
      "step": 2074
    },
    {
      "epoch": 0.2544139283962727,
      "grad_norm": 2.024793552614871,
      "learning_rate": 4.9190041294089465e-06,
      "loss": 0.5355,
      "step": 2075
    },
    {
      "epoch": 0.2545365375183914,
      "grad_norm": 2.268900655639409,
      "learning_rate": 4.918876112016846e-06,
      "loss": 0.5861,
      "step": 2076
    },
    {
      "epoch": 0.25465914664051004,
      "grad_norm": 1.9964563151832666,
      "learning_rate": 4.918747995204655e-06,
      "loss": 0.5633,
      "step": 2077
    },
    {
      "epoch": 0.25478175576262874,
      "grad_norm": 2.398098960109017,
      "learning_rate": 4.918619778977639e-06,
      "loss": 0.5608,
      "step": 2078
    },
    {
      "epoch": 0.25490436488474744,
      "grad_norm": 2.4673178493937464,
      "learning_rate": 4.918491463341068e-06,
      "loss": 0.6413,
      "step": 2079
    },
    {
      "epoch": 0.25502697400686614,
      "grad_norm": 2.2211452736889847,
      "learning_rate": 4.9183630483002164e-06,
      "loss": 0.5843,
      "step": 2080
    },
    {
      "epoch": 0.2551495831289848,
      "grad_norm": 2.057075918452542,
      "learning_rate": 4.918234533860362e-06,
      "loss": 0.5552,
      "step": 2081
    },
    {
      "epoch": 0.2552721922511035,
      "grad_norm": 2.270289109509578,
      "learning_rate": 4.918105920026788e-06,
      "loss": 0.5821,
      "step": 2082
    },
    {
      "epoch": 0.2553948013732222,
      "grad_norm": 2.279485204873285,
      "learning_rate": 4.9179772068047786e-06,
      "loss": 0.6294,
      "step": 2083
    },
    {
      "epoch": 0.2555174104953409,
      "grad_norm": 2.1894691029685305,
      "learning_rate": 4.917848394199626e-06,
      "loss": 0.5544,
      "step": 2084
    },
    {
      "epoch": 0.2556400196174595,
      "grad_norm": 2.5044236411923,
      "learning_rate": 4.917719482216624e-06,
      "loss": 0.625,
      "step": 2085
    },
    {
      "epoch": 0.2557626287395782,
      "grad_norm": 2.42361178314466,
      "learning_rate": 4.917590470861071e-06,
      "loss": 0.6054,
      "step": 2086
    },
    {
      "epoch": 0.2558852378616969,
      "grad_norm": 2.2453850828172834,
      "learning_rate": 4.917461360138269e-06,
      "loss": 0.5859,
      "step": 2087
    },
    {
      "epoch": 0.2560078469838156,
      "grad_norm": 2.1945877214532463,
      "learning_rate": 4.917332150053527e-06,
      "loss": 0.5933,
      "step": 2088
    },
    {
      "epoch": 0.25613045610593427,
      "grad_norm": 1.996689302156182,
      "learning_rate": 4.917202840612152e-06,
      "loss": 0.5901,
      "step": 2089
    },
    {
      "epoch": 0.25625306522805297,
      "grad_norm": 2.053613960529471,
      "learning_rate": 4.917073431819462e-06,
      "loss": 0.5487,
      "step": 2090
    },
    {
      "epoch": 0.25637567435017167,
      "grad_norm": 2.4107394516876006,
      "learning_rate": 4.9169439236807744e-06,
      "loss": 0.565,
      "step": 2091
    },
    {
      "epoch": 0.2564982834722903,
      "grad_norm": 2.430439723489835,
      "learning_rate": 4.916814316201413e-06,
      "loss": 0.5189,
      "step": 2092
    },
    {
      "epoch": 0.256620892594409,
      "grad_norm": 2.23689619886947,
      "learning_rate": 4.916684609386705e-06,
      "loss": 0.5398,
      "step": 2093
    },
    {
      "epoch": 0.2567435017165277,
      "grad_norm": 2.2919988174533086,
      "learning_rate": 4.9165548032419805e-06,
      "loss": 0.6173,
      "step": 2094
    },
    {
      "epoch": 0.2568661108386464,
      "grad_norm": 2.2315174059956124,
      "learning_rate": 4.916424897772575e-06,
      "loss": 0.5804,
      "step": 2095
    },
    {
      "epoch": 0.25698871996076506,
      "grad_norm": 2.0713177036213213,
      "learning_rate": 4.916294892983829e-06,
      "loss": 0.5225,
      "step": 2096
    },
    {
      "epoch": 0.25711132908288375,
      "grad_norm": 2.26090177625888,
      "learning_rate": 4.916164788881086e-06,
      "loss": 0.5451,
      "step": 2097
    },
    {
      "epoch": 0.25723393820500245,
      "grad_norm": 1.753772542434545,
      "learning_rate": 4.916034585469691e-06,
      "loss": 0.4705,
      "step": 2098
    },
    {
      "epoch": 0.25735654732712115,
      "grad_norm": 2.1607411123600944,
      "learning_rate": 4.9159042827549985e-06,
      "loss": 0.577,
      "step": 2099
    },
    {
      "epoch": 0.2574791564492398,
      "grad_norm": 2.3997708934398814,
      "learning_rate": 4.915773880742362e-06,
      "loss": 0.6142,
      "step": 2100
    },
    {
      "epoch": 0.2576017655713585,
      "grad_norm": 2.235962905964515,
      "learning_rate": 4.915643379437143e-06,
      "loss": 0.5851,
      "step": 2101
    },
    {
      "epoch": 0.2577243746934772,
      "grad_norm": 2.2073329132565274,
      "learning_rate": 4.915512778844705e-06,
      "loss": 0.5405,
      "step": 2102
    },
    {
      "epoch": 0.2578469838155959,
      "grad_norm": 2.0596571428533013,
      "learning_rate": 4.9153820789704134e-06,
      "loss": 0.5867,
      "step": 2103
    },
    {
      "epoch": 0.25796959293771454,
      "grad_norm": 2.1814037582302284,
      "learning_rate": 4.915251279819644e-06,
      "loss": 0.5431,
      "step": 2104
    },
    {
      "epoch": 0.25809220205983324,
      "grad_norm": 2.2626261370184264,
      "learning_rate": 4.915120381397771e-06,
      "loss": 0.5694,
      "step": 2105
    },
    {
      "epoch": 0.25821481118195194,
      "grad_norm": 2.412100484889518,
      "learning_rate": 4.914989383710173e-06,
      "loss": 0.5408,
      "step": 2106
    },
    {
      "epoch": 0.25833742030407064,
      "grad_norm": 2.05273232888658,
      "learning_rate": 4.914858286762239e-06,
      "loss": 0.6066,
      "step": 2107
    },
    {
      "epoch": 0.2584600294261893,
      "grad_norm": 2.216372915121689,
      "learning_rate": 4.914727090559352e-06,
      "loss": 0.5284,
      "step": 2108
    },
    {
      "epoch": 0.258582638548308,
      "grad_norm": 2.071596597250116,
      "learning_rate": 4.914595795106907e-06,
      "loss": 0.5532,
      "step": 2109
    },
    {
      "epoch": 0.2587052476704267,
      "grad_norm": 2.328082473307757,
      "learning_rate": 4.9144644004103e-06,
      "loss": 0.5687,
      "step": 2110
    },
    {
      "epoch": 0.2588278567925454,
      "grad_norm": 2.347066786634377,
      "learning_rate": 4.9143329064749325e-06,
      "loss": 0.5522,
      "step": 2111
    },
    {
      "epoch": 0.25895046591466403,
      "grad_norm": 2.258555475421831,
      "learning_rate": 4.914201313306207e-06,
      "loss": 0.5684,
      "step": 2112
    },
    {
      "epoch": 0.25907307503678273,
      "grad_norm": 2.203357816148967,
      "learning_rate": 4.914069620909535e-06,
      "loss": 0.5792,
      "step": 2113
    },
    {
      "epoch": 0.25919568415890143,
      "grad_norm": 2.356625497053139,
      "learning_rate": 4.913937829290327e-06,
      "loss": 0.6853,
      "step": 2114
    },
    {
      "epoch": 0.25931829328102013,
      "grad_norm": 2.1719384863060704,
      "learning_rate": 4.913805938454001e-06,
      "loss": 0.5951,
      "step": 2115
    },
    {
      "epoch": 0.2594409024031388,
      "grad_norm": 2.161395873468492,
      "learning_rate": 4.913673948405977e-06,
      "loss": 0.6129,
      "step": 2116
    },
    {
      "epoch": 0.2595635115252575,
      "grad_norm": 1.9146804137499076,
      "learning_rate": 4.9135418591516815e-06,
      "loss": 0.5852,
      "step": 2117
    },
    {
      "epoch": 0.2596861206473762,
      "grad_norm": 2.0697225032498396,
      "learning_rate": 4.913409670696541e-06,
      "loss": 0.6011,
      "step": 2118
    },
    {
      "epoch": 0.2598087297694949,
      "grad_norm": 2.2684162639982133,
      "learning_rate": 4.913277383045992e-06,
      "loss": 0.6053,
      "step": 2119
    },
    {
      "epoch": 0.2599313388916135,
      "grad_norm": 2.1498031763215786,
      "learning_rate": 4.9131449962054705e-06,
      "loss": 0.5901,
      "step": 2120
    },
    {
      "epoch": 0.2600539480137322,
      "grad_norm": 1.9912035947594726,
      "learning_rate": 4.913012510180417e-06,
      "loss": 0.5501,
      "step": 2121
    },
    {
      "epoch": 0.2601765571358509,
      "grad_norm": 2.404879750102417,
      "learning_rate": 4.912879924976277e-06,
      "loss": 0.5779,
      "step": 2122
    },
    {
      "epoch": 0.2602991662579696,
      "grad_norm": 2.3001323098641833,
      "learning_rate": 4.9127472405985015e-06,
      "loss": 0.5408,
      "step": 2123
    },
    {
      "epoch": 0.26042177538008826,
      "grad_norm": 2.0782992419879696,
      "learning_rate": 4.912614457052542e-06,
      "loss": 0.5996,
      "step": 2124
    },
    {
      "epoch": 0.26054438450220696,
      "grad_norm": 2.3377486655831934,
      "learning_rate": 4.912481574343857e-06,
      "loss": 0.5399,
      "step": 2125
    },
    {
      "epoch": 0.26066699362432566,
      "grad_norm": 2.3333619111412096,
      "learning_rate": 4.912348592477909e-06,
      "loss": 0.7019,
      "step": 2126
    },
    {
      "epoch": 0.26078960274644436,
      "grad_norm": 2.1344592390000527,
      "learning_rate": 4.912215511460163e-06,
      "loss": 0.5595,
      "step": 2127
    },
    {
      "epoch": 0.260912211868563,
      "grad_norm": 2.160924586649149,
      "learning_rate": 4.912082331296088e-06,
      "loss": 0.6029,
      "step": 2128
    },
    {
      "epoch": 0.2610348209906817,
      "grad_norm": 2.0209532346485277,
      "learning_rate": 4.91194905199116e-06,
      "loss": 0.5621,
      "step": 2129
    },
    {
      "epoch": 0.2611574301128004,
      "grad_norm": 2.293484155985866,
      "learning_rate": 4.911815673550856e-06,
      "loss": 0.5792,
      "step": 2130
    },
    {
      "epoch": 0.2612800392349191,
      "grad_norm": 2.1286303189279483,
      "learning_rate": 4.911682195980657e-06,
      "loss": 0.6373,
      "step": 2131
    },
    {
      "epoch": 0.26140264835703775,
      "grad_norm": 2.3363100500841876,
      "learning_rate": 4.911548619286052e-06,
      "loss": 0.6079,
      "step": 2132
    },
    {
      "epoch": 0.26152525747915645,
      "grad_norm": 2.135472657932946,
      "learning_rate": 4.911414943472528e-06,
      "loss": 0.5735,
      "step": 2133
    },
    {
      "epoch": 0.26164786660127515,
      "grad_norm": 2.039019071476704,
      "learning_rate": 4.911281168545581e-06,
      "loss": 0.5713,
      "step": 2134
    },
    {
      "epoch": 0.26177047572339385,
      "grad_norm": 2.1318952552860813,
      "learning_rate": 4.911147294510709e-06,
      "loss": 0.5727,
      "step": 2135
    },
    {
      "epoch": 0.2618930848455125,
      "grad_norm": 2.338877175540124,
      "learning_rate": 4.911013321373415e-06,
      "loss": 0.6283,
      "step": 2136
    },
    {
      "epoch": 0.2620156939676312,
      "grad_norm": 2.045069483834694,
      "learning_rate": 4.910879249139205e-06,
      "loss": 0.5601,
      "step": 2137
    },
    {
      "epoch": 0.2621383030897499,
      "grad_norm": 2.446288071488983,
      "learning_rate": 4.91074507781359e-06,
      "loss": 0.631,
      "step": 2138
    },
    {
      "epoch": 0.26226091221186854,
      "grad_norm": 2.2143725030013512,
      "learning_rate": 4.910610807402084e-06,
      "loss": 0.5851,
      "step": 2139
    },
    {
      "epoch": 0.26238352133398724,
      "grad_norm": 2.0428226090970982,
      "learning_rate": 4.910476437910206e-06,
      "loss": 0.5322,
      "step": 2140
    },
    {
      "epoch": 0.26250613045610593,
      "grad_norm": 2.211985357587556,
      "learning_rate": 4.91034196934348e-06,
      "loss": 0.5819,
      "step": 2141
    },
    {
      "epoch": 0.26262873957822463,
      "grad_norm": 2.3817292766938336,
      "learning_rate": 4.910207401707431e-06,
      "loss": 0.6509,
      "step": 2142
    },
    {
      "epoch": 0.2627513487003433,
      "grad_norm": 1.8741277608739002,
      "learning_rate": 4.910072735007591e-06,
      "loss": 0.5453,
      "step": 2143
    },
    {
      "epoch": 0.262873957822462,
      "grad_norm": 2.1767447029547857,
      "learning_rate": 4.909937969249495e-06,
      "loss": 0.5579,
      "step": 2144
    },
    {
      "epoch": 0.2629965669445807,
      "grad_norm": 2.2097134188156082,
      "learning_rate": 4.909803104438681e-06,
      "loss": 0.5769,
      "step": 2145
    },
    {
      "epoch": 0.2631191760666994,
      "grad_norm": 2.6046682838094375,
      "learning_rate": 4.909668140580694e-06,
      "loss": 0.5795,
      "step": 2146
    },
    {
      "epoch": 0.263241785188818,
      "grad_norm": 2.2191846745781407,
      "learning_rate": 4.90953307768108e-06,
      "loss": 0.5641,
      "step": 2147
    },
    {
      "epoch": 0.2633643943109367,
      "grad_norm": 2.1956569817884457,
      "learning_rate": 4.909397915745392e-06,
      "loss": 0.5699,
      "step": 2148
    },
    {
      "epoch": 0.2634870034330554,
      "grad_norm": 2.2170847601126678,
      "learning_rate": 4.909262654779183e-06,
      "loss": 0.6094,
      "step": 2149
    },
    {
      "epoch": 0.2636096125551741,
      "grad_norm": 2.0979426513221813,
      "learning_rate": 4.9091272947880145e-06,
      "loss": 0.5905,
      "step": 2150
    },
    {
      "epoch": 0.26373222167729277,
      "grad_norm": 2.2243038747841086,
      "learning_rate": 4.908991835777449e-06,
      "loss": 0.5983,
      "step": 2151
    },
    {
      "epoch": 0.26385483079941147,
      "grad_norm": 1.9754188732961873,
      "learning_rate": 4.908856277753053e-06,
      "loss": 0.5685,
      "step": 2152
    },
    {
      "epoch": 0.26397743992153017,
      "grad_norm": 2.153120765293098,
      "learning_rate": 4.908720620720401e-06,
      "loss": 0.5787,
      "step": 2153
    },
    {
      "epoch": 0.26410004904364887,
      "grad_norm": 2.015751807900484,
      "learning_rate": 4.908584864685067e-06,
      "loss": 0.5557,
      "step": 2154
    },
    {
      "epoch": 0.2642226581657675,
      "grad_norm": 2.076028605933276,
      "learning_rate": 4.90844900965263e-06,
      "loss": 0.5742,
      "step": 2155
    },
    {
      "epoch": 0.2643452672878862,
      "grad_norm": 2.106885459026765,
      "learning_rate": 4.908313055628676e-06,
      "loss": 0.5521,
      "step": 2156
    },
    {
      "epoch": 0.2644678764100049,
      "grad_norm": 2.144266566465935,
      "learning_rate": 4.9081770026187915e-06,
      "loss": 0.5956,
      "step": 2157
    },
    {
      "epoch": 0.2645904855321236,
      "grad_norm": 2.3372151741381026,
      "learning_rate": 4.908040850628568e-06,
      "loss": 0.5698,
      "step": 2158
    },
    {
      "epoch": 0.26471309465424225,
      "grad_norm": 2.07891054446335,
      "learning_rate": 4.907904599663603e-06,
      "loss": 0.5537,
      "step": 2159
    },
    {
      "epoch": 0.26483570377636095,
      "grad_norm": 2.1094404971440484,
      "learning_rate": 4.907768249729496e-06,
      "loss": 0.5956,
      "step": 2160
    },
    {
      "epoch": 0.26495831289847965,
      "grad_norm": 2.047081488955484,
      "learning_rate": 4.907631800831853e-06,
      "loss": 0.539,
      "step": 2161
    },
    {
      "epoch": 0.26508092202059835,
      "grad_norm": 1.9630053717445346,
      "learning_rate": 4.9074952529762785e-06,
      "loss": 0.5662,
      "step": 2162
    },
    {
      "epoch": 0.265203531142717,
      "grad_norm": 2.161622616658076,
      "learning_rate": 4.907358606168387e-06,
      "loss": 0.5594,
      "step": 2163
    },
    {
      "epoch": 0.2653261402648357,
      "grad_norm": 1.9810160144413338,
      "learning_rate": 4.907221860413795e-06,
      "loss": 0.5427,
      "step": 2164
    },
    {
      "epoch": 0.2654487493869544,
      "grad_norm": 2.1762611387738433,
      "learning_rate": 4.907085015718124e-06,
      "loss": 0.5513,
      "step": 2165
    },
    {
      "epoch": 0.2655713585090731,
      "grad_norm": 2.3586662276451973,
      "learning_rate": 4.906948072086997e-06,
      "loss": 0.606,
      "step": 2166
    },
    {
      "epoch": 0.26569396763119174,
      "grad_norm": 1.9325041509130256,
      "learning_rate": 4.906811029526043e-06,
      "loss": 0.5337,
      "step": 2167
    },
    {
      "epoch": 0.26581657675331044,
      "grad_norm": 2.595756385080539,
      "learning_rate": 4.906673888040895e-06,
      "loss": 0.5682,
      "step": 2168
    },
    {
      "epoch": 0.26593918587542914,
      "grad_norm": 2.1126553519584035,
      "learning_rate": 4.906536647637189e-06,
      "loss": 0.6159,
      "step": 2169
    },
    {
      "epoch": 0.26606179499754784,
      "grad_norm": 2.2978748159257174,
      "learning_rate": 4.906399308320566e-06,
      "loss": 0.5526,
      "step": 2170
    },
    {
      "epoch": 0.2661844041196665,
      "grad_norm": 2.124716273296453,
      "learning_rate": 4.906261870096673e-06,
      "loss": 0.5772,
      "step": 2171
    },
    {
      "epoch": 0.2663070132417852,
      "grad_norm": 2.01105393890005,
      "learning_rate": 4.906124332971155e-06,
      "loss": 0.5649,
      "step": 2172
    },
    {
      "epoch": 0.2664296223639039,
      "grad_norm": 2.277164062869118,
      "learning_rate": 4.905986696949668e-06,
      "loss": 0.6298,
      "step": 2173
    },
    {
      "epoch": 0.2665522314860226,
      "grad_norm": 1.9651617052733243,
      "learning_rate": 4.905848962037868e-06,
      "loss": 0.5578,
      "step": 2174
    },
    {
      "epoch": 0.26667484060814123,
      "grad_norm": 2.427229187579969,
      "learning_rate": 4.905711128241417e-06,
      "loss": 0.6009,
      "step": 2175
    },
    {
      "epoch": 0.26679744973025993,
      "grad_norm": 2.319095965002049,
      "learning_rate": 4.90557319556598e-06,
      "loss": 0.5903,
      "step": 2176
    },
    {
      "epoch": 0.2669200588523786,
      "grad_norm": 2.432436797588275,
      "learning_rate": 4.905435164017225e-06,
      "loss": 0.5736,
      "step": 2177
    },
    {
      "epoch": 0.2670426679744973,
      "grad_norm": 2.359293186722807,
      "learning_rate": 4.905297033600827e-06,
      "loss": 0.585,
      "step": 2178
    },
    {
      "epoch": 0.26716527709661597,
      "grad_norm": 2.1398633118651667,
      "learning_rate": 4.905158804322462e-06,
      "loss": 0.6038,
      "step": 2179
    },
    {
      "epoch": 0.26728788621873467,
      "grad_norm": 2.2206655989859643,
      "learning_rate": 4.905020476187813e-06,
      "loss": 0.5771,
      "step": 2180
    },
    {
      "epoch": 0.26741049534085337,
      "grad_norm": 2.0362080502076583,
      "learning_rate": 4.9048820492025635e-06,
      "loss": 0.5675,
      "step": 2181
    },
    {
      "epoch": 0.26753310446297207,
      "grad_norm": 2.262536406762254,
      "learning_rate": 4.904743523372406e-06,
      "loss": 0.5655,
      "step": 2182
    },
    {
      "epoch": 0.2676557135850907,
      "grad_norm": 2.07780327284729,
      "learning_rate": 4.904604898703032e-06,
      "loss": 0.6081,
      "step": 2183
    },
    {
      "epoch": 0.2677783227072094,
      "grad_norm": 2.1086352070082,
      "learning_rate": 4.904466175200138e-06,
      "loss": 0.5903,
      "step": 2184
    },
    {
      "epoch": 0.2679009318293281,
      "grad_norm": 2.0575711298228625,
      "learning_rate": 4.904327352869429e-06,
      "loss": 0.5325,
      "step": 2185
    },
    {
      "epoch": 0.2680235409514468,
      "grad_norm": 2.13786425818665,
      "learning_rate": 4.904188431716609e-06,
      "loss": 0.544,
      "step": 2186
    },
    {
      "epoch": 0.26814615007356546,
      "grad_norm": 2.255646911918541,
      "learning_rate": 4.904049411747388e-06,
      "loss": 0.5359,
      "step": 2187
    },
    {
      "epoch": 0.26826875919568416,
      "grad_norm": 2.2817703351158722,
      "learning_rate": 4.90391029296748e-06,
      "loss": 0.5385,
      "step": 2188
    },
    {
      "epoch": 0.26839136831780286,
      "grad_norm": 2.1896312475760435,
      "learning_rate": 4.903771075382603e-06,
      "loss": 0.5874,
      "step": 2189
    },
    {
      "epoch": 0.2685139774399215,
      "grad_norm": 2.287644874218852,
      "learning_rate": 4.903631758998479e-06,
      "loss": 0.564,
      "step": 2190
    },
    {
      "epoch": 0.2686365865620402,
      "grad_norm": 2.1805760833017285,
      "learning_rate": 4.903492343820835e-06,
      "loss": 0.5297,
      "step": 2191
    },
    {
      "epoch": 0.2687591956841589,
      "grad_norm": 2.067512642617523,
      "learning_rate": 4.903352829855401e-06,
      "loss": 0.5957,
      "step": 2192
    },
    {
      "epoch": 0.2688818048062776,
      "grad_norm": 2.08097654203804,
      "learning_rate": 4.90321321710791e-06,
      "loss": 0.53,
      "step": 2193
    },
    {
      "epoch": 0.26900441392839625,
      "grad_norm": 2.0474248539444284,
      "learning_rate": 4.903073505584102e-06,
      "loss": 0.5518,
      "step": 2194
    },
    {
      "epoch": 0.26912702305051495,
      "grad_norm": 2.386032880771198,
      "learning_rate": 4.9029336952897175e-06,
      "loss": 0.6031,
      "step": 2195
    },
    {
      "epoch": 0.26924963217263365,
      "grad_norm": 1.885762712368444,
      "learning_rate": 4.902793786230506e-06,
      "loss": 0.5564,
      "step": 2196
    },
    {
      "epoch": 0.26937224129475235,
      "grad_norm": 2.1675335171623984,
      "learning_rate": 4.902653778412214e-06,
      "loss": 0.5774,
      "step": 2197
    },
    {
      "epoch": 0.269494850416871,
      "grad_norm": 2.5396743718576458,
      "learning_rate": 4.902513671840599e-06,
      "loss": 0.6019,
      "step": 2198
    },
    {
      "epoch": 0.2696174595389897,
      "grad_norm": 2.1809351642379236,
      "learning_rate": 4.902373466521419e-06,
      "loss": 0.5736,
      "step": 2199
    },
    {
      "epoch": 0.2697400686611084,
      "grad_norm": 2.2496611095880135,
      "learning_rate": 4.902233162460436e-06,
      "loss": 0.5599,
      "step": 2200
    },
    {
      "epoch": 0.2698626777832271,
      "grad_norm": 2.1636813380839524,
      "learning_rate": 4.902092759663418e-06,
      "loss": 0.5998,
      "step": 2201
    },
    {
      "epoch": 0.26998528690534573,
      "grad_norm": 2.086195213439038,
      "learning_rate": 4.901952258136134e-06,
      "loss": 0.5413,
      "step": 2202
    },
    {
      "epoch": 0.27010789602746443,
      "grad_norm": 2.5077110830917815,
      "learning_rate": 4.9018116578843605e-06,
      "loss": 0.569,
      "step": 2203
    },
    {
      "epoch": 0.27023050514958313,
      "grad_norm": 2.1735367381530155,
      "learning_rate": 4.901670958913876e-06,
      "loss": 0.5948,
      "step": 2204
    },
    {
      "epoch": 0.27035311427170183,
      "grad_norm": 2.30113855917891,
      "learning_rate": 4.9015301612304625e-06,
      "loss": 0.5743,
      "step": 2205
    },
    {
      "epoch": 0.2704757233938205,
      "grad_norm": 2.0554867816868043,
      "learning_rate": 4.901389264839909e-06,
      "loss": 0.5479,
      "step": 2206
    },
    {
      "epoch": 0.2705983325159392,
      "grad_norm": 2.3147333523433793,
      "learning_rate": 4.901248269748004e-06,
      "loss": 0.5172,
      "step": 2207
    },
    {
      "epoch": 0.2707209416380579,
      "grad_norm": 2.1675161727575554,
      "learning_rate": 4.9011071759605445e-06,
      "loss": 0.5474,
      "step": 2208
    },
    {
      "epoch": 0.2708435507601766,
      "grad_norm": 2.1806793821788073,
      "learning_rate": 4.90096598348333e-06,
      "loss": 0.584,
      "step": 2209
    },
    {
      "epoch": 0.2709661598822952,
      "grad_norm": 2.3541095385487765,
      "learning_rate": 4.900824692322162e-06,
      "loss": 0.6123,
      "step": 2210
    },
    {
      "epoch": 0.2710887690044139,
      "grad_norm": 2.1472668455330055,
      "learning_rate": 4.90068330248285e-06,
      "loss": 0.6127,
      "step": 2211
    },
    {
      "epoch": 0.2712113781265326,
      "grad_norm": 2.170494935043984,
      "learning_rate": 4.900541813971203e-06,
      "loss": 0.5549,
      "step": 2212
    },
    {
      "epoch": 0.2713339872486513,
      "grad_norm": 2.097439253496814,
      "learning_rate": 4.9004002267930374e-06,
      "loss": 0.5679,
      "step": 2213
    },
    {
      "epoch": 0.27145659637076996,
      "grad_norm": 1.9379056086797382,
      "learning_rate": 4.900258540954174e-06,
      "loss": 0.5279,
      "step": 2214
    },
    {
      "epoch": 0.27157920549288866,
      "grad_norm": 2.080965337401295,
      "learning_rate": 4.9001167564604345e-06,
      "loss": 0.6309,
      "step": 2215
    },
    {
      "epoch": 0.27170181461500736,
      "grad_norm": 2.005098227493822,
      "learning_rate": 4.899974873317647e-06,
      "loss": 0.5513,
      "step": 2216
    },
    {
      "epoch": 0.27182442373712606,
      "grad_norm": 2.127445487464854,
      "learning_rate": 4.899832891531644e-06,
      "loss": 0.6188,
      "step": 2217
    },
    {
      "epoch": 0.2719470328592447,
      "grad_norm": 2.148756555386159,
      "learning_rate": 4.899690811108261e-06,
      "loss": 0.5462,
      "step": 2218
    },
    {
      "epoch": 0.2720696419813634,
      "grad_norm": 2.018047111947823,
      "learning_rate": 4.8995486320533365e-06,
      "loss": 0.5822,
      "step": 2219
    },
    {
      "epoch": 0.2721922511034821,
      "grad_norm": 2.1753441194754672,
      "learning_rate": 4.899406354372716e-06,
      "loss": 0.5794,
      "step": 2220
    },
    {
      "epoch": 0.2723148602256008,
      "grad_norm": 2.1401327690443144,
      "learning_rate": 4.899263978072245e-06,
      "loss": 0.55,
      "step": 2221
    },
    {
      "epoch": 0.27243746934771945,
      "grad_norm": 2.443280845352502,
      "learning_rate": 4.8991215031577785e-06,
      "loss": 0.5686,
      "step": 2222
    },
    {
      "epoch": 0.27256007846983815,
      "grad_norm": 2.335235469312777,
      "learning_rate": 4.89897892963517e-06,
      "loss": 0.6299,
      "step": 2223
    },
    {
      "epoch": 0.27268268759195685,
      "grad_norm": 2.0032252202848597,
      "learning_rate": 4.898836257510281e-06,
      "loss": 0.5432,
      "step": 2224
    },
    {
      "epoch": 0.27280529671407555,
      "grad_norm": 2.2619205679167456,
      "learning_rate": 4.8986934867889744e-06,
      "loss": 0.5944,
      "step": 2225
    },
    {
      "epoch": 0.2729279058361942,
      "grad_norm": 2.3343338509414924,
      "learning_rate": 4.89855061747712e-06,
      "loss": 0.5836,
      "step": 2226
    },
    {
      "epoch": 0.2730505149583129,
      "grad_norm": 2.4783804650684824,
      "learning_rate": 4.898407649580587e-06,
      "loss": 0.5576,
      "step": 2227
    },
    {
      "epoch": 0.2731731240804316,
      "grad_norm": 2.3448604554282317,
      "learning_rate": 4.898264583105256e-06,
      "loss": 0.6935,
      "step": 2228
    },
    {
      "epoch": 0.2732957332025503,
      "grad_norm": 2.289596205353715,
      "learning_rate": 4.898121418057004e-06,
      "loss": 0.6013,
      "step": 2229
    },
    {
      "epoch": 0.27341834232466894,
      "grad_norm": 2.2295951416438418,
      "learning_rate": 4.897978154441715e-06,
      "loss": 0.6005,
      "step": 2230
    },
    {
      "epoch": 0.27354095144678764,
      "grad_norm": 2.188210180740377,
      "learning_rate": 4.897834792265279e-06,
      "loss": 0.5628,
      "step": 2231
    },
    {
      "epoch": 0.27366356056890634,
      "grad_norm": 2.2343287922244186,
      "learning_rate": 4.897691331533589e-06,
      "loss": 0.5692,
      "step": 2232
    },
    {
      "epoch": 0.27378616969102504,
      "grad_norm": 2.0426842022631675,
      "learning_rate": 4.89754777225254e-06,
      "loss": 0.5688,
      "step": 2233
    },
    {
      "epoch": 0.2739087788131437,
      "grad_norm": 2.1969679987010244,
      "learning_rate": 4.897404114428033e-06,
      "loss": 0.5816,
      "step": 2234
    },
    {
      "epoch": 0.2740313879352624,
      "grad_norm": 2.2343924115083733,
      "learning_rate": 4.897260358065972e-06,
      "loss": 0.588,
      "step": 2235
    },
    {
      "epoch": 0.2741539970573811,
      "grad_norm": 2.1643427533395543,
      "learning_rate": 4.8971165031722665e-06,
      "loss": 0.5369,
      "step": 2236
    },
    {
      "epoch": 0.2742766061794998,
      "grad_norm": 2.3915168065243333,
      "learning_rate": 4.8969725497528295e-06,
      "loss": 0.5516,
      "step": 2237
    },
    {
      "epoch": 0.2743992153016184,
      "grad_norm": 2.179880413592092,
      "learning_rate": 4.8968284978135775e-06,
      "loss": 0.5764,
      "step": 2238
    },
    {
      "epoch": 0.2745218244237371,
      "grad_norm": 2.0805221847377724,
      "learning_rate": 4.89668434736043e-06,
      "loss": 0.5628,
      "step": 2239
    },
    {
      "epoch": 0.2746444335458558,
      "grad_norm": 2.192056441760874,
      "learning_rate": 4.896540098399313e-06,
      "loss": 0.5902,
      "step": 2240
    },
    {
      "epoch": 0.27476704266797447,
      "grad_norm": 1.9562683286959148,
      "learning_rate": 4.896395750936156e-06,
      "loss": 0.5741,
      "step": 2241
    },
    {
      "epoch": 0.27488965179009317,
      "grad_norm": 2.2147298101712733,
      "learning_rate": 4.89625130497689e-06,
      "loss": 0.5967,
      "step": 2242
    },
    {
      "epoch": 0.27501226091221187,
      "grad_norm": 2.324712711348578,
      "learning_rate": 4.896106760527455e-06,
      "loss": 0.6164,
      "step": 2243
    },
    {
      "epoch": 0.27513487003433057,
      "grad_norm": 2.3216984562639427,
      "learning_rate": 4.895962117593789e-06,
      "loss": 0.6033,
      "step": 2244
    },
    {
      "epoch": 0.2752574791564492,
      "grad_norm": 2.1618623243963415,
      "learning_rate": 4.895817376181837e-06,
      "loss": 0.5864,
      "step": 2245
    },
    {
      "epoch": 0.2753800882785679,
      "grad_norm": 2.125552077747292,
      "learning_rate": 4.895672536297551e-06,
      "loss": 0.5223,
      "step": 2246
    },
    {
      "epoch": 0.2755026974006866,
      "grad_norm": 2.2330103901762626,
      "learning_rate": 4.895527597946882e-06,
      "loss": 0.6111,
      "step": 2247
    },
    {
      "epoch": 0.2756253065228053,
      "grad_norm": 2.028855387947123,
      "learning_rate": 4.895382561135788e-06,
      "loss": 0.5707,
      "step": 2248
    },
    {
      "epoch": 0.27574791564492396,
      "grad_norm": 2.261663592001697,
      "learning_rate": 4.8952374258702305e-06,
      "loss": 0.5846,
      "step": 2249
    },
    {
      "epoch": 0.27587052476704266,
      "grad_norm": 2.0623420674602246,
      "learning_rate": 4.895092192156173e-06,
      "loss": 0.5488,
      "step": 2250
    },
    {
      "epoch": 0.27599313388916136,
      "grad_norm": 2.2794429874590887,
      "learning_rate": 4.894946859999587e-06,
      "loss": 0.5627,
      "step": 2251
    },
    {
      "epoch": 0.27611574301128006,
      "grad_norm": 2.118156891111331,
      "learning_rate": 4.8948014294064455e-06,
      "loss": 0.5629,
      "step": 2252
    },
    {
      "epoch": 0.2762383521333987,
      "grad_norm": 2.1873766357322046,
      "learning_rate": 4.8946559003827255e-06,
      "loss": 0.5902,
      "step": 2253
    },
    {
      "epoch": 0.2763609612555174,
      "grad_norm": 2.121358557250274,
      "learning_rate": 4.894510272934408e-06,
      "loss": 0.5225,
      "step": 2254
    },
    {
      "epoch": 0.2764835703776361,
      "grad_norm": 2.205597628860012,
      "learning_rate": 4.894364547067479e-06,
      "loss": 0.592,
      "step": 2255
    },
    {
      "epoch": 0.2766061794997548,
      "grad_norm": 2.3176657465487827,
      "learning_rate": 4.894218722787929e-06,
      "loss": 0.5988,
      "step": 2256
    },
    {
      "epoch": 0.27672878862187344,
      "grad_norm": 2.0808215849189415,
      "learning_rate": 4.894072800101751e-06,
      "loss": 0.5465,
      "step": 2257
    },
    {
      "epoch": 0.27685139774399214,
      "grad_norm": 2.097758790192038,
      "learning_rate": 4.893926779014942e-06,
      "loss": 0.5768,
      "step": 2258
    },
    {
      "epoch": 0.27697400686611084,
      "grad_norm": 2.4456869531664998,
      "learning_rate": 4.8937806595335044e-06,
      "loss": 0.6032,
      "step": 2259
    },
    {
      "epoch": 0.27709661598822954,
      "grad_norm": 2.093900799549764,
      "learning_rate": 4.893634441663444e-06,
      "loss": 0.5399,
      "step": 2260
    },
    {
      "epoch": 0.2772192251103482,
      "grad_norm": 2.190282642397202,
      "learning_rate": 4.89348812541077e-06,
      "loss": 0.5333,
      "step": 2261
    },
    {
      "epoch": 0.2773418342324669,
      "grad_norm": 2.1895243106302753,
      "learning_rate": 4.893341710781497e-06,
      "loss": 0.5971,
      "step": 2262
    },
    {
      "epoch": 0.2774644433545856,
      "grad_norm": 2.153653551579472,
      "learning_rate": 4.893195197781643e-06,
      "loss": 0.6179,
      "step": 2263
    },
    {
      "epoch": 0.2775870524767043,
      "grad_norm": 2.0709938196970574,
      "learning_rate": 4.893048586417229e-06,
      "loss": 0.5845,
      "step": 2264
    },
    {
      "epoch": 0.27770966159882293,
      "grad_norm": 2.216966786773228,
      "learning_rate": 4.892901876694281e-06,
      "loss": 0.5781,
      "step": 2265
    },
    {
      "epoch": 0.27783227072094163,
      "grad_norm": 2.1250233634055053,
      "learning_rate": 4.8927550686188305e-06,
      "loss": 0.5756,
      "step": 2266
    },
    {
      "epoch": 0.27795487984306033,
      "grad_norm": 1.9314607407598408,
      "learning_rate": 4.892608162196912e-06,
      "loss": 0.5605,
      "step": 2267
    },
    {
      "epoch": 0.27807748896517903,
      "grad_norm": 2.0686694604291973,
      "learning_rate": 4.892461157434559e-06,
      "loss": 0.5143,
      "step": 2268
    },
    {
      "epoch": 0.2782000980872977,
      "grad_norm": 2.0032226014754158,
      "learning_rate": 4.892314054337819e-06,
      "loss": 0.5863,
      "step": 2269
    },
    {
      "epoch": 0.2783227072094164,
      "grad_norm": 2.047225541205096,
      "learning_rate": 4.8921668529127356e-06,
      "loss": 0.6094,
      "step": 2270
    },
    {
      "epoch": 0.2784453163315351,
      "grad_norm": 2.154348340034833,
      "learning_rate": 4.8920195531653595e-06,
      "loss": 0.536,
      "step": 2271
    },
    {
      "epoch": 0.2785679254536538,
      "grad_norm": 2.11665334240907,
      "learning_rate": 4.891872155101746e-06,
      "loss": 0.5891,
      "step": 2272
    },
    {
      "epoch": 0.2786905345757724,
      "grad_norm": 2.183798249526174,
      "learning_rate": 4.891724658727951e-06,
      "loss": 0.608,
      "step": 2273
    },
    {
      "epoch": 0.2788131436978911,
      "grad_norm": 2.1764160749458905,
      "learning_rate": 4.891577064050039e-06,
      "loss": 0.5897,
      "step": 2274
    },
    {
      "epoch": 0.2789357528200098,
      "grad_norm": 2.2940220351004053,
      "learning_rate": 4.891429371074076e-06,
      "loss": 0.5833,
      "step": 2275
    },
    {
      "epoch": 0.2790583619421285,
      "grad_norm": 2.061162197383791,
      "learning_rate": 4.891281579806132e-06,
      "loss": 0.5851,
      "step": 2276
    },
    {
      "epoch": 0.27918097106424716,
      "grad_norm": 2.029668748805312,
      "learning_rate": 4.891133690252281e-06,
      "loss": 0.5407,
      "step": 2277
    },
    {
      "epoch": 0.27930358018636586,
      "grad_norm": 2.040341773792399,
      "learning_rate": 4.890985702418602e-06,
      "loss": 0.5493,
      "step": 2278
    },
    {
      "epoch": 0.27942618930848456,
      "grad_norm": 1.9828327547500653,
      "learning_rate": 4.890837616311179e-06,
      "loss": 0.5345,
      "step": 2279
    },
    {
      "epoch": 0.27954879843060326,
      "grad_norm": 2.1849946945446037,
      "learning_rate": 4.890689431936097e-06,
      "loss": 0.5764,
      "step": 2280
    },
    {
      "epoch": 0.2796714075527219,
      "grad_norm": 2.0128367896614363,
      "learning_rate": 4.890541149299446e-06,
      "loss": 0.535,
      "step": 2281
    },
    {
      "epoch": 0.2797940166748406,
      "grad_norm": 2.324569654353035,
      "learning_rate": 4.890392768407322e-06,
      "loss": 0.5445,
      "step": 2282
    },
    {
      "epoch": 0.2799166257969593,
      "grad_norm": 2.1614080542688643,
      "learning_rate": 4.890244289265824e-06,
      "loss": 0.5753,
      "step": 2283
    },
    {
      "epoch": 0.280039234919078,
      "grad_norm": 2.1151637589647434,
      "learning_rate": 4.890095711881053e-06,
      "loss": 0.6381,
      "step": 2284
    },
    {
      "epoch": 0.28016184404119665,
      "grad_norm": 2.313607516266653,
      "learning_rate": 4.889947036259118e-06,
      "loss": 0.6466,
      "step": 2285
    },
    {
      "epoch": 0.28028445316331535,
      "grad_norm": 2.151775251795715,
      "learning_rate": 4.889798262406128e-06,
      "loss": 0.6031,
      "step": 2286
    },
    {
      "epoch": 0.28040706228543405,
      "grad_norm": 2.1491090866379077,
      "learning_rate": 4.889649390328199e-06,
      "loss": 0.5532,
      "step": 2287
    },
    {
      "epoch": 0.2805296714075527,
      "grad_norm": 2.1581865418343047,
      "learning_rate": 4.88950042003145e-06,
      "loss": 0.5642,
      "step": 2288
    },
    {
      "epoch": 0.2806522805296714,
      "grad_norm": 2.0509624270287516,
      "learning_rate": 4.889351351522002e-06,
      "loss": 0.5701,
      "step": 2289
    },
    {
      "epoch": 0.2807748896517901,
      "grad_norm": 2.1196954484373802,
      "learning_rate": 4.889202184805985e-06,
      "loss": 0.5749,
      "step": 2290
    },
    {
      "epoch": 0.2808974987739088,
      "grad_norm": 2.029800284337199,
      "learning_rate": 4.889052919889527e-06,
      "loss": 0.5486,
      "step": 2291
    },
    {
      "epoch": 0.28102010789602744,
      "grad_norm": 2.135505537975298,
      "learning_rate": 4.888903556778766e-06,
      "loss": 0.6249,
      "step": 2292
    },
    {
      "epoch": 0.28114271701814614,
      "grad_norm": 1.9754684703048024,
      "learning_rate": 4.888754095479839e-06,
      "loss": 0.6437,
      "step": 2293
    },
    {
      "epoch": 0.28126532614026484,
      "grad_norm": 2.355640937719849,
      "learning_rate": 4.88860453599889e-06,
      "loss": 0.5921,
      "step": 2294
    },
    {
      "epoch": 0.28138793526238354,
      "grad_norm": 2.072472833805868,
      "learning_rate": 4.888454878342065e-06,
      "loss": 0.5895,
      "step": 2295
    },
    {
      "epoch": 0.2815105443845022,
      "grad_norm": 2.316544211655201,
      "learning_rate": 4.888305122515517e-06,
      "loss": 0.5613,
      "step": 2296
    },
    {
      "epoch": 0.2816331535066209,
      "grad_norm": 2.1488033966547273,
      "learning_rate": 4.888155268525399e-06,
      "loss": 0.5727,
      "step": 2297
    },
    {
      "epoch": 0.2817557626287396,
      "grad_norm": 2.036580863235068,
      "learning_rate": 4.888005316377873e-06,
      "loss": 0.5413,
      "step": 2298
    },
    {
      "epoch": 0.2818783717508583,
      "grad_norm": 2.1428060670198135,
      "learning_rate": 4.8878552660791006e-06,
      "loss": 0.5959,
      "step": 2299
    },
    {
      "epoch": 0.2820009808729769,
      "grad_norm": 2.2541683096121865,
      "learning_rate": 4.887705117635249e-06,
      "loss": 0.6005,
      "step": 2300
    },
    {
      "epoch": 0.2821235899950956,
      "grad_norm": 2.308335387823834,
      "learning_rate": 4.88755487105249e-06,
      "loss": 0.6417,
      "step": 2301
    },
    {
      "epoch": 0.2822461991172143,
      "grad_norm": 1.8345692704578398,
      "learning_rate": 4.887404526336999e-06,
      "loss": 0.5795,
      "step": 2302
    },
    {
      "epoch": 0.282368808239333,
      "grad_norm": 2.414675516135264,
      "learning_rate": 4.887254083494955e-06,
      "loss": 0.6337,
      "step": 2303
    },
    {
      "epoch": 0.28249141736145167,
      "grad_norm": 2.1674323325432168,
      "learning_rate": 4.887103542532543e-06,
      "loss": 0.5515,
      "step": 2304
    },
    {
      "epoch": 0.28261402648357037,
      "grad_norm": 1.8336935850059926,
      "learning_rate": 4.886952903455948e-06,
      "loss": 0.5265,
      "step": 2305
    },
    {
      "epoch": 0.28273663560568907,
      "grad_norm": 2.0792026667803514,
      "learning_rate": 4.886802166271365e-06,
      "loss": 0.5689,
      "step": 2306
    },
    {
      "epoch": 0.28285924472780777,
      "grad_norm": 2.1808218389522014,
      "learning_rate": 4.886651330984985e-06,
      "loss": 0.6178,
      "step": 2307
    },
    {
      "epoch": 0.2829818538499264,
      "grad_norm": 1.9665949384552317,
      "learning_rate": 4.8865003976030114e-06,
      "loss": 0.5864,
      "step": 2308
    },
    {
      "epoch": 0.2831044629720451,
      "grad_norm": 2.337259513972997,
      "learning_rate": 4.8863493661316465e-06,
      "loss": 0.5566,
      "step": 2309
    },
    {
      "epoch": 0.2832270720941638,
      "grad_norm": 2.3385350306576083,
      "learning_rate": 4.886198236577098e-06,
      "loss": 0.5856,
      "step": 2310
    },
    {
      "epoch": 0.2833496812162825,
      "grad_norm": 2.1105589386164616,
      "learning_rate": 4.886047008945577e-06,
      "loss": 0.5685,
      "step": 2311
    },
    {
      "epoch": 0.28347229033840116,
      "grad_norm": 1.9846716067791055,
      "learning_rate": 4.8858956832433e-06,
      "loss": 0.5748,
      "step": 2312
    },
    {
      "epoch": 0.28359489946051986,
      "grad_norm": 2.1801783082191677,
      "learning_rate": 4.885744259476487e-06,
      "loss": 0.5958,
      "step": 2313
    },
    {
      "epoch": 0.28371750858263856,
      "grad_norm": 2.499603376856785,
      "learning_rate": 4.88559273765136e-06,
      "loss": 0.5966,
      "step": 2314
    },
    {
      "epoch": 0.28384011770475726,
      "grad_norm": 2.1905960455722235,
      "learning_rate": 4.885441117774149e-06,
      "loss": 0.5655,
      "step": 2315
    },
    {
      "epoch": 0.2839627268268759,
      "grad_norm": 2.1073598393695256,
      "learning_rate": 4.885289399851085e-06,
      "loss": 0.5302,
      "step": 2316
    },
    {
      "epoch": 0.2840853359489946,
      "grad_norm": 2.071872354445809,
      "learning_rate": 4.885137583888404e-06,
      "loss": 0.56,
      "step": 2317
    },
    {
      "epoch": 0.2842079450711133,
      "grad_norm": 2.345244498039479,
      "learning_rate": 4.884985669892345e-06,
      "loss": 0.5925,
      "step": 2318
    },
    {
      "epoch": 0.284330554193232,
      "grad_norm": 2.3037530958868193,
      "learning_rate": 4.884833657869154e-06,
      "loss": 0.5792,
      "step": 2319
    },
    {
      "epoch": 0.28445316331535064,
      "grad_norm": 2.4098353470022156,
      "learning_rate": 4.884681547825076e-06,
      "loss": 0.6451,
      "step": 2320
    },
    {
      "epoch": 0.28457577243746934,
      "grad_norm": 2.30625909168648,
      "learning_rate": 4.884529339766366e-06,
      "loss": 0.5806,
      "step": 2321
    },
    {
      "epoch": 0.28469838155958804,
      "grad_norm": 2.119225014328954,
      "learning_rate": 4.8843770336992776e-06,
      "loss": 0.62,
      "step": 2322
    },
    {
      "epoch": 0.28482099068170674,
      "grad_norm": 2.269578490932833,
      "learning_rate": 4.884224629630073e-06,
      "loss": 0.5877,
      "step": 2323
    },
    {
      "epoch": 0.2849435998038254,
      "grad_norm": 2.09774679091316,
      "learning_rate": 4.884072127565015e-06,
      "loss": 0.5802,
      "step": 2324
    },
    {
      "epoch": 0.2850662089259441,
      "grad_norm": 2.04397760310307,
      "learning_rate": 4.883919527510371e-06,
      "loss": 0.5446,
      "step": 2325
    },
    {
      "epoch": 0.2851888180480628,
      "grad_norm": 2.3266979482972125,
      "learning_rate": 4.883766829472414e-06,
      "loss": 0.6244,
      "step": 2326
    },
    {
      "epoch": 0.2853114271701815,
      "grad_norm": 2.1804019243238764,
      "learning_rate": 4.883614033457421e-06,
      "loss": 0.5069,
      "step": 2327
    },
    {
      "epoch": 0.28543403629230013,
      "grad_norm": 2.1421639485825126,
      "learning_rate": 4.883461139471672e-06,
      "loss": 0.5322,
      "step": 2328
    },
    {
      "epoch": 0.28555664541441883,
      "grad_norm": 2.14815030889232,
      "learning_rate": 4.883308147521449e-06,
      "loss": 0.6367,
      "step": 2329
    },
    {
      "epoch": 0.28567925453653753,
      "grad_norm": 1.9430707430755878,
      "learning_rate": 4.883155057613043e-06,
      "loss": 0.5687,
      "step": 2330
    },
    {
      "epoch": 0.28580186365865623,
      "grad_norm": 2.021537766618528,
      "learning_rate": 4.883001869752743e-06,
      "loss": 0.5212,
      "step": 2331
    },
    {
      "epoch": 0.2859244727807749,
      "grad_norm": 2.3644629565887247,
      "learning_rate": 4.88284858394685e-06,
      "loss": 0.5735,
      "step": 2332
    },
    {
      "epoch": 0.2860470819028936,
      "grad_norm": 2.209693209944188,
      "learning_rate": 4.88269520020166e-06,
      "loss": 0.5956,
      "step": 2333
    },
    {
      "epoch": 0.2861696910250123,
      "grad_norm": 2.2695564306005305,
      "learning_rate": 4.88254171852348e-06,
      "loss": 0.542,
      "step": 2334
    },
    {
      "epoch": 0.286292300147131,
      "grad_norm": 2.3110233098763504,
      "learning_rate": 4.882388138918617e-06,
      "loss": 0.5427,
      "step": 2335
    },
    {
      "epoch": 0.2864149092692496,
      "grad_norm": 2.156046271071062,
      "learning_rate": 4.882234461393384e-06,
      "loss": 0.5647,
      "step": 2336
    },
    {
      "epoch": 0.2865375183913683,
      "grad_norm": 2.1419581248910684,
      "learning_rate": 4.8820806859540974e-06,
      "loss": 0.5499,
      "step": 2337
    },
    {
      "epoch": 0.286660127513487,
      "grad_norm": 2.3543574757994614,
      "learning_rate": 4.881926812607077e-06,
      "loss": 0.5252,
      "step": 2338
    },
    {
      "epoch": 0.28678273663560566,
      "grad_norm": 2.1454175596489375,
      "learning_rate": 4.881772841358649e-06,
      "loss": 0.5953,
      "step": 2339
    },
    {
      "epoch": 0.28690534575772436,
      "grad_norm": 2.0073558452355917,
      "learning_rate": 4.881618772215139e-06,
      "loss": 0.6192,
      "step": 2340
    },
    {
      "epoch": 0.28702795487984306,
      "grad_norm": 2.243460588660744,
      "learning_rate": 4.881464605182882e-06,
      "loss": 0.5353,
      "step": 2341
    },
    {
      "epoch": 0.28715056400196176,
      "grad_norm": 2.1039865979235897,
      "learning_rate": 4.881310340268213e-06,
      "loss": 0.5786,
      "step": 2342
    },
    {
      "epoch": 0.2872731731240804,
      "grad_norm": 1.9608946314483984,
      "learning_rate": 4.881155977477473e-06,
      "loss": 0.578,
      "step": 2343
    },
    {
      "epoch": 0.2873957822461991,
      "grad_norm": 2.2795865309881056,
      "learning_rate": 4.881001516817008e-06,
      "loss": 0.6022,
      "step": 2344
    },
    {
      "epoch": 0.2875183913683178,
      "grad_norm": 2.0455863269813226,
      "learning_rate": 4.880846958293164e-06,
      "loss": 0.5333,
      "step": 2345
    },
    {
      "epoch": 0.2876410004904365,
      "grad_norm": 2.1952897227338664,
      "learning_rate": 4.8806923019122965e-06,
      "loss": 0.5711,
      "step": 2346
    },
    {
      "epoch": 0.28776360961255515,
      "grad_norm": 2.4630321637510524,
      "learning_rate": 4.8805375476807604e-06,
      "loss": 0.5857,
      "step": 2347
    },
    {
      "epoch": 0.28788621873467385,
      "grad_norm": 2.1656832107193003,
      "learning_rate": 4.880382695604916e-06,
      "loss": 0.567,
      "step": 2348
    },
    {
      "epoch": 0.28800882785679255,
      "grad_norm": 2.443651307342959,
      "learning_rate": 4.880227745691129e-06,
      "loss": 0.5289,
      "step": 2349
    },
    {
      "epoch": 0.28813143697891125,
      "grad_norm": 2.1868530635586425,
      "learning_rate": 4.880072697945768e-06,
      "loss": 0.5576,
      "step": 2350
    },
    {
      "epoch": 0.2882540461010299,
      "grad_norm": 2.334632813730692,
      "learning_rate": 4.879917552375205e-06,
      "loss": 0.6074,
      "step": 2351
    },
    {
      "epoch": 0.2883766552231486,
      "grad_norm": 1.964067723716308,
      "learning_rate": 4.879762308985818e-06,
      "loss": 0.5456,
      "step": 2352
    },
    {
      "epoch": 0.2884992643452673,
      "grad_norm": 2.090942892768125,
      "learning_rate": 4.879606967783987e-06,
      "loss": 0.5982,
      "step": 2353
    },
    {
      "epoch": 0.288621873467386,
      "grad_norm": 2.18306701868179,
      "learning_rate": 4.8794515287760964e-06,
      "loss": 0.5291,
      "step": 2354
    },
    {
      "epoch": 0.28874448258950464,
      "grad_norm": 2.0401592062922806,
      "learning_rate": 4.879295991968536e-06,
      "loss": 0.5532,
      "step": 2355
    },
    {
      "epoch": 0.28886709171162334,
      "grad_norm": 2.30188078445326,
      "learning_rate": 4.879140357367698e-06,
      "loss": 0.5706,
      "step": 2356
    },
    {
      "epoch": 0.28898970083374204,
      "grad_norm": 1.9612364075966215,
      "learning_rate": 4.8789846249799785e-06,
      "loss": 0.5428,
      "step": 2357
    },
    {
      "epoch": 0.28911230995586074,
      "grad_norm": 2.3989090334350305,
      "learning_rate": 4.878828794811779e-06,
      "loss": 0.6029,
      "step": 2358
    },
    {
      "epoch": 0.2892349190779794,
      "grad_norm": 2.1506965640308087,
      "learning_rate": 4.8786728668695064e-06,
      "loss": 0.5625,
      "step": 2359
    },
    {
      "epoch": 0.2893575282000981,
      "grad_norm": 2.3842932559918095,
      "learning_rate": 4.878516841159567e-06,
      "loss": 0.5302,
      "step": 2360
    },
    {
      "epoch": 0.2894801373222168,
      "grad_norm": 2.3003381481894154,
      "learning_rate": 4.8783607176883755e-06,
      "loss": 0.5586,
      "step": 2361
    },
    {
      "epoch": 0.2896027464443355,
      "grad_norm": 2.099285263656974,
      "learning_rate": 4.8782044964623465e-06,
      "loss": 0.6111,
      "step": 2362
    },
    {
      "epoch": 0.2897253555664541,
      "grad_norm": 2.2634833738535916,
      "learning_rate": 4.878048177487903e-06,
      "loss": 0.6043,
      "step": 2363
    },
    {
      "epoch": 0.2898479646885728,
      "grad_norm": 2.1985018551436046,
      "learning_rate": 4.877891760771469e-06,
      "loss": 0.6012,
      "step": 2364
    },
    {
      "epoch": 0.2899705738106915,
      "grad_norm": 2.396636302567812,
      "learning_rate": 4.877735246319475e-06,
      "loss": 0.6124,
      "step": 2365
    },
    {
      "epoch": 0.2900931829328102,
      "grad_norm": 2.1219099420809107,
      "learning_rate": 4.877578634138352e-06,
      "loss": 0.5802,
      "step": 2366
    },
    {
      "epoch": 0.29021579205492887,
      "grad_norm": 2.1559843520151354,
      "learning_rate": 4.877421924234538e-06,
      "loss": 0.508,
      "step": 2367
    },
    {
      "epoch": 0.29033840117704757,
      "grad_norm": 2.1794916468140766,
      "learning_rate": 4.877265116614474e-06,
      "loss": 0.5686,
      "step": 2368
    },
    {
      "epoch": 0.29046101029916627,
      "grad_norm": 2.102073280609561,
      "learning_rate": 4.877108211284606e-06,
      "loss": 0.5642,
      "step": 2369
    },
    {
      "epoch": 0.29058361942128497,
      "grad_norm": 2.5998739270367213,
      "learning_rate": 4.876951208251381e-06,
      "loss": 0.6039,
      "step": 2370
    },
    {
      "epoch": 0.2907062285434036,
      "grad_norm": 2.5198072003175263,
      "learning_rate": 4.8767941075212534e-06,
      "loss": 0.5863,
      "step": 2371
    },
    {
      "epoch": 0.2908288376655223,
      "grad_norm": 2.3265528175254238,
      "learning_rate": 4.876636909100681e-06,
      "loss": 0.6072,
      "step": 2372
    },
    {
      "epoch": 0.290951446787641,
      "grad_norm": 2.1762065808299833,
      "learning_rate": 4.876479612996123e-06,
      "loss": 0.5982,
      "step": 2373
    },
    {
      "epoch": 0.2910740559097597,
      "grad_norm": 2.2040112718858187,
      "learning_rate": 4.8763222192140465e-06,
      "loss": 0.5896,
      "step": 2374
    },
    {
      "epoch": 0.29119666503187835,
      "grad_norm": 2.217895390816471,
      "learning_rate": 4.876164727760921e-06,
      "loss": 0.6325,
      "step": 2375
    },
    {
      "epoch": 0.29131927415399705,
      "grad_norm": 2.110830891143581,
      "learning_rate": 4.876007138643216e-06,
      "loss": 0.5179,
      "step": 2376
    },
    {
      "epoch": 0.29144188327611575,
      "grad_norm": 2.3312872538557388,
      "learning_rate": 4.8758494518674124e-06,
      "loss": 0.6029,
      "step": 2377
    },
    {
      "epoch": 0.29156449239823445,
      "grad_norm": 1.9857319293199631,
      "learning_rate": 4.87569166743999e-06,
      "loss": 0.5716,
      "step": 2378
    },
    {
      "epoch": 0.2916871015203531,
      "grad_norm": 2.331898213726207,
      "learning_rate": 4.875533785367435e-06,
      "loss": 0.5924,
      "step": 2379
    },
    {
      "epoch": 0.2918097106424718,
      "grad_norm": 2.204965378247748,
      "learning_rate": 4.875375805656235e-06,
      "loss": 0.5946,
      "step": 2380
    },
    {
      "epoch": 0.2919323197645905,
      "grad_norm": 2.127209953665129,
      "learning_rate": 4.875217728312885e-06,
      "loss": 0.5717,
      "step": 2381
    },
    {
      "epoch": 0.2920549288867092,
      "grad_norm": 2.4576068012326764,
      "learning_rate": 4.87505955334388e-06,
      "loss": 0.5781,
      "step": 2382
    },
    {
      "epoch": 0.29217753800882784,
      "grad_norm": 2.095575768745872,
      "learning_rate": 4.874901280755724e-06,
      "loss": 0.5727,
      "step": 2383
    },
    {
      "epoch": 0.29230014713094654,
      "grad_norm": 1.97197310026004,
      "learning_rate": 4.87474291055492e-06,
      "loss": 0.5968,
      "step": 2384
    },
    {
      "epoch": 0.29242275625306524,
      "grad_norm": 2.3023589682401404,
      "learning_rate": 4.874584442747979e-06,
      "loss": 0.5674,
      "step": 2385
    },
    {
      "epoch": 0.29254536537518394,
      "grad_norm": 2.1640665846879985,
      "learning_rate": 4.874425877341414e-06,
      "loss": 0.5768,
      "step": 2386
    },
    {
      "epoch": 0.2926679744973026,
      "grad_norm": 2.2743085976531834,
      "learning_rate": 4.8742672143417405e-06,
      "loss": 0.5504,
      "step": 2387
    },
    {
      "epoch": 0.2927905836194213,
      "grad_norm": 2.201872242975317,
      "learning_rate": 4.874108453755482e-06,
      "loss": 0.5898,
      "step": 2388
    },
    {
      "epoch": 0.29291319274154,
      "grad_norm": 2.364207033933614,
      "learning_rate": 4.873949595589163e-06,
      "loss": 0.5937,
      "step": 2389
    },
    {
      "epoch": 0.29303580186365863,
      "grad_norm": 2.0110208456212413,
      "learning_rate": 4.873790639849312e-06,
      "loss": 0.5652,
      "step": 2390
    },
    {
      "epoch": 0.29315841098577733,
      "grad_norm": 2.0333099212826227,
      "learning_rate": 4.873631586542464e-06,
      "loss": 0.5419,
      "step": 2391
    },
    {
      "epoch": 0.29328102010789603,
      "grad_norm": 2.2358745687997112,
      "learning_rate": 4.873472435675155e-06,
      "loss": 0.53,
      "step": 2392
    },
    {
      "epoch": 0.29340362923001473,
      "grad_norm": 2.587010573320979,
      "learning_rate": 4.873313187253928e-06,
      "loss": 0.6233,
      "step": 2393
    },
    {
      "epoch": 0.2935262383521334,
      "grad_norm": 1.9922952953127797,
      "learning_rate": 4.873153841285326e-06,
      "loss": 0.597,
      "step": 2394
    },
    {
      "epoch": 0.2936488474742521,
      "grad_norm": 2.0003554341300607,
      "learning_rate": 4.8729943977759005e-06,
      "loss": 0.5936,
      "step": 2395
    },
    {
      "epoch": 0.2937714565963708,
      "grad_norm": 1.9772169023233865,
      "learning_rate": 4.8728348567322045e-06,
      "loss": 0.5261,
      "step": 2396
    },
    {
      "epoch": 0.2938940657184895,
      "grad_norm": 2.0916295490831245,
      "learning_rate": 4.872675218160795e-06,
      "loss": 0.5999,
      "step": 2397
    },
    {
      "epoch": 0.2940166748406081,
      "grad_norm": 2.0920970841816287,
      "learning_rate": 4.872515482068234e-06,
      "loss": 0.6523,
      "step": 2398
    },
    {
      "epoch": 0.2941392839627268,
      "grad_norm": 2.189094964935812,
      "learning_rate": 4.872355648461086e-06,
      "loss": 0.5923,
      "step": 2399
    },
    {
      "epoch": 0.2942618930848455,
      "grad_norm": 2.28240793049857,
      "learning_rate": 4.872195717345921e-06,
      "loss": 0.5803,
      "step": 2400
    },
    {
      "epoch": 0.2943845022069642,
      "grad_norm": 2.077242365361767,
      "learning_rate": 4.872035688729312e-06,
      "loss": 0.5113,
      "step": 2401
    },
    {
      "epoch": 0.29450711132908286,
      "grad_norm": 2.2997875641005616,
      "learning_rate": 4.871875562617837e-06,
      "loss": 0.5652,
      "step": 2402
    },
    {
      "epoch": 0.29462972045120156,
      "grad_norm": 2.040680590786669,
      "learning_rate": 4.8717153390180775e-06,
      "loss": 0.5802,
      "step": 2403
    },
    {
      "epoch": 0.29475232957332026,
      "grad_norm": 2.046156308086903,
      "learning_rate": 4.871555017936618e-06,
      "loss": 0.5576,
      "step": 2404
    },
    {
      "epoch": 0.29487493869543896,
      "grad_norm": 2.243316229613149,
      "learning_rate": 4.87139459938005e-06,
      "loss": 0.6601,
      "step": 2405
    },
    {
      "epoch": 0.2949975478175576,
      "grad_norm": 2.103270817462296,
      "learning_rate": 4.8712340833549645e-06,
      "loss": 0.5404,
      "step": 2406
    },
    {
      "epoch": 0.2951201569396763,
      "grad_norm": 2.111160734781343,
      "learning_rate": 4.8710734698679615e-06,
      "loss": 0.5773,
      "step": 2407
    },
    {
      "epoch": 0.295242766061795,
      "grad_norm": 2.2935501496094024,
      "learning_rate": 4.87091275892564e-06,
      "loss": 0.5469,
      "step": 2408
    },
    {
      "epoch": 0.2953653751839137,
      "grad_norm": 2.0642148781935585,
      "learning_rate": 4.870751950534608e-06,
      "loss": 0.6025,
      "step": 2409
    },
    {
      "epoch": 0.29548798430603235,
      "grad_norm": 2.0778533223022886,
      "learning_rate": 4.870591044701473e-06,
      "loss": 0.5711,
      "step": 2410
    },
    {
      "epoch": 0.29561059342815105,
      "grad_norm": 2.0980064395259133,
      "learning_rate": 4.870430041432849e-06,
      "loss": 0.5725,
      "step": 2411
    },
    {
      "epoch": 0.29573320255026975,
      "grad_norm": 2.2572744064890933,
      "learning_rate": 4.870268940735354e-06,
      "loss": 0.6042,
      "step": 2412
    },
    {
      "epoch": 0.29585581167238845,
      "grad_norm": 2.0442586717376425,
      "learning_rate": 4.87010774261561e-06,
      "loss": 0.5612,
      "step": 2413
    },
    {
      "epoch": 0.2959784207945071,
      "grad_norm": 2.157954317141817,
      "learning_rate": 4.869946447080241e-06,
      "loss": 0.5779,
      "step": 2414
    },
    {
      "epoch": 0.2961010299166258,
      "grad_norm": 1.9985817120779992,
      "learning_rate": 4.869785054135877e-06,
      "loss": 0.578,
      "step": 2415
    },
    {
      "epoch": 0.2962236390387445,
      "grad_norm": 2.338091404260272,
      "learning_rate": 4.8696235637891535e-06,
      "loss": 0.5603,
      "step": 2416
    },
    {
      "epoch": 0.2963462481608632,
      "grad_norm": 2.066133764253658,
      "learning_rate": 4.869461976046705e-06,
      "loss": 0.5603,
      "step": 2417
    },
    {
      "epoch": 0.29646885728298183,
      "grad_norm": 2.2286219026869145,
      "learning_rate": 4.869300290915175e-06,
      "loss": 0.5043,
      "step": 2418
    },
    {
      "epoch": 0.29659146640510053,
      "grad_norm": 2.2701243178974755,
      "learning_rate": 4.869138508401209e-06,
      "loss": 0.5571,
      "step": 2419
    },
    {
      "epoch": 0.29671407552721923,
      "grad_norm": 2.0482033227623457,
      "learning_rate": 4.868976628511454e-06,
      "loss": 0.5739,
      "step": 2420
    },
    {
      "epoch": 0.29683668464933793,
      "grad_norm": 2.113451766659064,
      "learning_rate": 4.868814651252568e-06,
      "loss": 0.5412,
      "step": 2421
    },
    {
      "epoch": 0.2969592937714566,
      "grad_norm": 2.178812159358402,
      "learning_rate": 4.868652576631205e-06,
      "loss": 0.631,
      "step": 2422
    },
    {
      "epoch": 0.2970819028935753,
      "grad_norm": 2.0862951221003985,
      "learning_rate": 4.868490404654028e-06,
      "loss": 0.5688,
      "step": 2423
    },
    {
      "epoch": 0.297204512015694,
      "grad_norm": 2.135666594504098,
      "learning_rate": 4.868328135327703e-06,
      "loss": 0.5204,
      "step": 2424
    },
    {
      "epoch": 0.2973271211378127,
      "grad_norm": 2.0621698916594724,
      "learning_rate": 4.868165768658898e-06,
      "loss": 0.5235,
      "step": 2425
    },
    {
      "epoch": 0.2974497302599313,
      "grad_norm": 2.3212854361779396,
      "learning_rate": 4.868003304654288e-06,
      "loss": 0.5781,
      "step": 2426
    },
    {
      "epoch": 0.29757233938205,
      "grad_norm": 2.320469378391152,
      "learning_rate": 4.86784074332055e-06,
      "loss": 0.5308,
      "step": 2427
    },
    {
      "epoch": 0.2976949485041687,
      "grad_norm": 2.181697460512582,
      "learning_rate": 4.867678084664365e-06,
      "loss": 0.5285,
      "step": 2428
    },
    {
      "epoch": 0.2978175576262874,
      "grad_norm": 2.0863322031634683,
      "learning_rate": 4.867515328692419e-06,
      "loss": 0.6127,
      "step": 2429
    },
    {
      "epoch": 0.29794016674840607,
      "grad_norm": 2.292939456107265,
      "learning_rate": 4.867352475411401e-06,
      "loss": 0.5332,
      "step": 2430
    },
    {
      "epoch": 0.29806277587052477,
      "grad_norm": 1.8208884894242738,
      "learning_rate": 4.867189524828006e-06,
      "loss": 0.5277,
      "step": 2431
    },
    {
      "epoch": 0.29818538499264347,
      "grad_norm": 2.3638698175878314,
      "learning_rate": 4.867026476948932e-06,
      "loss": 0.596,
      "step": 2432
    },
    {
      "epoch": 0.29830799411476216,
      "grad_norm": 2.291608446835649,
      "learning_rate": 4.866863331780878e-06,
      "loss": 0.5181,
      "step": 2433
    },
    {
      "epoch": 0.2984306032368808,
      "grad_norm": 2.281306542221624,
      "learning_rate": 4.8667000893305505e-06,
      "loss": 0.5626,
      "step": 2434
    },
    {
      "epoch": 0.2985532123589995,
      "grad_norm": 2.3940010859683514,
      "learning_rate": 4.86653674960466e-06,
      "loss": 0.5843,
      "step": 2435
    },
    {
      "epoch": 0.2986758214811182,
      "grad_norm": 2.172139841946123,
      "learning_rate": 4.866373312609919e-06,
      "loss": 0.5823,
      "step": 2436
    },
    {
      "epoch": 0.29879843060323685,
      "grad_norm": 2.033814957377332,
      "learning_rate": 4.866209778353046e-06,
      "loss": 0.5539,
      "step": 2437
    },
    {
      "epoch": 0.29892103972535555,
      "grad_norm": 1.9503461385474976,
      "learning_rate": 4.8660461468407615e-06,
      "loss": 0.5402,
      "step": 2438
    },
    {
      "epoch": 0.29904364884747425,
      "grad_norm": 2.4025072887738017,
      "learning_rate": 4.865882418079793e-06,
      "loss": 0.6743,
      "step": 2439
    },
    {
      "epoch": 0.29916625796959295,
      "grad_norm": 2.344174216014242,
      "learning_rate": 4.865718592076867e-06,
      "loss": 0.6031,
      "step": 2440
    },
    {
      "epoch": 0.2992888670917116,
      "grad_norm": 2.070877077832272,
      "learning_rate": 4.865554668838719e-06,
      "loss": 0.5927,
      "step": 2441
    },
    {
      "epoch": 0.2994114762138303,
      "grad_norm": 2.37212529833984,
      "learning_rate": 4.865390648372087e-06,
      "loss": 0.5706,
      "step": 2442
    },
    {
      "epoch": 0.299534085335949,
      "grad_norm": 2.248091908825087,
      "learning_rate": 4.865226530683711e-06,
      "loss": 0.591,
      "step": 2443
    },
    {
      "epoch": 0.2996566944580677,
      "grad_norm": 2.119681727363141,
      "learning_rate": 4.8650623157803376e-06,
      "loss": 0.5312,
      "step": 2444
    },
    {
      "epoch": 0.29977930358018634,
      "grad_norm": 2.1061483666746197,
      "learning_rate": 4.864898003668716e-06,
      "loss": 0.6198,
      "step": 2445
    },
    {
      "epoch": 0.29990191270230504,
      "grad_norm": 2.0485663237871954,
      "learning_rate": 4.8647335943555996e-06,
      "loss": 0.5473,
      "step": 2446
    },
    {
      "epoch": 0.30002452182442374,
      "grad_norm": 2.2121839672060015,
      "learning_rate": 4.864569087847746e-06,
      "loss": 0.5626,
      "step": 2447
    },
    {
      "epoch": 0.30014713094654244,
      "grad_norm": 2.057449685791187,
      "learning_rate": 4.864404484151917e-06,
      "loss": 0.5932,
      "step": 2448
    },
    {
      "epoch": 0.3002697400686611,
      "grad_norm": 1.957117618811284,
      "learning_rate": 4.864239783274877e-06,
      "loss": 0.5689,
      "step": 2449
    },
    {
      "epoch": 0.3003923491907798,
      "grad_norm": 2.0857909777410515,
      "learning_rate": 4.864074985223398e-06,
      "loss": 0.5427,
      "step": 2450
    },
    {
      "epoch": 0.3005149583128985,
      "grad_norm": 2.173970724003031,
      "learning_rate": 4.863910090004251e-06,
      "loss": 0.5456,
      "step": 2451
    },
    {
      "epoch": 0.3006375674350172,
      "grad_norm": 2.1891672456787603,
      "learning_rate": 4.863745097624214e-06,
      "loss": 0.5864,
      "step": 2452
    },
    {
      "epoch": 0.3007601765571358,
      "grad_norm": 2.0494239843901347,
      "learning_rate": 4.86358000809007e-06,
      "loss": 0.5554,
      "step": 2453
    },
    {
      "epoch": 0.3008827856792545,
      "grad_norm": 2.339586913411431,
      "learning_rate": 4.863414821408602e-06,
      "loss": 0.5816,
      "step": 2454
    },
    {
      "epoch": 0.3010053948013732,
      "grad_norm": 2.044709746353195,
      "learning_rate": 4.863249537586601e-06,
      "loss": 0.553,
      "step": 2455
    },
    {
      "epoch": 0.3011280039234919,
      "grad_norm": 2.1065412049429644,
      "learning_rate": 4.863084156630861e-06,
      "loss": 0.5462,
      "step": 2456
    },
    {
      "epoch": 0.30125061304561057,
      "grad_norm": 2.1652024639943495,
      "learning_rate": 4.8629186785481775e-06,
      "loss": 0.6662,
      "step": 2457
    },
    {
      "epoch": 0.30137322216772927,
      "grad_norm": 2.1590303974284293,
      "learning_rate": 4.862753103345353e-06,
      "loss": 0.5506,
      "step": 2458
    },
    {
      "epoch": 0.30149583128984797,
      "grad_norm": 2.1536129484429396,
      "learning_rate": 4.862587431029194e-06,
      "loss": 0.579,
      "step": 2459
    },
    {
      "epoch": 0.30161844041196667,
      "grad_norm": 2.193008207098898,
      "learning_rate": 4.8624216616065086e-06,
      "loss": 0.5644,
      "step": 2460
    },
    {
      "epoch": 0.3017410495340853,
      "grad_norm": 2.245783867225157,
      "learning_rate": 4.86225579508411e-06,
      "loss": 0.5653,
      "step": 2461
    },
    {
      "epoch": 0.301863658656204,
      "grad_norm": 2.221573436861182,
      "learning_rate": 4.862089831468817e-06,
      "loss": 0.5456,
      "step": 2462
    },
    {
      "epoch": 0.3019862677783227,
      "grad_norm": 2.3446628253946384,
      "learning_rate": 4.8619237707674496e-06,
      "loss": 0.5453,
      "step": 2463
    },
    {
      "epoch": 0.3021088769004414,
      "grad_norm": 2.166924076520924,
      "learning_rate": 4.861757612986835e-06,
      "loss": 0.5396,
      "step": 2464
    },
    {
      "epoch": 0.30223148602256006,
      "grad_norm": 2.083606929612345,
      "learning_rate": 4.861591358133799e-06,
      "loss": 0.6803,
      "step": 2465
    },
    {
      "epoch": 0.30235409514467876,
      "grad_norm": 2.03169826688465,
      "learning_rate": 4.8614250062151794e-06,
      "loss": 0.5007,
      "step": 2466
    },
    {
      "epoch": 0.30247670426679746,
      "grad_norm": 1.9519768084425961,
      "learning_rate": 4.861258557237811e-06,
      "loss": 0.5676,
      "step": 2467
    },
    {
      "epoch": 0.30259931338891616,
      "grad_norm": 2.433810249111824,
      "learning_rate": 4.861092011208535e-06,
      "loss": 0.6099,
      "step": 2468
    },
    {
      "epoch": 0.3027219225110348,
      "grad_norm": 2.1444714358283004,
      "learning_rate": 4.860925368134198e-06,
      "loss": 0.5985,
      "step": 2469
    },
    {
      "epoch": 0.3028445316331535,
      "grad_norm": 2.2670370470896137,
      "learning_rate": 4.860758628021649e-06,
      "loss": 0.6067,
      "step": 2470
    },
    {
      "epoch": 0.3029671407552722,
      "grad_norm": 2.039449267042797,
      "learning_rate": 4.86059179087774e-06,
      "loss": 0.5906,
      "step": 2471
    },
    {
      "epoch": 0.3030897498773909,
      "grad_norm": 2.245045013119291,
      "learning_rate": 4.8604248567093296e-06,
      "loss": 0.5686,
      "step": 2472
    },
    {
      "epoch": 0.30321235899950955,
      "grad_norm": 1.867074663172474,
      "learning_rate": 4.860257825523279e-06,
      "loss": 0.5539,
      "step": 2473
    },
    {
      "epoch": 0.30333496812162825,
      "grad_norm": 2.0844937744281036,
      "learning_rate": 4.860090697326453e-06,
      "loss": 0.6166,
      "step": 2474
    },
    {
      "epoch": 0.30345757724374695,
      "grad_norm": 2.058212173480211,
      "learning_rate": 4.859923472125722e-06,
      "loss": 0.5724,
      "step": 2475
    },
    {
      "epoch": 0.30358018636586565,
      "grad_norm": 2.1904575282310077,
      "learning_rate": 4.8597561499279565e-06,
      "loss": 0.5721,
      "step": 2476
    },
    {
      "epoch": 0.3037027954879843,
      "grad_norm": 2.051025234421864,
      "learning_rate": 4.859588730740037e-06,
      "loss": 0.5797,
      "step": 2477
    },
    {
      "epoch": 0.303825404610103,
      "grad_norm": 2.161409446160528,
      "learning_rate": 4.859421214568843e-06,
      "loss": 0.5541,
      "step": 2478
    },
    {
      "epoch": 0.3039480137322217,
      "grad_norm": 2.0841846503209314,
      "learning_rate": 4.859253601421261e-06,
      "loss": 0.6003,
      "step": 2479
    },
    {
      "epoch": 0.3040706228543404,
      "grad_norm": 2.1875382328213675,
      "learning_rate": 4.8590858913041775e-06,
      "loss": 0.5248,
      "step": 2480
    },
    {
      "epoch": 0.30419323197645903,
      "grad_norm": 2.0258216981278836,
      "learning_rate": 4.8589180842244884e-06,
      "loss": 0.5312,
      "step": 2481
    },
    {
      "epoch": 0.30431584109857773,
      "grad_norm": 2.2371990954978975,
      "learning_rate": 4.85875018018909e-06,
      "loss": 0.5671,
      "step": 2482
    },
    {
      "epoch": 0.30443845022069643,
      "grad_norm": 2.1768796341415433,
      "learning_rate": 4.858582179204884e-06,
      "loss": 0.5443,
      "step": 2483
    },
    {
      "epoch": 0.30456105934281513,
      "grad_norm": 2.099193134670474,
      "learning_rate": 4.858414081278774e-06,
      "loss": 0.6153,
      "step": 2484
    },
    {
      "epoch": 0.3046836684649338,
      "grad_norm": 1.9791272868918175,
      "learning_rate": 4.8582458864176695e-06,
      "loss": 0.5764,
      "step": 2485
    },
    {
      "epoch": 0.3048062775870525,
      "grad_norm": 2.387904613147357,
      "learning_rate": 4.858077594628486e-06,
      "loss": 0.5771,
      "step": 2486
    },
    {
      "epoch": 0.3049288867091712,
      "grad_norm": 2.2624228128350925,
      "learning_rate": 4.857909205918136e-06,
      "loss": 0.561,
      "step": 2487
    },
    {
      "epoch": 0.3050514958312898,
      "grad_norm": 2.171603476803925,
      "learning_rate": 4.857740720293545e-06,
      "loss": 0.5727,
      "step": 2488
    },
    {
      "epoch": 0.3051741049534085,
      "grad_norm": 1.868861466506674,
      "learning_rate": 4.857572137761636e-06,
      "loss": 0.5671,
      "step": 2489
    },
    {
      "epoch": 0.3052967140755272,
      "grad_norm": 2.216819325275664,
      "learning_rate": 4.857403458329338e-06,
      "loss": 0.6235,
      "step": 2490
    },
    {
      "epoch": 0.3054193231976459,
      "grad_norm": 1.9520477975411916,
      "learning_rate": 4.8572346820035856e-06,
      "loss": 0.5874,
      "step": 2491
    },
    {
      "epoch": 0.30554193231976456,
      "grad_norm": 2.1072703138425135,
      "learning_rate": 4.8570658087913135e-06,
      "loss": 0.5483,
      "step": 2492
    },
    {
      "epoch": 0.30566454144188326,
      "grad_norm": 2.438316593298949,
      "learning_rate": 4.856896838699464e-06,
      "loss": 0.5406,
      "step": 2493
    },
    {
      "epoch": 0.30578715056400196,
      "grad_norm": 2.0794820865899553,
      "learning_rate": 4.856727771734983e-06,
      "loss": 0.6021,
      "step": 2494
    },
    {
      "epoch": 0.30590975968612066,
      "grad_norm": 2.6390291860036004,
      "learning_rate": 4.856558607904817e-06,
      "loss": 0.6242,
      "step": 2495
    },
    {
      "epoch": 0.3060323688082393,
      "grad_norm": 1.995307257836443,
      "learning_rate": 4.856389347215921e-06,
      "loss": 0.5553,
      "step": 2496
    },
    {
      "epoch": 0.306154977930358,
      "grad_norm": 2.2125111856330677,
      "learning_rate": 4.85621998967525e-06,
      "loss": 0.5265,
      "step": 2497
    },
    {
      "epoch": 0.3062775870524767,
      "grad_norm": 2.0440550681471517,
      "learning_rate": 4.856050535289767e-06,
      "loss": 0.5475,
      "step": 2498
    },
    {
      "epoch": 0.3064001961745954,
      "grad_norm": 2.1903273741802747,
      "learning_rate": 4.855880984066436e-06,
      "loss": 0.5316,
      "step": 2499
    },
    {
      "epoch": 0.30652280529671405,
      "grad_norm": 2.198138820792633,
      "learning_rate": 4.855711336012226e-06,
      "loss": 0.5266,
      "step": 2500
    },
    {
      "epoch": 0.30664541441883275,
      "grad_norm": 2.0681709202389547,
      "learning_rate": 4.855541591134109e-06,
      "loss": 0.5022,
      "step": 2501
    },
    {
      "epoch": 0.30676802354095145,
      "grad_norm": 2.0009444183296385,
      "learning_rate": 4.855371749439063e-06,
      "loss": 0.5247,
      "step": 2502
    },
    {
      "epoch": 0.30689063266307015,
      "grad_norm": 2.2678445040572366,
      "learning_rate": 4.855201810934067e-06,
      "loss": 0.5223,
      "step": 2503
    },
    {
      "epoch": 0.3070132417851888,
      "grad_norm": 1.981901628863335,
      "learning_rate": 4.855031775626109e-06,
      "loss": 0.6102,
      "step": 2504
    },
    {
      "epoch": 0.3071358509073075,
      "grad_norm": 1.8941910905062571,
      "learning_rate": 4.854861643522174e-06,
      "loss": 0.5636,
      "step": 2505
    },
    {
      "epoch": 0.3072584600294262,
      "grad_norm": 1.919995295596903,
      "learning_rate": 4.854691414629258e-06,
      "loss": 0.5975,
      "step": 2506
    },
    {
      "epoch": 0.3073810691515449,
      "grad_norm": 2.25478943956314,
      "learning_rate": 4.854521088954355e-06,
      "loss": 0.6021,
      "step": 2507
    },
    {
      "epoch": 0.30750367827366354,
      "grad_norm": 1.9600067992945436,
      "learning_rate": 4.8543506665044674e-06,
      "loss": 0.5612,
      "step": 2508
    },
    {
      "epoch": 0.30762628739578224,
      "grad_norm": 2.3386023687574737,
      "learning_rate": 4.854180147286599e-06,
      "loss": 0.6061,
      "step": 2509
    },
    {
      "epoch": 0.30774889651790094,
      "grad_norm": 2.293073455602218,
      "learning_rate": 4.85400953130776e-06,
      "loss": 0.6141,
      "step": 2510
    },
    {
      "epoch": 0.30787150564001964,
      "grad_norm": 2.110305089357345,
      "learning_rate": 4.85383881857496e-06,
      "loss": 0.5617,
      "step": 2511
    },
    {
      "epoch": 0.3079941147621383,
      "grad_norm": 1.8356544468681828,
      "learning_rate": 4.8536680090952185e-06,
      "loss": 0.5183,
      "step": 2512
    },
    {
      "epoch": 0.308116723884257,
      "grad_norm": 2.101743175481761,
      "learning_rate": 4.853497102875555e-06,
      "loss": 0.616,
      "step": 2513
    },
    {
      "epoch": 0.3082393330063757,
      "grad_norm": 2.203670764538001,
      "learning_rate": 4.853326099922994e-06,
      "loss": 0.5272,
      "step": 2514
    },
    {
      "epoch": 0.3083619421284944,
      "grad_norm": 1.8981870028824803,
      "learning_rate": 4.853155000244564e-06,
      "loss": 0.5511,
      "step": 2515
    },
    {
      "epoch": 0.308484551250613,
      "grad_norm": 2.271806292777771,
      "learning_rate": 4.852983803847296e-06,
      "loss": 0.5759,
      "step": 2516
    },
    {
      "epoch": 0.3086071603727317,
      "grad_norm": 1.9473004845753112,
      "learning_rate": 4.852812510738231e-06,
      "loss": 0.5343,
      "step": 2517
    },
    {
      "epoch": 0.3087297694948504,
      "grad_norm": 2.184380445007404,
      "learning_rate": 4.852641120924404e-06,
      "loss": 0.5754,
      "step": 2518
    },
    {
      "epoch": 0.3088523786169691,
      "grad_norm": 2.239275591865717,
      "learning_rate": 4.852469634412863e-06,
      "loss": 0.5316,
      "step": 2519
    },
    {
      "epoch": 0.30897498773908777,
      "grad_norm": 2.159479764990694,
      "learning_rate": 4.852298051210655e-06,
      "loss": 0.5676,
      "step": 2520
    },
    {
      "epoch": 0.30909759686120647,
      "grad_norm": 2.0683135319539003,
      "learning_rate": 4.852126371324833e-06,
      "loss": 0.557,
      "step": 2521
    },
    {
      "epoch": 0.30922020598332517,
      "grad_norm": 2.187565763326547,
      "learning_rate": 4.851954594762452e-06,
      "loss": 0.5654,
      "step": 2522
    },
    {
      "epoch": 0.30934281510544387,
      "grad_norm": 2.2002555917007456,
      "learning_rate": 4.851782721530575e-06,
      "loss": 0.5274,
      "step": 2523
    },
    {
      "epoch": 0.3094654242275625,
      "grad_norm": 2.3704828099172857,
      "learning_rate": 4.851610751636264e-06,
      "loss": 0.6103,
      "step": 2524
    },
    {
      "epoch": 0.3095880333496812,
      "grad_norm": 2.376045568699821,
      "learning_rate": 4.851438685086588e-06,
      "loss": 0.6398,
      "step": 2525
    },
    {
      "epoch": 0.3097106424717999,
      "grad_norm": 2.567856734000049,
      "learning_rate": 4.8512665218886185e-06,
      "loss": 0.573,
      "step": 2526
    },
    {
      "epoch": 0.3098332515939186,
      "grad_norm": 2.112232611821303,
      "learning_rate": 4.851094262049433e-06,
      "loss": 0.5875,
      "step": 2527
    },
    {
      "epoch": 0.30995586071603726,
      "grad_norm": 2.4209678351858805,
      "learning_rate": 4.85092190557611e-06,
      "loss": 0.6223,
      "step": 2528
    },
    {
      "epoch": 0.31007846983815596,
      "grad_norm": 2.2914161040846563,
      "learning_rate": 4.850749452475736e-06,
      "loss": 0.5357,
      "step": 2529
    },
    {
      "epoch": 0.31020107896027466,
      "grad_norm": 2.1369221323425838,
      "learning_rate": 4.850576902755398e-06,
      "loss": 0.534,
      "step": 2530
    },
    {
      "epoch": 0.31032368808239336,
      "grad_norm": 2.0493875135237,
      "learning_rate": 4.850404256422186e-06,
      "loss": 0.5259,
      "step": 2531
    },
    {
      "epoch": 0.310446297204512,
      "grad_norm": 2.1799924511627626,
      "learning_rate": 4.8502315134832e-06,
      "loss": 0.5051,
      "step": 2532
    },
    {
      "epoch": 0.3105689063266307,
      "grad_norm": 2.3326455306477007,
      "learning_rate": 4.850058673945538e-06,
      "loss": 0.5521,
      "step": 2533
    },
    {
      "epoch": 0.3106915154487494,
      "grad_norm": 2.0003102771681287,
      "learning_rate": 4.849885737816303e-06,
      "loss": 0.5263,
      "step": 2534
    },
    {
      "epoch": 0.3108141245708681,
      "grad_norm": 2.204844839523329,
      "learning_rate": 4.849712705102604e-06,
      "loss": 0.5353,
      "step": 2535
    },
    {
      "epoch": 0.31093673369298674,
      "grad_norm": 2.1078787758586883,
      "learning_rate": 4.849539575811554e-06,
      "loss": 0.5291,
      "step": 2536
    },
    {
      "epoch": 0.31105934281510544,
      "grad_norm": 2.0220859816557915,
      "learning_rate": 4.849366349950266e-06,
      "loss": 0.5725,
      "step": 2537
    },
    {
      "epoch": 0.31118195193722414,
      "grad_norm": 2.080370828477035,
      "learning_rate": 4.849193027525864e-06,
      "loss": 0.5839,
      "step": 2538
    },
    {
      "epoch": 0.3113045610593428,
      "grad_norm": 1.9893308849177147,
      "learning_rate": 4.8490196085454685e-06,
      "loss": 0.5506,
      "step": 2539
    },
    {
      "epoch": 0.3114271701814615,
      "grad_norm": 2.2757095930317766,
      "learning_rate": 4.848846093016208e-06,
      "loss": 0.5315,
      "step": 2540
    },
    {
      "epoch": 0.3115497793035802,
      "grad_norm": 2.5155865922695555,
      "learning_rate": 4.848672480945216e-06,
      "loss": 0.5813,
      "step": 2541
    },
    {
      "epoch": 0.3116723884256989,
      "grad_norm": 2.191980503265634,
      "learning_rate": 4.848498772339627e-06,
      "loss": 0.5706,
      "step": 2542
    },
    {
      "epoch": 0.31179499754781753,
      "grad_norm": 2.104169300330261,
      "learning_rate": 4.848324967206581e-06,
      "loss": 0.5866,
      "step": 2543
    },
    {
      "epoch": 0.31191760666993623,
      "grad_norm": 2.2972426927482847,
      "learning_rate": 4.8481510655532195e-06,
      "loss": 0.5772,
      "step": 2544
    },
    {
      "epoch": 0.31204021579205493,
      "grad_norm": 2.2727375229006412,
      "learning_rate": 4.847977067386693e-06,
      "loss": 0.5728,
      "step": 2545
    },
    {
      "epoch": 0.31216282491417363,
      "grad_norm": 1.9399435495070072,
      "learning_rate": 4.847802972714153e-06,
      "loss": 0.6246,
      "step": 2546
    },
    {
      "epoch": 0.3122854340362923,
      "grad_norm": 1.990242101203904,
      "learning_rate": 4.847628781542753e-06,
      "loss": 0.519,
      "step": 2547
    },
    {
      "epoch": 0.312408043158411,
      "grad_norm": 1.930962243793643,
      "learning_rate": 4.847454493879656e-06,
      "loss": 0.5106,
      "step": 2548
    },
    {
      "epoch": 0.3125306522805297,
      "grad_norm": 2.1558729907172838,
      "learning_rate": 4.847280109732022e-06,
      "loss": 0.5811,
      "step": 2549
    },
    {
      "epoch": 0.3126532614026484,
      "grad_norm": 2.1162836531343427,
      "learning_rate": 4.847105629107021e-06,
      "loss": 0.6021,
      "step": 2550
    },
    {
      "epoch": 0.312775870524767,
      "grad_norm": 2.1380060352176136,
      "learning_rate": 4.846931052011823e-06,
      "loss": 0.5053,
      "step": 2551
    },
    {
      "epoch": 0.3128984796468857,
      "grad_norm": 2.1263380370431495,
      "learning_rate": 4.846756378453603e-06,
      "loss": 0.5669,
      "step": 2552
    },
    {
      "epoch": 0.3130210887690044,
      "grad_norm": 2.2571083318828027,
      "learning_rate": 4.846581608439542e-06,
      "loss": 0.6027,
      "step": 2553
    },
    {
      "epoch": 0.3131436978911231,
      "grad_norm": 2.0905369616972256,
      "learning_rate": 4.846406741976823e-06,
      "loss": 0.6035,
      "step": 2554
    },
    {
      "epoch": 0.31326630701324176,
      "grad_norm": 2.021414717760167,
      "learning_rate": 4.846231779072632e-06,
      "loss": 0.5735,
      "step": 2555
    },
    {
      "epoch": 0.31338891613536046,
      "grad_norm": 1.8706851226967947,
      "learning_rate": 4.846056719734161e-06,
      "loss": 0.5581,
      "step": 2556
    },
    {
      "epoch": 0.31351152525747916,
      "grad_norm": 2.329909258756687,
      "learning_rate": 4.8458815639686065e-06,
      "loss": 0.623,
      "step": 2557
    },
    {
      "epoch": 0.31363413437959786,
      "grad_norm": 2.1626313505663157,
      "learning_rate": 4.8457063117831656e-06,
      "loss": 0.5312,
      "step": 2558
    },
    {
      "epoch": 0.3137567435017165,
      "grad_norm": 2.2525162815043482,
      "learning_rate": 4.845530963185043e-06,
      "loss": 0.5498,
      "step": 2559
    },
    {
      "epoch": 0.3138793526238352,
      "grad_norm": 2.1162678232055185,
      "learning_rate": 4.845355518181446e-06,
      "loss": 0.6109,
      "step": 2560
    },
    {
      "epoch": 0.3140019617459539,
      "grad_norm": 2.1896832659232714,
      "learning_rate": 4.8451799767795846e-06,
      "loss": 0.5981,
      "step": 2561
    },
    {
      "epoch": 0.3141245708680726,
      "grad_norm": 2.1774503566281385,
      "learning_rate": 4.8450043389866744e-06,
      "loss": 0.5652,
      "step": 2562
    },
    {
      "epoch": 0.31424717999019125,
      "grad_norm": 2.174259007580952,
      "learning_rate": 4.8448286048099335e-06,
      "loss": 0.5463,
      "step": 2563
    },
    {
      "epoch": 0.31436978911230995,
      "grad_norm": 2.0232077963674873,
      "learning_rate": 4.844652774256587e-06,
      "loss": 0.5372,
      "step": 2564
    },
    {
      "epoch": 0.31449239823442865,
      "grad_norm": 2.5116434996760417,
      "learning_rate": 4.84447684733386e-06,
      "loss": 0.5642,
      "step": 2565
    },
    {
      "epoch": 0.31461500735654735,
      "grad_norm": 2.0109596438898865,
      "learning_rate": 4.844300824048984e-06,
      "loss": 0.5695,
      "step": 2566
    },
    {
      "epoch": 0.314737616478666,
      "grad_norm": 2.1881259868824876,
      "learning_rate": 4.844124704409195e-06,
      "loss": 0.5989,
      "step": 2567
    },
    {
      "epoch": 0.3148602256007847,
      "grad_norm": 2.056127788347427,
      "learning_rate": 4.843948488421728e-06,
      "loss": 0.6074,
      "step": 2568
    },
    {
      "epoch": 0.3149828347229034,
      "grad_norm": 2.2253512112697176,
      "learning_rate": 4.843772176093831e-06,
      "loss": 0.5915,
      "step": 2569
    },
    {
      "epoch": 0.3151054438450221,
      "grad_norm": 2.487070778822094,
      "learning_rate": 4.8435957674327475e-06,
      "loss": 0.5712,
      "step": 2570
    },
    {
      "epoch": 0.31522805296714074,
      "grad_norm": 2.263555513840315,
      "learning_rate": 4.843419262445729e-06,
      "loss": 0.5257,
      "step": 2571
    },
    {
      "epoch": 0.31535066208925944,
      "grad_norm": 2.2313860834137014,
      "learning_rate": 4.84324266114003e-06,
      "loss": 0.561,
      "step": 2572
    },
    {
      "epoch": 0.31547327121137814,
      "grad_norm": 2.2203236188791937,
      "learning_rate": 4.843065963522909e-06,
      "loss": 0.5669,
      "step": 2573
    },
    {
      "epoch": 0.31559588033349684,
      "grad_norm": 1.9565456902433327,
      "learning_rate": 4.842889169601629e-06,
      "loss": 0.5467,
      "step": 2574
    },
    {
      "epoch": 0.3157184894556155,
      "grad_norm": 2.2418653999582725,
      "learning_rate": 4.8427122793834565e-06,
      "loss": 0.5663,
      "step": 2575
    },
    {
      "epoch": 0.3158410985777342,
      "grad_norm": 1.961941183545985,
      "learning_rate": 4.842535292875662e-06,
      "loss": 0.5104,
      "step": 2576
    },
    {
      "epoch": 0.3159637076998529,
      "grad_norm": 2.153642567097384,
      "learning_rate": 4.842358210085519e-06,
      "loss": 0.5863,
      "step": 2577
    },
    {
      "epoch": 0.3160863168219716,
      "grad_norm": 2.205237117431091,
      "learning_rate": 4.842181031020308e-06,
      "loss": 0.5915,
      "step": 2578
    },
    {
      "epoch": 0.3162089259440902,
      "grad_norm": 2.3681164150646086,
      "learning_rate": 4.842003755687309e-06,
      "loss": 0.5399,
      "step": 2579
    },
    {
      "epoch": 0.3163315350662089,
      "grad_norm": 2.2874983386758787,
      "learning_rate": 4.84182638409381e-06,
      "loss": 0.5887,
      "step": 2580
    },
    {
      "epoch": 0.3164541441883276,
      "grad_norm": 2.1557894248073723,
      "learning_rate": 4.8416489162471e-06,
      "loss": 0.5789,
      "step": 2581
    },
    {
      "epoch": 0.3165767533104463,
      "grad_norm": 2.0717184476258956,
      "learning_rate": 4.8414713521544755e-06,
      "loss": 0.53,
      "step": 2582
    },
    {
      "epoch": 0.31669936243256497,
      "grad_norm": 1.933873944565141,
      "learning_rate": 4.841293691823232e-06,
      "loss": 0.5146,
      "step": 2583
    },
    {
      "epoch": 0.31682197155468367,
      "grad_norm": 2.2464633624832135,
      "learning_rate": 4.8411159352606735e-06,
      "loss": 0.6317,
      "step": 2584
    },
    {
      "epoch": 0.31694458067680237,
      "grad_norm": 2.349672106736992,
      "learning_rate": 4.840938082474105e-06,
      "loss": 0.5973,
      "step": 2585
    },
    {
      "epoch": 0.31706718979892107,
      "grad_norm": 2.108856770349395,
      "learning_rate": 4.840760133470837e-06,
      "loss": 0.4923,
      "step": 2586
    },
    {
      "epoch": 0.3171897989210397,
      "grad_norm": 2.2745161432899748,
      "learning_rate": 4.840582088258183e-06,
      "loss": 0.5606,
      "step": 2587
    },
    {
      "epoch": 0.3173124080431584,
      "grad_norm": 2.266838604803363,
      "learning_rate": 4.840403946843463e-06,
      "loss": 0.6017,
      "step": 2588
    },
    {
      "epoch": 0.3174350171652771,
      "grad_norm": 1.9381284034170791,
      "learning_rate": 4.840225709233997e-06,
      "loss": 0.5186,
      "step": 2589
    },
    {
      "epoch": 0.31755762628739576,
      "grad_norm": 2.2139589696167423,
      "learning_rate": 4.84004737543711e-06,
      "loss": 0.6062,
      "step": 2590
    },
    {
      "epoch": 0.31768023540951446,
      "grad_norm": 2.292148341155313,
      "learning_rate": 4.839868945460134e-06,
      "loss": 0.6061,
      "step": 2591
    },
    {
      "epoch": 0.31780284453163316,
      "grad_norm": 2.263386116767671,
      "learning_rate": 4.8396904193104025e-06,
      "loss": 0.539,
      "step": 2592
    },
    {
      "epoch": 0.31792545365375185,
      "grad_norm": 2.4753468930229885,
      "learning_rate": 4.839511796995252e-06,
      "loss": 0.5802,
      "step": 2593
    },
    {
      "epoch": 0.3180480627758705,
      "grad_norm": 2.1292417610281995,
      "learning_rate": 4.839333078522025e-06,
      "loss": 0.5639,
      "step": 2594
    },
    {
      "epoch": 0.3181706718979892,
      "grad_norm": 2.160452653289787,
      "learning_rate": 4.8391542638980675e-06,
      "loss": 0.5403,
      "step": 2595
    },
    {
      "epoch": 0.3182932810201079,
      "grad_norm": 2.288318159270002,
      "learning_rate": 4.838975353130727e-06,
      "loss": 0.6061,
      "step": 2596
    },
    {
      "epoch": 0.3184158901422266,
      "grad_norm": 2.0129684652810065,
      "learning_rate": 4.83879634622736e-06,
      "loss": 0.575,
      "step": 2597
    },
    {
      "epoch": 0.31853849926434524,
      "grad_norm": 2.440308716826246,
      "learning_rate": 4.838617243195323e-06,
      "loss": 0.628,
      "step": 2598
    },
    {
      "epoch": 0.31866110838646394,
      "grad_norm": 2.392242724092846,
      "learning_rate": 4.838438044041976e-06,
      "loss": 0.5735,
      "step": 2599
    },
    {
      "epoch": 0.31878371750858264,
      "grad_norm": 2.1359563181615235,
      "learning_rate": 4.838258748774686e-06,
      "loss": 0.5555,
      "step": 2600
    },
    {
      "epoch": 0.31890632663070134,
      "grad_norm": 2.292022362476964,
      "learning_rate": 4.838079357400823e-06,
      "loss": 0.5976,
      "step": 2601
    },
    {
      "epoch": 0.31902893575282,
      "grad_norm": 2.157240491502986,
      "learning_rate": 4.837899869927757e-06,
      "loss": 0.6055,
      "step": 2602
    },
    {
      "epoch": 0.3191515448749387,
      "grad_norm": 2.043225859367703,
      "learning_rate": 4.8377202863628685e-06,
      "loss": 0.4974,
      "step": 2603
    },
    {
      "epoch": 0.3192741539970574,
      "grad_norm": 1.9026130259488268,
      "learning_rate": 4.837540606713538e-06,
      "loss": 0.5859,
      "step": 2604
    },
    {
      "epoch": 0.3193967631191761,
      "grad_norm": 2.150869144114063,
      "learning_rate": 4.837360830987149e-06,
      "loss": 0.5537,
      "step": 2605
    },
    {
      "epoch": 0.31951937224129473,
      "grad_norm": 2.101705365156381,
      "learning_rate": 4.837180959191093e-06,
      "loss": 0.5573,
      "step": 2606
    },
    {
      "epoch": 0.31964198136341343,
      "grad_norm": 2.2813035013078515,
      "learning_rate": 4.837000991332762e-06,
      "loss": 0.6047,
      "step": 2607
    },
    {
      "epoch": 0.31976459048553213,
      "grad_norm": 1.876395314920957,
      "learning_rate": 4.836820927419552e-06,
      "loss": 0.5556,
      "step": 2608
    },
    {
      "epoch": 0.31988719960765083,
      "grad_norm": 1.955326370359488,
      "learning_rate": 4.8366407674588654e-06,
      "loss": 0.5212,
      "step": 2609
    },
    {
      "epoch": 0.3200098087297695,
      "grad_norm": 2.0494133848777727,
      "learning_rate": 4.836460511458107e-06,
      "loss": 0.5895,
      "step": 2610
    },
    {
      "epoch": 0.3201324178518882,
      "grad_norm": 2.0126054571834975,
      "learning_rate": 4.836280159424685e-06,
      "loss": 0.5923,
      "step": 2611
    },
    {
      "epoch": 0.3202550269740069,
      "grad_norm": 2.117128309648565,
      "learning_rate": 4.836099711366012e-06,
      "loss": 0.5856,
      "step": 2612
    },
    {
      "epoch": 0.3203776360961256,
      "grad_norm": 2.1302059616788083,
      "learning_rate": 4.835919167289506e-06,
      "loss": 0.5405,
      "step": 2613
    },
    {
      "epoch": 0.3205002452182442,
      "grad_norm": 2.032065928198955,
      "learning_rate": 4.8357385272025855e-06,
      "loss": 0.5636,
      "step": 2614
    },
    {
      "epoch": 0.3206228543403629,
      "grad_norm": 2.009794496230216,
      "learning_rate": 4.835557791112677e-06,
      "loss": 0.5616,
      "step": 2615
    },
    {
      "epoch": 0.3207454634624816,
      "grad_norm": 1.978851331336874,
      "learning_rate": 4.835376959027208e-06,
      "loss": 0.5689,
      "step": 2616
    },
    {
      "epoch": 0.3208680725846003,
      "grad_norm": 2.1628073254804354,
      "learning_rate": 4.835196030953613e-06,
      "loss": 0.5458,
      "step": 2617
    },
    {
      "epoch": 0.32099068170671896,
      "grad_norm": 2.0309842570408807,
      "learning_rate": 4.835015006899326e-06,
      "loss": 0.5554,
      "step": 2618
    },
    {
      "epoch": 0.32111329082883766,
      "grad_norm": 2.2216556088656607,
      "learning_rate": 4.8348338868717885e-06,
      "loss": 0.5554,
      "step": 2619
    },
    {
      "epoch": 0.32123589995095636,
      "grad_norm": 1.9617045071177175,
      "learning_rate": 4.8346526708784444e-06,
      "loss": 0.5518,
      "step": 2620
    },
    {
      "epoch": 0.32135850907307506,
      "grad_norm": 1.856034527976495,
      "learning_rate": 4.834471358926743e-06,
      "loss": 0.5787,
      "step": 2621
    },
    {
      "epoch": 0.3214811181951937,
      "grad_norm": 2.2026988862045616,
      "learning_rate": 4.8342899510241355e-06,
      "loss": 0.5587,
      "step": 2622
    },
    {
      "epoch": 0.3216037273173124,
      "grad_norm": 2.0442931020307826,
      "learning_rate": 4.834108447178078e-06,
      "loss": 0.5599,
      "step": 2623
    },
    {
      "epoch": 0.3217263364394311,
      "grad_norm": 2.163125670034706,
      "learning_rate": 4.833926847396032e-06,
      "loss": 0.5554,
      "step": 2624
    },
    {
      "epoch": 0.3218489455615498,
      "grad_norm": 2.2057650252221848,
      "learning_rate": 4.8337451516854605e-06,
      "loss": 0.5774,
      "step": 2625
    },
    {
      "epoch": 0.32197155468366845,
      "grad_norm": 2.363326120064941,
      "learning_rate": 4.833563360053831e-06,
      "loss": 0.5947,
      "step": 2626
    },
    {
      "epoch": 0.32209416380578715,
      "grad_norm": 2.400834295520317,
      "learning_rate": 4.833381472508617e-06,
      "loss": 0.5101,
      "step": 2627
    },
    {
      "epoch": 0.32221677292790585,
      "grad_norm": 2.2317896925015006,
      "learning_rate": 4.833199489057293e-06,
      "loss": 0.5774,
      "step": 2628
    },
    {
      "epoch": 0.32233938205002455,
      "grad_norm": 1.9944840187918227,
      "learning_rate": 4.83301740970734e-06,
      "loss": 0.5399,
      "step": 2629
    },
    {
      "epoch": 0.3224619911721432,
      "grad_norm": 2.151946856083046,
      "learning_rate": 4.832835234466241e-06,
      "loss": 0.5506,
      "step": 2630
    },
    {
      "epoch": 0.3225846002942619,
      "grad_norm": 2.2133410929580015,
      "learning_rate": 4.832652963341483e-06,
      "loss": 0.5831,
      "step": 2631
    },
    {
      "epoch": 0.3227072094163806,
      "grad_norm": 2.138704323592089,
      "learning_rate": 4.832470596340559e-06,
      "loss": 0.4847,
      "step": 2632
    },
    {
      "epoch": 0.3228298185384993,
      "grad_norm": 2.2295556378319112,
      "learning_rate": 4.832288133470965e-06,
      "loss": 0.5577,
      "step": 2633
    },
    {
      "epoch": 0.32295242766061794,
      "grad_norm": 2.2627686673275207,
      "learning_rate": 4.832105574740199e-06,
      "loss": 0.5949,
      "step": 2634
    },
    {
      "epoch": 0.32307503678273664,
      "grad_norm": 2.1512660941069717,
      "learning_rate": 4.831922920155766e-06,
      "loss": 0.5461,
      "step": 2635
    },
    {
      "epoch": 0.32319764590485534,
      "grad_norm": 2.4156607510054275,
      "learning_rate": 4.831740169725172e-06,
      "loss": 0.5681,
      "step": 2636
    },
    {
      "epoch": 0.323320255026974,
      "grad_norm": 2.067491853092663,
      "learning_rate": 4.831557323455929e-06,
      "loss": 0.5399,
      "step": 2637
    },
    {
      "epoch": 0.3234428641490927,
      "grad_norm": 1.9773476609519738,
      "learning_rate": 4.8313743813555516e-06,
      "loss": 0.5766,
      "step": 2638
    },
    {
      "epoch": 0.3235654732712114,
      "grad_norm": 2.181577625418793,
      "learning_rate": 4.831191343431561e-06,
      "loss": 0.6097,
      "step": 2639
    },
    {
      "epoch": 0.3236880823933301,
      "grad_norm": 2.1861284263933647,
      "learning_rate": 4.831008209691478e-06,
      "loss": 0.5271,
      "step": 2640
    },
    {
      "epoch": 0.3238106915154487,
      "grad_norm": 2.524756337828793,
      "learning_rate": 4.830824980142832e-06,
      "loss": 0.5834,
      "step": 2641
    },
    {
      "epoch": 0.3239333006375674,
      "grad_norm": 1.9997979865821023,
      "learning_rate": 4.830641654793152e-06,
      "loss": 0.539,
      "step": 2642
    },
    {
      "epoch": 0.3240559097596861,
      "grad_norm": 2.2980435790764817,
      "learning_rate": 4.830458233649975e-06,
      "loss": 0.6322,
      "step": 2643
    },
    {
      "epoch": 0.3241785188818048,
      "grad_norm": 2.0623395346767537,
      "learning_rate": 4.830274716720837e-06,
      "loss": 0.5844,
      "step": 2644
    },
    {
      "epoch": 0.32430112800392347,
      "grad_norm": 2.291168726833156,
      "learning_rate": 4.8300911040132845e-06,
      "loss": 0.6109,
      "step": 2645
    },
    {
      "epoch": 0.32442373712604217,
      "grad_norm": 2.0959358343627668,
      "learning_rate": 4.829907395534862e-06,
      "loss": 0.5628,
      "step": 2646
    },
    {
      "epoch": 0.32454634624816087,
      "grad_norm": 2.0752646667680303,
      "learning_rate": 4.82972359129312e-06,
      "loss": 0.5845,
      "step": 2647
    },
    {
      "epoch": 0.32466895537027957,
      "grad_norm": 2.0680569665200603,
      "learning_rate": 4.829539691295615e-06,
      "loss": 0.5863,
      "step": 2648
    },
    {
      "epoch": 0.3247915644923982,
      "grad_norm": 2.2354523670250646,
      "learning_rate": 4.829355695549905e-06,
      "loss": 0.5451,
      "step": 2649
    },
    {
      "epoch": 0.3249141736145169,
      "grad_norm": 2.253780825361444,
      "learning_rate": 4.8291716040635505e-06,
      "loss": 0.5566,
      "step": 2650
    },
    {
      "epoch": 0.3250367827366356,
      "grad_norm": 2.3037633270495053,
      "learning_rate": 4.828987416844121e-06,
      "loss": 0.5358,
      "step": 2651
    },
    {
      "epoch": 0.3251593918587543,
      "grad_norm": 1.8878018544592976,
      "learning_rate": 4.828803133899184e-06,
      "loss": 0.5293,
      "step": 2652
    },
    {
      "epoch": 0.32528200098087295,
      "grad_norm": 2.468550999672396,
      "learning_rate": 4.8286187552363165e-06,
      "loss": 0.563,
      "step": 2653
    },
    {
      "epoch": 0.32540461010299165,
      "grad_norm": 2.07538543230941,
      "learning_rate": 4.828434280863096e-06,
      "loss": 0.5373,
      "step": 2654
    },
    {
      "epoch": 0.32552721922511035,
      "grad_norm": 2.3604609596308395,
      "learning_rate": 4.8282497107871036e-06,
      "loss": 0.5927,
      "step": 2655
    },
    {
      "epoch": 0.32564982834722905,
      "grad_norm": 2.0634378475869974,
      "learning_rate": 4.8280650450159265e-06,
      "loss": 0.5417,
      "step": 2656
    },
    {
      "epoch": 0.3257724374693477,
      "grad_norm": 2.0282564981310207,
      "learning_rate": 4.827880283557155e-06,
      "loss": 0.5822,
      "step": 2657
    },
    {
      "epoch": 0.3258950465914664,
      "grad_norm": 2.318332846391812,
      "learning_rate": 4.8276954264183814e-06,
      "loss": 0.5724,
      "step": 2658
    },
    {
      "epoch": 0.3260176557135851,
      "grad_norm": 2.4668519768757395,
      "learning_rate": 4.827510473607206e-06,
      "loss": 0.587,
      "step": 2659
    },
    {
      "epoch": 0.3261402648357038,
      "grad_norm": 2.1000231443310504,
      "learning_rate": 4.827325425131229e-06,
      "loss": 0.5522,
      "step": 2660
    },
    {
      "epoch": 0.32626287395782244,
      "grad_norm": 2.1093684792893916,
      "learning_rate": 4.8271402809980576e-06,
      "loss": 0.577,
      "step": 2661
    },
    {
      "epoch": 0.32638548307994114,
      "grad_norm": 2.070342251511821,
      "learning_rate": 4.8269550412153e-06,
      "loss": 0.5736,
      "step": 2662
    },
    {
      "epoch": 0.32650809220205984,
      "grad_norm": 1.9332118117551411,
      "learning_rate": 4.82676970579057e-06,
      "loss": 0.5772,
      "step": 2663
    },
    {
      "epoch": 0.32663070132417854,
      "grad_norm": 2.059656474831579,
      "learning_rate": 4.8265842747314875e-06,
      "loss": 0.5475,
      "step": 2664
    },
    {
      "epoch": 0.3267533104462972,
      "grad_norm": 1.8699344905868982,
      "learning_rate": 4.826398748045672e-06,
      "loss": 0.5926,
      "step": 2665
    },
    {
      "epoch": 0.3268759195684159,
      "grad_norm": 2.259347599620248,
      "learning_rate": 4.826213125740749e-06,
      "loss": 0.6032,
      "step": 2666
    },
    {
      "epoch": 0.3269985286905346,
      "grad_norm": 1.9820837395701505,
      "learning_rate": 4.826027407824349e-06,
      "loss": 0.5607,
      "step": 2667
    },
    {
      "epoch": 0.3271211378126533,
      "grad_norm": 1.9276966930450614,
      "learning_rate": 4.825841594304104e-06,
      "loss": 0.5256,
      "step": 2668
    },
    {
      "epoch": 0.32724374693477193,
      "grad_norm": 2.025337267561681,
      "learning_rate": 4.825655685187651e-06,
      "loss": 0.5566,
      "step": 2669
    },
    {
      "epoch": 0.32736635605689063,
      "grad_norm": 2.0444149392977238,
      "learning_rate": 4.825469680482633e-06,
      "loss": 0.5904,
      "step": 2670
    },
    {
      "epoch": 0.32748896517900933,
      "grad_norm": 2.213438562619513,
      "learning_rate": 4.825283580196695e-06,
      "loss": 0.5905,
      "step": 2671
    },
    {
      "epoch": 0.32761157430112803,
      "grad_norm": 2.1664109427804217,
      "learning_rate": 4.825097384337484e-06,
      "loss": 0.5912,
      "step": 2672
    },
    {
      "epoch": 0.32773418342324667,
      "grad_norm": 2.309243645885789,
      "learning_rate": 4.8249110929126545e-06,
      "loss": 0.5718,
      "step": 2673
    },
    {
      "epoch": 0.32785679254536537,
      "grad_norm": 2.081190826866376,
      "learning_rate": 4.824724705929863e-06,
      "loss": 0.5784,
      "step": 2674
    },
    {
      "epoch": 0.32797940166748407,
      "grad_norm": 2.1693285154205224,
      "learning_rate": 4.824538223396771e-06,
      "loss": 0.5573,
      "step": 2675
    },
    {
      "epoch": 0.32810201078960277,
      "grad_norm": 2.0949982637668256,
      "learning_rate": 4.824351645321043e-06,
      "loss": 0.5416,
      "step": 2676
    },
    {
      "epoch": 0.3282246199117214,
      "grad_norm": 2.2385848670640867,
      "learning_rate": 4.824164971710347e-06,
      "loss": 0.5518,
      "step": 2677
    },
    {
      "epoch": 0.3283472290338401,
      "grad_norm": 2.0611908730037496,
      "learning_rate": 4.823978202572356e-06,
      "loss": 0.4719,
      "step": 2678
    },
    {
      "epoch": 0.3284698381559588,
      "grad_norm": 2.152862809619523,
      "learning_rate": 4.823791337914747e-06,
      "loss": 0.6227,
      "step": 2679
    },
    {
      "epoch": 0.3285924472780775,
      "grad_norm": 1.9471967732519186,
      "learning_rate": 4.823604377745199e-06,
      "loss": 0.5689,
      "step": 2680
    },
    {
      "epoch": 0.32871505640019616,
      "grad_norm": 2.404858538713614,
      "learning_rate": 4.823417322071398e-06,
      "loss": 0.5611,
      "step": 2681
    },
    {
      "epoch": 0.32883766552231486,
      "grad_norm": 1.8312962529858126,
      "learning_rate": 4.823230170901033e-06,
      "loss": 0.5666,
      "step": 2682
    },
    {
      "epoch": 0.32896027464443356,
      "grad_norm": 1.915700540037742,
      "learning_rate": 4.8230429242417945e-06,
      "loss": 0.5588,
      "step": 2683
    },
    {
      "epoch": 0.32908288376655226,
      "grad_norm": 2.2047392382425706,
      "learning_rate": 4.822855582101378e-06,
      "loss": 0.5365,
      "step": 2684
    },
    {
      "epoch": 0.3292054928886709,
      "grad_norm": 2.162260686489382,
      "learning_rate": 4.822668144487485e-06,
      "loss": 0.586,
      "step": 2685
    },
    {
      "epoch": 0.3293281020107896,
      "grad_norm": 2.2734589245594905,
      "learning_rate": 4.82248061140782e-06,
      "loss": 0.5988,
      "step": 2686
    },
    {
      "epoch": 0.3294507111329083,
      "grad_norm": 1.9560163128815706,
      "learning_rate": 4.822292982870091e-06,
      "loss": 0.5414,
      "step": 2687
    },
    {
      "epoch": 0.32957332025502695,
      "grad_norm": 2.2512745388443522,
      "learning_rate": 4.822105258882007e-06,
      "loss": 0.5738,
      "step": 2688
    },
    {
      "epoch": 0.32969592937714565,
      "grad_norm": 2.0408492710742325,
      "learning_rate": 4.821917439451287e-06,
      "loss": 0.5855,
      "step": 2689
    },
    {
      "epoch": 0.32981853849926435,
      "grad_norm": 2.348811818632246,
      "learning_rate": 4.82172952458565e-06,
      "loss": 0.5769,
      "step": 2690
    },
    {
      "epoch": 0.32994114762138305,
      "grad_norm": 2.126932977673629,
      "learning_rate": 4.821541514292819e-06,
      "loss": 0.538,
      "step": 2691
    },
    {
      "epoch": 0.3300637567435017,
      "grad_norm": 2.149471187531189,
      "learning_rate": 4.821353408580521e-06,
      "loss": 0.5662,
      "step": 2692
    },
    {
      "epoch": 0.3301863658656204,
      "grad_norm": 2.0619556688456475,
      "learning_rate": 4.82116520745649e-06,
      "loss": 0.5453,
      "step": 2693
    },
    {
      "epoch": 0.3303089749877391,
      "grad_norm": 2.355396793292966,
      "learning_rate": 4.820976910928459e-06,
      "loss": 0.6144,
      "step": 2694
    },
    {
      "epoch": 0.3304315841098578,
      "grad_norm": 2.208364118718668,
      "learning_rate": 4.820788519004167e-06,
      "loss": 0.5232,
      "step": 2695
    },
    {
      "epoch": 0.33055419323197643,
      "grad_norm": 2.1684876827718287,
      "learning_rate": 4.820600031691358e-06,
      "loss": 0.5571,
      "step": 2696
    },
    {
      "epoch": 0.33067680235409513,
      "grad_norm": 2.3485361515075263,
      "learning_rate": 4.820411448997781e-06,
      "loss": 0.5828,
      "step": 2697
    },
    {
      "epoch": 0.33079941147621383,
      "grad_norm": 2.128842786544047,
      "learning_rate": 4.820222770931184e-06,
      "loss": 0.5407,
      "step": 2698
    },
    {
      "epoch": 0.33092202059833253,
      "grad_norm": 2.2198738782761525,
      "learning_rate": 4.820033997499325e-06,
      "loss": 0.581,
      "step": 2699
    },
    {
      "epoch": 0.3310446297204512,
      "grad_norm": 2.3129473707075427,
      "learning_rate": 4.81984512870996e-06,
      "loss": 0.5736,
      "step": 2700
    },
    {
      "epoch": 0.3311672388425699,
      "grad_norm": 2.337529639050983,
      "learning_rate": 4.819656164570855e-06,
      "loss": 0.619,
      "step": 2701
    },
    {
      "epoch": 0.3312898479646886,
      "grad_norm": 2.136158901519746,
      "learning_rate": 4.819467105089773e-06,
      "loss": 0.5416,
      "step": 2702
    },
    {
      "epoch": 0.3314124570868073,
      "grad_norm": 2.1869517505712786,
      "learning_rate": 4.819277950274488e-06,
      "loss": 0.4937,
      "step": 2703
    },
    {
      "epoch": 0.3315350662089259,
      "grad_norm": 1.9451439721337096,
      "learning_rate": 4.819088700132773e-06,
      "loss": 0.5811,
      "step": 2704
    },
    {
      "epoch": 0.3316576753310446,
      "grad_norm": 2.4263433998356523,
      "learning_rate": 4.818899354672406e-06,
      "loss": 0.5543,
      "step": 2705
    },
    {
      "epoch": 0.3317802844531633,
      "grad_norm": 2.0310773925065932,
      "learning_rate": 4.818709913901171e-06,
      "loss": 0.5623,
      "step": 2706
    },
    {
      "epoch": 0.331902893575282,
      "grad_norm": 2.1765066985156425,
      "learning_rate": 4.818520377826854e-06,
      "loss": 0.597,
      "step": 2707
    },
    {
      "epoch": 0.33202550269740067,
      "grad_norm": 2.065229331660956,
      "learning_rate": 4.818330746457244e-06,
      "loss": 0.4924,
      "step": 2708
    },
    {
      "epoch": 0.33214811181951936,
      "grad_norm": 2.2979426526307116,
      "learning_rate": 4.8181410198001365e-06,
      "loss": 0.6054,
      "step": 2709
    },
    {
      "epoch": 0.33227072094163806,
      "grad_norm": 1.8541121664851257,
      "learning_rate": 4.817951197863328e-06,
      "loss": 0.5691,
      "step": 2710
    },
    {
      "epoch": 0.33239333006375676,
      "grad_norm": 2.202891969546578,
      "learning_rate": 4.8177612806546235e-06,
      "loss": 0.5755,
      "step": 2711
    },
    {
      "epoch": 0.3325159391858754,
      "grad_norm": 2.0499225668918926,
      "learning_rate": 4.8175712681818264e-06,
      "loss": 0.582,
      "step": 2712
    },
    {
      "epoch": 0.3326385483079941,
      "grad_norm": 2.290423060546017,
      "learning_rate": 4.817381160452747e-06,
      "loss": 0.5767,
      "step": 2713
    },
    {
      "epoch": 0.3327611574301128,
      "grad_norm": 2.1419281511450015,
      "learning_rate": 4.817190957475199e-06,
      "loss": 0.5792,
      "step": 2714
    },
    {
      "epoch": 0.3328837665522315,
      "grad_norm": 2.5408072050025474,
      "learning_rate": 4.817000659257001e-06,
      "loss": 0.6259,
      "step": 2715
    },
    {
      "epoch": 0.33300637567435015,
      "grad_norm": 2.266609476923883,
      "learning_rate": 4.816810265805974e-06,
      "loss": 0.5845,
      "step": 2716
    },
    {
      "epoch": 0.33312898479646885,
      "grad_norm": 2.0961468857923764,
      "learning_rate": 4.816619777129943e-06,
      "loss": 0.5892,
      "step": 2717
    },
    {
      "epoch": 0.33325159391858755,
      "grad_norm": 2.201247415016691,
      "learning_rate": 4.816429193236738e-06,
      "loss": 0.5729,
      "step": 2718
    },
    {
      "epoch": 0.33337420304070625,
      "grad_norm": 2.1645622240365836,
      "learning_rate": 4.816238514134192e-06,
      "loss": 0.5611,
      "step": 2719
    },
    {
      "epoch": 0.3334968121628249,
      "grad_norm": 2.266897312522762,
      "learning_rate": 4.816047739830144e-06,
      "loss": 0.584,
      "step": 2720
    },
    {
      "epoch": 0.3336194212849436,
      "grad_norm": 2.377912860569799,
      "learning_rate": 4.815856870332432e-06,
      "loss": 0.5712,
      "step": 2721
    },
    {
      "epoch": 0.3337420304070623,
      "grad_norm": 2.255852726942042,
      "learning_rate": 4.815665905648903e-06,
      "loss": 0.5403,
      "step": 2722
    },
    {
      "epoch": 0.333864639529181,
      "grad_norm": 1.9584762275730805,
      "learning_rate": 4.815474845787406e-06,
      "loss": 0.5338,
      "step": 2723
    },
    {
      "epoch": 0.33398724865129964,
      "grad_norm": 2.2776765148701776,
      "learning_rate": 4.815283690755793e-06,
      "loss": 0.5724,
      "step": 2724
    },
    {
      "epoch": 0.33410985777341834,
      "grad_norm": 2.110947766178365,
      "learning_rate": 4.815092440561923e-06,
      "loss": 0.5479,
      "step": 2725
    },
    {
      "epoch": 0.33423246689553704,
      "grad_norm": 1.9886924089574547,
      "learning_rate": 4.8149010952136544e-06,
      "loss": 0.5083,
      "step": 2726
    },
    {
      "epoch": 0.33435507601765574,
      "grad_norm": 1.9427483647177275,
      "learning_rate": 4.814709654718852e-06,
      "loss": 0.5392,
      "step": 2727
    },
    {
      "epoch": 0.3344776851397744,
      "grad_norm": 2.1690619799230646,
      "learning_rate": 4.814518119085385e-06,
      "loss": 0.5302,
      "step": 2728
    },
    {
      "epoch": 0.3346002942618931,
      "grad_norm": 2.267283679905566,
      "learning_rate": 4.8143264883211265e-06,
      "loss": 0.5909,
      "step": 2729
    },
    {
      "epoch": 0.3347229033840118,
      "grad_norm": 2.033280164997415,
      "learning_rate": 4.814134762433951e-06,
      "loss": 0.5776,
      "step": 2730
    },
    {
      "epoch": 0.3348455125061305,
      "grad_norm": 2.080201588886976,
      "learning_rate": 4.813942941431741e-06,
      "loss": 0.5481,
      "step": 2731
    },
    {
      "epoch": 0.3349681216282491,
      "grad_norm": 2.21419394403794,
      "learning_rate": 4.813751025322379e-06,
      "loss": 0.5529,
      "step": 2732
    },
    {
      "epoch": 0.3350907307503678,
      "grad_norm": 2.2619171972305923,
      "learning_rate": 4.813559014113754e-06,
      "loss": 0.5949,
      "step": 2733
    },
    {
      "epoch": 0.3352133398724865,
      "grad_norm": 2.115855076540607,
      "learning_rate": 4.813366907813758e-06,
      "loss": 0.5128,
      "step": 2734
    },
    {
      "epoch": 0.3353359489946052,
      "grad_norm": 2.1567015598026575,
      "learning_rate": 4.813174706430286e-06,
      "loss": 0.5858,
      "step": 2735
    },
    {
      "epoch": 0.33545855811672387,
      "grad_norm": 2.162595206384739,
      "learning_rate": 4.812982409971239e-06,
      "loss": 0.5414,
      "step": 2736
    },
    {
      "epoch": 0.33558116723884257,
      "grad_norm": 2.1048199347616374,
      "learning_rate": 4.8127900184445186e-06,
      "loss": 0.5578,
      "step": 2737
    },
    {
      "epoch": 0.33570377636096127,
      "grad_norm": 2.0457634554237294,
      "learning_rate": 4.812597531858035e-06,
      "loss": 0.5663,
      "step": 2738
    },
    {
      "epoch": 0.3358263854830799,
      "grad_norm": 2.180280892031595,
      "learning_rate": 4.812404950219699e-06,
      "loss": 0.5673,
      "step": 2739
    },
    {
      "epoch": 0.3359489946051986,
      "grad_norm": 1.954054850255883,
      "learning_rate": 4.812212273537426e-06,
      "loss": 0.5676,
      "step": 2740
    },
    {
      "epoch": 0.3360716037273173,
      "grad_norm": 2.0064077404682608,
      "learning_rate": 4.812019501819134e-06,
      "loss": 0.5262,
      "step": 2741
    },
    {
      "epoch": 0.336194212849436,
      "grad_norm": 1.8544508159751543,
      "learning_rate": 4.811826635072749e-06,
      "loss": 0.461,
      "step": 2742
    },
    {
      "epoch": 0.33631682197155466,
      "grad_norm": 2.3167066184586083,
      "learning_rate": 4.811633673306195e-06,
      "loss": 0.572,
      "step": 2743
    },
    {
      "epoch": 0.33643943109367336,
      "grad_norm": 2.1394742679792995,
      "learning_rate": 4.811440616527404e-06,
      "loss": 0.6096,
      "step": 2744
    },
    {
      "epoch": 0.33656204021579206,
      "grad_norm": 2.1029350640565037,
      "learning_rate": 4.811247464744313e-06,
      "loss": 0.5732,
      "step": 2745
    },
    {
      "epoch": 0.33668464933791076,
      "grad_norm": 2.175751791269463,
      "learning_rate": 4.81105421796486e-06,
      "loss": 0.5505,
      "step": 2746
    },
    {
      "epoch": 0.3368072584600294,
      "grad_norm": 2.275420006362202,
      "learning_rate": 4.810860876196986e-06,
      "loss": 0.6781,
      "step": 2747
    },
    {
      "epoch": 0.3369298675821481,
      "grad_norm": 2.100463302596484,
      "learning_rate": 4.81066743944864e-06,
      "loss": 0.5255,
      "step": 2748
    },
    {
      "epoch": 0.3370524767042668,
      "grad_norm": 2.5754672969378603,
      "learning_rate": 4.81047390772777e-06,
      "loss": 0.5536,
      "step": 2749
    },
    {
      "epoch": 0.3371750858263855,
      "grad_norm": 1.9560481360245996,
      "learning_rate": 4.810280281042333e-06,
      "loss": 0.5687,
      "step": 2750
    },
    {
      "epoch": 0.33729769494850415,
      "grad_norm": 2.0118886778299627,
      "learning_rate": 4.8100865594002866e-06,
      "loss": 0.5674,
      "step": 2751
    },
    {
      "epoch": 0.33742030407062285,
      "grad_norm": 2.0693859146929,
      "learning_rate": 4.809892742809592e-06,
      "loss": 0.5605,
      "step": 2752
    },
    {
      "epoch": 0.33754291319274154,
      "grad_norm": 2.1135065774742996,
      "learning_rate": 4.809698831278217e-06,
      "loss": 0.5909,
      "step": 2753
    },
    {
      "epoch": 0.33766552231486024,
      "grad_norm": 2.2594893887688245,
      "learning_rate": 4.809504824814132e-06,
      "loss": 0.5174,
      "step": 2754
    },
    {
      "epoch": 0.3377881314369789,
      "grad_norm": 2.113591863270934,
      "learning_rate": 4.809310723425309e-06,
      "loss": 0.5259,
      "step": 2755
    },
    {
      "epoch": 0.3379107405590976,
      "grad_norm": 2.1604544568441977,
      "learning_rate": 4.809116527119726e-06,
      "loss": 0.5382,
      "step": 2756
    },
    {
      "epoch": 0.3380333496812163,
      "grad_norm": 2.2484372805384463,
      "learning_rate": 4.8089222359053665e-06,
      "loss": 0.6144,
      "step": 2757
    },
    {
      "epoch": 0.338155958803335,
      "grad_norm": 1.97403377021205,
      "learning_rate": 4.8087278497902164e-06,
      "loss": 0.63,
      "step": 2758
    },
    {
      "epoch": 0.33827856792545363,
      "grad_norm": 2.0056099358131783,
      "learning_rate": 4.8085333687822634e-06,
      "loss": 0.6352,
      "step": 2759
    },
    {
      "epoch": 0.33840117704757233,
      "grad_norm": 1.9590991690461992,
      "learning_rate": 4.808338792889503e-06,
      "loss": 0.5178,
      "step": 2760
    },
    {
      "epoch": 0.33852378616969103,
      "grad_norm": 2.005876832581365,
      "learning_rate": 4.80814412211993e-06,
      "loss": 0.5917,
      "step": 2761
    },
    {
      "epoch": 0.33864639529180973,
      "grad_norm": 1.8750059363314884,
      "learning_rate": 4.8079493564815485e-06,
      "loss": 0.5434,
      "step": 2762
    },
    {
      "epoch": 0.3387690044139284,
      "grad_norm": 2.5697674291507173,
      "learning_rate": 4.807754495982362e-06,
      "loss": 0.6009,
      "step": 2763
    },
    {
      "epoch": 0.3388916135360471,
      "grad_norm": 1.8386040807826871,
      "learning_rate": 4.807559540630381e-06,
      "loss": 0.5728,
      "step": 2764
    },
    {
      "epoch": 0.3390142226581658,
      "grad_norm": 2.226651110490691,
      "learning_rate": 4.807364490433617e-06,
      "loss": 0.5472,
      "step": 2765
    },
    {
      "epoch": 0.3391368317802845,
      "grad_norm": 2.034868374160199,
      "learning_rate": 4.807169345400088e-06,
      "loss": 0.5835,
      "step": 2766
    },
    {
      "epoch": 0.3392594409024031,
      "grad_norm": 2.028976113842342,
      "learning_rate": 4.806974105537814e-06,
      "loss": 0.5974,
      "step": 2767
    },
    {
      "epoch": 0.3393820500245218,
      "grad_norm": 2.13939868517275,
      "learning_rate": 4.80677877085482e-06,
      "loss": 0.5869,
      "step": 2768
    },
    {
      "epoch": 0.3395046591466405,
      "grad_norm": 2.004466455554557,
      "learning_rate": 4.806583341359135e-06,
      "loss": 0.5165,
      "step": 2769
    },
    {
      "epoch": 0.3396272682687592,
      "grad_norm": 1.9953524239615872,
      "learning_rate": 4.8063878170587904e-06,
      "loss": 0.555,
      "step": 2770
    },
    {
      "epoch": 0.33974987739087786,
      "grad_norm": 2.166639387136037,
      "learning_rate": 4.806192197961824e-06,
      "loss": 0.5318,
      "step": 2771
    },
    {
      "epoch": 0.33987248651299656,
      "grad_norm": 2.1805427716828585,
      "learning_rate": 4.805996484076276e-06,
      "loss": 0.5465,
      "step": 2772
    },
    {
      "epoch": 0.33999509563511526,
      "grad_norm": 2.0442402273913682,
      "learning_rate": 4.805800675410189e-06,
      "loss": 0.5834,
      "step": 2773
    },
    {
      "epoch": 0.34011770475723396,
      "grad_norm": 2.075162057044204,
      "learning_rate": 4.8056047719716135e-06,
      "loss": 0.5673,
      "step": 2774
    },
    {
      "epoch": 0.3402403138793526,
      "grad_norm": 2.03420614715893,
      "learning_rate": 4.8054087737685995e-06,
      "loss": 0.5317,
      "step": 2775
    },
    {
      "epoch": 0.3403629230014713,
      "grad_norm": 1.9290287429681146,
      "learning_rate": 4.805212680809202e-06,
      "loss": 0.5252,
      "step": 2776
    },
    {
      "epoch": 0.34048553212359,
      "grad_norm": 2.1609801858173388,
      "learning_rate": 4.805016493101484e-06,
      "loss": 0.547,
      "step": 2777
    },
    {
      "epoch": 0.3406081412457087,
      "grad_norm": 2.0772519810384513,
      "learning_rate": 4.8048202106535075e-06,
      "loss": 0.5505,
      "step": 2778
    },
    {
      "epoch": 0.34073075036782735,
      "grad_norm": 2.018813626926367,
      "learning_rate": 4.80462383347334e-06,
      "loss": 0.5418,
      "step": 2779
    },
    {
      "epoch": 0.34085335948994605,
      "grad_norm": 2.2557447540180977,
      "learning_rate": 4.804427361569052e-06,
      "loss": 0.5696,
      "step": 2780
    },
    {
      "epoch": 0.34097596861206475,
      "grad_norm": 2.0286980577646228,
      "learning_rate": 4.80423079494872e-06,
      "loss": 0.5662,
      "step": 2781
    },
    {
      "epoch": 0.34109857773418345,
      "grad_norm": 2.2916305040579945,
      "learning_rate": 4.804034133620423e-06,
      "loss": 0.597,
      "step": 2782
    },
    {
      "epoch": 0.3412211868563021,
      "grad_norm": 2.174334539050703,
      "learning_rate": 4.803837377592245e-06,
      "loss": 0.5554,
      "step": 2783
    },
    {
      "epoch": 0.3413437959784208,
      "grad_norm": 2.0329622917558883,
      "learning_rate": 4.803640526872271e-06,
      "loss": 0.5491,
      "step": 2784
    },
    {
      "epoch": 0.3414664051005395,
      "grad_norm": 2.157534907189676,
      "learning_rate": 4.803443581468594e-06,
      "loss": 0.5289,
      "step": 2785
    },
    {
      "epoch": 0.34158901422265814,
      "grad_norm": 1.9725370237461064,
      "learning_rate": 4.803246541389307e-06,
      "loss": 0.5625,
      "step": 2786
    },
    {
      "epoch": 0.34171162334477684,
      "grad_norm": 2.3828666911872447,
      "learning_rate": 4.80304940664251e-06,
      "loss": 0.6137,
      "step": 2787
    },
    {
      "epoch": 0.34183423246689554,
      "grad_norm": 2.3665212724303095,
      "learning_rate": 4.802852177236305e-06,
      "loss": 0.5164,
      "step": 2788
    },
    {
      "epoch": 0.34195684158901424,
      "grad_norm": 2.0385087534725495,
      "learning_rate": 4.802654853178799e-06,
      "loss": 0.5367,
      "step": 2789
    },
    {
      "epoch": 0.3420794507111329,
      "grad_norm": 2.1834011245424194,
      "learning_rate": 4.8024574344781015e-06,
      "loss": 0.5326,
      "step": 2790
    },
    {
      "epoch": 0.3422020598332516,
      "grad_norm": 2.031722898198034,
      "learning_rate": 4.802259921142327e-06,
      "loss": 0.5311,
      "step": 2791
    },
    {
      "epoch": 0.3423246689553703,
      "grad_norm": 2.094122263434666,
      "learning_rate": 4.802062313179595e-06,
      "loss": 0.5667,
      "step": 2792
    },
    {
      "epoch": 0.342447278077489,
      "grad_norm": 2.212389087104708,
      "learning_rate": 4.801864610598026e-06,
      "loss": 0.5534,
      "step": 2793
    },
    {
      "epoch": 0.3425698871996076,
      "grad_norm": 1.9727525379278679,
      "learning_rate": 4.8016668134057465e-06,
      "loss": 0.5223,
      "step": 2794
    },
    {
      "epoch": 0.3426924963217263,
      "grad_norm": 2.026794371600365,
      "learning_rate": 4.801468921610886e-06,
      "loss": 0.587,
      "step": 2795
    },
    {
      "epoch": 0.342815105443845,
      "grad_norm": 2.044517003087823,
      "learning_rate": 4.801270935221578e-06,
      "loss": 0.5663,
      "step": 2796
    },
    {
      "epoch": 0.3429377145659637,
      "grad_norm": 2.097035066690043,
      "learning_rate": 4.801072854245961e-06,
      "loss": 0.5772,
      "step": 2797
    },
    {
      "epoch": 0.34306032368808237,
      "grad_norm": 2.1428244318338034,
      "learning_rate": 4.800874678692176e-06,
      "loss": 0.5342,
      "step": 2798
    },
    {
      "epoch": 0.34318293281020107,
      "grad_norm": 2.21831721661674,
      "learning_rate": 4.800676408568368e-06,
      "loss": 0.6031,
      "step": 2799
    },
    {
      "epoch": 0.34330554193231977,
      "grad_norm": 2.4202680757850707,
      "learning_rate": 4.800478043882687e-06,
      "loss": 0.6307,
      "step": 2800
    },
    {
      "epoch": 0.34342815105443847,
      "grad_norm": 2.0001818598416956,
      "learning_rate": 4.800279584643285e-06,
      "loss": 0.5256,
      "step": 2801
    },
    {
      "epoch": 0.3435507601765571,
      "grad_norm": 2.269770654602562,
      "learning_rate": 4.8000810308583205e-06,
      "loss": 0.5772,
      "step": 2802
    },
    {
      "epoch": 0.3436733692986758,
      "grad_norm": 2.4196236558636177,
      "learning_rate": 4.799882382535953e-06,
      "loss": 0.5564,
      "step": 2803
    },
    {
      "epoch": 0.3437959784207945,
      "grad_norm": 2.280977703583073,
      "learning_rate": 4.799683639684348e-06,
      "loss": 0.659,
      "step": 2804
    },
    {
      "epoch": 0.3439185875429132,
      "grad_norm": 2.029852754835976,
      "learning_rate": 4.799484802311674e-06,
      "loss": 0.5735,
      "step": 2805
    },
    {
      "epoch": 0.34404119666503186,
      "grad_norm": 1.9131947188743708,
      "learning_rate": 4.7992858704261056e-06,
      "loss": 0.5042,
      "step": 2806
    },
    {
      "epoch": 0.34416380578715056,
      "grad_norm": 2.2493000728780213,
      "learning_rate": 4.799086844035815e-06,
      "loss": 0.5991,
      "step": 2807
    },
    {
      "epoch": 0.34428641490926926,
      "grad_norm": 2.3222539835478675,
      "learning_rate": 4.798887723148986e-06,
      "loss": 0.5739,
      "step": 2808
    },
    {
      "epoch": 0.34440902403138796,
      "grad_norm": 2.0954644301195584,
      "learning_rate": 4.798688507773802e-06,
      "loss": 0.5738,
      "step": 2809
    },
    {
      "epoch": 0.3445316331535066,
      "grad_norm": 2.05149610939195,
      "learning_rate": 4.79848919791845e-06,
      "loss": 0.6067,
      "step": 2810
    },
    {
      "epoch": 0.3446542422756253,
      "grad_norm": 2.24639669326068,
      "learning_rate": 4.798289793591123e-06,
      "loss": 0.5663,
      "step": 2811
    },
    {
      "epoch": 0.344776851397744,
      "grad_norm": 2.282350750087361,
      "learning_rate": 4.798090294800017e-06,
      "loss": 0.6229,
      "step": 2812
    },
    {
      "epoch": 0.3448994605198627,
      "grad_norm": 2.084054508643681,
      "learning_rate": 4.797890701553331e-06,
      "loss": 0.5752,
      "step": 2813
    },
    {
      "epoch": 0.34502206964198134,
      "grad_norm": 2.4571843561568167,
      "learning_rate": 4.797691013859269e-06,
      "loss": 0.5736,
      "step": 2814
    },
    {
      "epoch": 0.34514467876410004,
      "grad_norm": 1.935404540262591,
      "learning_rate": 4.79749123172604e-06,
      "loss": 0.561,
      "step": 2815
    },
    {
      "epoch": 0.34526728788621874,
      "grad_norm": 2.343456700088709,
      "learning_rate": 4.797291355161853e-06,
      "loss": 0.5804,
      "step": 2816
    },
    {
      "epoch": 0.34538989700833744,
      "grad_norm": 2.113890245883015,
      "learning_rate": 4.797091384174924e-06,
      "loss": 0.5695,
      "step": 2817
    },
    {
      "epoch": 0.3455125061304561,
      "grad_norm": 1.9435677092655446,
      "learning_rate": 4.796891318773472e-06,
      "loss": 0.5309,
      "step": 2818
    },
    {
      "epoch": 0.3456351152525748,
      "grad_norm": 2.140672846187679,
      "learning_rate": 4.796691158965721e-06,
      "loss": 0.6026,
      "step": 2819
    },
    {
      "epoch": 0.3457577243746935,
      "grad_norm": 2.139665947426166,
      "learning_rate": 4.796490904759898e-06,
      "loss": 0.5669,
      "step": 2820
    },
    {
      "epoch": 0.3458803334968122,
      "grad_norm": 2.093491087225223,
      "learning_rate": 4.796290556164232e-06,
      "loss": 0.5684,
      "step": 2821
    },
    {
      "epoch": 0.34600294261893083,
      "grad_norm": 1.9977656703012823,
      "learning_rate": 4.79609011318696e-06,
      "loss": 0.5361,
      "step": 2822
    },
    {
      "epoch": 0.34612555174104953,
      "grad_norm": 2.10103994723688,
      "learning_rate": 4.795889575836319e-06,
      "loss": 0.5624,
      "step": 2823
    },
    {
      "epoch": 0.34624816086316823,
      "grad_norm": 2.137325832526662,
      "learning_rate": 4.7956889441205525e-06,
      "loss": 0.549,
      "step": 2824
    },
    {
      "epoch": 0.34637076998528693,
      "grad_norm": 2.0539294303421936,
      "learning_rate": 4.795488218047905e-06,
      "loss": 0.5313,
      "step": 2825
    },
    {
      "epoch": 0.3464933791074056,
      "grad_norm": 2.0379719571895056,
      "learning_rate": 4.795287397626629e-06,
      "loss": 0.5701,
      "step": 2826
    },
    {
      "epoch": 0.3466159882295243,
      "grad_norm": 2.1448553473917364,
      "learning_rate": 4.795086482864976e-06,
      "loss": 0.5863,
      "step": 2827
    },
    {
      "epoch": 0.346738597351643,
      "grad_norm": 2.137016301005333,
      "learning_rate": 4.794885473771207e-06,
      "loss": 0.5197,
      "step": 2828
    },
    {
      "epoch": 0.3468612064737617,
      "grad_norm": 2.3391681268380036,
      "learning_rate": 4.794684370353582e-06,
      "loss": 0.5521,
      "step": 2829
    },
    {
      "epoch": 0.3469838155958803,
      "grad_norm": 2.2560180558816354,
      "learning_rate": 4.794483172620366e-06,
      "loss": 0.6124,
      "step": 2830
    },
    {
      "epoch": 0.347106424717999,
      "grad_norm": 2.0407620895510474,
      "learning_rate": 4.79428188057983e-06,
      "loss": 0.6008,
      "step": 2831
    },
    {
      "epoch": 0.3472290338401177,
      "grad_norm": 2.0110923898015236,
      "learning_rate": 4.7940804942402475e-06,
      "loss": 0.5823,
      "step": 2832
    },
    {
      "epoch": 0.3473516429622364,
      "grad_norm": 2.1143475136863388,
      "learning_rate": 4.793879013609894e-06,
      "loss": 0.569,
      "step": 2833
    },
    {
      "epoch": 0.34747425208435506,
      "grad_norm": 2.230693054785888,
      "learning_rate": 4.793677438697054e-06,
      "loss": 0.5974,
      "step": 2834
    },
    {
      "epoch": 0.34759686120647376,
      "grad_norm": 1.8830152034467809,
      "learning_rate": 4.79347576951001e-06,
      "loss": 0.575,
      "step": 2835
    },
    {
      "epoch": 0.34771947032859246,
      "grad_norm": 2.05902209922629,
      "learning_rate": 4.7932740060570505e-06,
      "loss": 0.545,
      "step": 2836
    },
    {
      "epoch": 0.3478420794507111,
      "grad_norm": 2.007145765794899,
      "learning_rate": 4.7930721483464704e-06,
      "loss": 0.5816,
      "step": 2837
    },
    {
      "epoch": 0.3479646885728298,
      "grad_norm": 1.902608402889865,
      "learning_rate": 4.792870196386566e-06,
      "loss": 0.6048,
      "step": 2838
    },
    {
      "epoch": 0.3480872976949485,
      "grad_norm": 1.9784694989494738,
      "learning_rate": 4.7926681501856355e-06,
      "loss": 0.5707,
      "step": 2839
    },
    {
      "epoch": 0.3482099068170672,
      "grad_norm": 1.871059184142365,
      "learning_rate": 4.792466009751987e-06,
      "loss": 0.5947,
      "step": 2840
    },
    {
      "epoch": 0.34833251593918585,
      "grad_norm": 2.080594202384509,
      "learning_rate": 4.792263775093926e-06,
      "loss": 0.5368,
      "step": 2841
    },
    {
      "epoch": 0.34845512506130455,
      "grad_norm": 2.1618502487323483,
      "learning_rate": 4.792061446219766e-06,
      "loss": 0.5632,
      "step": 2842
    },
    {
      "epoch": 0.34857773418342325,
      "grad_norm": 2.332256705444855,
      "learning_rate": 4.791859023137822e-06,
      "loss": 0.569,
      "step": 2843
    },
    {
      "epoch": 0.34870034330554195,
      "grad_norm": 2.246129061343891,
      "learning_rate": 4.791656505856416e-06,
      "loss": 0.5485,
      "step": 2844
    },
    {
      "epoch": 0.3488229524276606,
      "grad_norm": 2.061489310296882,
      "learning_rate": 4.79145389438387e-06,
      "loss": 0.5748,
      "step": 2845
    },
    {
      "epoch": 0.3489455615497793,
      "grad_norm": 2.1239416280564627,
      "learning_rate": 4.791251188728512e-06,
      "loss": 0.5663,
      "step": 2846
    },
    {
      "epoch": 0.349068170671898,
      "grad_norm": 2.0605688956230765,
      "learning_rate": 4.791048388898673e-06,
      "loss": 0.5789,
      "step": 2847
    },
    {
      "epoch": 0.3491907797940167,
      "grad_norm": 2.0665376530898416,
      "learning_rate": 4.7908454949026915e-06,
      "loss": 0.5693,
      "step": 2848
    },
    {
      "epoch": 0.34931338891613534,
      "grad_norm": 2.1298597070860272,
      "learning_rate": 4.790642506748903e-06,
      "loss": 0.5246,
      "step": 2849
    },
    {
      "epoch": 0.34943599803825404,
      "grad_norm": 2.2667384113868083,
      "learning_rate": 4.790439424445653e-06,
      "loss": 0.5833,
      "step": 2850
    },
    {
      "epoch": 0.34955860716037274,
      "grad_norm": 1.917762243399785,
      "learning_rate": 4.790236248001286e-06,
      "loss": 0.5367,
      "step": 2851
    },
    {
      "epoch": 0.34968121628249144,
      "grad_norm": 2.3299272164705913,
      "learning_rate": 4.790032977424156e-06,
      "loss": 0.5965,
      "step": 2852
    },
    {
      "epoch": 0.3498038254046101,
      "grad_norm": 2.131399834106721,
      "learning_rate": 4.789829612722617e-06,
      "loss": 0.6062,
      "step": 2853
    },
    {
      "epoch": 0.3499264345267288,
      "grad_norm": 2.1257368722662524,
      "learning_rate": 4.789626153905027e-06,
      "loss": 0.5768,
      "step": 2854
    },
    {
      "epoch": 0.3500490436488475,
      "grad_norm": 2.152227240524035,
      "learning_rate": 4.789422600979747e-06,
      "loss": 0.5545,
      "step": 2855
    },
    {
      "epoch": 0.3501716527709662,
      "grad_norm": 2.1020518543890696,
      "learning_rate": 4.789218953955147e-06,
      "loss": 0.5599,
      "step": 2856
    },
    {
      "epoch": 0.3502942618930848,
      "grad_norm": 2.1901774404796233,
      "learning_rate": 4.789015212839594e-06,
      "loss": 0.565,
      "step": 2857
    },
    {
      "epoch": 0.3504168710152035,
      "grad_norm": 2.0787042544327607,
      "learning_rate": 4.788811377641464e-06,
      "loss": 0.5838,
      "step": 2858
    },
    {
      "epoch": 0.3505394801373222,
      "grad_norm": 2.081398069508745,
      "learning_rate": 4.788607448369134e-06,
      "loss": 0.5812,
      "step": 2859
    },
    {
      "epoch": 0.3506620892594409,
      "grad_norm": 2.0960317033723634,
      "learning_rate": 4.788403425030986e-06,
      "loss": 0.5267,
      "step": 2860
    },
    {
      "epoch": 0.35078469838155957,
      "grad_norm": 2.296193745397798,
      "learning_rate": 4.788199307635406e-06,
      "loss": 0.58,
      "step": 2861
    },
    {
      "epoch": 0.35090730750367827,
      "grad_norm": 2.120585747167325,
      "learning_rate": 4.787995096190784e-06,
      "loss": 0.5852,
      "step": 2862
    },
    {
      "epoch": 0.35102991662579697,
      "grad_norm": 2.2037624266300577,
      "learning_rate": 4.787790790705512e-06,
      "loss": 0.6202,
      "step": 2863
    },
    {
      "epoch": 0.35115252574791567,
      "grad_norm": 1.809843075038605,
      "learning_rate": 4.78758639118799e-06,
      "loss": 0.5466,
      "step": 2864
    },
    {
      "epoch": 0.3512751348700343,
      "grad_norm": 1.8961773761002052,
      "learning_rate": 4.787381897646616e-06,
      "loss": 0.5759,
      "step": 2865
    },
    {
      "epoch": 0.351397743992153,
      "grad_norm": 2.2334717155001282,
      "learning_rate": 4.787177310089797e-06,
      "loss": 0.6165,
      "step": 2866
    },
    {
      "epoch": 0.3515203531142717,
      "grad_norm": 1.8408268575670856,
      "learning_rate": 4.786972628525941e-06,
      "loss": 0.5207,
      "step": 2867
    },
    {
      "epoch": 0.3516429622363904,
      "grad_norm": 2.1091166594256756,
      "learning_rate": 4.786767852963462e-06,
      "loss": 0.6072,
      "step": 2868
    },
    {
      "epoch": 0.35176557135850905,
      "grad_norm": 2.0818555043633156,
      "learning_rate": 4.7865629834107755e-06,
      "loss": 0.5418,
      "step": 2869
    },
    {
      "epoch": 0.35188818048062775,
      "grad_norm": 2.3072599739672284,
      "learning_rate": 4.786358019876301e-06,
      "loss": 0.5129,
      "step": 2870
    },
    {
      "epoch": 0.35201078960274645,
      "grad_norm": 2.0531712373033164,
      "learning_rate": 4.786152962368466e-06,
      "loss": 0.5353,
      "step": 2871
    },
    {
      "epoch": 0.35213339872486515,
      "grad_norm": 2.00305477733035,
      "learning_rate": 4.785947810895697e-06,
      "loss": 0.5497,
      "step": 2872
    },
    {
      "epoch": 0.3522560078469838,
      "grad_norm": 1.7442538338018758,
      "learning_rate": 4.7857425654664245e-06,
      "loss": 0.575,
      "step": 2873
    },
    {
      "epoch": 0.3523786169691025,
      "grad_norm": 1.9963296641900683,
      "learning_rate": 4.785537226089088e-06,
      "loss": 0.5457,
      "step": 2874
    },
    {
      "epoch": 0.3525012260912212,
      "grad_norm": 2.2199902539164005,
      "learning_rate": 4.785331792772123e-06,
      "loss": 0.5535,
      "step": 2875
    },
    {
      "epoch": 0.3526238352133399,
      "grad_norm": 2.0828173878885052,
      "learning_rate": 4.785126265523977e-06,
      "loss": 0.5376,
      "step": 2876
    },
    {
      "epoch": 0.35274644433545854,
      "grad_norm": 2.1610756158711886,
      "learning_rate": 4.784920644353096e-06,
      "loss": 0.6377,
      "step": 2877
    },
    {
      "epoch": 0.35286905345757724,
      "grad_norm": 2.0292966376652086,
      "learning_rate": 4.7847149292679305e-06,
      "loss": 0.566,
      "step": 2878
    },
    {
      "epoch": 0.35299166257969594,
      "grad_norm": 1.9267648275166351,
      "learning_rate": 4.784509120276937e-06,
      "loss": 0.5629,
      "step": 2879
    },
    {
      "epoch": 0.35311427170181464,
      "grad_norm": 2.2605363298209586,
      "learning_rate": 4.784303217388575e-06,
      "loss": 0.5592,
      "step": 2880
    },
    {
      "epoch": 0.3532368808239333,
      "grad_norm": 1.9219627440595723,
      "learning_rate": 4.7840972206113055e-06,
      "loss": 0.5903,
      "step": 2881
    },
    {
      "epoch": 0.353359489946052,
      "grad_norm": 2.302405377224869,
      "learning_rate": 4.783891129953598e-06,
      "loss": 0.5193,
      "step": 2882
    },
    {
      "epoch": 0.3534820990681707,
      "grad_norm": 2.1741378772683775,
      "learning_rate": 4.78368494542392e-06,
      "loss": 0.5509,
      "step": 2883
    },
    {
      "epoch": 0.3536047081902894,
      "grad_norm": 2.364336532451585,
      "learning_rate": 4.783478667030749e-06,
      "loss": 0.5454,
      "step": 2884
    },
    {
      "epoch": 0.35372731731240803,
      "grad_norm": 2.1513852090710466,
      "learning_rate": 4.783272294782562e-06,
      "loss": 0.618,
      "step": 2885
    },
    {
      "epoch": 0.35384992643452673,
      "grad_norm": 2.4396468371735542,
      "learning_rate": 4.783065828687841e-06,
      "loss": 0.5277,
      "step": 2886
    },
    {
      "epoch": 0.35397253555664543,
      "grad_norm": 2.464028082097574,
      "learning_rate": 4.782859268755074e-06,
      "loss": 0.6119,
      "step": 2887
    },
    {
      "epoch": 0.3540951446787641,
      "grad_norm": 2.3541954374295235,
      "learning_rate": 4.782652614992747e-06,
      "loss": 0.5975,
      "step": 2888
    },
    {
      "epoch": 0.3542177538008828,
      "grad_norm": 2.0613450671044578,
      "learning_rate": 4.782445867409359e-06,
      "loss": 0.5282,
      "step": 2889
    },
    {
      "epoch": 0.3543403629230015,
      "grad_norm": 2.2246653902011704,
      "learning_rate": 4.782239026013404e-06,
      "loss": 0.6095,
      "step": 2890
    },
    {
      "epoch": 0.3544629720451202,
      "grad_norm": 1.9366463540446675,
      "learning_rate": 4.782032090813384e-06,
      "loss": 0.5122,
      "step": 2891
    },
    {
      "epoch": 0.3545855811672388,
      "grad_norm": 2.1001254228909145,
      "learning_rate": 4.781825061817807e-06,
      "loss": 0.5758,
      "step": 2892
    },
    {
      "epoch": 0.3547081902893575,
      "grad_norm": 1.8997643876389863,
      "learning_rate": 4.781617939035178e-06,
      "loss": 0.5788,
      "step": 2893
    },
    {
      "epoch": 0.3548307994114762,
      "grad_norm": 2.2828187436502985,
      "learning_rate": 4.781410722474013e-06,
      "loss": 0.5958,
      "step": 2894
    },
    {
      "epoch": 0.3549534085335949,
      "grad_norm": 2.200540276416903,
      "learning_rate": 4.781203412142829e-06,
      "loss": 0.5931,
      "step": 2895
    },
    {
      "epoch": 0.35507601765571356,
      "grad_norm": 2.1508456251486785,
      "learning_rate": 4.7809960080501464e-06,
      "loss": 0.5775,
      "step": 2896
    },
    {
      "epoch": 0.35519862677783226,
      "grad_norm": 2.0498984572179078,
      "learning_rate": 4.780788510204489e-06,
      "loss": 0.6155,
      "step": 2897
    },
    {
      "epoch": 0.35532123589995096,
      "grad_norm": 1.995034695073086,
      "learning_rate": 4.780580918614387e-06,
      "loss": 0.5208,
      "step": 2898
    },
    {
      "epoch": 0.35544384502206966,
      "grad_norm": 2.0264414549279235,
      "learning_rate": 4.780373233288371e-06,
      "loss": 0.5491,
      "step": 2899
    },
    {
      "epoch": 0.3555664541441883,
      "grad_norm": 2.147132662404967,
      "learning_rate": 4.780165454234978e-06,
      "loss": 0.563,
      "step": 2900
    },
    {
      "epoch": 0.355689063266307,
      "grad_norm": 2.2299510882301177,
      "learning_rate": 4.779957581462749e-06,
      "loss": 0.6229,
      "step": 2901
    },
    {
      "epoch": 0.3558116723884257,
      "grad_norm": 1.9462250752096544,
      "learning_rate": 4.779749614980225e-06,
      "loss": 0.5454,
      "step": 2902
    },
    {
      "epoch": 0.3559342815105444,
      "grad_norm": 2.0777725924510473,
      "learning_rate": 4.779541554795958e-06,
      "loss": 0.6034,
      "step": 2903
    },
    {
      "epoch": 0.35605689063266305,
      "grad_norm": 2.0737062238742143,
      "learning_rate": 4.7793334009184964e-06,
      "loss": 0.5392,
      "step": 2904
    },
    {
      "epoch": 0.35617949975478175,
      "grad_norm": 2.221853836684175,
      "learning_rate": 4.779125153356397e-06,
      "loss": 0.5426,
      "step": 2905
    },
    {
      "epoch": 0.35630210887690045,
      "grad_norm": 2.2366935348450183,
      "learning_rate": 4.778916812118219e-06,
      "loss": 0.5245,
      "step": 2906
    },
    {
      "epoch": 0.35642471799901915,
      "grad_norm": 2.263115394744328,
      "learning_rate": 4.7787083772125255e-06,
      "loss": 0.6312,
      "step": 2907
    },
    {
      "epoch": 0.3565473271211378,
      "grad_norm": 1.983808897369176,
      "learning_rate": 4.778499848647884e-06,
      "loss": 0.5695,
      "step": 2908
    },
    {
      "epoch": 0.3566699362432565,
      "grad_norm": 2.1251740338890244,
      "learning_rate": 4.778291226432864e-06,
      "loss": 0.5941,
      "step": 2909
    },
    {
      "epoch": 0.3567925453653752,
      "grad_norm": 1.9087051533729953,
      "learning_rate": 4.778082510576042e-06,
      "loss": 0.5744,
      "step": 2910
    },
    {
      "epoch": 0.3569151544874939,
      "grad_norm": 1.983676753407669,
      "learning_rate": 4.777873701085996e-06,
      "loss": 0.5375,
      "step": 2911
    },
    {
      "epoch": 0.35703776360961254,
      "grad_norm": 2.3333516870984496,
      "learning_rate": 4.777664797971306e-06,
      "loss": 0.5713,
      "step": 2912
    },
    {
      "epoch": 0.35716037273173123,
      "grad_norm": 2.062116762934037,
      "learning_rate": 4.777455801240563e-06,
      "loss": 0.566,
      "step": 2913
    },
    {
      "epoch": 0.35728298185384993,
      "grad_norm": 2.256995349136988,
      "learning_rate": 4.777246710902354e-06,
      "loss": 0.5136,
      "step": 2914
    },
    {
      "epoch": 0.35740559097596863,
      "grad_norm": 2.0648061503191597,
      "learning_rate": 4.777037526965274e-06,
      "loss": 0.5776,
      "step": 2915
    },
    {
      "epoch": 0.3575282000980873,
      "grad_norm": 2.1643323326106314,
      "learning_rate": 4.7768282494379195e-06,
      "loss": 0.5605,
      "step": 2916
    },
    {
      "epoch": 0.357650809220206,
      "grad_norm": 1.984972140732173,
      "learning_rate": 4.776618878328894e-06,
      "loss": 0.5365,
      "step": 2917
    },
    {
      "epoch": 0.3577734183423247,
      "grad_norm": 2.09139171556923,
      "learning_rate": 4.776409413646802e-06,
      "loss": 0.5614,
      "step": 2918
    },
    {
      "epoch": 0.3578960274644434,
      "grad_norm": 1.9323005617013151,
      "learning_rate": 4.776199855400252e-06,
      "loss": 0.5702,
      "step": 2919
    },
    {
      "epoch": 0.358018636586562,
      "grad_norm": 2.2175094959612927,
      "learning_rate": 4.7759902035978585e-06,
      "loss": 0.6048,
      "step": 2920
    },
    {
      "epoch": 0.3581412457086807,
      "grad_norm": 2.1351136718170336,
      "learning_rate": 4.775780458248239e-06,
      "loss": 0.5194,
      "step": 2921
    },
    {
      "epoch": 0.3582638548307994,
      "grad_norm": 2.093874653677714,
      "learning_rate": 4.7755706193600135e-06,
      "loss": 0.5845,
      "step": 2922
    },
    {
      "epoch": 0.3583864639529181,
      "grad_norm": 2.1526870784337935,
      "learning_rate": 4.775360686941808e-06,
      "loss": 0.5938,
      "step": 2923
    },
    {
      "epoch": 0.35850907307503677,
      "grad_norm": 2.0904842254356084,
      "learning_rate": 4.775150661002249e-06,
      "loss": 0.596,
      "step": 2924
    },
    {
      "epoch": 0.35863168219715547,
      "grad_norm": 2.0067518424034416,
      "learning_rate": 4.774940541549969e-06,
      "loss": 0.4935,
      "step": 2925
    },
    {
      "epoch": 0.35875429131927417,
      "grad_norm": 2.0848427741449886,
      "learning_rate": 4.774730328593608e-06,
      "loss": 0.5548,
      "step": 2926
    },
    {
      "epoch": 0.35887690044139287,
      "grad_norm": 2.123947531104713,
      "learning_rate": 4.774520022141801e-06,
      "loss": 0.5643,
      "step": 2927
    },
    {
      "epoch": 0.3589995095635115,
      "grad_norm": 2.0855805229015645,
      "learning_rate": 4.7743096222031945e-06,
      "loss": 0.5901,
      "step": 2928
    },
    {
      "epoch": 0.3591221186856302,
      "grad_norm": 1.9891342849089677,
      "learning_rate": 4.774099128786437e-06,
      "loss": 0.5526,
      "step": 2929
    },
    {
      "epoch": 0.3592447278077489,
      "grad_norm": 2.2163464297126643,
      "learning_rate": 4.773888541900179e-06,
      "loss": 0.5809,
      "step": 2930
    },
    {
      "epoch": 0.3593673369298676,
      "grad_norm": 2.1103646220200183,
      "learning_rate": 4.773677861553077e-06,
      "loss": 0.5815,
      "step": 2931
    },
    {
      "epoch": 0.35948994605198625,
      "grad_norm": 2.3170934957883205,
      "learning_rate": 4.773467087753789e-06,
      "loss": 0.5605,
      "step": 2932
    },
    {
      "epoch": 0.35961255517410495,
      "grad_norm": 2.07418982577219,
      "learning_rate": 4.773256220510979e-06,
      "loss": 0.5504,
      "step": 2933
    },
    {
      "epoch": 0.35973516429622365,
      "grad_norm": 2.0667750921073886,
      "learning_rate": 4.773045259833313e-06,
      "loss": 0.5141,
      "step": 2934
    },
    {
      "epoch": 0.3598577734183423,
      "grad_norm": 2.061873656581924,
      "learning_rate": 4.7728342057294645e-06,
      "loss": 0.5453,
      "step": 2935
    },
    {
      "epoch": 0.359980382540461,
      "grad_norm": 2.2135369883090408,
      "learning_rate": 4.772623058208106e-06,
      "loss": 0.64,
      "step": 2936
    },
    {
      "epoch": 0.3601029916625797,
      "grad_norm": 2.204382050043287,
      "learning_rate": 4.772411817277915e-06,
      "loss": 0.5438,
      "step": 2937
    },
    {
      "epoch": 0.3602256007846984,
      "grad_norm": 1.9983603096994205,
      "learning_rate": 4.772200482947576e-06,
      "loss": 0.5534,
      "step": 2938
    },
    {
      "epoch": 0.36034820990681704,
      "grad_norm": 2.186392977140974,
      "learning_rate": 4.771989055225775e-06,
      "loss": 0.5488,
      "step": 2939
    },
    {
      "epoch": 0.36047081902893574,
      "grad_norm": 2.011585057017848,
      "learning_rate": 4.771777534121202e-06,
      "loss": 0.5516,
      "step": 2940
    },
    {
      "epoch": 0.36059342815105444,
      "grad_norm": 1.8518310841581462,
      "learning_rate": 4.771565919642549e-06,
      "loss": 0.5672,
      "step": 2941
    },
    {
      "epoch": 0.36071603727317314,
      "grad_norm": 2.018160812768486,
      "learning_rate": 4.771354211798516e-06,
      "loss": 0.5734,
      "step": 2942
    },
    {
      "epoch": 0.3608386463952918,
      "grad_norm": 2.1650925048672707,
      "learning_rate": 4.771142410597805e-06,
      "loss": 0.5313,
      "step": 2943
    },
    {
      "epoch": 0.3609612555174105,
      "grad_norm": 2.040610640628223,
      "learning_rate": 4.770930516049118e-06,
      "loss": 0.6048,
      "step": 2944
    },
    {
      "epoch": 0.3610838646395292,
      "grad_norm": 2.163446344924786,
      "learning_rate": 4.770718528161168e-06,
      "loss": 0.5482,
      "step": 2945
    },
    {
      "epoch": 0.3612064737616479,
      "grad_norm": 2.135550257398155,
      "learning_rate": 4.770506446942666e-06,
      "loss": 0.5722,
      "step": 2946
    },
    {
      "epoch": 0.36132908288376653,
      "grad_norm": 1.8979814050252133,
      "learning_rate": 4.770294272402329e-06,
      "loss": 0.5241,
      "step": 2947
    },
    {
      "epoch": 0.36145169200588523,
      "grad_norm": 2.225476419437898,
      "learning_rate": 4.770082004548878e-06,
      "loss": 0.5835,
      "step": 2948
    },
    {
      "epoch": 0.3615743011280039,
      "grad_norm": 2.1211149640919555,
      "learning_rate": 4.769869643391039e-06,
      "loss": 0.6135,
      "step": 2949
    },
    {
      "epoch": 0.3616969102501226,
      "grad_norm": 2.2068929519568603,
      "learning_rate": 4.769657188937538e-06,
      "loss": 0.5686,
      "step": 2950
    },
    {
      "epoch": 0.36181951937224127,
      "grad_norm": 1.9941832610463026,
      "learning_rate": 4.769444641197109e-06,
      "loss": 0.5388,
      "step": 2951
    },
    {
      "epoch": 0.36194212849435997,
      "grad_norm": 1.8397979092236303,
      "learning_rate": 4.769232000178488e-06,
      "loss": 0.516,
      "step": 2952
    },
    {
      "epoch": 0.36206473761647867,
      "grad_norm": 2.116332910622892,
      "learning_rate": 4.769019265890413e-06,
      "loss": 0.6397,
      "step": 2953
    },
    {
      "epoch": 0.36218734673859737,
      "grad_norm": 1.9398760956579457,
      "learning_rate": 4.76880643834163e-06,
      "loss": 0.5733,
      "step": 2954
    },
    {
      "epoch": 0.362309955860716,
      "grad_norm": 2.216102264662129,
      "learning_rate": 4.768593517540886e-06,
      "loss": 0.523,
      "step": 2955
    },
    {
      "epoch": 0.3624325649828347,
      "grad_norm": 2.0939674312175343,
      "learning_rate": 4.768380503496931e-06,
      "loss": 0.5634,
      "step": 2956
    },
    {
      "epoch": 0.3625551741049534,
      "grad_norm": 1.7010216235295648,
      "learning_rate": 4.768167396218523e-06,
      "loss": 0.5557,
      "step": 2957
    },
    {
      "epoch": 0.3626777832270721,
      "grad_norm": 2.266046806991849,
      "learning_rate": 4.767954195714419e-06,
      "loss": 0.5942,
      "step": 2958
    },
    {
      "epoch": 0.36280039234919076,
      "grad_norm": 2.0884212565127203,
      "learning_rate": 4.767740901993383e-06,
      "loss": 0.6171,
      "step": 2959
    },
    {
      "epoch": 0.36292300147130946,
      "grad_norm": 2.393851608454192,
      "learning_rate": 4.76752751506418e-06,
      "loss": 0.5963,
      "step": 2960
    },
    {
      "epoch": 0.36304561059342816,
      "grad_norm": 2.138314510053057,
      "learning_rate": 4.7673140349355826e-06,
      "loss": 0.5647,
      "step": 2961
    },
    {
      "epoch": 0.36316821971554686,
      "grad_norm": 2.0013122955502487,
      "learning_rate": 4.767100461616364e-06,
      "loss": 0.5723,
      "step": 2962
    },
    {
      "epoch": 0.3632908288376655,
      "grad_norm": 2.2116619469394903,
      "learning_rate": 4.766886795115303e-06,
      "loss": 0.5765,
      "step": 2963
    },
    {
      "epoch": 0.3634134379597842,
      "grad_norm": 2.1391770804900747,
      "learning_rate": 4.7666730354411815e-06,
      "loss": 0.6202,
      "step": 2964
    },
    {
      "epoch": 0.3635360470819029,
      "grad_norm": 2.074495082016022,
      "learning_rate": 4.7664591826027855e-06,
      "loss": 0.5942,
      "step": 2965
    },
    {
      "epoch": 0.3636586562040216,
      "grad_norm": 2.030471318945944,
      "learning_rate": 4.7662452366089044e-06,
      "loss": 0.5033,
      "step": 2966
    },
    {
      "epoch": 0.36378126532614025,
      "grad_norm": 2.04550225032401,
      "learning_rate": 4.766031197468332e-06,
      "loss": 0.5413,
      "step": 2967
    },
    {
      "epoch": 0.36390387444825895,
      "grad_norm": 1.9628456833548005,
      "learning_rate": 4.765817065189866e-06,
      "loss": 0.4839,
      "step": 2968
    },
    {
      "epoch": 0.36402648357037765,
      "grad_norm": 1.9331670570241968,
      "learning_rate": 4.765602839782307e-06,
      "loss": 0.5238,
      "step": 2969
    },
    {
      "epoch": 0.36414909269249635,
      "grad_norm": 2.067015412932695,
      "learning_rate": 4.765388521254461e-06,
      "loss": 0.5958,
      "step": 2970
    },
    {
      "epoch": 0.364271701814615,
      "grad_norm": 2.007340287898898,
      "learning_rate": 4.765174109615136e-06,
      "loss": 0.5403,
      "step": 2971
    },
    {
      "epoch": 0.3643943109367337,
      "grad_norm": 2.4764282280179786,
      "learning_rate": 4.764959604873144e-06,
      "loss": 0.5541,
      "step": 2972
    },
    {
      "epoch": 0.3645169200588524,
      "grad_norm": 1.7978170418367325,
      "learning_rate": 4.764745007037304e-06,
      "loss": 0.5477,
      "step": 2973
    },
    {
      "epoch": 0.3646395291809711,
      "grad_norm": 2.160168542546661,
      "learning_rate": 4.764530316116433e-06,
      "loss": 0.574,
      "step": 2974
    },
    {
      "epoch": 0.36476213830308973,
      "grad_norm": 2.291984711760818,
      "learning_rate": 4.764315532119358e-06,
      "loss": 0.6004,
      "step": 2975
    },
    {
      "epoch": 0.36488474742520843,
      "grad_norm": 2.0145592153866074,
      "learning_rate": 4.7641006550549066e-06,
      "loss": 0.5164,
      "step": 2976
    },
    {
      "epoch": 0.36500735654732713,
      "grad_norm": 1.9796019754780336,
      "learning_rate": 4.7638856849319094e-06,
      "loss": 0.5376,
      "step": 2977
    },
    {
      "epoch": 0.36512996566944583,
      "grad_norm": 2.1307975578771035,
      "learning_rate": 4.763670621759202e-06,
      "loss": 0.5793,
      "step": 2978
    },
    {
      "epoch": 0.3652525747915645,
      "grad_norm": 2.199763437230651,
      "learning_rate": 4.7634554655456255e-06,
      "loss": 0.5643,
      "step": 2979
    },
    {
      "epoch": 0.3653751839136832,
      "grad_norm": 2.0564385172161237,
      "learning_rate": 4.763240216300021e-06,
      "loss": 0.5469,
      "step": 2980
    },
    {
      "epoch": 0.3654977930358019,
      "grad_norm": 1.9623948463482115,
      "learning_rate": 4.763024874031238e-06,
      "loss": 0.5723,
      "step": 2981
    },
    {
      "epoch": 0.3656204021579206,
      "grad_norm": 2.135824621592412,
      "learning_rate": 4.762809438748126e-06,
      "loss": 0.516,
      "step": 2982
    },
    {
      "epoch": 0.3657430112800392,
      "grad_norm": 1.916095597118898,
      "learning_rate": 4.76259391045954e-06,
      "loss": 0.5611,
      "step": 2983
    },
    {
      "epoch": 0.3658656204021579,
      "grad_norm": 2.295466593999566,
      "learning_rate": 4.762378289174339e-06,
      "loss": 0.5301,
      "step": 2984
    },
    {
      "epoch": 0.3659882295242766,
      "grad_norm": 2.128153424201585,
      "learning_rate": 4.762162574901384e-06,
      "loss": 0.5587,
      "step": 2985
    },
    {
      "epoch": 0.36611083864639526,
      "grad_norm": 1.9726729814408002,
      "learning_rate": 4.761946767649544e-06,
      "loss": 0.533,
      "step": 2986
    },
    {
      "epoch": 0.36623344776851396,
      "grad_norm": 2.027063981517083,
      "learning_rate": 4.761730867427686e-06,
      "loss": 0.5366,
      "step": 2987
    },
    {
      "epoch": 0.36635605689063266,
      "grad_norm": 1.827723041082406,
      "learning_rate": 4.761514874244686e-06,
      "loss": 0.4974,
      "step": 2988
    },
    {
      "epoch": 0.36647866601275136,
      "grad_norm": 2.3306874788857246,
      "learning_rate": 4.7612987881094205e-06,
      "loss": 0.6284,
      "step": 2989
    },
    {
      "epoch": 0.36660127513487,
      "grad_norm": 2.2440388502554045,
      "learning_rate": 4.761082609030773e-06,
      "loss": 0.5537,
      "step": 2990
    },
    {
      "epoch": 0.3667238842569887,
      "grad_norm": 1.9757639496065358,
      "learning_rate": 4.760866337017626e-06,
      "loss": 0.5533,
      "step": 2991
    },
    {
      "epoch": 0.3668464933791074,
      "grad_norm": 1.9183009838143006,
      "learning_rate": 4.76064997207887e-06,
      "loss": 0.5341,
      "step": 2992
    },
    {
      "epoch": 0.3669691025012261,
      "grad_norm": 2.2055754265481107,
      "learning_rate": 4.760433514223398e-06,
      "loss": 0.5382,
      "step": 2993
    },
    {
      "epoch": 0.36709171162334475,
      "grad_norm": 2.0746032740080134,
      "learning_rate": 4.760216963460106e-06,
      "loss": 0.5467,
      "step": 2994
    },
    {
      "epoch": 0.36721432074546345,
      "grad_norm": 2.1296567237155886,
      "learning_rate": 4.760000319797897e-06,
      "loss": 0.5805,
      "step": 2995
    },
    {
      "epoch": 0.36733692986758215,
      "grad_norm": 2.282371855327838,
      "learning_rate": 4.759783583245673e-06,
      "loss": 0.5616,
      "step": 2996
    },
    {
      "epoch": 0.36745953898970085,
      "grad_norm": 2.0382387386542327,
      "learning_rate": 4.759566753812343e-06,
      "loss": 0.5703,
      "step": 2997
    },
    {
      "epoch": 0.3675821481118195,
      "grad_norm": 4.08754158517215,
      "learning_rate": 4.75934983150682e-06,
      "loss": 0.5711,
      "step": 2998
    },
    {
      "epoch": 0.3677047572339382,
      "grad_norm": 2.1109515703381114,
      "learning_rate": 4.759132816338018e-06,
      "loss": 0.554,
      "step": 2999
    },
    {
      "epoch": 0.3678273663560569,
      "grad_norm": 1.941660458255488,
      "learning_rate": 4.758915708314858e-06,
      "loss": 0.5724,
      "step": 3000
    },
    {
      "epoch": 0.3679499754781756,
      "grad_norm": 1.8929713736716038,
      "learning_rate": 4.758698507446263e-06,
      "loss": 0.547,
      "step": 3001
    },
    {
      "epoch": 0.36807258460029424,
      "grad_norm": 2.0072897437131925,
      "learning_rate": 4.758481213741162e-06,
      "loss": 0.5723,
      "step": 3002
    },
    {
      "epoch": 0.36819519372241294,
      "grad_norm": 2.1535029024769963,
      "learning_rate": 4.758263827208484e-06,
      "loss": 0.6063,
      "step": 3003
    },
    {
      "epoch": 0.36831780284453164,
      "grad_norm": 2.0623170481322877,
      "learning_rate": 4.758046347857164e-06,
      "loss": 0.5498,
      "step": 3004
    },
    {
      "epoch": 0.36844041196665034,
      "grad_norm": 2.1190682139011576,
      "learning_rate": 4.757828775696143e-06,
      "loss": 0.5885,
      "step": 3005
    },
    {
      "epoch": 0.368563021088769,
      "grad_norm": 2.094007714785925,
      "learning_rate": 4.7576111107343605e-06,
      "loss": 0.5462,
      "step": 3006
    },
    {
      "epoch": 0.3686856302108877,
      "grad_norm": 1.8741810868547417,
      "learning_rate": 4.757393352980766e-06,
      "loss": 0.58,
      "step": 3007
    },
    {
      "epoch": 0.3688082393330064,
      "grad_norm": 1.9573932813586274,
      "learning_rate": 4.757175502444308e-06,
      "loss": 0.5799,
      "step": 3008
    },
    {
      "epoch": 0.3689308484551251,
      "grad_norm": 2.02429487552174,
      "learning_rate": 4.75695755913394e-06,
      "loss": 0.5244,
      "step": 3009
    },
    {
      "epoch": 0.3690534575772437,
      "grad_norm": 2.0875704315950503,
      "learning_rate": 4.7567395230586215e-06,
      "loss": 0.5836,
      "step": 3010
    },
    {
      "epoch": 0.3691760666993624,
      "grad_norm": 1.9853409598388165,
      "learning_rate": 4.756521394227313e-06,
      "loss": 0.5712,
      "step": 3011
    },
    {
      "epoch": 0.3692986758214811,
      "grad_norm": 2.2278875108086282,
      "learning_rate": 4.75630317264898e-06,
      "loss": 0.5982,
      "step": 3012
    },
    {
      "epoch": 0.3694212849435998,
      "grad_norm": 2.3014399571542725,
      "learning_rate": 4.756084858332593e-06,
      "loss": 0.5149,
      "step": 3013
    },
    {
      "epoch": 0.36954389406571847,
      "grad_norm": 2.1224584389411705,
      "learning_rate": 4.755866451287124e-06,
      "loss": 0.5535,
      "step": 3014
    },
    {
      "epoch": 0.36966650318783717,
      "grad_norm": 1.958594922245089,
      "learning_rate": 4.75564795152155e-06,
      "loss": 0.5604,
      "step": 3015
    },
    {
      "epoch": 0.36978911230995587,
      "grad_norm": 2.0430335173535714,
      "learning_rate": 4.755429359044852e-06,
      "loss": 0.548,
      "step": 3016
    },
    {
      "epoch": 0.36991172143207457,
      "grad_norm": 2.1709746846634306,
      "learning_rate": 4.755210673866014e-06,
      "loss": 0.5752,
      "step": 3017
    },
    {
      "epoch": 0.3700343305541932,
      "grad_norm": 2.1203428951542276,
      "learning_rate": 4.754991895994025e-06,
      "loss": 0.5946,
      "step": 3018
    },
    {
      "epoch": 0.3701569396763119,
      "grad_norm": 2.089692106484295,
      "learning_rate": 4.754773025437877e-06,
      "loss": 0.5771,
      "step": 3019
    },
    {
      "epoch": 0.3702795487984306,
      "grad_norm": 1.9981747590193264,
      "learning_rate": 4.754554062206566e-06,
      "loss": 0.5777,
      "step": 3020
    },
    {
      "epoch": 0.3704021579205493,
      "grad_norm": 2.282235978305992,
      "learning_rate": 4.754335006309091e-06,
      "loss": 0.5511,
      "step": 3021
    },
    {
      "epoch": 0.37052476704266796,
      "grad_norm": 1.9931554996567389,
      "learning_rate": 4.7541158577544575e-06,
      "loss": 0.5362,
      "step": 3022
    },
    {
      "epoch": 0.37064737616478666,
      "grad_norm": 2.1281606822482773,
      "learning_rate": 4.753896616551671e-06,
      "loss": 0.5893,
      "step": 3023
    },
    {
      "epoch": 0.37076998528690536,
      "grad_norm": 2.0592842445862707,
      "learning_rate": 4.753677282709743e-06,
      "loss": 0.5454,
      "step": 3024
    },
    {
      "epoch": 0.37089259440902406,
      "grad_norm": 2.195194318255548,
      "learning_rate": 4.753457856237688e-06,
      "loss": 0.5678,
      "step": 3025
    },
    {
      "epoch": 0.3710152035311427,
      "grad_norm": 2.1397856912845503,
      "learning_rate": 4.753238337144528e-06,
      "loss": 0.5815,
      "step": 3026
    },
    {
      "epoch": 0.3711378126532614,
      "grad_norm": 2.043528762408297,
      "learning_rate": 4.7530187254392816e-06,
      "loss": 0.5437,
      "step": 3027
    },
    {
      "epoch": 0.3712604217753801,
      "grad_norm": 2.321791745225574,
      "learning_rate": 4.752799021130978e-06,
      "loss": 0.5524,
      "step": 3028
    },
    {
      "epoch": 0.3713830308974988,
      "grad_norm": 2.2104742991063095,
      "learning_rate": 4.752579224228645e-06,
      "loss": 0.567,
      "step": 3029
    },
    {
      "epoch": 0.37150564001961744,
      "grad_norm": 2.1445082621041482,
      "learning_rate": 4.752359334741319e-06,
      "loss": 0.5314,
      "step": 3030
    },
    {
      "epoch": 0.37162824914173614,
      "grad_norm": 2.0061813424131105,
      "learning_rate": 4.7521393526780366e-06,
      "loss": 0.6569,
      "step": 3031
    },
    {
      "epoch": 0.37175085826385484,
      "grad_norm": 2.2008703524685362,
      "learning_rate": 4.7519192780478405e-06,
      "loss": 0.5462,
      "step": 3032
    },
    {
      "epoch": 0.37187346738597354,
      "grad_norm": 2.165047382519833,
      "learning_rate": 4.751699110859774e-06,
      "loss": 0.5265,
      "step": 3033
    },
    {
      "epoch": 0.3719960765080922,
      "grad_norm": 1.9321295137743872,
      "learning_rate": 4.751478851122888e-06,
      "loss": 0.5369,
      "step": 3034
    },
    {
      "epoch": 0.3721186856302109,
      "grad_norm": 2.1326600086872824,
      "learning_rate": 4.7512584988462365e-06,
      "loss": 0.5851,
      "step": 3035
    },
    {
      "epoch": 0.3722412947523296,
      "grad_norm": 2.2317709246959665,
      "learning_rate": 4.751038054038874e-06,
      "loss": 0.5748,
      "step": 3036
    },
    {
      "epoch": 0.37236390387444823,
      "grad_norm": 2.132858543826276,
      "learning_rate": 4.750817516709863e-06,
      "loss": 0.567,
      "step": 3037
    },
    {
      "epoch": 0.37248651299656693,
      "grad_norm": 1.9700481428746082,
      "learning_rate": 4.750596886868267e-06,
      "loss": 0.5349,
      "step": 3038
    },
    {
      "epoch": 0.37260912211868563,
      "grad_norm": 2.210900082864463,
      "learning_rate": 4.750376164523154e-06,
      "loss": 0.5793,
      "step": 3039
    },
    {
      "epoch": 0.37273173124080433,
      "grad_norm": 2.238792616970936,
      "learning_rate": 4.7501553496835975e-06,
      "loss": 0.5661,
      "step": 3040
    },
    {
      "epoch": 0.372854340362923,
      "grad_norm": 2.195371828670887,
      "learning_rate": 4.749934442358672e-06,
      "loss": 0.6196,
      "step": 3041
    },
    {
      "epoch": 0.3729769494850417,
      "grad_norm": 1.8329235714911642,
      "learning_rate": 4.7497134425574585e-06,
      "loss": 0.494,
      "step": 3042
    },
    {
      "epoch": 0.3730995586071604,
      "grad_norm": 1.970212179959365,
      "learning_rate": 4.7494923502890386e-06,
      "loss": 0.5649,
      "step": 3043
    },
    {
      "epoch": 0.3732221677292791,
      "grad_norm": 1.9400375779331063,
      "learning_rate": 4.749271165562501e-06,
      "loss": 0.527,
      "step": 3044
    },
    {
      "epoch": 0.3733447768513977,
      "grad_norm": 2.0913688148580256,
      "learning_rate": 4.749049888386938e-06,
      "loss": 0.5354,
      "step": 3045
    },
    {
      "epoch": 0.3734673859735164,
      "grad_norm": 2.1897128593569604,
      "learning_rate": 4.748828518771441e-06,
      "loss": 0.5746,
      "step": 3046
    },
    {
      "epoch": 0.3735899950956351,
      "grad_norm": 2.331082844419515,
      "learning_rate": 4.748607056725111e-06,
      "loss": 0.5723,
      "step": 3047
    },
    {
      "epoch": 0.3737126042177538,
      "grad_norm": 2.2303189071474354,
      "learning_rate": 4.74838550225705e-06,
      "loss": 0.5512,
      "step": 3048
    },
    {
      "epoch": 0.37383521333987246,
      "grad_norm": 2.141242660226257,
      "learning_rate": 4.748163855376365e-06,
      "loss": 0.5953,
      "step": 3049
    },
    {
      "epoch": 0.37395782246199116,
      "grad_norm": 2.03746921654001,
      "learning_rate": 4.7479421160921654e-06,
      "loss": 0.5521,
      "step": 3050
    },
    {
      "epoch": 0.37408043158410986,
      "grad_norm": 1.933125781842728,
      "learning_rate": 4.747720284413565e-06,
      "loss": 0.5506,
      "step": 3051
    },
    {
      "epoch": 0.37420304070622856,
      "grad_norm": 2.0257624495639415,
      "learning_rate": 4.747498360349681e-06,
      "loss": 0.5796,
      "step": 3052
    },
    {
      "epoch": 0.3743256498283472,
      "grad_norm": 2.0960882038345185,
      "learning_rate": 4.747276343909637e-06,
      "loss": 0.5613,
      "step": 3053
    },
    {
      "epoch": 0.3744482589504659,
      "grad_norm": 2.148170090232494,
      "learning_rate": 4.747054235102556e-06,
      "loss": 0.5652,
      "step": 3054
    },
    {
      "epoch": 0.3745708680725846,
      "grad_norm": 2.223533640239746,
      "learning_rate": 4.7468320339375686e-06,
      "loss": 0.5579,
      "step": 3055
    },
    {
      "epoch": 0.3746934771947033,
      "grad_norm": 2.2293820238161723,
      "learning_rate": 4.746609740423805e-06,
      "loss": 0.5657,
      "step": 3056
    },
    {
      "epoch": 0.37481608631682195,
      "grad_norm": 1.9876008029661405,
      "learning_rate": 4.746387354570405e-06,
      "loss": 0.5469,
      "step": 3057
    },
    {
      "epoch": 0.37493869543894065,
      "grad_norm": 1.9689655262471175,
      "learning_rate": 4.746164876386508e-06,
      "loss": 0.5411,
      "step": 3058
    },
    {
      "epoch": 0.37506130456105935,
      "grad_norm": 1.9596843350669195,
      "learning_rate": 4.745942305881258e-06,
      "loss": 0.5597,
      "step": 3059
    },
    {
      "epoch": 0.37518391368317805,
      "grad_norm": 2.021036279856177,
      "learning_rate": 4.745719643063803e-06,
      "loss": 0.5985,
      "step": 3060
    },
    {
      "epoch": 0.3753065228052967,
      "grad_norm": 1.781357668829744,
      "learning_rate": 4.745496887943295e-06,
      "loss": 0.5594,
      "step": 3061
    },
    {
      "epoch": 0.3754291319274154,
      "grad_norm": 2.2165682686222623,
      "learning_rate": 4.745274040528889e-06,
      "loss": 0.6305,
      "step": 3062
    },
    {
      "epoch": 0.3755517410495341,
      "grad_norm": 2.166272836158376,
      "learning_rate": 4.745051100829745e-06,
      "loss": 0.5555,
      "step": 3063
    },
    {
      "epoch": 0.3756743501716528,
      "grad_norm": 1.9978247122947257,
      "learning_rate": 4.744828068855027e-06,
      "loss": 0.5399,
      "step": 3064
    },
    {
      "epoch": 0.37579695929377144,
      "grad_norm": 1.9415699616692803,
      "learning_rate": 4.744604944613901e-06,
      "loss": 0.5686,
      "step": 3065
    },
    {
      "epoch": 0.37591956841589014,
      "grad_norm": 1.9721722361554788,
      "learning_rate": 4.7443817281155375e-06,
      "loss": 0.5082,
      "step": 3066
    },
    {
      "epoch": 0.37604217753800884,
      "grad_norm": 2.1775104502172287,
      "learning_rate": 4.744158419369112e-06,
      "loss": 0.4966,
      "step": 3067
    },
    {
      "epoch": 0.37616478666012754,
      "grad_norm": 2.082737449984552,
      "learning_rate": 4.743935018383803e-06,
      "loss": 0.5679,
      "step": 3068
    },
    {
      "epoch": 0.3762873957822462,
      "grad_norm": 2.185101679105929,
      "learning_rate": 4.743711525168791e-06,
      "loss": 0.5891,
      "step": 3069
    },
    {
      "epoch": 0.3764100049043649,
      "grad_norm": 2.2493060197224084,
      "learning_rate": 4.743487939733264e-06,
      "loss": 0.6153,
      "step": 3070
    },
    {
      "epoch": 0.3765326140264836,
      "grad_norm": 1.9928671790402992,
      "learning_rate": 4.74326426208641e-06,
      "loss": 0.542,
      "step": 3071
    },
    {
      "epoch": 0.3766552231486023,
      "grad_norm": 2.325727458344896,
      "learning_rate": 4.743040492237424e-06,
      "loss": 0.5813,
      "step": 3072
    },
    {
      "epoch": 0.3767778322707209,
      "grad_norm": 2.281424492686809,
      "learning_rate": 4.742816630195503e-06,
      "loss": 0.5897,
      "step": 3073
    },
    {
      "epoch": 0.3769004413928396,
      "grad_norm": 2.1863132427986,
      "learning_rate": 4.742592675969848e-06,
      "loss": 0.5751,
      "step": 3074
    },
    {
      "epoch": 0.3770230505149583,
      "grad_norm": 2.145791513075352,
      "learning_rate": 4.742368629569664e-06,
      "loss": 0.5523,
      "step": 3075
    },
    {
      "epoch": 0.377145659637077,
      "grad_norm": 2.041107575748355,
      "learning_rate": 4.742144491004158e-06,
      "loss": 0.577,
      "step": 3076
    },
    {
      "epoch": 0.37726826875919567,
      "grad_norm": 2.1892678323096044,
      "learning_rate": 4.741920260282545e-06,
      "loss": 0.6045,
      "step": 3077
    },
    {
      "epoch": 0.37739087788131437,
      "grad_norm": 1.936631862174079,
      "learning_rate": 4.7416959374140405e-06,
      "loss": 0.5626,
      "step": 3078
    },
    {
      "epoch": 0.37751348700343307,
      "grad_norm": 1.9273012593282346,
      "learning_rate": 4.741471522407864e-06,
      "loss": 0.5608,
      "step": 3079
    },
    {
      "epoch": 0.37763609612555177,
      "grad_norm": 2.0715555225662943,
      "learning_rate": 4.7412470152732405e-06,
      "loss": 0.6029,
      "step": 3080
    },
    {
      "epoch": 0.3777587052476704,
      "grad_norm": 2.0948168294825495,
      "learning_rate": 4.741022416019396e-06,
      "loss": 0.608,
      "step": 3081
    },
    {
      "epoch": 0.3778813143697891,
      "grad_norm": 2.0493538904920814,
      "learning_rate": 4.740797724655564e-06,
      "loss": 0.5102,
      "step": 3082
    },
    {
      "epoch": 0.3780039234919078,
      "grad_norm": 1.9518308332996959,
      "learning_rate": 4.740572941190977e-06,
      "loss": 0.5238,
      "step": 3083
    },
    {
      "epoch": 0.37812653261402646,
      "grad_norm": 1.9915951425691978,
      "learning_rate": 4.740348065634876e-06,
      "loss": 0.5189,
      "step": 3084
    },
    {
      "epoch": 0.37824914173614516,
      "grad_norm": 1.8056304775320224,
      "learning_rate": 4.740123097996504e-06,
      "loss": 0.5211,
      "step": 3085
    },
    {
      "epoch": 0.37837175085826386,
      "grad_norm": 2.219870847666406,
      "learning_rate": 4.739898038285105e-06,
      "loss": 0.5095,
      "step": 3086
    },
    {
      "epoch": 0.37849435998038256,
      "grad_norm": 2.039563338004496,
      "learning_rate": 4.739672886509933e-06,
      "loss": 0.5943,
      "step": 3087
    },
    {
      "epoch": 0.3786169691025012,
      "grad_norm": 1.9529133142800956,
      "learning_rate": 4.73944764268024e-06,
      "loss": 0.5194,
      "step": 3088
    },
    {
      "epoch": 0.3787395782246199,
      "grad_norm": 2.0430120905418088,
      "learning_rate": 4.739222306805284e-06,
      "loss": 0.5364,
      "step": 3089
    },
    {
      "epoch": 0.3788621873467386,
      "grad_norm": 2.2293501776813085,
      "learning_rate": 4.738996878894327e-06,
      "loss": 0.5864,
      "step": 3090
    },
    {
      "epoch": 0.3789847964688573,
      "grad_norm": 2.0989450799655502,
      "learning_rate": 4.738771358956634e-06,
      "loss": 0.5607,
      "step": 3091
    },
    {
      "epoch": 0.37910740559097594,
      "grad_norm": 2.2190838540243174,
      "learning_rate": 4.738545747001475e-06,
      "loss": 0.5232,
      "step": 3092
    },
    {
      "epoch": 0.37923001471309464,
      "grad_norm": 2.2025237094651353,
      "learning_rate": 4.738320043038123e-06,
      "loss": 0.6019,
      "step": 3093
    },
    {
      "epoch": 0.37935262383521334,
      "grad_norm": 2.265074877908346,
      "learning_rate": 4.738094247075855e-06,
      "loss": 0.588,
      "step": 3094
    },
    {
      "epoch": 0.37947523295733204,
      "grad_norm": 2.2298166582178527,
      "learning_rate": 4.737868359123951e-06,
      "loss": 0.5041,
      "step": 3095
    },
    {
      "epoch": 0.3795978420794507,
      "grad_norm": 1.9613359075967587,
      "learning_rate": 4.737642379191694e-06,
      "loss": 0.5042,
      "step": 3096
    },
    {
      "epoch": 0.3797204512015694,
      "grad_norm": 2.202141085921748,
      "learning_rate": 4.737416307288376e-06,
      "loss": 0.5623,
      "step": 3097
    },
    {
      "epoch": 0.3798430603236881,
      "grad_norm": 2.152955576571735,
      "learning_rate": 4.737190143423286e-06,
      "loss": 0.5721,
      "step": 3098
    },
    {
      "epoch": 0.3799656694458068,
      "grad_norm": 2.1392227251713556,
      "learning_rate": 4.73696388760572e-06,
      "loss": 0.5229,
      "step": 3099
    },
    {
      "epoch": 0.38008827856792543,
      "grad_norm": 1.9274559343216773,
      "learning_rate": 4.736737539844978e-06,
      "loss": 0.5425,
      "step": 3100
    },
    {
      "epoch": 0.38021088769004413,
      "grad_norm": 1.9607848508212335,
      "learning_rate": 4.736511100150364e-06,
      "loss": 0.535,
      "step": 3101
    },
    {
      "epoch": 0.38033349681216283,
      "grad_norm": 2.008414669130566,
      "learning_rate": 4.7362845685311835e-06,
      "loss": 0.556,
      "step": 3102
    },
    {
      "epoch": 0.38045610593428153,
      "grad_norm": 2.104081412394667,
      "learning_rate": 4.736057944996749e-06,
      "loss": 0.5946,
      "step": 3103
    },
    {
      "epoch": 0.3805787150564002,
      "grad_norm": 2.0491841582551853,
      "learning_rate": 4.735831229556374e-06,
      "loss": 0.5417,
      "step": 3104
    },
    {
      "epoch": 0.3807013241785189,
      "grad_norm": 1.9860728949268147,
      "learning_rate": 4.735604422219377e-06,
      "loss": 0.5289,
      "step": 3105
    },
    {
      "epoch": 0.3808239333006376,
      "grad_norm": 2.153509421297733,
      "learning_rate": 4.735377522995081e-06,
      "loss": 0.5743,
      "step": 3106
    },
    {
      "epoch": 0.3809465424227563,
      "grad_norm": 2.139342884231883,
      "learning_rate": 4.73515053189281e-06,
      "loss": 0.6158,
      "step": 3107
    },
    {
      "epoch": 0.3810691515448749,
      "grad_norm": 2.1011637997041563,
      "learning_rate": 4.734923448921897e-06,
      "loss": 0.5186,
      "step": 3108
    },
    {
      "epoch": 0.3811917606669936,
      "grad_norm": 2.26682787458064,
      "learning_rate": 4.734696274091672e-06,
      "loss": 0.5611,
      "step": 3109
    },
    {
      "epoch": 0.3813143697891123,
      "grad_norm": 2.1296441975536498,
      "learning_rate": 4.734469007411475e-06,
      "loss": 0.5436,
      "step": 3110
    },
    {
      "epoch": 0.381436978911231,
      "grad_norm": 2.1292129771633066,
      "learning_rate": 4.734241648890645e-06,
      "loss": 0.595,
      "step": 3111
    },
    {
      "epoch": 0.38155958803334966,
      "grad_norm": 2.1543801299326377,
      "learning_rate": 4.734014198538529e-06,
      "loss": 0.5803,
      "step": 3112
    },
    {
      "epoch": 0.38168219715546836,
      "grad_norm": 1.9170219122057355,
      "learning_rate": 4.733786656364474e-06,
      "loss": 0.5652,
      "step": 3113
    },
    {
      "epoch": 0.38180480627758706,
      "grad_norm": 2.2383015665000436,
      "learning_rate": 4.733559022377833e-06,
      "loss": 0.5528,
      "step": 3114
    },
    {
      "epoch": 0.38192741539970576,
      "grad_norm": 1.936083245379472,
      "learning_rate": 4.73333129658796e-06,
      "loss": 0.5377,
      "step": 3115
    },
    {
      "epoch": 0.3820500245218244,
      "grad_norm": 2.129552339942418,
      "learning_rate": 4.73310347900422e-06,
      "loss": 0.6124,
      "step": 3116
    },
    {
      "epoch": 0.3821726336439431,
      "grad_norm": 2.03867681681008,
      "learning_rate": 4.732875569635972e-06,
      "loss": 0.5588,
      "step": 3117
    },
    {
      "epoch": 0.3822952427660618,
      "grad_norm": 1.997710302931446,
      "learning_rate": 4.732647568492585e-06,
      "loss": 0.5938,
      "step": 3118
    },
    {
      "epoch": 0.3824178518881805,
      "grad_norm": 2.1321435319556206,
      "learning_rate": 4.732419475583431e-06,
      "loss": 0.5399,
      "step": 3119
    },
    {
      "epoch": 0.38254046101029915,
      "grad_norm": 1.9641045100393104,
      "learning_rate": 4.732191290917884e-06,
      "loss": 0.5119,
      "step": 3120
    },
    {
      "epoch": 0.38266307013241785,
      "grad_norm": 2.2955943035043953,
      "learning_rate": 4.731963014505323e-06,
      "loss": 0.6113,
      "step": 3121
    },
    {
      "epoch": 0.38278567925453655,
      "grad_norm": 1.9728530274176768,
      "learning_rate": 4.731734646355131e-06,
      "loss": 0.5222,
      "step": 3122
    },
    {
      "epoch": 0.38290828837665525,
      "grad_norm": 2.0456918088014087,
      "learning_rate": 4.731506186476694e-06,
      "loss": 0.5446,
      "step": 3123
    },
    {
      "epoch": 0.3830308974987739,
      "grad_norm": 2.040559931587615,
      "learning_rate": 4.731277634879401e-06,
      "loss": 0.5662,
      "step": 3124
    },
    {
      "epoch": 0.3831535066208926,
      "grad_norm": 1.9955637404137145,
      "learning_rate": 4.731048991572649e-06,
      "loss": 0.5487,
      "step": 3125
    },
    {
      "epoch": 0.3832761157430113,
      "grad_norm": 2.0374491239080483,
      "learning_rate": 4.730820256565833e-06,
      "loss": 0.6124,
      "step": 3126
    },
    {
      "epoch": 0.38339872486513,
      "grad_norm": 2.276587702375193,
      "learning_rate": 4.730591429868354e-06,
      "loss": 0.5092,
      "step": 3127
    },
    {
      "epoch": 0.38352133398724864,
      "grad_norm": 2.1487305204226463,
      "learning_rate": 4.7303625114896195e-06,
      "loss": 0.5826,
      "step": 3128
    },
    {
      "epoch": 0.38364394310936734,
      "grad_norm": 2.154811976310912,
      "learning_rate": 4.730133501439037e-06,
      "loss": 0.562,
      "step": 3129
    },
    {
      "epoch": 0.38376655223148604,
      "grad_norm": 2.0039930430585393,
      "learning_rate": 4.72990439972602e-06,
      "loss": 0.5031,
      "step": 3130
    },
    {
      "epoch": 0.38388916135360474,
      "grad_norm": 2.135113686623802,
      "learning_rate": 4.729675206359983e-06,
      "loss": 0.5795,
      "step": 3131
    },
    {
      "epoch": 0.3840117704757234,
      "grad_norm": 2.057415318693967,
      "learning_rate": 4.729445921350349e-06,
      "loss": 0.5919,
      "step": 3132
    },
    {
      "epoch": 0.3841343795978421,
      "grad_norm": 2.0011657739581272,
      "learning_rate": 4.72921654470654e-06,
      "loss": 0.5539,
      "step": 3133
    },
    {
      "epoch": 0.3842569887199608,
      "grad_norm": 2.173872521123026,
      "learning_rate": 4.728987076437985e-06,
      "loss": 0.5827,
      "step": 3134
    },
    {
      "epoch": 0.3843795978420794,
      "grad_norm": 2.0708127762506807,
      "learning_rate": 4.7287575165541145e-06,
      "loss": 0.5623,
      "step": 3135
    },
    {
      "epoch": 0.3845022069641981,
      "grad_norm": 2.0180250539962254,
      "learning_rate": 4.728527865064365e-06,
      "loss": 0.519,
      "step": 3136
    },
    {
      "epoch": 0.3846248160863168,
      "grad_norm": 1.979222915833149,
      "learning_rate": 4.728298121978174e-06,
      "loss": 0.5827,
      "step": 3137
    },
    {
      "epoch": 0.3847474252084355,
      "grad_norm": 2.0543118536665683,
      "learning_rate": 4.728068287304986e-06,
      "loss": 0.5558,
      "step": 3138
    },
    {
      "epoch": 0.38487003433055417,
      "grad_norm": 2.3080116350476487,
      "learning_rate": 4.727838361054247e-06,
      "loss": 0.5929,
      "step": 3139
    },
    {
      "epoch": 0.38499264345267287,
      "grad_norm": 2.1587408238343953,
      "learning_rate": 4.727608343235407e-06,
      "loss": 0.5941,
      "step": 3140
    },
    {
      "epoch": 0.38511525257479157,
      "grad_norm": 1.981149827119973,
      "learning_rate": 4.72737823385792e-06,
      "loss": 0.5309,
      "step": 3141
    },
    {
      "epoch": 0.38523786169691027,
      "grad_norm": 2.2716934587399678,
      "learning_rate": 4.727148032931245e-06,
      "loss": 0.5735,
      "step": 3142
    },
    {
      "epoch": 0.3853604708190289,
      "grad_norm": 1.934470654088506,
      "learning_rate": 4.726917740464842e-06,
      "loss": 0.5234,
      "step": 3143
    },
    {
      "epoch": 0.3854830799411476,
      "grad_norm": 2.0220619721229367,
      "learning_rate": 4.726687356468179e-06,
      "loss": 0.564,
      "step": 3144
    },
    {
      "epoch": 0.3856056890632663,
      "grad_norm": 2.061146283528838,
      "learning_rate": 4.726456880950723e-06,
      "loss": 0.5827,
      "step": 3145
    },
    {
      "epoch": 0.385728298185385,
      "grad_norm": 2.0494336386337144,
      "learning_rate": 4.726226313921947e-06,
      "loss": 0.5967,
      "step": 3146
    },
    {
      "epoch": 0.38585090730750365,
      "grad_norm": 2.013750611774036,
      "learning_rate": 4.7259956553913285e-06,
      "loss": 0.5489,
      "step": 3147
    },
    {
      "epoch": 0.38597351642962235,
      "grad_norm": 2.033753927724842,
      "learning_rate": 4.725764905368348e-06,
      "loss": 0.5874,
      "step": 3148
    },
    {
      "epoch": 0.38609612555174105,
      "grad_norm": 1.9913835951343468,
      "learning_rate": 4.725534063862489e-06,
      "loss": 0.6092,
      "step": 3149
    },
    {
      "epoch": 0.38621873467385975,
      "grad_norm": 2.2525346725431046,
      "learning_rate": 4.725303130883241e-06,
      "loss": 0.6202,
      "step": 3150
    },
    {
      "epoch": 0.3863413437959784,
      "grad_norm": 1.9619606987188192,
      "learning_rate": 4.725072106440094e-06,
      "loss": 0.5401,
      "step": 3151
    },
    {
      "epoch": 0.3864639529180971,
      "grad_norm": 2.170689978164708,
      "learning_rate": 4.724840990542544e-06,
      "loss": 0.601,
      "step": 3152
    },
    {
      "epoch": 0.3865865620402158,
      "grad_norm": 2.0664187870166706,
      "learning_rate": 4.724609783200091e-06,
      "loss": 0.548,
      "step": 3153
    },
    {
      "epoch": 0.3867091711623345,
      "grad_norm": 2.113099591995908,
      "learning_rate": 4.724378484422237e-06,
      "loss": 0.5917,
      "step": 3154
    },
    {
      "epoch": 0.38683178028445314,
      "grad_norm": 2.2256631797827593,
      "learning_rate": 4.72414709421849e-06,
      "loss": 0.5859,
      "step": 3155
    },
    {
      "epoch": 0.38695438940657184,
      "grad_norm": 2.2458698435936895,
      "learning_rate": 4.72391561259836e-06,
      "loss": 0.609,
      "step": 3156
    },
    {
      "epoch": 0.38707699852869054,
      "grad_norm": 2.133712984893548,
      "learning_rate": 4.7236840395713606e-06,
      "loss": 0.5719,
      "step": 3157
    },
    {
      "epoch": 0.38719960765080924,
      "grad_norm": 2.0529192537748178,
      "learning_rate": 4.7234523751470105e-06,
      "loss": 0.5556,
      "step": 3158
    },
    {
      "epoch": 0.3873222167729279,
      "grad_norm": 1.9265524726970513,
      "learning_rate": 4.723220619334832e-06,
      "loss": 0.5767,
      "step": 3159
    },
    {
      "epoch": 0.3874448258950466,
      "grad_norm": 2.2711953974824803,
      "learning_rate": 4.7229887721443495e-06,
      "loss": 0.5741,
      "step": 3160
    },
    {
      "epoch": 0.3875674350171653,
      "grad_norm": 1.8882398355754644,
      "learning_rate": 4.7227568335850934e-06,
      "loss": 0.518,
      "step": 3161
    },
    {
      "epoch": 0.387690044139284,
      "grad_norm": 2.1229471442558796,
      "learning_rate": 4.722524803666596e-06,
      "loss": 0.5379,
      "step": 3162
    },
    {
      "epoch": 0.38781265326140263,
      "grad_norm": 2.0862603597118614,
      "learning_rate": 4.722292682398395e-06,
      "loss": 0.508,
      "step": 3163
    },
    {
      "epoch": 0.38793526238352133,
      "grad_norm": 2.291123681653485,
      "learning_rate": 4.72206046979003e-06,
      "loss": 0.5515,
      "step": 3164
    },
    {
      "epoch": 0.38805787150564003,
      "grad_norm": 2.2063733073878753,
      "learning_rate": 4.721828165851046e-06,
      "loss": 0.5902,
      "step": 3165
    },
    {
      "epoch": 0.38818048062775873,
      "grad_norm": 2.209987630357585,
      "learning_rate": 4.721595770590991e-06,
      "loss": 0.5584,
      "step": 3166
    },
    {
      "epoch": 0.3883030897498774,
      "grad_norm": 2.1387792882575027,
      "learning_rate": 4.721363284019417e-06,
      "loss": 0.5888,
      "step": 3167
    },
    {
      "epoch": 0.3884256988719961,
      "grad_norm": 2.1162097521197776,
      "learning_rate": 4.721130706145879e-06,
      "loss": 0.5484,
      "step": 3168
    },
    {
      "epoch": 0.38854830799411477,
      "grad_norm": 2.0330515515103835,
      "learning_rate": 4.7208980369799376e-06,
      "loss": 0.611,
      "step": 3169
    },
    {
      "epoch": 0.38867091711623347,
      "grad_norm": 2.158963448094596,
      "learning_rate": 4.720665276531153e-06,
      "loss": 0.5398,
      "step": 3170
    },
    {
      "epoch": 0.3887935262383521,
      "grad_norm": 2.0964973803791227,
      "learning_rate": 4.720432424809096e-06,
      "loss": 0.6052,
      "step": 3171
    },
    {
      "epoch": 0.3889161353604708,
      "grad_norm": 1.946533102633546,
      "learning_rate": 4.720199481823336e-06,
      "loss": 0.5705,
      "step": 3172
    },
    {
      "epoch": 0.3890387444825895,
      "grad_norm": 2.205204756046535,
      "learning_rate": 4.719966447583445e-06,
      "loss": 0.5597,
      "step": 3173
    },
    {
      "epoch": 0.3891613536047082,
      "grad_norm": 2.3498351733703857,
      "learning_rate": 4.719733322099004e-06,
      "loss": 0.5676,
      "step": 3174
    },
    {
      "epoch": 0.38928396272682686,
      "grad_norm": 2.0684706150046055,
      "learning_rate": 4.719500105379593e-06,
      "loss": 0.5542,
      "step": 3175
    },
    {
      "epoch": 0.38940657184894556,
      "grad_norm": 2.139697742842126,
      "learning_rate": 4.7192667974347985e-06,
      "loss": 0.5736,
      "step": 3176
    },
    {
      "epoch": 0.38952918097106426,
      "grad_norm": 2.352427650907481,
      "learning_rate": 4.719033398274209e-06,
      "loss": 0.6393,
      "step": 3177
    },
    {
      "epoch": 0.38965179009318296,
      "grad_norm": 2.1483789159015774,
      "learning_rate": 4.718799907907421e-06,
      "loss": 0.5746,
      "step": 3178
    },
    {
      "epoch": 0.3897743992153016,
      "grad_norm": 2.1713636410921753,
      "learning_rate": 4.718566326344026e-06,
      "loss": 0.5321,
      "step": 3179
    },
    {
      "epoch": 0.3898970083374203,
      "grad_norm": 2.1225985455536893,
      "learning_rate": 4.718332653593629e-06,
      "loss": 0.5102,
      "step": 3180
    },
    {
      "epoch": 0.390019617459539,
      "grad_norm": 2.021274344374406,
      "learning_rate": 4.718098889665832e-06,
      "loss": 0.5447,
      "step": 3181
    },
    {
      "epoch": 0.3901422265816577,
      "grad_norm": 1.9896217381464214,
      "learning_rate": 4.717865034570243e-06,
      "loss": 0.5295,
      "step": 3182
    },
    {
      "epoch": 0.39026483570377635,
      "grad_norm": 1.9590210946992117,
      "learning_rate": 4.717631088316476e-06,
      "loss": 0.502,
      "step": 3183
    },
    {
      "epoch": 0.39038744482589505,
      "grad_norm": 1.8412952215406073,
      "learning_rate": 4.717397050914144e-06,
      "loss": 0.489,
      "step": 3184
    },
    {
      "epoch": 0.39051005394801375,
      "grad_norm": 2.184784850388716,
      "learning_rate": 4.7171629223728684e-06,
      "loss": 0.5818,
      "step": 3185
    },
    {
      "epoch": 0.3906326630701324,
      "grad_norm": 2.109756079322164,
      "learning_rate": 4.7169287027022715e-06,
      "loss": 0.5299,
      "step": 3186
    },
    {
      "epoch": 0.3907552721922511,
      "grad_norm": 2.1379643527232495,
      "learning_rate": 4.71669439191198e-06,
      "loss": 0.5506,
      "step": 3187
    },
    {
      "epoch": 0.3908778813143698,
      "grad_norm": 2.2911477871646797,
      "learning_rate": 4.716459990011625e-06,
      "loss": 0.5935,
      "step": 3188
    },
    {
      "epoch": 0.3910004904364885,
      "grad_norm": 2.1127635230968265,
      "learning_rate": 4.71622549701084e-06,
      "loss": 0.612,
      "step": 3189
    },
    {
      "epoch": 0.39112309955860713,
      "grad_norm": 2.175545718907433,
      "learning_rate": 4.715990912919264e-06,
      "loss": 0.5329,
      "step": 3190
    },
    {
      "epoch": 0.39124570868072583,
      "grad_norm": 2.1025529820523907,
      "learning_rate": 4.715756237746539e-06,
      "loss": 0.5356,
      "step": 3191
    },
    {
      "epoch": 0.39136831780284453,
      "grad_norm": 2.2344241739338573,
      "learning_rate": 4.715521471502308e-06,
      "loss": 0.557,
      "step": 3192
    },
    {
      "epoch": 0.39149092692496323,
      "grad_norm": 2.1687265470651704,
      "learning_rate": 4.715286614196223e-06,
      "loss": 0.5569,
      "step": 3193
    },
    {
      "epoch": 0.3916135360470819,
      "grad_norm": 2.1813887330784847,
      "learning_rate": 4.715051665837937e-06,
      "loss": 0.4969,
      "step": 3194
    },
    {
      "epoch": 0.3917361451692006,
      "grad_norm": 1.9464148611894034,
      "learning_rate": 4.7148166264371055e-06,
      "loss": 0.5553,
      "step": 3195
    },
    {
      "epoch": 0.3918587542913193,
      "grad_norm": 2.070335129272076,
      "learning_rate": 4.71458149600339e-06,
      "loss": 0.5936,
      "step": 3196
    },
    {
      "epoch": 0.391981363413438,
      "grad_norm": 2.4187561169702496,
      "learning_rate": 4.714346274546454e-06,
      "loss": 0.5705,
      "step": 3197
    },
    {
      "epoch": 0.3921039725355566,
      "grad_norm": 2.0898257740876383,
      "learning_rate": 4.714110962075966e-06,
      "loss": 0.5383,
      "step": 3198
    },
    {
      "epoch": 0.3922265816576753,
      "grad_norm": 2.0755492395532156,
      "learning_rate": 4.713875558601598e-06,
      "loss": 0.5263,
      "step": 3199
    },
    {
      "epoch": 0.392349190779794,
      "grad_norm": 2.004821104063912,
      "learning_rate": 4.7136400641330245e-06,
      "loss": 0.5755,
      "step": 3200
    },
    {
      "epoch": 0.3924717999019127,
      "grad_norm": 2.2034277405794374,
      "learning_rate": 4.713404478679927e-06,
      "loss": 0.5982,
      "step": 3201
    },
    {
      "epoch": 0.39259440902403137,
      "grad_norm": 2.0639309479305363,
      "learning_rate": 4.713168802251985e-06,
      "loss": 0.5397,
      "step": 3202
    },
    {
      "epoch": 0.39271701814615007,
      "grad_norm": 2.260296675944287,
      "learning_rate": 4.7129330348588884e-06,
      "loss": 0.6009,
      "step": 3203
    },
    {
      "epoch": 0.39283962726826877,
      "grad_norm": 2.2015736817509746,
      "learning_rate": 4.712697176510326e-06,
      "loss": 0.5597,
      "step": 3204
    },
    {
      "epoch": 0.39296223639038746,
      "grad_norm": 2.388083478859123,
      "learning_rate": 4.712461227215992e-06,
      "loss": 0.5767,
      "step": 3205
    },
    {
      "epoch": 0.3930848455125061,
      "grad_norm": 2.169018937181542,
      "learning_rate": 4.712225186985585e-06,
      "loss": 0.5442,
      "step": 3206
    },
    {
      "epoch": 0.3932074546346248,
      "grad_norm": 2.0742504545263536,
      "learning_rate": 4.711989055828807e-06,
      "loss": 0.5219,
      "step": 3207
    },
    {
      "epoch": 0.3933300637567435,
      "grad_norm": 2.1037991394859614,
      "learning_rate": 4.711752833755362e-06,
      "loss": 0.5893,
      "step": 3208
    },
    {
      "epoch": 0.3934526728788622,
      "grad_norm": 2.010845832526278,
      "learning_rate": 4.71151652077496e-06,
      "loss": 0.5472,
      "step": 3209
    },
    {
      "epoch": 0.39357528200098085,
      "grad_norm": 2.172544198064754,
      "learning_rate": 4.711280116897314e-06,
      "loss": 0.6005,
      "step": 3210
    },
    {
      "epoch": 0.39369789112309955,
      "grad_norm": 2.119746227619439,
      "learning_rate": 4.71104362213214e-06,
      "loss": 0.5725,
      "step": 3211
    },
    {
      "epoch": 0.39382050024521825,
      "grad_norm": 2.034187745895064,
      "learning_rate": 4.710807036489159e-06,
      "loss": 0.564,
      "step": 3212
    },
    {
      "epoch": 0.39394310936733695,
      "grad_norm": 2.0418922099938217,
      "learning_rate": 4.710570359978095e-06,
      "loss": 0.5455,
      "step": 3213
    },
    {
      "epoch": 0.3940657184894556,
      "grad_norm": 2.0565305299517798,
      "learning_rate": 4.7103335926086765e-06,
      "loss": 0.6099,
      "step": 3214
    },
    {
      "epoch": 0.3941883276115743,
      "grad_norm": 2.1279092967066027,
      "learning_rate": 4.710096734390633e-06,
      "loss": 0.6067,
      "step": 3215
    },
    {
      "epoch": 0.394310936733693,
      "grad_norm": 2.213671034914412,
      "learning_rate": 4.709859785333702e-06,
      "loss": 0.5653,
      "step": 3216
    },
    {
      "epoch": 0.3944335458558117,
      "grad_norm": 2.171920888263399,
      "learning_rate": 4.709622745447621e-06,
      "loss": 0.574,
      "step": 3217
    },
    {
      "epoch": 0.39455615497793034,
      "grad_norm": 2.050428864190355,
      "learning_rate": 4.709385614742134e-06,
      "loss": 0.5138,
      "step": 3218
    },
    {
      "epoch": 0.39467876410004904,
      "grad_norm": 2.183092903746878,
      "learning_rate": 4.709148393226985e-06,
      "loss": 0.555,
      "step": 3219
    },
    {
      "epoch": 0.39480137322216774,
      "grad_norm": 2.0426763731591655,
      "learning_rate": 4.708911080911928e-06,
      "loss": 0.5816,
      "step": 3220
    },
    {
      "epoch": 0.39492398234428644,
      "grad_norm": 2.3039759758115927,
      "learning_rate": 4.708673677806715e-06,
      "loss": 0.5574,
      "step": 3221
    },
    {
      "epoch": 0.3950465914664051,
      "grad_norm": 2.0411980663490508,
      "learning_rate": 4.708436183921103e-06,
      "loss": 0.5815,
      "step": 3222
    },
    {
      "epoch": 0.3951692005885238,
      "grad_norm": 1.7860412714676623,
      "learning_rate": 4.708198599264853e-06,
      "loss": 0.5831,
      "step": 3223
    },
    {
      "epoch": 0.3952918097106425,
      "grad_norm": 2.1558885452453356,
      "learning_rate": 4.707960923847733e-06,
      "loss": 0.5679,
      "step": 3224
    },
    {
      "epoch": 0.3954144188327612,
      "grad_norm": 1.9214163723649125,
      "learning_rate": 4.707723157679509e-06,
      "loss": 0.483,
      "step": 3225
    },
    {
      "epoch": 0.3955370279548798,
      "grad_norm": 1.9261868946456204,
      "learning_rate": 4.707485300769955e-06,
      "loss": 0.483,
      "step": 3226
    },
    {
      "epoch": 0.3956596370769985,
      "grad_norm": 2.2289925085726194,
      "learning_rate": 4.707247353128848e-06,
      "loss": 0.5716,
      "step": 3227
    },
    {
      "epoch": 0.3957822461991172,
      "grad_norm": 2.1798455071970424,
      "learning_rate": 4.7070093147659665e-06,
      "loss": 0.5122,
      "step": 3228
    },
    {
      "epoch": 0.3959048553212359,
      "grad_norm": 2.081493784517991,
      "learning_rate": 4.7067711856910946e-06,
      "loss": 0.6145,
      "step": 3229
    },
    {
      "epoch": 0.39602746444335457,
      "grad_norm": 1.8826108283835943,
      "learning_rate": 4.70653296591402e-06,
      "loss": 0.5284,
      "step": 3230
    },
    {
      "epoch": 0.39615007356547327,
      "grad_norm": 2.3357001065710277,
      "learning_rate": 4.7062946554445355e-06,
      "loss": 0.59,
      "step": 3231
    },
    {
      "epoch": 0.39627268268759197,
      "grad_norm": 2.120174107030589,
      "learning_rate": 4.7060562542924335e-06,
      "loss": 0.6059,
      "step": 3232
    },
    {
      "epoch": 0.3963952918097106,
      "grad_norm": 2.2335755078490074,
      "learning_rate": 4.705817762467515e-06,
      "loss": 0.5916,
      "step": 3233
    },
    {
      "epoch": 0.3965179009318293,
      "grad_norm": 2.3058974662892164,
      "learning_rate": 4.70557917997958e-06,
      "loss": 0.6214,
      "step": 3234
    },
    {
      "epoch": 0.396640510053948,
      "grad_norm": 2.2435281613107882,
      "learning_rate": 4.705340506838437e-06,
      "loss": 0.5297,
      "step": 3235
    },
    {
      "epoch": 0.3967631191760667,
      "grad_norm": 2.06625652481536,
      "learning_rate": 4.705101743053895e-06,
      "loss": 0.5995,
      "step": 3236
    },
    {
      "epoch": 0.39688572829818536,
      "grad_norm": 2.1919540379863065,
      "learning_rate": 4.704862888635767e-06,
      "loss": 0.5712,
      "step": 3237
    },
    {
      "epoch": 0.39700833742030406,
      "grad_norm": 2.1211074515196895,
      "learning_rate": 4.704623943593871e-06,
      "loss": 0.5562,
      "step": 3238
    },
    {
      "epoch": 0.39713094654242276,
      "grad_norm": 2.268895019090642,
      "learning_rate": 4.704384907938029e-06,
      "loss": 0.5814,
      "step": 3239
    },
    {
      "epoch": 0.39725355566454146,
      "grad_norm": 1.9864488738698836,
      "learning_rate": 4.704145781678063e-06,
      "loss": 0.4728,
      "step": 3240
    },
    {
      "epoch": 0.3973761647866601,
      "grad_norm": 2.085497338845527,
      "learning_rate": 4.703906564823804e-06,
      "loss": 0.5857,
      "step": 3241
    },
    {
      "epoch": 0.3974987739087788,
      "grad_norm": 2.1205429051500935,
      "learning_rate": 4.703667257385084e-06,
      "loss": 0.5012,
      "step": 3242
    },
    {
      "epoch": 0.3976213830308975,
      "grad_norm": 2.1072218607584343,
      "learning_rate": 4.7034278593717374e-06,
      "loss": 0.5725,
      "step": 3243
    },
    {
      "epoch": 0.3977439921530162,
      "grad_norm": 1.9954219301114704,
      "learning_rate": 4.703188370793606e-06,
      "loss": 0.5504,
      "step": 3244
    },
    {
      "epoch": 0.39786660127513485,
      "grad_norm": 1.9128985608485067,
      "learning_rate": 4.702948791660531e-06,
      "loss": 0.5548,
      "step": 3245
    },
    {
      "epoch": 0.39798921039725355,
      "grad_norm": 2.1746061963893806,
      "learning_rate": 4.702709121982362e-06,
      "loss": 0.5243,
      "step": 3246
    },
    {
      "epoch": 0.39811181951937225,
      "grad_norm": 2.122923037451591,
      "learning_rate": 4.702469361768947e-06,
      "loss": 0.539,
      "step": 3247
    },
    {
      "epoch": 0.39823442864149095,
      "grad_norm": 2.062430401332022,
      "learning_rate": 4.702229511030143e-06,
      "loss": 0.5543,
      "step": 3248
    },
    {
      "epoch": 0.3983570377636096,
      "grad_norm": 2.430175636828952,
      "learning_rate": 4.701989569775807e-06,
      "loss": 0.5571,
      "step": 3249
    },
    {
      "epoch": 0.3984796468857283,
      "grad_norm": 1.7054134224176687,
      "learning_rate": 4.7017495380158015e-06,
      "loss": 0.504,
      "step": 3250
    },
    {
      "epoch": 0.398602256007847,
      "grad_norm": 1.9858356366422873,
      "learning_rate": 4.7015094157599924e-06,
      "loss": 0.5817,
      "step": 3251
    },
    {
      "epoch": 0.3987248651299657,
      "grad_norm": 2.22170490374211,
      "learning_rate": 4.701269203018248e-06,
      "loss": 0.5304,
      "step": 3252
    },
    {
      "epoch": 0.39884747425208433,
      "grad_norm": 2.1665784443445086,
      "learning_rate": 4.701028899800443e-06,
      "loss": 0.5973,
      "step": 3253
    },
    {
      "epoch": 0.39897008337420303,
      "grad_norm": 2.1347352449863393,
      "learning_rate": 4.700788506116454e-06,
      "loss": 0.6425,
      "step": 3254
    },
    {
      "epoch": 0.39909269249632173,
      "grad_norm": 1.9115547790564376,
      "learning_rate": 4.70054802197616e-06,
      "loss": 0.5498,
      "step": 3255
    },
    {
      "epoch": 0.39921530161844043,
      "grad_norm": 2.1254385674963916,
      "learning_rate": 4.700307447389447e-06,
      "loss": 0.5327,
      "step": 3256
    },
    {
      "epoch": 0.3993379107405591,
      "grad_norm": 2.198545166223846,
      "learning_rate": 4.700066782366203e-06,
      "loss": 0.555,
      "step": 3257
    },
    {
      "epoch": 0.3994605198626778,
      "grad_norm": 2.157044891746772,
      "learning_rate": 4.699826026916319e-06,
      "loss": 0.5852,
      "step": 3258
    },
    {
      "epoch": 0.3995831289847965,
      "grad_norm": 2.083527358734546,
      "learning_rate": 4.699585181049691e-06,
      "loss": 0.5373,
      "step": 3259
    },
    {
      "epoch": 0.3997057381069152,
      "grad_norm": 2.126111527987555,
      "learning_rate": 4.6993442447762185e-06,
      "loss": 0.5811,
      "step": 3260
    },
    {
      "epoch": 0.3998283472290338,
      "grad_norm": 1.9709296968345618,
      "learning_rate": 4.699103218105803e-06,
      "loss": 0.5286,
      "step": 3261
    },
    {
      "epoch": 0.3999509563511525,
      "grad_norm": 2.279770311135898,
      "learning_rate": 4.6988621010483525e-06,
      "loss": 0.5632,
      "step": 3262
    },
    {
      "epoch": 0.4000735654732712,
      "grad_norm": 2.3778071138640615,
      "learning_rate": 4.698620893613777e-06,
      "loss": 0.5206,
      "step": 3263
    },
    {
      "epoch": 0.4001961745953899,
      "grad_norm": 2.1865407855073227,
      "learning_rate": 4.698379595811989e-06,
      "loss": 0.5706,
      "step": 3264
    },
    {
      "epoch": 0.40031878371750856,
      "grad_norm": 2.122266746321062,
      "learning_rate": 4.698138207652909e-06,
      "loss": 0.5222,
      "step": 3265
    },
    {
      "epoch": 0.40044139283962726,
      "grad_norm": 2.206885841316605,
      "learning_rate": 4.697896729146457e-06,
      "loss": 0.5664,
      "step": 3266
    },
    {
      "epoch": 0.40056400196174596,
      "grad_norm": 2.1277258840026585,
      "learning_rate": 4.697655160302559e-06,
      "loss": 0.572,
      "step": 3267
    },
    {
      "epoch": 0.40068661108386466,
      "grad_norm": 1.9390161807709057,
      "learning_rate": 4.697413501131143e-06,
      "loss": 0.5742,
      "step": 3268
    },
    {
      "epoch": 0.4008092202059833,
      "grad_norm": 2.2377390650870312,
      "learning_rate": 4.697171751642141e-06,
      "loss": 0.5458,
      "step": 3269
    },
    {
      "epoch": 0.400931829328102,
      "grad_norm": 2.498417485948073,
      "learning_rate": 4.69692991184549e-06,
      "loss": 0.5765,
      "step": 3270
    },
    {
      "epoch": 0.4010544384502207,
      "grad_norm": 1.9715420144174485,
      "learning_rate": 4.696687981751132e-06,
      "loss": 0.5669,
      "step": 3271
    },
    {
      "epoch": 0.4011770475723394,
      "grad_norm": 2.2197121101101134,
      "learning_rate": 4.696445961369008e-06,
      "loss": 0.5833,
      "step": 3272
    },
    {
      "epoch": 0.40129965669445805,
      "grad_norm": 2.1421110260333243,
      "learning_rate": 4.696203850709066e-06,
      "loss": 0.5609,
      "step": 3273
    },
    {
      "epoch": 0.40142226581657675,
      "grad_norm": 1.9742152345795758,
      "learning_rate": 4.695961649781258e-06,
      "loss": 0.519,
      "step": 3274
    },
    {
      "epoch": 0.40154487493869545,
      "grad_norm": 2.1316385049583157,
      "learning_rate": 4.6957193585955395e-06,
      "loss": 0.5047,
      "step": 3275
    },
    {
      "epoch": 0.40166748406081415,
      "grad_norm": 2.009119401139509,
      "learning_rate": 4.695476977161867e-06,
      "loss": 0.5268,
      "step": 3276
    },
    {
      "epoch": 0.4017900931829328,
      "grad_norm": 2.364903633305011,
      "learning_rate": 4.695234505490205e-06,
      "loss": 0.58,
      "step": 3277
    },
    {
      "epoch": 0.4019127023050515,
      "grad_norm": 2.096758133252479,
      "learning_rate": 4.694991943590517e-06,
      "loss": 0.5554,
      "step": 3278
    },
    {
      "epoch": 0.4020353114271702,
      "grad_norm": 2.220200584347517,
      "learning_rate": 4.694749291472775e-06,
      "loss": 0.5459,
      "step": 3279
    },
    {
      "epoch": 0.4021579205492889,
      "grad_norm": 1.9283284874843176,
      "learning_rate": 4.694506549146951e-06,
      "loss": 0.5361,
      "step": 3280
    },
    {
      "epoch": 0.40228052967140754,
      "grad_norm": 1.993383172239444,
      "learning_rate": 4.694263716623024e-06,
      "loss": 0.5864,
      "step": 3281
    },
    {
      "epoch": 0.40240313879352624,
      "grad_norm": 1.8484954159702105,
      "learning_rate": 4.694020793910973e-06,
      "loss": 0.51,
      "step": 3282
    },
    {
      "epoch": 0.40252574791564494,
      "grad_norm": 2.1314284759397673,
      "learning_rate": 4.693777781020783e-06,
      "loss": 0.5629,
      "step": 3283
    },
    {
      "epoch": 0.4026483570377636,
      "grad_norm": 2.2565287476241362,
      "learning_rate": 4.693534677962442e-06,
      "loss": 0.5792,
      "step": 3284
    },
    {
      "epoch": 0.4027709661598823,
      "grad_norm": 1.9867532647515402,
      "learning_rate": 4.6932914847459434e-06,
      "loss": 0.5553,
      "step": 3285
    },
    {
      "epoch": 0.402893575282001,
      "grad_norm": 1.960197287150962,
      "learning_rate": 4.693048201381281e-06,
      "loss": 0.5347,
      "step": 3286
    },
    {
      "epoch": 0.4030161844041197,
      "grad_norm": 1.9411123328674114,
      "learning_rate": 4.692804827878456e-06,
      "loss": 0.5339,
      "step": 3287
    },
    {
      "epoch": 0.4031387935262383,
      "grad_norm": 2.062265004293305,
      "learning_rate": 4.69256136424747e-06,
      "loss": 0.5701,
      "step": 3288
    },
    {
      "epoch": 0.403261402648357,
      "grad_norm": 2.144248040693785,
      "learning_rate": 4.692317810498331e-06,
      "loss": 0.6301,
      "step": 3289
    },
    {
      "epoch": 0.4033840117704757,
      "grad_norm": 2.3365543203438564,
      "learning_rate": 4.692074166641047e-06,
      "loss": 0.593,
      "step": 3290
    },
    {
      "epoch": 0.4035066208925944,
      "grad_norm": 2.147906310802431,
      "learning_rate": 4.691830432685636e-06,
      "loss": 0.5797,
      "step": 3291
    },
    {
      "epoch": 0.40362923001471307,
      "grad_norm": 2.2692806374824226,
      "learning_rate": 4.691586608642113e-06,
      "loss": 0.5981,
      "step": 3292
    },
    {
      "epoch": 0.40375183913683177,
      "grad_norm": 2.111086088604424,
      "learning_rate": 4.6913426945205e-06,
      "loss": 0.5721,
      "step": 3293
    },
    {
      "epoch": 0.40387444825895047,
      "grad_norm": 2.091388635504393,
      "learning_rate": 4.691098690330823e-06,
      "loss": 0.5887,
      "step": 3294
    },
    {
      "epoch": 0.40399705738106917,
      "grad_norm": 1.9703645860520809,
      "learning_rate": 4.690854596083111e-06,
      "loss": 0.5472,
      "step": 3295
    },
    {
      "epoch": 0.4041196665031878,
      "grad_norm": 2.133285382266898,
      "learning_rate": 4.690610411787396e-06,
      "loss": 0.5858,
      "step": 3296
    },
    {
      "epoch": 0.4042422756253065,
      "grad_norm": 2.107931745661043,
      "learning_rate": 4.690366137453716e-06,
      "loss": 0.5733,
      "step": 3297
    },
    {
      "epoch": 0.4043648847474252,
      "grad_norm": 2.05862365048563,
      "learning_rate": 4.690121773092109e-06,
      "loss": 0.5481,
      "step": 3298
    },
    {
      "epoch": 0.4044874938695439,
      "grad_norm": 2.2682514872070727,
      "learning_rate": 4.68987731871262e-06,
      "loss": 0.5347,
      "step": 3299
    },
    {
      "epoch": 0.40461010299166256,
      "grad_norm": 2.0633110800093437,
      "learning_rate": 4.689632774325297e-06,
      "loss": 0.5858,
      "step": 3300
    },
    {
      "epoch": 0.40473271211378126,
      "grad_norm": 2.0021074376886214,
      "learning_rate": 4.6893881399401894e-06,
      "loss": 0.5655,
      "step": 3301
    },
    {
      "epoch": 0.40485532123589996,
      "grad_norm": 1.972688377346128,
      "learning_rate": 4.689143415567355e-06,
      "loss": 0.518,
      "step": 3302
    },
    {
      "epoch": 0.40497793035801866,
      "grad_norm": 2.070510866592338,
      "learning_rate": 4.688898601216849e-06,
      "loss": 0.5632,
      "step": 3303
    },
    {
      "epoch": 0.4051005394801373,
      "grad_norm": 2.1570783595681435,
      "learning_rate": 4.688653696898736e-06,
      "loss": 0.5422,
      "step": 3304
    },
    {
      "epoch": 0.405223148602256,
      "grad_norm": 2.0087233454796345,
      "learning_rate": 4.688408702623081e-06,
      "loss": 0.5683,
      "step": 3305
    },
    {
      "epoch": 0.4053457577243747,
      "grad_norm": 2.0646881082816257,
      "learning_rate": 4.688163618399955e-06,
      "loss": 0.5595,
      "step": 3306
    },
    {
      "epoch": 0.4054683668464934,
      "grad_norm": 2.0350227795423046,
      "learning_rate": 4.687918444239429e-06,
      "loss": 0.5284,
      "step": 3307
    },
    {
      "epoch": 0.40559097596861204,
      "grad_norm": 1.9215150004683847,
      "learning_rate": 4.687673180151583e-06,
      "loss": 0.5448,
      "step": 3308
    },
    {
      "epoch": 0.40571358509073074,
      "grad_norm": 2.3545526416804266,
      "learning_rate": 4.687427826146495e-06,
      "loss": 0.5611,
      "step": 3309
    },
    {
      "epoch": 0.40583619421284944,
      "grad_norm": 1.9062174798638207,
      "learning_rate": 4.6871823822342525e-06,
      "loss": 0.5174,
      "step": 3310
    },
    {
      "epoch": 0.40595880333496814,
      "grad_norm": 1.7819583257296003,
      "learning_rate": 4.68693684842494e-06,
      "loss": 0.56,
      "step": 3311
    },
    {
      "epoch": 0.4060814124570868,
      "grad_norm": 1.88932642682849,
      "learning_rate": 4.686691224728652e-06,
      "loss": 0.5065,
      "step": 3312
    },
    {
      "epoch": 0.4062040215792055,
      "grad_norm": 1.941706618911615,
      "learning_rate": 4.686445511155484e-06,
      "loss": 0.5481,
      "step": 3313
    },
    {
      "epoch": 0.4063266307013242,
      "grad_norm": 2.2799728560857777,
      "learning_rate": 4.6861997077155345e-06,
      "loss": 0.5562,
      "step": 3314
    },
    {
      "epoch": 0.4064492398234429,
      "grad_norm": 2.1661732344211337,
      "learning_rate": 4.685953814418906e-06,
      "loss": 0.5212,
      "step": 3315
    },
    {
      "epoch": 0.40657184894556153,
      "grad_norm": 2.106300674118267,
      "learning_rate": 4.685707831275707e-06,
      "loss": 0.5927,
      "step": 3316
    },
    {
      "epoch": 0.40669445806768023,
      "grad_norm": 2.0136576608859427,
      "learning_rate": 4.685461758296046e-06,
      "loss": 0.5458,
      "step": 3317
    },
    {
      "epoch": 0.40681706718979893,
      "grad_norm": 2.025401852575781,
      "learning_rate": 4.685215595490038e-06,
      "loss": 0.5627,
      "step": 3318
    },
    {
      "epoch": 0.40693967631191763,
      "grad_norm": 2.124015735652726,
      "learning_rate": 4.6849693428678e-06,
      "loss": 0.5467,
      "step": 3319
    },
    {
      "epoch": 0.4070622854340363,
      "grad_norm": 2.1157727825590054,
      "learning_rate": 4.684723000439454e-06,
      "loss": 0.5347,
      "step": 3320
    },
    {
      "epoch": 0.407184894556155,
      "grad_norm": 2.0327088383089342,
      "learning_rate": 4.684476568215125e-06,
      "loss": 0.5433,
      "step": 3321
    },
    {
      "epoch": 0.4073075036782737,
      "grad_norm": 1.8636795111140312,
      "learning_rate": 4.684230046204941e-06,
      "loss": 0.5078,
      "step": 3322
    },
    {
      "epoch": 0.4074301128003924,
      "grad_norm": 2.2825776872693098,
      "learning_rate": 4.683983434419036e-06,
      "loss": 0.6157,
      "step": 3323
    },
    {
      "epoch": 0.407552721922511,
      "grad_norm": 2.3058514765962648,
      "learning_rate": 4.683736732867545e-06,
      "loss": 0.5705,
      "step": 3324
    },
    {
      "epoch": 0.4076753310446297,
      "grad_norm": 2.1072568511025214,
      "learning_rate": 4.683489941560609e-06,
      "loss": 0.5285,
      "step": 3325
    },
    {
      "epoch": 0.4077979401667484,
      "grad_norm": 2.363493028731597,
      "learning_rate": 4.68324306050837e-06,
      "loss": 0.5964,
      "step": 3326
    },
    {
      "epoch": 0.4079205492888671,
      "grad_norm": 2.1804505951990216,
      "learning_rate": 4.682996089720976e-06,
      "loss": 0.5776,
      "step": 3327
    },
    {
      "epoch": 0.40804315841098576,
      "grad_norm": 2.2730342668402046,
      "learning_rate": 4.682749029208578e-06,
      "loss": 0.5622,
      "step": 3328
    },
    {
      "epoch": 0.40816576753310446,
      "grad_norm": 1.9567074033315108,
      "learning_rate": 4.682501878981332e-06,
      "loss": 0.558,
      "step": 3329
    },
    {
      "epoch": 0.40828837665522316,
      "grad_norm": 2.2591647319369232,
      "learning_rate": 4.682254639049394e-06,
      "loss": 0.5719,
      "step": 3330
    },
    {
      "epoch": 0.40841098577734186,
      "grad_norm": 2.0408454723721383,
      "learning_rate": 4.6820073094229265e-06,
      "loss": 0.5791,
      "step": 3331
    },
    {
      "epoch": 0.4085335948994605,
      "grad_norm": 1.9855526644944148,
      "learning_rate": 4.681759890112096e-06,
      "loss": 0.5296,
      "step": 3332
    },
    {
      "epoch": 0.4086562040215792,
      "grad_norm": 2.0791887101053472,
      "learning_rate": 4.681512381127073e-06,
      "loss": 0.503,
      "step": 3333
    },
    {
      "epoch": 0.4087788131436979,
      "grad_norm": 2.411770559367751,
      "learning_rate": 4.681264782478027e-06,
      "loss": 0.5742,
      "step": 3334
    },
    {
      "epoch": 0.40890142226581655,
      "grad_norm": 2.223213322316418,
      "learning_rate": 4.681017094175138e-06,
      "loss": 0.5851,
      "step": 3335
    },
    {
      "epoch": 0.40902403138793525,
      "grad_norm": 2.010677003622843,
      "learning_rate": 4.680769316228585e-06,
      "loss": 0.5382,
      "step": 3336
    },
    {
      "epoch": 0.40914664051005395,
      "grad_norm": 2.1405562816875645,
      "learning_rate": 4.680521448648551e-06,
      "loss": 0.4964,
      "step": 3337
    },
    {
      "epoch": 0.40926924963217265,
      "grad_norm": 2.0854044064739266,
      "learning_rate": 4.680273491445227e-06,
      "loss": 0.5854,
      "step": 3338
    },
    {
      "epoch": 0.4093918587542913,
      "grad_norm": 2.065639359201795,
      "learning_rate": 4.680025444628802e-06,
      "loss": 0.6075,
      "step": 3339
    },
    {
      "epoch": 0.40951446787641,
      "grad_norm": 2.1011366356797425,
      "learning_rate": 4.679777308209471e-06,
      "loss": 0.5367,
      "step": 3340
    },
    {
      "epoch": 0.4096370769985287,
      "grad_norm": 2.007061390845266,
      "learning_rate": 4.679529082197435e-06,
      "loss": 0.5817,
      "step": 3341
    },
    {
      "epoch": 0.4097596861206474,
      "grad_norm": 1.914294081341713,
      "learning_rate": 4.6792807666028945e-06,
      "loss": 0.552,
      "step": 3342
    },
    {
      "epoch": 0.40988229524276604,
      "grad_norm": 2.230249850996275,
      "learning_rate": 4.679032361436056e-06,
      "loss": 0.5931,
      "step": 3343
    },
    {
      "epoch": 0.41000490436488474,
      "grad_norm": 1.9604828732819612,
      "learning_rate": 4.678783866707131e-06,
      "loss": 0.5354,
      "step": 3344
    },
    {
      "epoch": 0.41012751348700344,
      "grad_norm": 2.03747993201356,
      "learning_rate": 4.678535282426331e-06,
      "loss": 0.5578,
      "step": 3345
    },
    {
      "epoch": 0.41025012260912214,
      "grad_norm": 1.979101507518659,
      "learning_rate": 4.678286608603874e-06,
      "loss": 0.5696,
      "step": 3346
    },
    {
      "epoch": 0.4103727317312408,
      "grad_norm": 2.2231018059879477,
      "learning_rate": 4.6780378452499805e-06,
      "loss": 0.5468,
      "step": 3347
    },
    {
      "epoch": 0.4104953408533595,
      "grad_norm": 2.0450827302953196,
      "learning_rate": 4.677788992374877e-06,
      "loss": 0.5404,
      "step": 3348
    },
    {
      "epoch": 0.4106179499754782,
      "grad_norm": 2.0289966019432284,
      "learning_rate": 4.6775400499887894e-06,
      "loss": 0.542,
      "step": 3349
    },
    {
      "epoch": 0.4107405590975969,
      "grad_norm": 2.0658131489002,
      "learning_rate": 4.677291018101951e-06,
      "loss": 0.5597,
      "step": 3350
    },
    {
      "epoch": 0.4108631682197155,
      "grad_norm": 2.1737691513588335,
      "learning_rate": 4.6770418967245975e-06,
      "loss": 0.5643,
      "step": 3351
    },
    {
      "epoch": 0.4109857773418342,
      "grad_norm": 2.0021349933290917,
      "learning_rate": 4.676792685866967e-06,
      "loss": 0.5744,
      "step": 3352
    },
    {
      "epoch": 0.4111083864639529,
      "grad_norm": 1.902678446777951,
      "learning_rate": 4.676543385539304e-06,
      "loss": 0.5436,
      "step": 3353
    },
    {
      "epoch": 0.4112309955860716,
      "grad_norm": 2.2077112885623205,
      "learning_rate": 4.676293995751855e-06,
      "loss": 0.5558,
      "step": 3354
    },
    {
      "epoch": 0.41135360470819027,
      "grad_norm": 1.9089475227421169,
      "learning_rate": 4.67604451651487e-06,
      "loss": 0.5559,
      "step": 3355
    },
    {
      "epoch": 0.41147621383030897,
      "grad_norm": 2.0876853720266526,
      "learning_rate": 4.675794947838602e-06,
      "loss": 0.5296,
      "step": 3356
    },
    {
      "epoch": 0.41159882295242767,
      "grad_norm": 2.2112840724239193,
      "learning_rate": 4.67554528973331e-06,
      "loss": 0.5451,
      "step": 3357
    },
    {
      "epoch": 0.41172143207454637,
      "grad_norm": 2.2018323894828167,
      "learning_rate": 4.675295542209256e-06,
      "loss": 0.5107,
      "step": 3358
    },
    {
      "epoch": 0.411844041196665,
      "grad_norm": 2.0947114545397856,
      "learning_rate": 4.675045705276703e-06,
      "loss": 0.5216,
      "step": 3359
    },
    {
      "epoch": 0.4119666503187837,
      "grad_norm": 1.80621506711525,
      "learning_rate": 4.674795778945922e-06,
      "loss": 0.4827,
      "step": 3360
    },
    {
      "epoch": 0.4120892594409024,
      "grad_norm": 2.192731378426004,
      "learning_rate": 4.674545763227183e-06,
      "loss": 0.5693,
      "step": 3361
    },
    {
      "epoch": 0.4122118685630211,
      "grad_norm": 2.087543113486589,
      "learning_rate": 4.6742956581307645e-06,
      "loss": 0.5614,
      "step": 3362
    },
    {
      "epoch": 0.41233447768513976,
      "grad_norm": 2.2722361203193655,
      "learning_rate": 4.674045463666944e-06,
      "loss": 0.6044,
      "step": 3363
    },
    {
      "epoch": 0.41245708680725846,
      "grad_norm": 2.1660392577771974,
      "learning_rate": 4.673795179846008e-06,
      "loss": 0.5421,
      "step": 3364
    },
    {
      "epoch": 0.41257969592937715,
      "grad_norm": 1.9140149072476813,
      "learning_rate": 4.67354480667824e-06,
      "loss": 0.5001,
      "step": 3365
    },
    {
      "epoch": 0.41270230505149585,
      "grad_norm": 2.523239319875167,
      "learning_rate": 4.673294344173933e-06,
      "loss": 0.6324,
      "step": 3366
    },
    {
      "epoch": 0.4128249141736145,
      "grad_norm": 2.316487594851582,
      "learning_rate": 4.673043792343382e-06,
      "loss": 0.6034,
      "step": 3367
    },
    {
      "epoch": 0.4129475232957332,
      "grad_norm": 2.4460601610601187,
      "learning_rate": 4.6727931511968824e-06,
      "loss": 0.5803,
      "step": 3368
    },
    {
      "epoch": 0.4130701324178519,
      "grad_norm": 2.189087448206088,
      "learning_rate": 4.6725424207447375e-06,
      "loss": 0.5976,
      "step": 3369
    },
    {
      "epoch": 0.4131927415399706,
      "grad_norm": 1.9321530288938693,
      "learning_rate": 4.672291600997254e-06,
      "loss": 0.5806,
      "step": 3370
    },
    {
      "epoch": 0.41331535066208924,
      "grad_norm": 2.1995743028644617,
      "learning_rate": 4.67204069196474e-06,
      "loss": 0.5917,
      "step": 3371
    },
    {
      "epoch": 0.41343795978420794,
      "grad_norm": 2.4253376190521365,
      "learning_rate": 4.671789693657508e-06,
      "loss": 0.5397,
      "step": 3372
    },
    {
      "epoch": 0.41356056890632664,
      "grad_norm": 2.4319843345191896,
      "learning_rate": 4.6715386060858745e-06,
      "loss": 0.5769,
      "step": 3373
    },
    {
      "epoch": 0.41368317802844534,
      "grad_norm": 2.37325262655483,
      "learning_rate": 4.671287429260161e-06,
      "loss": 0.5397,
      "step": 3374
    },
    {
      "epoch": 0.413805787150564,
      "grad_norm": 2.2180608606456524,
      "learning_rate": 4.671036163190689e-06,
      "loss": 0.5793,
      "step": 3375
    },
    {
      "epoch": 0.4139283962726827,
      "grad_norm": 2.138060851247992,
      "learning_rate": 4.670784807887787e-06,
      "loss": 0.533,
      "step": 3376
    },
    {
      "epoch": 0.4140510053948014,
      "grad_norm": 1.9201177965165424,
      "learning_rate": 4.670533363361787e-06,
      "loss": 0.5409,
      "step": 3377
    },
    {
      "epoch": 0.4141736145169201,
      "grad_norm": 2.102887719360065,
      "learning_rate": 4.670281829623023e-06,
      "loss": 0.543,
      "step": 3378
    },
    {
      "epoch": 0.41429622363903873,
      "grad_norm": 2.222132844783542,
      "learning_rate": 4.670030206681833e-06,
      "loss": 0.5592,
      "step": 3379
    },
    {
      "epoch": 0.41441883276115743,
      "grad_norm": 2.0978450252921963,
      "learning_rate": 4.6697784945485604e-06,
      "loss": 0.5451,
      "step": 3380
    },
    {
      "epoch": 0.41454144188327613,
      "grad_norm": 2.2910276421449822,
      "learning_rate": 4.66952669323355e-06,
      "loss": 0.5301,
      "step": 3381
    },
    {
      "epoch": 0.4146640510053948,
      "grad_norm": 2.1445216525078465,
      "learning_rate": 4.669274802747153e-06,
      "loss": 0.6102,
      "step": 3382
    },
    {
      "epoch": 0.4147866601275135,
      "grad_norm": 2.141611046153773,
      "learning_rate": 4.6690228230997195e-06,
      "loss": 0.5595,
      "step": 3383
    },
    {
      "epoch": 0.4149092692496322,
      "grad_norm": 2.247808047149825,
      "learning_rate": 4.6687707543016095e-06,
      "loss": 0.5754,
      "step": 3384
    },
    {
      "epoch": 0.4150318783717509,
      "grad_norm": 1.9709072534448013,
      "learning_rate": 4.668518596363181e-06,
      "loss": 0.5562,
      "step": 3385
    },
    {
      "epoch": 0.4151544874938695,
      "grad_norm": 2.008192798941864,
      "learning_rate": 4.6682663492947984e-06,
      "loss": 0.5184,
      "step": 3386
    },
    {
      "epoch": 0.4152770966159882,
      "grad_norm": 2.393737365753565,
      "learning_rate": 4.668014013106832e-06,
      "loss": 0.5369,
      "step": 3387
    },
    {
      "epoch": 0.4153997057381069,
      "grad_norm": 2.0163264451151344,
      "learning_rate": 4.66776158780965e-06,
      "loss": 0.5254,
      "step": 3388
    },
    {
      "epoch": 0.4155223148602256,
      "grad_norm": 2.1445011704403103,
      "learning_rate": 4.66750907341363e-06,
      "loss": 0.5835,
      "step": 3389
    },
    {
      "epoch": 0.41564492398234426,
      "grad_norm": 1.9769099293162533,
      "learning_rate": 4.667256469929149e-06,
      "loss": 0.5571,
      "step": 3390
    },
    {
      "epoch": 0.41576753310446296,
      "grad_norm": 2.241481779562706,
      "learning_rate": 4.667003777366591e-06,
      "loss": 0.5103,
      "step": 3391
    },
    {
      "epoch": 0.41589014222658166,
      "grad_norm": 1.9449954714480293,
      "learning_rate": 4.666750995736341e-06,
      "loss": 0.5353,
      "step": 3392
    },
    {
      "epoch": 0.41601275134870036,
      "grad_norm": 3.2702702912021535,
      "learning_rate": 4.666498125048789e-06,
      "loss": 0.585,
      "step": 3393
    },
    {
      "epoch": 0.416135360470819,
      "grad_norm": 2.266385896340605,
      "learning_rate": 4.666245165314329e-06,
      "loss": 0.5403,
      "step": 3394
    },
    {
      "epoch": 0.4162579695929377,
      "grad_norm": 2.0151934647546454,
      "learning_rate": 4.665992116543358e-06,
      "loss": 0.5277,
      "step": 3395
    },
    {
      "epoch": 0.4163805787150564,
      "grad_norm": 2.0859889694651934,
      "learning_rate": 4.665738978746276e-06,
      "loss": 0.5688,
      "step": 3396
    },
    {
      "epoch": 0.4165031878371751,
      "grad_norm": 2.117698542686676,
      "learning_rate": 4.665485751933487e-06,
      "loss": 0.5362,
      "step": 3397
    },
    {
      "epoch": 0.41662579695929375,
      "grad_norm": 1.8964607071241013,
      "learning_rate": 4.665232436115401e-06,
      "loss": 0.556,
      "step": 3398
    },
    {
      "epoch": 0.41674840608141245,
      "grad_norm": 2.0872248865633045,
      "learning_rate": 4.664979031302428e-06,
      "loss": 0.589,
      "step": 3399
    },
    {
      "epoch": 0.41687101520353115,
      "grad_norm": 2.1866339991252897,
      "learning_rate": 4.664725537504985e-06,
      "loss": 0.5573,
      "step": 3400
    },
    {
      "epoch": 0.41699362432564985,
      "grad_norm": 1.9732366097556437,
      "learning_rate": 4.6644719547334895e-06,
      "loss": 0.553,
      "step": 3401
    },
    {
      "epoch": 0.4171162334477685,
      "grad_norm": 2.1610769959911225,
      "learning_rate": 4.664218282998364e-06,
      "loss": 0.5887,
      "step": 3402
    },
    {
      "epoch": 0.4172388425698872,
      "grad_norm": 2.025479216573328,
      "learning_rate": 4.663964522310036e-06,
      "loss": 0.545,
      "step": 3403
    },
    {
      "epoch": 0.4173614516920059,
      "grad_norm": 2.2130342842474664,
      "learning_rate": 4.6637106726789355e-06,
      "loss": 0.5158,
      "step": 3404
    },
    {
      "epoch": 0.4174840608141246,
      "grad_norm": 2.130428092103285,
      "learning_rate": 4.663456734115496e-06,
      "loss": 0.6234,
      "step": 3405
    },
    {
      "epoch": 0.41760666993624324,
      "grad_norm": 2.1772941914770514,
      "learning_rate": 4.663202706630154e-06,
      "loss": 0.5711,
      "step": 3406
    },
    {
      "epoch": 0.41772927905836194,
      "grad_norm": 1.8795083701734936,
      "learning_rate": 4.662948590233352e-06,
      "loss": 0.5242,
      "step": 3407
    },
    {
      "epoch": 0.41785188818048064,
      "grad_norm": 1.8943964433100988,
      "learning_rate": 4.662694384935533e-06,
      "loss": 0.5504,
      "step": 3408
    },
    {
      "epoch": 0.41797449730259933,
      "grad_norm": 2.2766144253622147,
      "learning_rate": 4.662440090747146e-06,
      "loss": 0.5681,
      "step": 3409
    },
    {
      "epoch": 0.418097106424718,
      "grad_norm": 2.2308831197729537,
      "learning_rate": 4.662185707678644e-06,
      "loss": 0.5498,
      "step": 3410
    },
    {
      "epoch": 0.4182197155468367,
      "grad_norm": 1.7688922475704274,
      "learning_rate": 4.66193123574048e-06,
      "loss": 0.535,
      "step": 3411
    },
    {
      "epoch": 0.4183423246689554,
      "grad_norm": 1.945973825493986,
      "learning_rate": 4.661676674943115e-06,
      "loss": 0.5208,
      "step": 3412
    },
    {
      "epoch": 0.4184649337910741,
      "grad_norm": 1.9457397034431698,
      "learning_rate": 4.661422025297012e-06,
      "loss": 0.552,
      "step": 3413
    },
    {
      "epoch": 0.4185875429131927,
      "grad_norm": 2.012365547906927,
      "learning_rate": 4.661167286812638e-06,
      "loss": 0.5197,
      "step": 3414
    },
    {
      "epoch": 0.4187101520353114,
      "grad_norm": 2.0710642628078384,
      "learning_rate": 4.660912459500462e-06,
      "loss": 0.5793,
      "step": 3415
    },
    {
      "epoch": 0.4188327611574301,
      "grad_norm": 2.2176557953205904,
      "learning_rate": 4.660657543370958e-06,
      "loss": 0.6336,
      "step": 3416
    },
    {
      "epoch": 0.4189553702795488,
      "grad_norm": 2.0628995834766832,
      "learning_rate": 4.660402538434605e-06,
      "loss": 0.5591,
      "step": 3417
    },
    {
      "epoch": 0.41907797940166747,
      "grad_norm": 1.8866988647675953,
      "learning_rate": 4.660147444701881e-06,
      "loss": 0.5664,
      "step": 3418
    },
    {
      "epoch": 0.41920058852378617,
      "grad_norm": 2.1267322780355427,
      "learning_rate": 4.659892262183274e-06,
      "loss": 0.5926,
      "step": 3419
    },
    {
      "epoch": 0.41932319764590487,
      "grad_norm": 2.0691204862409354,
      "learning_rate": 4.659636990889271e-06,
      "loss": 0.5596,
      "step": 3420
    },
    {
      "epoch": 0.41944580676802357,
      "grad_norm": 2.0477874789197155,
      "learning_rate": 4.659381630830365e-06,
      "loss": 0.5401,
      "step": 3421
    },
    {
      "epoch": 0.4195684158901422,
      "grad_norm": 2.121741483021424,
      "learning_rate": 4.659126182017051e-06,
      "loss": 0.5887,
      "step": 3422
    },
    {
      "epoch": 0.4196910250122609,
      "grad_norm": 2.088799876729286,
      "learning_rate": 4.658870644459827e-06,
      "loss": 0.6234,
      "step": 3423
    },
    {
      "epoch": 0.4198136341343796,
      "grad_norm": 2.05866773990815,
      "learning_rate": 4.658615018169199e-06,
      "loss": 0.5602,
      "step": 3424
    },
    {
      "epoch": 0.4199362432564983,
      "grad_norm": 1.9194537009922479,
      "learning_rate": 4.658359303155672e-06,
      "loss": 0.5135,
      "step": 3425
    },
    {
      "epoch": 0.42005885237861695,
      "grad_norm": 1.9982856819477715,
      "learning_rate": 4.658103499429757e-06,
      "loss": 0.5451,
      "step": 3426
    },
    {
      "epoch": 0.42018146150073565,
      "grad_norm": 1.9312968077007464,
      "learning_rate": 4.657847607001966e-06,
      "loss": 0.5736,
      "step": 3427
    },
    {
      "epoch": 0.42030407062285435,
      "grad_norm": 2.2173848437078343,
      "learning_rate": 4.65759162588282e-06,
      "loss": 0.5351,
      "step": 3428
    },
    {
      "epoch": 0.42042667974497305,
      "grad_norm": 2.048096638515399,
      "learning_rate": 4.657335556082837e-06,
      "loss": 0.5705,
      "step": 3429
    },
    {
      "epoch": 0.4205492888670917,
      "grad_norm": 2.0519326855470896,
      "learning_rate": 4.657079397612544e-06,
      "loss": 0.5504,
      "step": 3430
    },
    {
      "epoch": 0.4206718979892104,
      "grad_norm": 2.071740591360204,
      "learning_rate": 4.656823150482469e-06,
      "loss": 0.6106,
      "step": 3431
    },
    {
      "epoch": 0.4207945071113291,
      "grad_norm": 2.3462004409994153,
      "learning_rate": 4.656566814703144e-06,
      "loss": 0.5874,
      "step": 3432
    },
    {
      "epoch": 0.42091711623344774,
      "grad_norm": 1.9542944578928614,
      "learning_rate": 4.656310390285105e-06,
      "loss": 0.5486,
      "step": 3433
    },
    {
      "epoch": 0.42103972535556644,
      "grad_norm": 2.1663970823552,
      "learning_rate": 4.656053877238892e-06,
      "loss": 0.5414,
      "step": 3434
    },
    {
      "epoch": 0.42116233447768514,
      "grad_norm": 2.064467813163212,
      "learning_rate": 4.655797275575047e-06,
      "loss": 0.5969,
      "step": 3435
    },
    {
      "epoch": 0.42128494359980384,
      "grad_norm": 1.9891161634372743,
      "learning_rate": 4.655540585304118e-06,
      "loss": 0.5534,
      "step": 3436
    },
    {
      "epoch": 0.4214075527219225,
      "grad_norm": 2.0880530247097693,
      "learning_rate": 4.655283806436654e-06,
      "loss": 0.5674,
      "step": 3437
    },
    {
      "epoch": 0.4215301618440412,
      "grad_norm": 2.033046071813895,
      "learning_rate": 4.65502693898321e-06,
      "loss": 0.5479,
      "step": 3438
    },
    {
      "epoch": 0.4216527709661599,
      "grad_norm": 2.065902193530228,
      "learning_rate": 4.654769982954345e-06,
      "loss": 0.563,
      "step": 3439
    },
    {
      "epoch": 0.4217753800882786,
      "grad_norm": 2.0121780726163268,
      "learning_rate": 4.654512938360618e-06,
      "loss": 0.6307,
      "step": 3440
    },
    {
      "epoch": 0.42189798921039723,
      "grad_norm": 2.1637273060014732,
      "learning_rate": 4.654255805212594e-06,
      "loss": 0.6333,
      "step": 3441
    },
    {
      "epoch": 0.42202059833251593,
      "grad_norm": 2.2292847773571487,
      "learning_rate": 4.653998583520844e-06,
      "loss": 0.5315,
      "step": 3442
    },
    {
      "epoch": 0.42214320745463463,
      "grad_norm": 2.0760523476191013,
      "learning_rate": 4.653741273295938e-06,
      "loss": 0.5416,
      "step": 3443
    },
    {
      "epoch": 0.42226581657675333,
      "grad_norm": 2.1860717005580224,
      "learning_rate": 4.653483874548454e-06,
      "loss": 0.5481,
      "step": 3444
    },
    {
      "epoch": 0.42238842569887197,
      "grad_norm": 2.023688775332933,
      "learning_rate": 4.653226387288969e-06,
      "loss": 0.528,
      "step": 3445
    },
    {
      "epoch": 0.42251103482099067,
      "grad_norm": 2.1115749257490153,
      "learning_rate": 4.652968811528069e-06,
      "loss": 0.5522,
      "step": 3446
    },
    {
      "epoch": 0.42263364394310937,
      "grad_norm": 1.7912102361404427,
      "learning_rate": 4.652711147276338e-06,
      "loss": 0.5246,
      "step": 3447
    },
    {
      "epoch": 0.42275625306522807,
      "grad_norm": 2.0402366615565324,
      "learning_rate": 4.652453394544369e-06,
      "loss": 0.5213,
      "step": 3448
    },
    {
      "epoch": 0.4228788621873467,
      "grad_norm": 2.1168736953767824,
      "learning_rate": 4.652195553342753e-06,
      "loss": 0.5764,
      "step": 3449
    },
    {
      "epoch": 0.4230014713094654,
      "grad_norm": 2.128613671624858,
      "learning_rate": 4.651937623682091e-06,
      "loss": 0.5677,
      "step": 3450
    },
    {
      "epoch": 0.4231240804315841,
      "grad_norm": 2.0760340700197886,
      "learning_rate": 4.6516796055729825e-06,
      "loss": 0.556,
      "step": 3451
    },
    {
      "epoch": 0.4232466895537028,
      "grad_norm": 2.1166840450097695,
      "learning_rate": 4.651421499026033e-06,
      "loss": 0.5687,
      "step": 3452
    },
    {
      "epoch": 0.42336929867582146,
      "grad_norm": 2.137041217662521,
      "learning_rate": 4.6511633040518515e-06,
      "loss": 0.5482,
      "step": 3453
    },
    {
      "epoch": 0.42349190779794016,
      "grad_norm": 2.270776163981297,
      "learning_rate": 4.65090502066105e-06,
      "loss": 0.607,
      "step": 3454
    },
    {
      "epoch": 0.42361451692005886,
      "grad_norm": 2.214129120500861,
      "learning_rate": 4.650646648864244e-06,
      "loss": 0.5557,
      "step": 3455
    },
    {
      "epoch": 0.42373712604217756,
      "grad_norm": 2.0576765613921677,
      "learning_rate": 4.650388188672052e-06,
      "loss": 0.571,
      "step": 3456
    },
    {
      "epoch": 0.4238597351642962,
      "grad_norm": 1.9418173782749208,
      "learning_rate": 4.6501296400950995e-06,
      "loss": 0.4833,
      "step": 3457
    },
    {
      "epoch": 0.4239823442864149,
      "grad_norm": 2.2634272817216625,
      "learning_rate": 4.649871003144013e-06,
      "loss": 0.5837,
      "step": 3458
    },
    {
      "epoch": 0.4241049534085336,
      "grad_norm": 2.0502111390639195,
      "learning_rate": 4.6496122778294205e-06,
      "loss": 0.5439,
      "step": 3459
    },
    {
      "epoch": 0.4242275625306523,
      "grad_norm": 2.077609622623519,
      "learning_rate": 4.649353464161959e-06,
      "loss": 0.5524,
      "step": 3460
    },
    {
      "epoch": 0.42435017165277095,
      "grad_norm": 2.0201572105303565,
      "learning_rate": 4.649094562152266e-06,
      "loss": 0.5413,
      "step": 3461
    },
    {
      "epoch": 0.42447278077488965,
      "grad_norm": 1.8862898032145665,
      "learning_rate": 4.6488355718109805e-06,
      "loss": 0.5613,
      "step": 3462
    },
    {
      "epoch": 0.42459538989700835,
      "grad_norm": 2.0458666530895115,
      "learning_rate": 4.648576493148748e-06,
      "loss": 0.5795,
      "step": 3463
    },
    {
      "epoch": 0.42471799901912705,
      "grad_norm": 1.9500456792612246,
      "learning_rate": 4.64831732617622e-06,
      "loss": 0.5643,
      "step": 3464
    },
    {
      "epoch": 0.4248406081412457,
      "grad_norm": 1.8757579658841699,
      "learning_rate": 4.648058070904044e-06,
      "loss": 0.5447,
      "step": 3465
    },
    {
      "epoch": 0.4249632172633644,
      "grad_norm": 2.1562987117179184,
      "learning_rate": 4.6477987273428806e-06,
      "loss": 0.5277,
      "step": 3466
    },
    {
      "epoch": 0.4250858263854831,
      "grad_norm": 2.067985117603793,
      "learning_rate": 4.647539295503386e-06,
      "loss": 0.5816,
      "step": 3467
    },
    {
      "epoch": 0.4252084355076018,
      "grad_norm": 1.955765176560867,
      "learning_rate": 4.6472797753962255e-06,
      "loss": 0.5289,
      "step": 3468
    },
    {
      "epoch": 0.42533104462972043,
      "grad_norm": 2.0878905564315726,
      "learning_rate": 4.647020167032063e-06,
      "loss": 0.5519,
      "step": 3469
    },
    {
      "epoch": 0.42545365375183913,
      "grad_norm": 1.8430322068127936,
      "learning_rate": 4.646760470421573e-06,
      "loss": 0.5623,
      "step": 3470
    },
    {
      "epoch": 0.42557626287395783,
      "grad_norm": 2.1502859550431,
      "learning_rate": 4.646500685575426e-06,
      "loss": 0.5363,
      "step": 3471
    },
    {
      "epoch": 0.42569887199607653,
      "grad_norm": 1.9260027382484302,
      "learning_rate": 4.6462408125043004e-06,
      "loss": 0.5573,
      "step": 3472
    },
    {
      "epoch": 0.4258214811181952,
      "grad_norm": 2.0304419092834474,
      "learning_rate": 4.645980851218878e-06,
      "loss": 0.5296,
      "step": 3473
    },
    {
      "epoch": 0.4259440902403139,
      "grad_norm": 1.9094364816078666,
      "learning_rate": 4.645720801729843e-06,
      "loss": 0.5904,
      "step": 3474
    },
    {
      "epoch": 0.4260666993624326,
      "grad_norm": 2.1108899633736042,
      "learning_rate": 4.645460664047885e-06,
      "loss": 0.5317,
      "step": 3475
    },
    {
      "epoch": 0.4261893084845513,
      "grad_norm": 2.115190865535291,
      "learning_rate": 4.645200438183695e-06,
      "loss": 0.5388,
      "step": 3476
    },
    {
      "epoch": 0.4263119176066699,
      "grad_norm": 1.9593340603686553,
      "learning_rate": 4.6449401241479695e-06,
      "loss": 0.5487,
      "step": 3477
    },
    {
      "epoch": 0.4264345267287886,
      "grad_norm": 2.141720405621178,
      "learning_rate": 4.644679721951408e-06,
      "loss": 0.5436,
      "step": 3478
    },
    {
      "epoch": 0.4265571358509073,
      "grad_norm": 2.09488535445868,
      "learning_rate": 4.644419231604713e-06,
      "loss": 0.5764,
      "step": 3479
    },
    {
      "epoch": 0.426679744973026,
      "grad_norm": 2.1684014355023824,
      "learning_rate": 4.64415865311859e-06,
      "loss": 0.5729,
      "step": 3480
    },
    {
      "epoch": 0.42680235409514466,
      "grad_norm": 1.8936175113353295,
      "learning_rate": 4.6438979865037515e-06,
      "loss": 0.5215,
      "step": 3481
    },
    {
      "epoch": 0.42692496321726336,
      "grad_norm": 1.7474471968332423,
      "learning_rate": 4.64363723177091e-06,
      "loss": 0.5083,
      "step": 3482
    },
    {
      "epoch": 0.42704757233938206,
      "grad_norm": 2.043664524056626,
      "learning_rate": 4.643376388930784e-06,
      "loss": 0.5428,
      "step": 3483
    },
    {
      "epoch": 0.4271701814615007,
      "grad_norm": 1.9909874179519678,
      "learning_rate": 4.643115457994093e-06,
      "loss": 0.5429,
      "step": 3484
    },
    {
      "epoch": 0.4272927905836194,
      "grad_norm": 1.8816045605647111,
      "learning_rate": 4.642854438971562e-06,
      "loss": 0.5591,
      "step": 3485
    },
    {
      "epoch": 0.4274153997057381,
      "grad_norm": 1.9774679594676507,
      "learning_rate": 4.642593331873921e-06,
      "loss": 0.585,
      "step": 3486
    },
    {
      "epoch": 0.4275380088278568,
      "grad_norm": 2.1515939638837676,
      "learning_rate": 4.642332136711901e-06,
      "loss": 0.5666,
      "step": 3487
    },
    {
      "epoch": 0.42766061794997545,
      "grad_norm": 2.082821548870457,
      "learning_rate": 4.642070853496237e-06,
      "loss": 0.5473,
      "step": 3488
    },
    {
      "epoch": 0.42778322707209415,
      "grad_norm": 1.9407531334805643,
      "learning_rate": 4.641809482237668e-06,
      "loss": 0.5742,
      "step": 3489
    },
    {
      "epoch": 0.42790583619421285,
      "grad_norm": 1.966481618494102,
      "learning_rate": 4.641548022946939e-06,
      "loss": 0.5617,
      "step": 3490
    },
    {
      "epoch": 0.42802844531633155,
      "grad_norm": 2.015206986060532,
      "learning_rate": 4.6412864756347945e-06,
      "loss": 0.5629,
      "step": 3491
    },
    {
      "epoch": 0.4281510544384502,
      "grad_norm": 2.1192221756451484,
      "learning_rate": 4.641024840311984e-06,
      "loss": 0.5617,
      "step": 3492
    },
    {
      "epoch": 0.4282736635605689,
      "grad_norm": 1.9909916092897295,
      "learning_rate": 4.6407631169892635e-06,
      "loss": 0.591,
      "step": 3493
    },
    {
      "epoch": 0.4283962726826876,
      "grad_norm": 2.0525540568058256,
      "learning_rate": 4.640501305677387e-06,
      "loss": 0.5588,
      "step": 3494
    },
    {
      "epoch": 0.4285188818048063,
      "grad_norm": 1.9591323968140169,
      "learning_rate": 4.6402394063871195e-06,
      "loss": 0.5406,
      "step": 3495
    },
    {
      "epoch": 0.42864149092692494,
      "grad_norm": 2.0349069570923373,
      "learning_rate": 4.6399774191292225e-06,
      "loss": 0.5697,
      "step": 3496
    },
    {
      "epoch": 0.42876410004904364,
      "grad_norm": 2.0628825164941844,
      "learning_rate": 4.639715343914465e-06,
      "loss": 0.5498,
      "step": 3497
    },
    {
      "epoch": 0.42888670917116234,
      "grad_norm": 2.059631257149244,
      "learning_rate": 4.639453180753619e-06,
      "loss": 0.5993,
      "step": 3498
    },
    {
      "epoch": 0.42900931829328104,
      "grad_norm": 1.9385615178825555,
      "learning_rate": 4.63919092965746e-06,
      "loss": 0.5368,
      "step": 3499
    },
    {
      "epoch": 0.4291319274153997,
      "grad_norm": 2.1089556098478632,
      "learning_rate": 4.638928590636767e-06,
      "loss": 0.5324,
      "step": 3500
    },
    {
      "epoch": 0.4292545365375184,
      "grad_norm": 1.9395377714213324,
      "learning_rate": 4.638666163702321e-06,
      "loss": 0.5709,
      "step": 3501
    },
    {
      "epoch": 0.4293771456596371,
      "grad_norm": 1.857926829688806,
      "learning_rate": 4.63840364886491e-06,
      "loss": 0.5736,
      "step": 3502
    },
    {
      "epoch": 0.4294997547817558,
      "grad_norm": 1.9602556258213741,
      "learning_rate": 4.6381410461353225e-06,
      "loss": 0.5267,
      "step": 3503
    },
    {
      "epoch": 0.4296223639038744,
      "grad_norm": 2.159205804293527,
      "learning_rate": 4.637878355524354e-06,
      "loss": 0.5698,
      "step": 3504
    },
    {
      "epoch": 0.4297449730259931,
      "grad_norm": 2.0502082050256965,
      "learning_rate": 4.6376155770428e-06,
      "loss": 0.5613,
      "step": 3505
    },
    {
      "epoch": 0.4298675821481118,
      "grad_norm": 2.129328305456904,
      "learning_rate": 4.637352710701461e-06,
      "loss": 0.6057,
      "step": 3506
    },
    {
      "epoch": 0.4299901912702305,
      "grad_norm": 2.061226823047102,
      "learning_rate": 4.637089756511142e-06,
      "loss": 0.5336,
      "step": 3507
    },
    {
      "epoch": 0.43011280039234917,
      "grad_norm": 2.229428285046801,
      "learning_rate": 4.63682671448265e-06,
      "loss": 0.5359,
      "step": 3508
    },
    {
      "epoch": 0.43023540951446787,
      "grad_norm": 2.158242135474844,
      "learning_rate": 4.6365635846267974e-06,
      "loss": 0.5627,
      "step": 3509
    },
    {
      "epoch": 0.43035801863658657,
      "grad_norm": 2.004409835150981,
      "learning_rate": 4.636300366954399e-06,
      "loss": 0.5409,
      "step": 3510
    },
    {
      "epoch": 0.43048062775870527,
      "grad_norm": 1.9627006824967246,
      "learning_rate": 4.636037061476274e-06,
      "loss": 0.5409,
      "step": 3511
    },
    {
      "epoch": 0.4306032368808239,
      "grad_norm": 1.9368048674004077,
      "learning_rate": 4.635773668203244e-06,
      "loss": 0.5114,
      "step": 3512
    },
    {
      "epoch": 0.4307258460029426,
      "grad_norm": 2.093861803799361,
      "learning_rate": 4.635510187146135e-06,
      "loss": 0.5435,
      "step": 3513
    },
    {
      "epoch": 0.4308484551250613,
      "grad_norm": 1.8490640794734652,
      "learning_rate": 4.635246618315777e-06,
      "loss": 0.5435,
      "step": 3514
    },
    {
      "epoch": 0.43097106424718,
      "grad_norm": 1.9631131867793203,
      "learning_rate": 4.634982961723003e-06,
      "loss": 0.5099,
      "step": 3515
    },
    {
      "epoch": 0.43109367336929866,
      "grad_norm": 1.9476574991875957,
      "learning_rate": 4.634719217378648e-06,
      "loss": 0.5664,
      "step": 3516
    },
    {
      "epoch": 0.43121628249141736,
      "grad_norm": 2.010394965229585,
      "learning_rate": 4.634455385293556e-06,
      "loss": 0.5351,
      "step": 3517
    },
    {
      "epoch": 0.43133889161353606,
      "grad_norm": 2.1629275622443536,
      "learning_rate": 4.634191465478567e-06,
      "loss": 0.5664,
      "step": 3518
    },
    {
      "epoch": 0.43146150073565476,
      "grad_norm": 1.943240349737302,
      "learning_rate": 4.633927457944532e-06,
      "loss": 0.4966,
      "step": 3519
    },
    {
      "epoch": 0.4315841098577734,
      "grad_norm": 1.9217955095871395,
      "learning_rate": 4.6336633627023e-06,
      "loss": 0.5292,
      "step": 3520
    },
    {
      "epoch": 0.4317067189798921,
      "grad_norm": 2.0289318017533353,
      "learning_rate": 4.633399179762726e-06,
      "loss": 0.5558,
      "step": 3521
    },
    {
      "epoch": 0.4318293281020108,
      "grad_norm": 2.2889246905079212,
      "learning_rate": 4.63313490913667e-06,
      "loss": 0.6182,
      "step": 3522
    },
    {
      "epoch": 0.4319519372241295,
      "grad_norm": 2.070148767619939,
      "learning_rate": 4.632870550834993e-06,
      "loss": 0.6047,
      "step": 3523
    },
    {
      "epoch": 0.43207454634624815,
      "grad_norm": 1.8870960354516,
      "learning_rate": 4.632606104868559e-06,
      "loss": 0.4767,
      "step": 3524
    },
    {
      "epoch": 0.43219715546836684,
      "grad_norm": 1.8853181922629598,
      "learning_rate": 4.632341571248239e-06,
      "loss": 0.5404,
      "step": 3525
    },
    {
      "epoch": 0.43231976459048554,
      "grad_norm": 2.1491074173203173,
      "learning_rate": 4.6320769499849056e-06,
      "loss": 0.5469,
      "step": 3526
    },
    {
      "epoch": 0.43244237371260424,
      "grad_norm": 2.0154825111714207,
      "learning_rate": 4.631812241089435e-06,
      "loss": 0.5368,
      "step": 3527
    },
    {
      "epoch": 0.4325649828347229,
      "grad_norm": 2.1942478324524766,
      "learning_rate": 4.631547444572707e-06,
      "loss": 0.6511,
      "step": 3528
    },
    {
      "epoch": 0.4326875919568416,
      "grad_norm": 2.1376894386435854,
      "learning_rate": 4.631282560445606e-06,
      "loss": 0.5469,
      "step": 3529
    },
    {
      "epoch": 0.4328102010789603,
      "grad_norm": 2.040180786697539,
      "learning_rate": 4.6310175887190175e-06,
      "loss": 0.5356,
      "step": 3530
    },
    {
      "epoch": 0.43293281020107893,
      "grad_norm": 2.0557194332226403,
      "learning_rate": 4.630752529403835e-06,
      "loss": 0.602,
      "step": 3531
    },
    {
      "epoch": 0.43305541932319763,
      "grad_norm": 1.8985968614545088,
      "learning_rate": 4.63048738251095e-06,
      "loss": 0.568,
      "step": 3532
    },
    {
      "epoch": 0.43317802844531633,
      "grad_norm": 2.08411842684583,
      "learning_rate": 4.630222148051263e-06,
      "loss": 0.5554,
      "step": 3533
    },
    {
      "epoch": 0.43330063756743503,
      "grad_norm": 2.006306177666297,
      "learning_rate": 4.629956826035673e-06,
      "loss": 0.551,
      "step": 3534
    },
    {
      "epoch": 0.4334232466895537,
      "grad_norm": 1.9167189628089598,
      "learning_rate": 4.629691416475088e-06,
      "loss": 0.5342,
      "step": 3535
    },
    {
      "epoch": 0.4335458558116724,
      "grad_norm": 1.9630201378383116,
      "learning_rate": 4.629425919380415e-06,
      "loss": 0.5685,
      "step": 3536
    },
    {
      "epoch": 0.4336684649337911,
      "grad_norm": 1.9859921613072846,
      "learning_rate": 4.629160334762567e-06,
      "loss": 0.5765,
      "step": 3537
    },
    {
      "epoch": 0.4337910740559098,
      "grad_norm": 1.9033401965220171,
      "learning_rate": 4.628894662632459e-06,
      "loss": 0.553,
      "step": 3538
    },
    {
      "epoch": 0.4339136831780284,
      "grad_norm": 1.935887127258317,
      "learning_rate": 4.628628903001013e-06,
      "loss": 0.5295,
      "step": 3539
    },
    {
      "epoch": 0.4340362923001471,
      "grad_norm": 2.074857044791904,
      "learning_rate": 4.628363055879149e-06,
      "loss": 0.5237,
      "step": 3540
    },
    {
      "epoch": 0.4341589014222658,
      "grad_norm": 2.193485550155588,
      "learning_rate": 4.628097121277797e-06,
      "loss": 0.6109,
      "step": 3541
    },
    {
      "epoch": 0.4342815105443845,
      "grad_norm": 2.190051899942963,
      "learning_rate": 4.627831099207884e-06,
      "loss": 0.5648,
      "step": 3542
    },
    {
      "epoch": 0.43440411966650316,
      "grad_norm": 2.0527431588058356,
      "learning_rate": 4.627564989680347e-06,
      "loss": 0.5867,
      "step": 3543
    },
    {
      "epoch": 0.43452672878862186,
      "grad_norm": 2.1466781744161203,
      "learning_rate": 4.627298792706123e-06,
      "loss": 0.5877,
      "step": 3544
    },
    {
      "epoch": 0.43464933791074056,
      "grad_norm": 2.1914441716236417,
      "learning_rate": 4.627032508296152e-06,
      "loss": 0.5592,
      "step": 3545
    },
    {
      "epoch": 0.43477194703285926,
      "grad_norm": 1.9867496289734599,
      "learning_rate": 4.626766136461378e-06,
      "loss": 0.5259,
      "step": 3546
    },
    {
      "epoch": 0.4348945561549779,
      "grad_norm": 2.0920685686747613,
      "learning_rate": 4.626499677212753e-06,
      "loss": 0.5592,
      "step": 3547
    },
    {
      "epoch": 0.4350171652770966,
      "grad_norm": 2.2406229249731657,
      "learning_rate": 4.626233130561225e-06,
      "loss": 0.5325,
      "step": 3548
    },
    {
      "epoch": 0.4351397743992153,
      "grad_norm": 2.0272923711677007,
      "learning_rate": 4.625966496517751e-06,
      "loss": 0.605,
      "step": 3549
    },
    {
      "epoch": 0.435262383521334,
      "grad_norm": 2.173413418230353,
      "learning_rate": 4.625699775093291e-06,
      "loss": 0.5545,
      "step": 3550
    },
    {
      "epoch": 0.43538499264345265,
      "grad_norm": 2.061409133142813,
      "learning_rate": 4.625432966298806e-06,
      "loss": 0.5674,
      "step": 3551
    },
    {
      "epoch": 0.43550760176557135,
      "grad_norm": 2.2052819691159997,
      "learning_rate": 4.625166070145265e-06,
      "loss": 0.6181,
      "step": 3552
    },
    {
      "epoch": 0.43563021088769005,
      "grad_norm": 1.8825501182977786,
      "learning_rate": 4.624899086643635e-06,
      "loss": 0.518,
      "step": 3553
    },
    {
      "epoch": 0.43575282000980875,
      "grad_norm": 2.1217355010038226,
      "learning_rate": 4.624632015804891e-06,
      "loss": 0.5532,
      "step": 3554
    },
    {
      "epoch": 0.4358754291319274,
      "grad_norm": 2.130376283381607,
      "learning_rate": 4.624364857640009e-06,
      "loss": 0.5541,
      "step": 3555
    },
    {
      "epoch": 0.4359980382540461,
      "grad_norm": 2.130511278843452,
      "learning_rate": 4.624097612159971e-06,
      "loss": 0.5179,
      "step": 3556
    },
    {
      "epoch": 0.4361206473761648,
      "grad_norm": 1.963836476217954,
      "learning_rate": 4.623830279375761e-06,
      "loss": 0.5784,
      "step": 3557
    },
    {
      "epoch": 0.4362432564982835,
      "grad_norm": 2.023633092527979,
      "learning_rate": 4.623562859298366e-06,
      "loss": 0.5112,
      "step": 3558
    },
    {
      "epoch": 0.43636586562040214,
      "grad_norm": 1.963540576870741,
      "learning_rate": 4.623295351938779e-06,
      "loss": 0.5674,
      "step": 3559
    },
    {
      "epoch": 0.43648847474252084,
      "grad_norm": 1.8745169439426466,
      "learning_rate": 4.623027757307993e-06,
      "loss": 0.5096,
      "step": 3560
    },
    {
      "epoch": 0.43661108386463954,
      "grad_norm": 2.2947489922673996,
      "learning_rate": 4.622760075417008e-06,
      "loss": 0.5744,
      "step": 3561
    },
    {
      "epoch": 0.43673369298675824,
      "grad_norm": 1.9299338865861675,
      "learning_rate": 4.6224923062768265e-06,
      "loss": 0.5044,
      "step": 3562
    },
    {
      "epoch": 0.4368563021088769,
      "grad_norm": 2.3078181731048444,
      "learning_rate": 4.6222244498984526e-06,
      "loss": 0.5942,
      "step": 3563
    },
    {
      "epoch": 0.4369789112309956,
      "grad_norm": 2.353321669824655,
      "learning_rate": 4.621956506292898e-06,
      "loss": 0.5813,
      "step": 3564
    },
    {
      "epoch": 0.4371015203531143,
      "grad_norm": 1.9403681982448782,
      "learning_rate": 4.621688475471173e-06,
      "loss": 0.5728,
      "step": 3565
    },
    {
      "epoch": 0.437224129475233,
      "grad_norm": 2.0397529308983273,
      "learning_rate": 4.621420357444296e-06,
      "loss": 0.5112,
      "step": 3566
    },
    {
      "epoch": 0.4373467385973516,
      "grad_norm": 1.9180029109812302,
      "learning_rate": 4.621152152223286e-06,
      "loss": 0.5626,
      "step": 3567
    },
    {
      "epoch": 0.4374693477194703,
      "grad_norm": 2.0784530812871997,
      "learning_rate": 4.620883859819168e-06,
      "loss": 0.5468,
      "step": 3568
    },
    {
      "epoch": 0.437591956841589,
      "grad_norm": 2.10339179086082,
      "learning_rate": 4.620615480242969e-06,
      "loss": 0.5691,
      "step": 3569
    },
    {
      "epoch": 0.4377145659637077,
      "grad_norm": 2.1314205613493247,
      "learning_rate": 4.620347013505719e-06,
      "loss": 0.5876,
      "step": 3570
    },
    {
      "epoch": 0.43783717508582637,
      "grad_norm": 2.110210023765099,
      "learning_rate": 4.620078459618454e-06,
      "loss": 0.5713,
      "step": 3571
    },
    {
      "epoch": 0.43795978420794507,
      "grad_norm": 2.0020438759887353,
      "learning_rate": 4.61980981859221e-06,
      "loss": 0.5904,
      "step": 3572
    },
    {
      "epoch": 0.43808239333006377,
      "grad_norm": 1.905132800779403,
      "learning_rate": 4.61954109043803e-06,
      "loss": 0.5517,
      "step": 3573
    },
    {
      "epoch": 0.43820500245218247,
      "grad_norm": 1.961468273550217,
      "learning_rate": 4.619272275166958e-06,
      "loss": 0.5366,
      "step": 3574
    },
    {
      "epoch": 0.4383276115743011,
      "grad_norm": 1.7938404544956288,
      "learning_rate": 4.619003372790045e-06,
      "loss": 0.5174,
      "step": 3575
    },
    {
      "epoch": 0.4384502206964198,
      "grad_norm": 1.9801726461618894,
      "learning_rate": 4.618734383318341e-06,
      "loss": 0.5695,
      "step": 3576
    },
    {
      "epoch": 0.4385728298185385,
      "grad_norm": 2.076981717086092,
      "learning_rate": 4.618465306762902e-06,
      "loss": 0.5033,
      "step": 3577
    },
    {
      "epoch": 0.4386954389406572,
      "grad_norm": 2.135477911829218,
      "learning_rate": 4.61819614313479e-06,
      "loss": 0.5141,
      "step": 3578
    },
    {
      "epoch": 0.43881804806277586,
      "grad_norm": 1.8895427839245242,
      "learning_rate": 4.617926892445067e-06,
      "loss": 0.469,
      "step": 3579
    },
    {
      "epoch": 0.43894065718489456,
      "grad_norm": 2.3078500160688358,
      "learning_rate": 4.617657554704797e-06,
      "loss": 0.6411,
      "step": 3580
    },
    {
      "epoch": 0.43906326630701326,
      "grad_norm": 2.073059783461285,
      "learning_rate": 4.6173881299250545e-06,
      "loss": 0.5693,
      "step": 3581
    },
    {
      "epoch": 0.4391858754291319,
      "grad_norm": 2.177695358968385,
      "learning_rate": 4.617118618116911e-06,
      "loss": 0.5454,
      "step": 3582
    },
    {
      "epoch": 0.4393084845512506,
      "grad_norm": 2.05753572551154,
      "learning_rate": 4.616849019291444e-06,
      "loss": 0.5677,
      "step": 3583
    },
    {
      "epoch": 0.4394310936733693,
      "grad_norm": 2.243304172699548,
      "learning_rate": 4.616579333459734e-06,
      "loss": 0.5659,
      "step": 3584
    },
    {
      "epoch": 0.439553702795488,
      "grad_norm": 2.0610909100275525,
      "learning_rate": 4.616309560632867e-06,
      "loss": 0.6078,
      "step": 3585
    },
    {
      "epoch": 0.43967631191760664,
      "grad_norm": 1.9788707712720917,
      "learning_rate": 4.61603970082193e-06,
      "loss": 0.5719,
      "step": 3586
    },
    {
      "epoch": 0.43979892103972534,
      "grad_norm": 2.0561953819238896,
      "learning_rate": 4.615769754038016e-06,
      "loss": 0.5412,
      "step": 3587
    },
    {
      "epoch": 0.43992153016184404,
      "grad_norm": 2.1155393440033166,
      "learning_rate": 4.615499720292219e-06,
      "loss": 0.5394,
      "step": 3588
    },
    {
      "epoch": 0.44004413928396274,
      "grad_norm": 2.2174820589862794,
      "learning_rate": 4.615229599595638e-06,
      "loss": 0.6249,
      "step": 3589
    },
    {
      "epoch": 0.4401667484060814,
      "grad_norm": 1.7725284619137769,
      "learning_rate": 4.614959391959376e-06,
      "loss": 0.5282,
      "step": 3590
    },
    {
      "epoch": 0.4402893575282001,
      "grad_norm": 1.963511181143349,
      "learning_rate": 4.6146890973945376e-06,
      "loss": 0.6042,
      "step": 3591
    },
    {
      "epoch": 0.4404119666503188,
      "grad_norm": 2.0515741322205874,
      "learning_rate": 4.6144187159122355e-06,
      "loss": 0.5529,
      "step": 3592
    },
    {
      "epoch": 0.4405345757724375,
      "grad_norm": 2.0212320307067384,
      "learning_rate": 4.61414824752358e-06,
      "loss": 0.5665,
      "step": 3593
    },
    {
      "epoch": 0.44065718489455613,
      "grad_norm": 1.9990597602581566,
      "learning_rate": 4.613877692239689e-06,
      "loss": 0.5485,
      "step": 3594
    },
    {
      "epoch": 0.44077979401667483,
      "grad_norm": 2.0701870065861656,
      "learning_rate": 4.613607050071682e-06,
      "loss": 0.544,
      "step": 3595
    },
    {
      "epoch": 0.44090240313879353,
      "grad_norm": 2.1634163703031586,
      "learning_rate": 4.613336321030683e-06,
      "loss": 0.5556,
      "step": 3596
    },
    {
      "epoch": 0.44102501226091223,
      "grad_norm": 1.7766336830069505,
      "learning_rate": 4.61306550512782e-06,
      "loss": 0.4869,
      "step": 3597
    },
    {
      "epoch": 0.4411476213830309,
      "grad_norm": 2.3103031654839894,
      "learning_rate": 4.612794602374226e-06,
      "loss": 0.5708,
      "step": 3598
    },
    {
      "epoch": 0.4412702305051496,
      "grad_norm": 1.995272238938819,
      "learning_rate": 4.612523612781031e-06,
      "loss": 0.5532,
      "step": 3599
    },
    {
      "epoch": 0.4413928396272683,
      "grad_norm": 2.0624974502843765,
      "learning_rate": 4.612252536359376e-06,
      "loss": 0.5407,
      "step": 3600
    },
    {
      "epoch": 0.441515448749387,
      "grad_norm": 2.1939726896737746,
      "learning_rate": 4.6119813731204025e-06,
      "loss": 0.5718,
      "step": 3601
    },
    {
      "epoch": 0.4416380578715056,
      "grad_norm": 2.2996810095826437,
      "learning_rate": 4.611710123075256e-06,
      "loss": 0.564,
      "step": 3602
    },
    {
      "epoch": 0.4417606669936243,
      "grad_norm": 2.140609409602015,
      "learning_rate": 4.611438786235085e-06,
      "loss": 0.5362,
      "step": 3603
    },
    {
      "epoch": 0.441883276115743,
      "grad_norm": 2.04907152857529,
      "learning_rate": 4.611167362611041e-06,
      "loss": 0.5277,
      "step": 3604
    },
    {
      "epoch": 0.4420058852378617,
      "grad_norm": 2.2893340738202324,
      "learning_rate": 4.6108958522142815e-06,
      "loss": 0.5459,
      "step": 3605
    },
    {
      "epoch": 0.44212849435998036,
      "grad_norm": 2.3073285485410393,
      "learning_rate": 4.610624255055965e-06,
      "loss": 0.5951,
      "step": 3606
    },
    {
      "epoch": 0.44225110348209906,
      "grad_norm": 1.9390989433936783,
      "learning_rate": 4.610352571147257e-06,
      "loss": 0.5362,
      "step": 3607
    },
    {
      "epoch": 0.44237371260421776,
      "grad_norm": 2.088005763177236,
      "learning_rate": 4.61008080049932e-06,
      "loss": 0.569,
      "step": 3608
    },
    {
      "epoch": 0.44249632172633646,
      "grad_norm": 2.1738353399726513,
      "learning_rate": 4.6098089431233266e-06,
      "loss": 0.5496,
      "step": 3609
    },
    {
      "epoch": 0.4426189308484551,
      "grad_norm": 2.059400529352292,
      "learning_rate": 4.609536999030451e-06,
      "loss": 0.5859,
      "step": 3610
    },
    {
      "epoch": 0.4427415399705738,
      "grad_norm": 2.105980023809846,
      "learning_rate": 4.60926496823187e-06,
      "loss": 0.6111,
      "step": 3611
    },
    {
      "epoch": 0.4428641490926925,
      "grad_norm": 2.132112171363945,
      "learning_rate": 4.608992850738765e-06,
      "loss": 0.5752,
      "step": 3612
    },
    {
      "epoch": 0.4429867582148112,
      "grad_norm": 2.2180539059119235,
      "learning_rate": 4.60872064656232e-06,
      "loss": 0.593,
      "step": 3613
    },
    {
      "epoch": 0.44310936733692985,
      "grad_norm": 1.9833554625319452,
      "learning_rate": 4.6084483557137235e-06,
      "loss": 0.5554,
      "step": 3614
    },
    {
      "epoch": 0.44323197645904855,
      "grad_norm": 2.129975442934878,
      "learning_rate": 4.608175978204166e-06,
      "loss": 0.5478,
      "step": 3615
    },
    {
      "epoch": 0.44335458558116725,
      "grad_norm": 2.2080890946641882,
      "learning_rate": 4.607903514044845e-06,
      "loss": 0.5299,
      "step": 3616
    },
    {
      "epoch": 0.44347719470328595,
      "grad_norm": 2.062814501474807,
      "learning_rate": 4.607630963246956e-06,
      "loss": 0.5724,
      "step": 3617
    },
    {
      "epoch": 0.4435998038254046,
      "grad_norm": 2.0474471949244517,
      "learning_rate": 4.607358325821705e-06,
      "loss": 0.5745,
      "step": 3618
    },
    {
      "epoch": 0.4437224129475233,
      "grad_norm": 1.9836323197514851,
      "learning_rate": 4.6070856017802945e-06,
      "loss": 0.5546,
      "step": 3619
    },
    {
      "epoch": 0.443845022069642,
      "grad_norm": 2.25570736448378,
      "learning_rate": 4.606812791133936e-06,
      "loss": 0.5564,
      "step": 3620
    },
    {
      "epoch": 0.4439676311917607,
      "grad_norm": 2.011169646447024,
      "learning_rate": 4.606539893893842e-06,
      "loss": 0.5086,
      "step": 3621
    },
    {
      "epoch": 0.44409024031387934,
      "grad_norm": 2.0769787715414347,
      "learning_rate": 4.606266910071229e-06,
      "loss": 0.5712,
      "step": 3622
    },
    {
      "epoch": 0.44421284943599804,
      "grad_norm": 2.1397844609176313,
      "learning_rate": 4.605993839677317e-06,
      "loss": 0.549,
      "step": 3623
    },
    {
      "epoch": 0.44433545855811674,
      "grad_norm": 1.9127729588512108,
      "learning_rate": 4.605720682723331e-06,
      "loss": 0.5156,
      "step": 3624
    },
    {
      "epoch": 0.44445806768023544,
      "grad_norm": 1.9072645118822864,
      "learning_rate": 4.605447439220496e-06,
      "loss": 0.5436,
      "step": 3625
    },
    {
      "epoch": 0.4445806768023541,
      "grad_norm": 1.8522335406988424,
      "learning_rate": 4.605174109180044e-06,
      "loss": 0.4964,
      "step": 3626
    },
    {
      "epoch": 0.4447032859244728,
      "grad_norm": 1.8698107422734291,
      "learning_rate": 4.604900692613209e-06,
      "loss": 0.5225,
      "step": 3627
    },
    {
      "epoch": 0.4448258950465915,
      "grad_norm": 1.949101654445614,
      "learning_rate": 4.604627189531229e-06,
      "loss": 0.4628,
      "step": 3628
    },
    {
      "epoch": 0.4449485041687102,
      "grad_norm": 1.9884167775178792,
      "learning_rate": 4.604353599945346e-06,
      "loss": 0.5737,
      "step": 3629
    },
    {
      "epoch": 0.4450711132908288,
      "grad_norm": 2.0598810172623945,
      "learning_rate": 4.604079923866804e-06,
      "loss": 0.5569,
      "step": 3630
    },
    {
      "epoch": 0.4451937224129475,
      "grad_norm": 1.9599467201343317,
      "learning_rate": 4.603806161306853e-06,
      "loss": 0.5528,
      "step": 3631
    },
    {
      "epoch": 0.4453163315350662,
      "grad_norm": 2.2938908360803185,
      "learning_rate": 4.603532312276743e-06,
      "loss": 0.6005,
      "step": 3632
    },
    {
      "epoch": 0.44543894065718487,
      "grad_norm": 1.9976457659243603,
      "learning_rate": 4.603258376787732e-06,
      "loss": 0.5249,
      "step": 3633
    },
    {
      "epoch": 0.44556154977930357,
      "grad_norm": 2.0821911398927218,
      "learning_rate": 4.602984354851078e-06,
      "loss": 0.5526,
      "step": 3634
    },
    {
      "epoch": 0.44568415890142227,
      "grad_norm": 2.020950316901276,
      "learning_rate": 4.6027102464780435e-06,
      "loss": 0.5261,
      "step": 3635
    },
    {
      "epoch": 0.44580676802354097,
      "grad_norm": 2.216579318408334,
      "learning_rate": 4.602436051679896e-06,
      "loss": 0.5769,
      "step": 3636
    },
    {
      "epoch": 0.4459293771456596,
      "grad_norm": 2.0464389826398253,
      "learning_rate": 4.602161770467904e-06,
      "loss": 0.5664,
      "step": 3637
    },
    {
      "epoch": 0.4460519862677783,
      "grad_norm": 2.0090415471690655,
      "learning_rate": 4.601887402853342e-06,
      "loss": 0.5902,
      "step": 3638
    },
    {
      "epoch": 0.446174595389897,
      "grad_norm": 1.956774266669028,
      "learning_rate": 4.601612948847487e-06,
      "loss": 0.5873,
      "step": 3639
    },
    {
      "epoch": 0.4462972045120157,
      "grad_norm": 1.9469890433226664,
      "learning_rate": 4.60133840846162e-06,
      "loss": 0.54,
      "step": 3640
    },
    {
      "epoch": 0.44641981363413435,
      "grad_norm": 2.0001036867980595,
      "learning_rate": 4.601063781707022e-06,
      "loss": 0.5625,
      "step": 3641
    },
    {
      "epoch": 0.44654242275625305,
      "grad_norm": 1.9548014504248439,
      "learning_rate": 4.6007890685949845e-06,
      "loss": 0.5974,
      "step": 3642
    },
    {
      "epoch": 0.44666503187837175,
      "grad_norm": 2.237487835000766,
      "learning_rate": 4.600514269136797e-06,
      "loss": 0.5274,
      "step": 3643
    },
    {
      "epoch": 0.44678764100049045,
      "grad_norm": 2.0849241609663633,
      "learning_rate": 4.600239383343755e-06,
      "loss": 0.5886,
      "step": 3644
    },
    {
      "epoch": 0.4469102501226091,
      "grad_norm": 2.0562910566991266,
      "learning_rate": 4.5999644112271545e-06,
      "loss": 0.5608,
      "step": 3645
    },
    {
      "epoch": 0.4470328592447278,
      "grad_norm": 2.058486183066064,
      "learning_rate": 4.5996893527983e-06,
      "loss": 0.5358,
      "step": 3646
    },
    {
      "epoch": 0.4471554683668465,
      "grad_norm": 2.313673533443129,
      "learning_rate": 4.5994142080684956e-06,
      "loss": 0.5582,
      "step": 3647
    },
    {
      "epoch": 0.4472780774889652,
      "grad_norm": 2.0817813887910392,
      "learning_rate": 4.59913897704905e-06,
      "loss": 0.6003,
      "step": 3648
    },
    {
      "epoch": 0.44740068661108384,
      "grad_norm": 2.020922737661403,
      "learning_rate": 4.598863659751277e-06,
      "loss": 0.5503,
      "step": 3649
    },
    {
      "epoch": 0.44752329573320254,
      "grad_norm": 1.894916100814167,
      "learning_rate": 4.598588256186491e-06,
      "loss": 0.599,
      "step": 3650
    },
    {
      "epoch": 0.44764590485532124,
      "grad_norm": 2.0077802656414896,
      "learning_rate": 4.5983127663660135e-06,
      "loss": 0.5953,
      "step": 3651
    },
    {
      "epoch": 0.44776851397743994,
      "grad_norm": 2.2372050996095876,
      "learning_rate": 4.598037190301166e-06,
      "loss": 0.5531,
      "step": 3652
    },
    {
      "epoch": 0.4478911230995586,
      "grad_norm": 1.9869487693071972,
      "learning_rate": 4.597761528003276e-06,
      "loss": 0.578,
      "step": 3653
    },
    {
      "epoch": 0.4480137322216773,
      "grad_norm": 2.208715648237573,
      "learning_rate": 4.597485779483673e-06,
      "loss": 0.5966,
      "step": 3654
    },
    {
      "epoch": 0.448136341343796,
      "grad_norm": 2.0434032816418073,
      "learning_rate": 4.597209944753692e-06,
      "loss": 0.5256,
      "step": 3655
    },
    {
      "epoch": 0.4482589504659147,
      "grad_norm": 2.3258703396339606,
      "learning_rate": 4.596934023824668e-06,
      "loss": 0.5539,
      "step": 3656
    },
    {
      "epoch": 0.44838155958803333,
      "grad_norm": 1.9213908949546443,
      "learning_rate": 4.596658016707945e-06,
      "loss": 0.5076,
      "step": 3657
    },
    {
      "epoch": 0.44850416871015203,
      "grad_norm": 2.114164722739402,
      "learning_rate": 4.596381923414864e-06,
      "loss": 0.5583,
      "step": 3658
    },
    {
      "epoch": 0.44862677783227073,
      "grad_norm": 2.2051030654978407,
      "learning_rate": 4.5961057439567755e-06,
      "loss": 0.619,
      "step": 3659
    },
    {
      "epoch": 0.44874938695438943,
      "grad_norm": 1.9782764326917488,
      "learning_rate": 4.59582947834503e-06,
      "loss": 0.6149,
      "step": 3660
    },
    {
      "epoch": 0.4488719960765081,
      "grad_norm": 2.0426476721994797,
      "learning_rate": 4.595553126590983e-06,
      "loss": 0.6027,
      "step": 3661
    },
    {
      "epoch": 0.4489946051986268,
      "grad_norm": 2.0297583863356445,
      "learning_rate": 4.595276688705992e-06,
      "loss": 0.5406,
      "step": 3662
    },
    {
      "epoch": 0.4491172143207455,
      "grad_norm": 2.0667531735926494,
      "learning_rate": 4.59500016470142e-06,
      "loss": 0.6009,
      "step": 3663
    },
    {
      "epoch": 0.4492398234428642,
      "grad_norm": 1.9539955264885978,
      "learning_rate": 4.594723554588632e-06,
      "loss": 0.534,
      "step": 3664
    },
    {
      "epoch": 0.4493624325649828,
      "grad_norm": 2.19548298061302,
      "learning_rate": 4.594446858378997e-06,
      "loss": 0.5456,
      "step": 3665
    },
    {
      "epoch": 0.4494850416871015,
      "grad_norm": 2.1544340351538724,
      "learning_rate": 4.59417007608389e-06,
      "loss": 0.5599,
      "step": 3666
    },
    {
      "epoch": 0.4496076508092202,
      "grad_norm": 1.967629373012543,
      "learning_rate": 4.593893207714684e-06,
      "loss": 0.5448,
      "step": 3667
    },
    {
      "epoch": 0.4497302599313389,
      "grad_norm": 2.078207018497663,
      "learning_rate": 4.59361625328276e-06,
      "loss": 0.5802,
      "step": 3668
    },
    {
      "epoch": 0.44985286905345756,
      "grad_norm": 1.9162655395479928,
      "learning_rate": 4.593339212799502e-06,
      "loss": 0.562,
      "step": 3669
    },
    {
      "epoch": 0.44997547817557626,
      "grad_norm": 2.266942224520068,
      "learning_rate": 4.593062086276296e-06,
      "loss": 0.5532,
      "step": 3670
    },
    {
      "epoch": 0.45009808729769496,
      "grad_norm": 1.7950518780979576,
      "learning_rate": 4.592784873724533e-06,
      "loss": 0.5243,
      "step": 3671
    },
    {
      "epoch": 0.45022069641981366,
      "grad_norm": 2.0588423196240035,
      "learning_rate": 4.592507575155607e-06,
      "loss": 0.5839,
      "step": 3672
    },
    {
      "epoch": 0.4503433055419323,
      "grad_norm": 2.1226387911064295,
      "learning_rate": 4.592230190580915e-06,
      "loss": 0.574,
      "step": 3673
    },
    {
      "epoch": 0.450465914664051,
      "grad_norm": 2.0415762093384977,
      "learning_rate": 4.591952720011858e-06,
      "loss": 0.5637,
      "step": 3674
    },
    {
      "epoch": 0.4505885237861697,
      "grad_norm": 2.049954307156107,
      "learning_rate": 4.591675163459841e-06,
      "loss": 0.5013,
      "step": 3675
    },
    {
      "epoch": 0.4507111329082884,
      "grad_norm": 2.245082505167453,
      "learning_rate": 4.591397520936271e-06,
      "loss": 0.5751,
      "step": 3676
    },
    {
      "epoch": 0.45083374203040705,
      "grad_norm": 2.011210778398496,
      "learning_rate": 4.591119792452562e-06,
      "loss": 0.5447,
      "step": 3677
    },
    {
      "epoch": 0.45095635115252575,
      "grad_norm": 2.062907994103553,
      "learning_rate": 4.590841978020126e-06,
      "loss": 0.546,
      "step": 3678
    },
    {
      "epoch": 0.45107896027464445,
      "grad_norm": 2.3969599205450907,
      "learning_rate": 4.590564077650384e-06,
      "loss": 0.5716,
      "step": 3679
    },
    {
      "epoch": 0.45120156939676315,
      "grad_norm": 1.897897583275991,
      "learning_rate": 4.590286091354757e-06,
      "loss": 0.5241,
      "step": 3680
    },
    {
      "epoch": 0.4513241785188818,
      "grad_norm": 2.2024903170074146,
      "learning_rate": 4.590008019144671e-06,
      "loss": 0.5818,
      "step": 3681
    },
    {
      "epoch": 0.4514467876410005,
      "grad_norm": 2.1119952211176707,
      "learning_rate": 4.5897298610315554e-06,
      "loss": 0.5794,
      "step": 3682
    },
    {
      "epoch": 0.4515693967631192,
      "grad_norm": 1.889365262609263,
      "learning_rate": 4.589451617026843e-06,
      "loss": 0.537,
      "step": 3683
    },
    {
      "epoch": 0.45169200588523784,
      "grad_norm": 1.8337980635539997,
      "learning_rate": 4.5891732871419706e-06,
      "loss": 0.5551,
      "step": 3684
    },
    {
      "epoch": 0.45181461500735653,
      "grad_norm": 1.9110671651552609,
      "learning_rate": 4.588894871388376e-06,
      "loss": 0.5203,
      "step": 3685
    },
    {
      "epoch": 0.45193722412947523,
      "grad_norm": 2.476741403474541,
      "learning_rate": 4.588616369777505e-06,
      "loss": 0.5745,
      "step": 3686
    },
    {
      "epoch": 0.45205983325159393,
      "grad_norm": 1.9180851899837599,
      "learning_rate": 4.588337782320804e-06,
      "loss": 0.521,
      "step": 3687
    },
    {
      "epoch": 0.4521824423737126,
      "grad_norm": 2.1331630057408706,
      "learning_rate": 4.588059109029723e-06,
      "loss": 0.54,
      "step": 3688
    },
    {
      "epoch": 0.4523050514958313,
      "grad_norm": 2.021516339504389,
      "learning_rate": 4.587780349915716e-06,
      "loss": 0.5452,
      "step": 3689
    },
    {
      "epoch": 0.45242766061795,
      "grad_norm": 2.447955377925707,
      "learning_rate": 4.58750150499024e-06,
      "loss": 0.5813,
      "step": 3690
    },
    {
      "epoch": 0.4525502697400687,
      "grad_norm": 2.1697198826690367,
      "learning_rate": 4.587222574264757e-06,
      "loss": 0.5793,
      "step": 3691
    },
    {
      "epoch": 0.4526728788621873,
      "grad_norm": 2.182508884439122,
      "learning_rate": 4.586943557750731e-06,
      "loss": 0.5864,
      "step": 3692
    },
    {
      "epoch": 0.452795487984306,
      "grad_norm": 2.227560518687857,
      "learning_rate": 4.58666445545963e-06,
      "loss": 0.6084,
      "step": 3693
    },
    {
      "epoch": 0.4529180971064247,
      "grad_norm": 2.061285807746104,
      "learning_rate": 4.586385267402925e-06,
      "loss": 0.5891,
      "step": 3694
    },
    {
      "epoch": 0.4530407062285434,
      "grad_norm": 2.0411763718773424,
      "learning_rate": 4.586105993592093e-06,
      "loss": 0.5814,
      "step": 3695
    },
    {
      "epoch": 0.45316331535066207,
      "grad_norm": 2.026118231206082,
      "learning_rate": 4.58582663403861e-06,
      "loss": 0.547,
      "step": 3696
    },
    {
      "epoch": 0.45328592447278077,
      "grad_norm": 2.1538896308966167,
      "learning_rate": 4.585547188753961e-06,
      "loss": 0.5231,
      "step": 3697
    },
    {
      "epoch": 0.45340853359489947,
      "grad_norm": 1.993783507557236,
      "learning_rate": 4.58526765774963e-06,
      "loss": 0.4978,
      "step": 3698
    },
    {
      "epoch": 0.45353114271701817,
      "grad_norm": 2.2181630583202705,
      "learning_rate": 4.584988041037106e-06,
      "loss": 0.5195,
      "step": 3699
    },
    {
      "epoch": 0.4536537518391368,
      "grad_norm": 1.9824025375542327,
      "learning_rate": 4.584708338627883e-06,
      "loss": 0.5889,
      "step": 3700
    },
    {
      "epoch": 0.4537763609612555,
      "grad_norm": 2.0002826077485807,
      "learning_rate": 4.584428550533457e-06,
      "loss": 0.5576,
      "step": 3701
    },
    {
      "epoch": 0.4538989700833742,
      "grad_norm": 1.993819298920871,
      "learning_rate": 4.584148676765327e-06,
      "loss": 0.5598,
      "step": 3702
    },
    {
      "epoch": 0.4540215792054929,
      "grad_norm": 2.1982456989769483,
      "learning_rate": 4.583868717334997e-06,
      "loss": 0.5734,
      "step": 3703
    },
    {
      "epoch": 0.45414418832761155,
      "grad_norm": 2.0092250532644176,
      "learning_rate": 4.583588672253973e-06,
      "loss": 0.628,
      "step": 3704
    },
    {
      "epoch": 0.45426679744973025,
      "grad_norm": 2.2481601496577817,
      "learning_rate": 4.583308541533766e-06,
      "loss": 0.53,
      "step": 3705
    },
    {
      "epoch": 0.45438940657184895,
      "grad_norm": 2.1632014040341576,
      "learning_rate": 4.583028325185889e-06,
      "loss": 0.5231,
      "step": 3706
    },
    {
      "epoch": 0.45451201569396765,
      "grad_norm": 2.079544297411378,
      "learning_rate": 4.582748023221861e-06,
      "loss": 0.5524,
      "step": 3707
    },
    {
      "epoch": 0.4546346248160863,
      "grad_norm": 2.060098017377206,
      "learning_rate": 4.582467635653202e-06,
      "loss": 0.5507,
      "step": 3708
    },
    {
      "epoch": 0.454757233938205,
      "grad_norm": 1.9672785344558688,
      "learning_rate": 4.582187162491436e-06,
      "loss": 0.5155,
      "step": 3709
    },
    {
      "epoch": 0.4548798430603237,
      "grad_norm": 2.048521143114474,
      "learning_rate": 4.581906603748092e-06,
      "loss": 0.4959,
      "step": 3710
    },
    {
      "epoch": 0.4550024521824424,
      "grad_norm": 2.330263417342888,
      "learning_rate": 4.581625959434701e-06,
      "loss": 0.6195,
      "step": 3711
    },
    {
      "epoch": 0.45512506130456104,
      "grad_norm": 2.1105495439802966,
      "learning_rate": 4.581345229562797e-06,
      "loss": 0.5354,
      "step": 3712
    },
    {
      "epoch": 0.45524767042667974,
      "grad_norm": 1.999752563855024,
      "learning_rate": 4.58106441414392e-06,
      "loss": 0.5423,
      "step": 3713
    },
    {
      "epoch": 0.45537027954879844,
      "grad_norm": 1.8840048696169172,
      "learning_rate": 4.580783513189611e-06,
      "loss": 0.5542,
      "step": 3714
    },
    {
      "epoch": 0.45549288867091714,
      "grad_norm": 2.1363407789636724,
      "learning_rate": 4.580502526711416e-06,
      "loss": 0.5469,
      "step": 3715
    },
    {
      "epoch": 0.4556154977930358,
      "grad_norm": 2.0290487226310985,
      "learning_rate": 4.580221454720884e-06,
      "loss": 0.561,
      "step": 3716
    },
    {
      "epoch": 0.4557381069151545,
      "grad_norm": 1.7652187409047588,
      "learning_rate": 4.579940297229567e-06,
      "loss": 0.5962,
      "step": 3717
    },
    {
      "epoch": 0.4558607160372732,
      "grad_norm": 2.0549129276497133,
      "learning_rate": 4.579659054249023e-06,
      "loss": 0.555,
      "step": 3718
    },
    {
      "epoch": 0.4559833251593919,
      "grad_norm": 2.11938782444385,
      "learning_rate": 4.579377725790809e-06,
      "loss": 0.5531,
      "step": 3719
    },
    {
      "epoch": 0.4561059342815105,
      "grad_norm": 1.954561828778789,
      "learning_rate": 4.579096311866489e-06,
      "loss": 0.5762,
      "step": 3720
    },
    {
      "epoch": 0.4562285434036292,
      "grad_norm": 2.1870977341397038,
      "learning_rate": 4.578814812487629e-06,
      "loss": 0.5467,
      "step": 3721
    },
    {
      "epoch": 0.4563511525257479,
      "grad_norm": 1.9675448762552672,
      "learning_rate": 4.578533227665802e-06,
      "loss": 0.5219,
      "step": 3722
    },
    {
      "epoch": 0.4564737616478666,
      "grad_norm": 2.000725355282433,
      "learning_rate": 4.578251557412579e-06,
      "loss": 0.579,
      "step": 3723
    },
    {
      "epoch": 0.45659637076998527,
      "grad_norm": 2.2091662300752333,
      "learning_rate": 4.577969801739537e-06,
      "loss": 0.5834,
      "step": 3724
    },
    {
      "epoch": 0.45671897989210397,
      "grad_norm": 1.9104141630355256,
      "learning_rate": 4.577687960658257e-06,
      "loss": 0.5304,
      "step": 3725
    },
    {
      "epoch": 0.45684158901422267,
      "grad_norm": 2.116551522755367,
      "learning_rate": 4.577406034180325e-06,
      "loss": 0.6609,
      "step": 3726
    },
    {
      "epoch": 0.45696419813634137,
      "grad_norm": 1.9742889958760212,
      "learning_rate": 4.577124022317326e-06,
      "loss": 0.5343,
      "step": 3727
    },
    {
      "epoch": 0.45708680725846,
      "grad_norm": 1.9577104448234244,
      "learning_rate": 4.576841925080853e-06,
      "loss": 0.5566,
      "step": 3728
    },
    {
      "epoch": 0.4572094163805787,
      "grad_norm": 2.2082940728878917,
      "learning_rate": 4.576559742482501e-06,
      "loss": 0.5526,
      "step": 3729
    },
    {
      "epoch": 0.4573320255026974,
      "grad_norm": 2.000052290080148,
      "learning_rate": 4.576277474533865e-06,
      "loss": 0.5316,
      "step": 3730
    },
    {
      "epoch": 0.45745463462481606,
      "grad_norm": 2.060899959137036,
      "learning_rate": 4.575995121246551e-06,
      "loss": 0.5615,
      "step": 3731
    },
    {
      "epoch": 0.45757724374693476,
      "grad_norm": 2.274951099760222,
      "learning_rate": 4.575712682632162e-06,
      "loss": 0.5793,
      "step": 3732
    },
    {
      "epoch": 0.45769985286905346,
      "grad_norm": 1.9407012707670719,
      "learning_rate": 4.575430158702306e-06,
      "loss": 0.5034,
      "step": 3733
    },
    {
      "epoch": 0.45782246199117216,
      "grad_norm": 1.9503750025200475,
      "learning_rate": 4.575147549468597e-06,
      "loss": 0.5874,
      "step": 3734
    },
    {
      "epoch": 0.4579450711132908,
      "grad_norm": 1.9670886249751995,
      "learning_rate": 4.57486485494265e-06,
      "loss": 0.5438,
      "step": 3735
    },
    {
      "epoch": 0.4580676802354095,
      "grad_norm": 1.9169072975461399,
      "learning_rate": 4.574582075136084e-06,
      "loss": 0.5446,
      "step": 3736
    },
    {
      "epoch": 0.4581902893575282,
      "grad_norm": 1.9734795245967256,
      "learning_rate": 4.574299210060522e-06,
      "loss": 0.5712,
      "step": 3737
    },
    {
      "epoch": 0.4583128984796469,
      "grad_norm": 1.970953665330994,
      "learning_rate": 4.574016259727591e-06,
      "loss": 0.5966,
      "step": 3738
    },
    {
      "epoch": 0.45843550760176555,
      "grad_norm": 1.9377697325663905,
      "learning_rate": 4.573733224148919e-06,
      "loss": 0.604,
      "step": 3739
    },
    {
      "epoch": 0.45855811672388425,
      "grad_norm": 2.082551169535372,
      "learning_rate": 4.57345010333614e-06,
      "loss": 0.5301,
      "step": 3740
    },
    {
      "epoch": 0.45868072584600295,
      "grad_norm": 1.905814015081867,
      "learning_rate": 4.573166897300892e-06,
      "loss": 0.512,
      "step": 3741
    },
    {
      "epoch": 0.45880333496812165,
      "grad_norm": 1.9800073645991982,
      "learning_rate": 4.572883606054813e-06,
      "loss": 0.6017,
      "step": 3742
    },
    {
      "epoch": 0.4589259440902403,
      "grad_norm": 2.239894514970921,
      "learning_rate": 4.572600229609549e-06,
      "loss": 0.5846,
      "step": 3743
    },
    {
      "epoch": 0.459048553212359,
      "grad_norm": 2.181841517982813,
      "learning_rate": 4.5723167679767465e-06,
      "loss": 0.5406,
      "step": 3744
    },
    {
      "epoch": 0.4591711623344777,
      "grad_norm": 1.9714135338064585,
      "learning_rate": 4.572033221168055e-06,
      "loss": 0.5796,
      "step": 3745
    },
    {
      "epoch": 0.4592937714565964,
      "grad_norm": 2.1537781514739267,
      "learning_rate": 4.571749589195131e-06,
      "loss": 0.5463,
      "step": 3746
    },
    {
      "epoch": 0.45941638057871503,
      "grad_norm": 1.993168120500438,
      "learning_rate": 4.57146587206963e-06,
      "loss": 0.5892,
      "step": 3747
    },
    {
      "epoch": 0.45953898970083373,
      "grad_norm": 2.0275985122141513,
      "learning_rate": 4.5711820698032155e-06,
      "loss": 0.5838,
      "step": 3748
    },
    {
      "epoch": 0.45966159882295243,
      "grad_norm": 1.9484712537956252,
      "learning_rate": 4.570898182407551e-06,
      "loss": 0.5577,
      "step": 3749
    },
    {
      "epoch": 0.45978420794507113,
      "grad_norm": 1.919240446285211,
      "learning_rate": 4.570614209894304e-06,
      "loss": 0.5799,
      "step": 3750
    },
    {
      "epoch": 0.4599068170671898,
      "grad_norm": 2.2117006892697297,
      "learning_rate": 4.570330152275149e-06,
      "loss": 0.6059,
      "step": 3751
    },
    {
      "epoch": 0.4600294261893085,
      "grad_norm": 1.9386463322960308,
      "learning_rate": 4.570046009561758e-06,
      "loss": 0.528,
      "step": 3752
    },
    {
      "epoch": 0.4601520353114272,
      "grad_norm": 1.9019243819729095,
      "learning_rate": 4.569761781765811e-06,
      "loss": 0.484,
      "step": 3753
    },
    {
      "epoch": 0.4602746444335459,
      "grad_norm": 1.9503436127565619,
      "learning_rate": 4.569477468898992e-06,
      "loss": 0.5355,
      "step": 3754
    },
    {
      "epoch": 0.4603972535556645,
      "grad_norm": 1.8864193111753715,
      "learning_rate": 4.569193070972984e-06,
      "loss": 0.5477,
      "step": 3755
    },
    {
      "epoch": 0.4605198626777832,
      "grad_norm": 1.9908996486109336,
      "learning_rate": 4.568908587999479e-06,
      "loss": 0.5289,
      "step": 3756
    },
    {
      "epoch": 0.4606424717999019,
      "grad_norm": 1.9953661656417303,
      "learning_rate": 4.568624019990167e-06,
      "loss": 0.5808,
      "step": 3757
    },
    {
      "epoch": 0.4607650809220206,
      "grad_norm": 1.9445669652463198,
      "learning_rate": 4.568339366956746e-06,
      "loss": 0.5589,
      "step": 3758
    },
    {
      "epoch": 0.46088769004413926,
      "grad_norm": 1.9468552517435147,
      "learning_rate": 4.568054628910915e-06,
      "loss": 0.5152,
      "step": 3759
    },
    {
      "epoch": 0.46101029916625796,
      "grad_norm": 2.0027093129727884,
      "learning_rate": 4.567769805864378e-06,
      "loss": 0.5479,
      "step": 3760
    },
    {
      "epoch": 0.46113290828837666,
      "grad_norm": 1.9333673565903573,
      "learning_rate": 4.567484897828842e-06,
      "loss": 0.5416,
      "step": 3761
    },
    {
      "epoch": 0.46125551741049536,
      "grad_norm": 1.9412802230582304,
      "learning_rate": 4.567199904816015e-06,
      "loss": 0.5431,
      "step": 3762
    },
    {
      "epoch": 0.461378126532614,
      "grad_norm": 1.9245859220262131,
      "learning_rate": 4.566914826837613e-06,
      "loss": 0.5926,
      "step": 3763
    },
    {
      "epoch": 0.4615007356547327,
      "grad_norm": 2.006982953745791,
      "learning_rate": 4.566629663905352e-06,
      "loss": 0.5643,
      "step": 3764
    },
    {
      "epoch": 0.4616233447768514,
      "grad_norm": 2.151583546892285,
      "learning_rate": 4.566344416030954e-06,
      "loss": 0.5556,
      "step": 3765
    },
    {
      "epoch": 0.4617459538989701,
      "grad_norm": 2.17051845352008,
      "learning_rate": 4.566059083226142e-06,
      "loss": 0.5709,
      "step": 3766
    },
    {
      "epoch": 0.46186856302108875,
      "grad_norm": 2.1038417973750887,
      "learning_rate": 4.565773665502644e-06,
      "loss": 0.5321,
      "step": 3767
    },
    {
      "epoch": 0.46199117214320745,
      "grad_norm": 1.9164685899161487,
      "learning_rate": 4.5654881628721905e-06,
      "loss": 0.501,
      "step": 3768
    },
    {
      "epoch": 0.46211378126532615,
      "grad_norm": 1.9902895479532852,
      "learning_rate": 4.5652025753465174e-06,
      "loss": 0.5659,
      "step": 3769
    },
    {
      "epoch": 0.46223639038744485,
      "grad_norm": 2.0027860814000635,
      "learning_rate": 4.564916902937362e-06,
      "loss": 0.5349,
      "step": 3770
    },
    {
      "epoch": 0.4623589995095635,
      "grad_norm": 2.045851427673892,
      "learning_rate": 4.564631145656467e-06,
      "loss": 0.5428,
      "step": 3771
    },
    {
      "epoch": 0.4624816086316822,
      "grad_norm": 2.1475963227166535,
      "learning_rate": 4.5643453035155764e-06,
      "loss": 0.5526,
      "step": 3772
    },
    {
      "epoch": 0.4626042177538009,
      "grad_norm": 2.0083258533666273,
      "learning_rate": 4.564059376526439e-06,
      "loss": 0.5072,
      "step": 3773
    },
    {
      "epoch": 0.4627268268759196,
      "grad_norm": 2.2511433802896947,
      "learning_rate": 4.563773364700807e-06,
      "loss": 0.562,
      "step": 3774
    },
    {
      "epoch": 0.46284943599803824,
      "grad_norm": 2.2002084557787676,
      "learning_rate": 4.563487268050436e-06,
      "loss": 0.5534,
      "step": 3775
    },
    {
      "epoch": 0.46297204512015694,
      "grad_norm": 1.9881762001822545,
      "learning_rate": 4.563201086587086e-06,
      "loss": 0.5625,
      "step": 3776
    },
    {
      "epoch": 0.46309465424227564,
      "grad_norm": 1.8722499670622037,
      "learning_rate": 4.562914820322517e-06,
      "loss": 0.5063,
      "step": 3777
    },
    {
      "epoch": 0.46321726336439434,
      "grad_norm": 1.9299421478542278,
      "learning_rate": 4.562628469268498e-06,
      "loss": 0.5177,
      "step": 3778
    },
    {
      "epoch": 0.463339872486513,
      "grad_norm": 2.0238748896313825,
      "learning_rate": 4.562342033436798e-06,
      "loss": 0.5244,
      "step": 3779
    },
    {
      "epoch": 0.4634624816086317,
      "grad_norm": 1.920658084001555,
      "learning_rate": 4.562055512839189e-06,
      "loss": 0.4875,
      "step": 3780
    },
    {
      "epoch": 0.4635850907307504,
      "grad_norm": 1.9515802858106417,
      "learning_rate": 4.561768907487446e-06,
      "loss": 0.5317,
      "step": 3781
    },
    {
      "epoch": 0.463707699852869,
      "grad_norm": 2.2029161832068365,
      "learning_rate": 4.561482217393352e-06,
      "loss": 0.5577,
      "step": 3782
    },
    {
      "epoch": 0.4638303089749877,
      "grad_norm": 1.912299388709289,
      "learning_rate": 4.56119544256869e-06,
      "loss": 0.5448,
      "step": 3783
    },
    {
      "epoch": 0.4639529180971064,
      "grad_norm": 1.9225936090789215,
      "learning_rate": 4.560908583025247e-06,
      "loss": 0.5418,
      "step": 3784
    },
    {
      "epoch": 0.4640755272192251,
      "grad_norm": 2.0721057055906686,
      "learning_rate": 4.560621638774811e-06,
      "loss": 0.5769,
      "step": 3785
    },
    {
      "epoch": 0.46419813634134377,
      "grad_norm": 1.9857073234185563,
      "learning_rate": 4.560334609829178e-06,
      "loss": 0.5024,
      "step": 3786
    },
    {
      "epoch": 0.46432074546346247,
      "grad_norm": 2.180562939019421,
      "learning_rate": 4.560047496200145e-06,
      "loss": 0.5908,
      "step": 3787
    },
    {
      "epoch": 0.46444335458558117,
      "grad_norm": 1.9288183536853805,
      "learning_rate": 4.559760297899513e-06,
      "loss": 0.5398,
      "step": 3788
    },
    {
      "epoch": 0.46456596370769987,
      "grad_norm": 2.1987984163920626,
      "learning_rate": 4.559473014939085e-06,
      "loss": 0.6211,
      "step": 3789
    },
    {
      "epoch": 0.4646885728298185,
      "grad_norm": 2.033261287533375,
      "learning_rate": 4.559185647330672e-06,
      "loss": 0.5345,
      "step": 3790
    },
    {
      "epoch": 0.4648111819519372,
      "grad_norm": 2.1674795182870397,
      "learning_rate": 4.558898195086083e-06,
      "loss": 0.5837,
      "step": 3791
    },
    {
      "epoch": 0.4649337910740559,
      "grad_norm": 1.9973886353966694,
      "learning_rate": 4.558610658217133e-06,
      "loss": 0.501,
      "step": 3792
    },
    {
      "epoch": 0.4650564001961746,
      "grad_norm": 2.029448305548684,
      "learning_rate": 4.55832303673564e-06,
      "loss": 0.5476,
      "step": 3793
    },
    {
      "epoch": 0.46517900931829326,
      "grad_norm": 2.168813263715552,
      "learning_rate": 4.558035330653426e-06,
      "loss": 0.5624,
      "step": 3794
    },
    {
      "epoch": 0.46530161844041196,
      "grad_norm": 1.950137196391168,
      "learning_rate": 4.557747539982317e-06,
      "loss": 0.5236,
      "step": 3795
    },
    {
      "epoch": 0.46542422756253066,
      "grad_norm": 1.9817516523822363,
      "learning_rate": 4.5574596647341414e-06,
      "loss": 0.512,
      "step": 3796
    },
    {
      "epoch": 0.46554683668464936,
      "grad_norm": 2.172419150041101,
      "learning_rate": 4.557171704920731e-06,
      "loss": 0.5864,
      "step": 3797
    },
    {
      "epoch": 0.465669445806768,
      "grad_norm": 2.1513429897079006,
      "learning_rate": 4.556883660553921e-06,
      "loss": 0.5164,
      "step": 3798
    },
    {
      "epoch": 0.4657920549288867,
      "grad_norm": 2.1602276003608947,
      "learning_rate": 4.5565955316455515e-06,
      "loss": 0.5836,
      "step": 3799
    },
    {
      "epoch": 0.4659146640510054,
      "grad_norm": 1.91463767779389,
      "learning_rate": 4.556307318207465e-06,
      "loss": 0.5346,
      "step": 3800
    },
    {
      "epoch": 0.4660372731731241,
      "grad_norm": 2.0613320963271997,
      "learning_rate": 4.556019020251508e-06,
      "loss": 0.5506,
      "step": 3801
    },
    {
      "epoch": 0.46615988229524274,
      "grad_norm": 2.0020712211378098,
      "learning_rate": 4.555730637789527e-06,
      "loss": 0.5456,
      "step": 3802
    },
    {
      "epoch": 0.46628249141736144,
      "grad_norm": 2.2560466142901383,
      "learning_rate": 4.5554421708333795e-06,
      "loss": 0.6487,
      "step": 3803
    },
    {
      "epoch": 0.46640510053948014,
      "grad_norm": 1.937665609275951,
      "learning_rate": 4.555153619394919e-06,
      "loss": 0.489,
      "step": 3804
    },
    {
      "epoch": 0.46652770966159884,
      "grad_norm": 2.3471381486486025,
      "learning_rate": 4.554864983486007e-06,
      "loss": 0.586,
      "step": 3805
    },
    {
      "epoch": 0.4666503187837175,
      "grad_norm": 2.324623638501708,
      "learning_rate": 4.554576263118506e-06,
      "loss": 0.569,
      "step": 3806
    },
    {
      "epoch": 0.4667729279058362,
      "grad_norm": 1.8735897329376607,
      "learning_rate": 4.554287458304283e-06,
      "loss": 0.5499,
      "step": 3807
    },
    {
      "epoch": 0.4668955370279549,
      "grad_norm": 2.154323879960266,
      "learning_rate": 4.553998569055209e-06,
      "loss": 0.5638,
      "step": 3808
    },
    {
      "epoch": 0.4670181461500736,
      "grad_norm": 2.3202897983612147,
      "learning_rate": 4.553709595383158e-06,
      "loss": 0.5328,
      "step": 3809
    },
    {
      "epoch": 0.46714075527219223,
      "grad_norm": 2.027990825430048,
      "learning_rate": 4.553420537300005e-06,
      "loss": 0.5108,
      "step": 3810
    },
    {
      "epoch": 0.46726336439431093,
      "grad_norm": 2.0522429153933244,
      "learning_rate": 4.553131394817635e-06,
      "loss": 0.521,
      "step": 3811
    },
    {
      "epoch": 0.46738597351642963,
      "grad_norm": 1.9582635583462067,
      "learning_rate": 4.552842167947928e-06,
      "loss": 0.5559,
      "step": 3812
    },
    {
      "epoch": 0.46750858263854833,
      "grad_norm": 2.292025656842411,
      "learning_rate": 4.552552856702774e-06,
      "loss": 0.5847,
      "step": 3813
    },
    {
      "epoch": 0.467631191760667,
      "grad_norm": 2.3777801918313974,
      "learning_rate": 4.552263461094065e-06,
      "loss": 0.556,
      "step": 3814
    },
    {
      "epoch": 0.4677538008827857,
      "grad_norm": 1.9662705452345302,
      "learning_rate": 4.551973981133694e-06,
      "loss": 0.4776,
      "step": 3815
    },
    {
      "epoch": 0.4678764100049044,
      "grad_norm": 1.9981929040448227,
      "learning_rate": 4.5516844168335585e-06,
      "loss": 0.5337,
      "step": 3816
    },
    {
      "epoch": 0.4679990191270231,
      "grad_norm": 1.976424115147196,
      "learning_rate": 4.551394768205563e-06,
      "loss": 0.5242,
      "step": 3817
    },
    {
      "epoch": 0.4681216282491417,
      "grad_norm": 2.000276249741957,
      "learning_rate": 4.55110503526161e-06,
      "loss": 0.5385,
      "step": 3818
    },
    {
      "epoch": 0.4682442373712604,
      "grad_norm": 1.96780713083259,
      "learning_rate": 4.550815218013608e-06,
      "loss": 0.5929,
      "step": 3819
    },
    {
      "epoch": 0.4683668464933791,
      "grad_norm": 1.9854799791164046,
      "learning_rate": 4.55052531647347e-06,
      "loss": 0.5042,
      "step": 3820
    },
    {
      "epoch": 0.4684894556154978,
      "grad_norm": 1.980723831531192,
      "learning_rate": 4.5502353306531125e-06,
      "loss": 0.5921,
      "step": 3821
    },
    {
      "epoch": 0.46861206473761646,
      "grad_norm": 1.9474672715393977,
      "learning_rate": 4.549945260564452e-06,
      "loss": 0.5644,
      "step": 3822
    },
    {
      "epoch": 0.46873467385973516,
      "grad_norm": 2.150523020037984,
      "learning_rate": 4.549655106219413e-06,
      "loss": 0.5434,
      "step": 3823
    },
    {
      "epoch": 0.46885728298185386,
      "grad_norm": 2.0076729153368795,
      "learning_rate": 4.54936486762992e-06,
      "loss": 0.5687,
      "step": 3824
    },
    {
      "epoch": 0.46897989210397256,
      "grad_norm": 2.1518285672534727,
      "learning_rate": 4.549074544807904e-06,
      "loss": 0.5739,
      "step": 3825
    },
    {
      "epoch": 0.4691025012260912,
      "grad_norm": 2.201476704946163,
      "learning_rate": 4.548784137765295e-06,
      "loss": 0.5127,
      "step": 3826
    },
    {
      "epoch": 0.4692251103482099,
      "grad_norm": 1.9504974827059307,
      "learning_rate": 4.548493646514032e-06,
      "loss": 0.5672,
      "step": 3827
    },
    {
      "epoch": 0.4693477194703286,
      "grad_norm": 2.088441629409665,
      "learning_rate": 4.548203071066053e-06,
      "loss": 0.5925,
      "step": 3828
    },
    {
      "epoch": 0.4694703285924473,
      "grad_norm": 2.1745921824191994,
      "learning_rate": 4.547912411433302e-06,
      "loss": 0.6,
      "step": 3829
    },
    {
      "epoch": 0.46959293771456595,
      "grad_norm": 2.0614203598483156,
      "learning_rate": 4.547621667627725e-06,
      "loss": 0.5392,
      "step": 3830
    },
    {
      "epoch": 0.46971554683668465,
      "grad_norm": 1.9682187713691786,
      "learning_rate": 4.547330839661273e-06,
      "loss": 0.5196,
      "step": 3831
    },
    {
      "epoch": 0.46983815595880335,
      "grad_norm": 1.667259460894566,
      "learning_rate": 4.547039927545899e-06,
      "loss": 0.5075,
      "step": 3832
    },
    {
      "epoch": 0.469960765080922,
      "grad_norm": 2.1733021936323724,
      "learning_rate": 4.546748931293559e-06,
      "loss": 0.5411,
      "step": 3833
    },
    {
      "epoch": 0.4700833742030407,
      "grad_norm": 1.9983149473771937,
      "learning_rate": 4.546457850916215e-06,
      "loss": 0.5595,
      "step": 3834
    },
    {
      "epoch": 0.4702059833251594,
      "grad_norm": 2.333619241910251,
      "learning_rate": 4.54616668642583e-06,
      "loss": 0.5541,
      "step": 3835
    },
    {
      "epoch": 0.4703285924472781,
      "grad_norm": 2.0284900889761412,
      "learning_rate": 4.545875437834373e-06,
      "loss": 0.5373,
      "step": 3836
    },
    {
      "epoch": 0.47045120156939674,
      "grad_norm": 1.9612112158309023,
      "learning_rate": 4.545584105153812e-06,
      "loss": 0.512,
      "step": 3837
    },
    {
      "epoch": 0.47057381069151544,
      "grad_norm": 2.1086139751780055,
      "learning_rate": 4.545292688396124e-06,
      "loss": 0.5149,
      "step": 3838
    },
    {
      "epoch": 0.47069641981363414,
      "grad_norm": 2.0747580268345027,
      "learning_rate": 4.5450011875732845e-06,
      "loss": 0.5712,
      "step": 3839
    },
    {
      "epoch": 0.47081902893575284,
      "grad_norm": 2.357784231328142,
      "learning_rate": 4.544709602697277e-06,
      "loss": 0.6076,
      "step": 3840
    },
    {
      "epoch": 0.4709416380578715,
      "grad_norm": 1.920692198911483,
      "learning_rate": 4.544417933780084e-06,
      "loss": 0.563,
      "step": 3841
    },
    {
      "epoch": 0.4710642471799902,
      "grad_norm": 2.164399696541498,
      "learning_rate": 4.5441261808336955e-06,
      "loss": 0.5674,
      "step": 3842
    },
    {
      "epoch": 0.4711868563021089,
      "grad_norm": 2.2614881504040087,
      "learning_rate": 4.543834343870101e-06,
      "loss": 0.5644,
      "step": 3843
    },
    {
      "epoch": 0.4713094654242276,
      "grad_norm": 2.0830409386015747,
      "learning_rate": 4.543542422901298e-06,
      "loss": 0.5533,
      "step": 3844
    },
    {
      "epoch": 0.4714320745463462,
      "grad_norm": 2.039420292149131,
      "learning_rate": 4.543250417939282e-06,
      "loss": 0.5564,
      "step": 3845
    },
    {
      "epoch": 0.4715546836684649,
      "grad_norm": 2.123087433731057,
      "learning_rate": 4.542958328996057e-06,
      "loss": 0.5998,
      "step": 3846
    },
    {
      "epoch": 0.4716772927905836,
      "grad_norm": 2.0521436786951965,
      "learning_rate": 4.5426661560836295e-06,
      "loss": 0.6025,
      "step": 3847
    },
    {
      "epoch": 0.4717999019127023,
      "grad_norm": 2.0233694606306543,
      "learning_rate": 4.542373899214006e-06,
      "loss": 0.5388,
      "step": 3848
    },
    {
      "epoch": 0.47192251103482097,
      "grad_norm": 2.228888878869861,
      "learning_rate": 4.5420815583991985e-06,
      "loss": 0.5634,
      "step": 3849
    },
    {
      "epoch": 0.47204512015693967,
      "grad_norm": 2.419754080501668,
      "learning_rate": 4.541789133651224e-06,
      "loss": 0.5867,
      "step": 3850
    },
    {
      "epoch": 0.47216772927905837,
      "grad_norm": 2.023179751145906,
      "learning_rate": 4.541496624982101e-06,
      "loss": 0.5278,
      "step": 3851
    },
    {
      "epoch": 0.47229033840117707,
      "grad_norm": 1.9624877979340478,
      "learning_rate": 4.541204032403854e-06,
      "loss": 0.5823,
      "step": 3852
    },
    {
      "epoch": 0.4724129475232957,
      "grad_norm": 2.053248061179508,
      "learning_rate": 4.540911355928507e-06,
      "loss": 0.5494,
      "step": 3853
    },
    {
      "epoch": 0.4725355566454144,
      "grad_norm": 2.150740201489758,
      "learning_rate": 4.540618595568089e-06,
      "loss": 0.6041,
      "step": 3854
    },
    {
      "epoch": 0.4726581657675331,
      "grad_norm": 2.1361126842668594,
      "learning_rate": 4.540325751334635e-06,
      "loss": 0.5934,
      "step": 3855
    },
    {
      "epoch": 0.4727807748896518,
      "grad_norm": 1.9538583758155827,
      "learning_rate": 4.540032823240181e-06,
      "loss": 0.5375,
      "step": 3856
    },
    {
      "epoch": 0.47290338401177046,
      "grad_norm": 2.0060155545303466,
      "learning_rate": 4.539739811296766e-06,
      "loss": 0.5467,
      "step": 3857
    },
    {
      "epoch": 0.47302599313388916,
      "grad_norm": 1.9863969197511635,
      "learning_rate": 4.539446715516434e-06,
      "loss": 0.5275,
      "step": 3858
    },
    {
      "epoch": 0.47314860225600786,
      "grad_norm": 2.27313936760499,
      "learning_rate": 4.539153535911231e-06,
      "loss": 0.6557,
      "step": 3859
    },
    {
      "epoch": 0.47327121137812656,
      "grad_norm": 2.125890670899719,
      "learning_rate": 4.5388602724932075e-06,
      "loss": 0.5098,
      "step": 3860
    },
    {
      "epoch": 0.4733938205002452,
      "grad_norm": 2.0653353030960524,
      "learning_rate": 4.538566925274417e-06,
      "loss": 0.5841,
      "step": 3861
    },
    {
      "epoch": 0.4735164296223639,
      "grad_norm": 2.0536509917208474,
      "learning_rate": 4.538273494266917e-06,
      "loss": 0.5377,
      "step": 3862
    },
    {
      "epoch": 0.4736390387444826,
      "grad_norm": 1.7093796926207387,
      "learning_rate": 4.537979979482768e-06,
      "loss": 0.4831,
      "step": 3863
    },
    {
      "epoch": 0.4737616478666013,
      "grad_norm": 1.9630442047546202,
      "learning_rate": 4.537686380934034e-06,
      "loss": 0.525,
      "step": 3864
    },
    {
      "epoch": 0.47388425698871994,
      "grad_norm": 1.9674026060959686,
      "learning_rate": 4.537392698632783e-06,
      "loss": 0.5412,
      "step": 3865
    },
    {
      "epoch": 0.47400686611083864,
      "grad_norm": 2.0900853130293062,
      "learning_rate": 4.537098932591084e-06,
      "loss": 0.5778,
      "step": 3866
    },
    {
      "epoch": 0.47412947523295734,
      "grad_norm": 2.099189768673084,
      "learning_rate": 4.536805082821014e-06,
      "loss": 0.5084,
      "step": 3867
    },
    {
      "epoch": 0.47425208435507604,
      "grad_norm": 2.119906770828749,
      "learning_rate": 4.536511149334648e-06,
      "loss": 0.5499,
      "step": 3868
    },
    {
      "epoch": 0.4743746934771947,
      "grad_norm": 2.3291012976388994,
      "learning_rate": 4.536217132144068e-06,
      "loss": 0.5844,
      "step": 3869
    },
    {
      "epoch": 0.4744973025993134,
      "grad_norm": 1.951688925679104,
      "learning_rate": 4.53592303126136e-06,
      "loss": 0.5386,
      "step": 3870
    },
    {
      "epoch": 0.4746199117214321,
      "grad_norm": 1.9139315047752696,
      "learning_rate": 4.535628846698611e-06,
      "loss": 0.5278,
      "step": 3871
    },
    {
      "epoch": 0.4747425208435508,
      "grad_norm": 1.9711710074142959,
      "learning_rate": 4.535334578467912e-06,
      "loss": 0.5262,
      "step": 3872
    },
    {
      "epoch": 0.47486512996566943,
      "grad_norm": 2.2159193444515526,
      "learning_rate": 4.535040226581358e-06,
      "loss": 0.5237,
      "step": 3873
    },
    {
      "epoch": 0.47498773908778813,
      "grad_norm": 2.045169560808683,
      "learning_rate": 4.5347457910510496e-06,
      "loss": 0.6027,
      "step": 3874
    },
    {
      "epoch": 0.47511034820990683,
      "grad_norm": 2.0156142041720404,
      "learning_rate": 4.534451271889087e-06,
      "loss": 0.548,
      "step": 3875
    },
    {
      "epoch": 0.47523295733202553,
      "grad_norm": 1.9125718001916088,
      "learning_rate": 4.534156669107575e-06,
      "loss": 0.5871,
      "step": 3876
    },
    {
      "epoch": 0.4753555664541442,
      "grad_norm": 2.027897722220514,
      "learning_rate": 4.533861982718621e-06,
      "loss": 0.5342,
      "step": 3877
    },
    {
      "epoch": 0.4754781755762629,
      "grad_norm": 1.958438545408101,
      "learning_rate": 4.5335672127343405e-06,
      "loss": 0.5659,
      "step": 3878
    },
    {
      "epoch": 0.4756007846983816,
      "grad_norm": 2.06462250779408,
      "learning_rate": 4.533272359166847e-06,
      "loss": 0.5258,
      "step": 3879
    },
    {
      "epoch": 0.4757233938205002,
      "grad_norm": 2.1010246209034498,
      "learning_rate": 4.5329774220282605e-06,
      "loss": 0.5367,
      "step": 3880
    },
    {
      "epoch": 0.4758460029426189,
      "grad_norm": 2.2524300389598406,
      "learning_rate": 4.532682401330703e-06,
      "loss": 0.548,
      "step": 3881
    },
    {
      "epoch": 0.4759686120647376,
      "grad_norm": 2.049730876019931,
      "learning_rate": 4.532387297086298e-06,
      "loss": 0.5206,
      "step": 3882
    },
    {
      "epoch": 0.4760912211868563,
      "grad_norm": 2.2213495404893617,
      "learning_rate": 4.532092109307179e-06,
      "loss": 0.5572,
      "step": 3883
    },
    {
      "epoch": 0.47621383030897496,
      "grad_norm": 2.0721197966156852,
      "learning_rate": 4.531796838005477e-06,
      "loss": 0.5878,
      "step": 3884
    },
    {
      "epoch": 0.47633643943109366,
      "grad_norm": 1.9268038072486073,
      "learning_rate": 4.531501483193327e-06,
      "loss": 0.5558,
      "step": 3885
    },
    {
      "epoch": 0.47645904855321236,
      "grad_norm": 1.842282593060449,
      "learning_rate": 4.5312060448828695e-06,
      "loss": 0.5747,
      "step": 3886
    },
    {
      "epoch": 0.47658165767533106,
      "grad_norm": 1.9861138951994808,
      "learning_rate": 4.5309105230862486e-06,
      "loss": 0.5508,
      "step": 3887
    },
    {
      "epoch": 0.4767042667974497,
      "grad_norm": 1.8024373455788476,
      "learning_rate": 4.530614917815609e-06,
      "loss": 0.5552,
      "step": 3888
    },
    {
      "epoch": 0.4768268759195684,
      "grad_norm": 1.9956972135692719,
      "learning_rate": 4.530319229083101e-06,
      "loss": 0.5884,
      "step": 3889
    },
    {
      "epoch": 0.4769494850416871,
      "grad_norm": 1.839685626513653,
      "learning_rate": 4.530023456900879e-06,
      "loss": 0.6027,
      "step": 3890
    },
    {
      "epoch": 0.4770720941638058,
      "grad_norm": 2.1764477575968155,
      "learning_rate": 4.5297276012810986e-06,
      "loss": 0.5943,
      "step": 3891
    },
    {
      "epoch": 0.47719470328592445,
      "grad_norm": 2.1524562690287037,
      "learning_rate": 4.52943166223592e-06,
      "loss": 0.5318,
      "step": 3892
    },
    {
      "epoch": 0.47731731240804315,
      "grad_norm": 1.9704961332698248,
      "learning_rate": 4.529135639777508e-06,
      "loss": 0.5486,
      "step": 3893
    },
    {
      "epoch": 0.47743992153016185,
      "grad_norm": 2.17245466436812,
      "learning_rate": 4.5288395339180294e-06,
      "loss": 0.608,
      "step": 3894
    },
    {
      "epoch": 0.47756253065228055,
      "grad_norm": 1.9186693195867692,
      "learning_rate": 4.528543344669654e-06,
      "loss": 0.5212,
      "step": 3895
    },
    {
      "epoch": 0.4776851397743992,
      "grad_norm": 2.014467434552764,
      "learning_rate": 4.5282470720445545e-06,
      "loss": 0.5552,
      "step": 3896
    },
    {
      "epoch": 0.4778077488965179,
      "grad_norm": 2.0295298205876833,
      "learning_rate": 4.527950716054911e-06,
      "loss": 0.5822,
      "step": 3897
    },
    {
      "epoch": 0.4779303580186366,
      "grad_norm": 2.121356468267368,
      "learning_rate": 4.527654276712902e-06,
      "loss": 0.6295,
      "step": 3898
    },
    {
      "epoch": 0.4780529671407553,
      "grad_norm": 2.297640017576276,
      "learning_rate": 4.527357754030713e-06,
      "loss": 0.5406,
      "step": 3899
    },
    {
      "epoch": 0.47817557626287394,
      "grad_norm": 1.7682200967901567,
      "learning_rate": 4.5270611480205316e-06,
      "loss": 0.4925,
      "step": 3900
    },
    {
      "epoch": 0.47829818538499264,
      "grad_norm": 1.842201723253016,
      "learning_rate": 4.5267644586945484e-06,
      "loss": 0.5392,
      "step": 3901
    },
    {
      "epoch": 0.47842079450711134,
      "grad_norm": 2.072984906430736,
      "learning_rate": 4.526467686064957e-06,
      "loss": 0.5551,
      "step": 3902
    },
    {
      "epoch": 0.47854340362923004,
      "grad_norm": 1.989311767535918,
      "learning_rate": 4.526170830143956e-06,
      "loss": 0.6055,
      "step": 3903
    },
    {
      "epoch": 0.4786660127513487,
      "grad_norm": 2.081285761364831,
      "learning_rate": 4.525873890943747e-06,
      "loss": 0.5318,
      "step": 3904
    },
    {
      "epoch": 0.4787886218734674,
      "grad_norm": 1.9569480207549008,
      "learning_rate": 4.5255768684765355e-06,
      "loss": 0.5608,
      "step": 3905
    },
    {
      "epoch": 0.4789112309955861,
      "grad_norm": 1.9747176920352254,
      "learning_rate": 4.525279762754527e-06,
      "loss": 0.598,
      "step": 3906
    },
    {
      "epoch": 0.4790338401177048,
      "grad_norm": 2.1117652734977037,
      "learning_rate": 4.524982573789937e-06,
      "loss": 0.6047,
      "step": 3907
    },
    {
      "epoch": 0.4791564492398234,
      "grad_norm": 1.9431616122473885,
      "learning_rate": 4.524685301594976e-06,
      "loss": 0.5125,
      "step": 3908
    },
    {
      "epoch": 0.4792790583619421,
      "grad_norm": 2.2130825917732193,
      "learning_rate": 4.524387946181866e-06,
      "loss": 0.5553,
      "step": 3909
    },
    {
      "epoch": 0.4794016674840608,
      "grad_norm": 2.17372656635695,
      "learning_rate": 4.524090507562828e-06,
      "loss": 0.5595,
      "step": 3910
    },
    {
      "epoch": 0.4795242766061795,
      "grad_norm": 2.1049491012505435,
      "learning_rate": 4.523792985750086e-06,
      "loss": 0.5501,
      "step": 3911
    },
    {
      "epoch": 0.47964688572829817,
      "grad_norm": 2.179883960184174,
      "learning_rate": 4.52349538075587e-06,
      "loss": 0.5508,
      "step": 3912
    },
    {
      "epoch": 0.47976949485041687,
      "grad_norm": 1.9044445747587835,
      "learning_rate": 4.5231976925924104e-06,
      "loss": 0.5411,
      "step": 3913
    },
    {
      "epoch": 0.47989210397253557,
      "grad_norm": 1.931552995262997,
      "learning_rate": 4.5228999212719445e-06,
      "loss": 0.5607,
      "step": 3914
    },
    {
      "epoch": 0.48001471309465427,
      "grad_norm": 1.776231937655516,
      "learning_rate": 4.52260206680671e-06,
      "loss": 0.5428,
      "step": 3915
    },
    {
      "epoch": 0.4801373222167729,
      "grad_norm": 1.818734563257854,
      "learning_rate": 4.522304129208951e-06,
      "loss": 0.4884,
      "step": 3916
    },
    {
      "epoch": 0.4802599313388916,
      "grad_norm": 1.9560109709288467,
      "learning_rate": 4.522006108490912e-06,
      "loss": 0.5944,
      "step": 3917
    },
    {
      "epoch": 0.4803825404610103,
      "grad_norm": 2.0340825256483903,
      "learning_rate": 4.521708004664841e-06,
      "loss": 0.553,
      "step": 3918
    },
    {
      "epoch": 0.480505149583129,
      "grad_norm": 2.036010638383201,
      "learning_rate": 4.521409817742993e-06,
      "loss": 0.5339,
      "step": 3919
    },
    {
      "epoch": 0.48062775870524765,
      "grad_norm": 2.0785976029370787,
      "learning_rate": 4.521111547737622e-06,
      "loss": 0.5989,
      "step": 3920
    },
    {
      "epoch": 0.48075036782736635,
      "grad_norm": 2.015943600002833,
      "learning_rate": 4.520813194660989e-06,
      "loss": 0.5281,
      "step": 3921
    },
    {
      "epoch": 0.48087297694948505,
      "grad_norm": 2.1750576418436416,
      "learning_rate": 4.520514758525356e-06,
      "loss": 0.4962,
      "step": 3922
    },
    {
      "epoch": 0.48099558607160375,
      "grad_norm": 1.9826294346090862,
      "learning_rate": 4.5202162393429896e-06,
      "loss": 0.5234,
      "step": 3923
    },
    {
      "epoch": 0.4811181951937224,
      "grad_norm": 1.8761709146525616,
      "learning_rate": 4.519917637126159e-06,
      "loss": 0.5298,
      "step": 3924
    },
    {
      "epoch": 0.4812408043158411,
      "grad_norm": 2.25452377367347,
      "learning_rate": 4.519618951887137e-06,
      "loss": 0.5885,
      "step": 3925
    },
    {
      "epoch": 0.4813634134379598,
      "grad_norm": 2.0344925363277793,
      "learning_rate": 4.5193201836382005e-06,
      "loss": 0.5734,
      "step": 3926
    },
    {
      "epoch": 0.4814860225600785,
      "grad_norm": 2.1235415653078658,
      "learning_rate": 4.51902133239163e-06,
      "loss": 0.5901,
      "step": 3927
    },
    {
      "epoch": 0.48160863168219714,
      "grad_norm": 1.932428833901565,
      "learning_rate": 4.518722398159709e-06,
      "loss": 0.5428,
      "step": 3928
    },
    {
      "epoch": 0.48173124080431584,
      "grad_norm": 2.349284049691569,
      "learning_rate": 4.5184233809547225e-06,
      "loss": 0.5587,
      "step": 3929
    },
    {
      "epoch": 0.48185384992643454,
      "grad_norm": 1.8939955413559877,
      "learning_rate": 4.518124280788963e-06,
      "loss": 0.5635,
      "step": 3930
    },
    {
      "epoch": 0.4819764590485532,
      "grad_norm": 2.105494488053032,
      "learning_rate": 4.517825097674722e-06,
      "loss": 0.5616,
      "step": 3931
    },
    {
      "epoch": 0.4820990681706719,
      "grad_norm": 2.2767180956472064,
      "learning_rate": 4.517525831624297e-06,
      "loss": 0.5474,
      "step": 3932
    },
    {
      "epoch": 0.4822216772927906,
      "grad_norm": 2.256535310800629,
      "learning_rate": 4.517226482649988e-06,
      "loss": 0.5351,
      "step": 3933
    },
    {
      "epoch": 0.4823442864149093,
      "grad_norm": 2.0360661313931376,
      "learning_rate": 4.5169270507641e-06,
      "loss": 0.5774,
      "step": 3934
    },
    {
      "epoch": 0.48246689553702793,
      "grad_norm": 2.150145085284331,
      "learning_rate": 4.51662753597894e-06,
      "loss": 0.5447,
      "step": 3935
    },
    {
      "epoch": 0.48258950465914663,
      "grad_norm": 2.0304591247400094,
      "learning_rate": 4.516327938306818e-06,
      "loss": 0.5641,
      "step": 3936
    },
    {
      "epoch": 0.48271211378126533,
      "grad_norm": 2.108868659796324,
      "learning_rate": 4.516028257760048e-06,
      "loss": 0.5377,
      "step": 3937
    },
    {
      "epoch": 0.48283472290338403,
      "grad_norm": 2.8292330055654604,
      "learning_rate": 4.515728494350947e-06,
      "loss": 0.6075,
      "step": 3938
    },
    {
      "epoch": 0.4829573320255027,
      "grad_norm": 2.2335833475971154,
      "learning_rate": 4.515428648091837e-06,
      "loss": 0.5198,
      "step": 3939
    },
    {
      "epoch": 0.4830799411476214,
      "grad_norm": 1.908818695063145,
      "learning_rate": 4.515128718995041e-06,
      "loss": 0.4895,
      "step": 3940
    },
    {
      "epoch": 0.48320255026974007,
      "grad_norm": 2.1155778100694427,
      "learning_rate": 4.514828707072887e-06,
      "loss": 0.5576,
      "step": 3941
    },
    {
      "epoch": 0.48332515939185877,
      "grad_norm": 2.1717662728683114,
      "learning_rate": 4.514528612337707e-06,
      "loss": 0.593,
      "step": 3942
    },
    {
      "epoch": 0.4834477685139774,
      "grad_norm": 1.9994867521930135,
      "learning_rate": 4.514228434801834e-06,
      "loss": 0.5302,
      "step": 3943
    },
    {
      "epoch": 0.4835703776360961,
      "grad_norm": 1.9670986719656656,
      "learning_rate": 4.513928174477606e-06,
      "loss": 0.5688,
      "step": 3944
    },
    {
      "epoch": 0.4836929867582148,
      "grad_norm": 2.056331134150932,
      "learning_rate": 4.513627831377365e-06,
      "loss": 0.5435,
      "step": 3945
    },
    {
      "epoch": 0.4838155958803335,
      "grad_norm": 1.7648933896588643,
      "learning_rate": 4.513327405513456e-06,
      "loss": 0.5289,
      "step": 3946
    },
    {
      "epoch": 0.48393820500245216,
      "grad_norm": 2.0804401125892946,
      "learning_rate": 4.513026896898225e-06,
      "loss": 0.4994,
      "step": 3947
    },
    {
      "epoch": 0.48406081412457086,
      "grad_norm": 2.2119993996046725,
      "learning_rate": 4.5127263055440264e-06,
      "loss": 0.6062,
      "step": 3948
    },
    {
      "epoch": 0.48418342324668956,
      "grad_norm": 2.1346036698843407,
      "learning_rate": 4.512425631463212e-06,
      "loss": 0.5569,
      "step": 3949
    },
    {
      "epoch": 0.48430603236880826,
      "grad_norm": 2.0928733887281243,
      "learning_rate": 4.512124874668142e-06,
      "loss": 0.5767,
      "step": 3950
    },
    {
      "epoch": 0.4844286414909269,
      "grad_norm": 2.162908070528017,
      "learning_rate": 4.5118240351711775e-06,
      "loss": 0.5406,
      "step": 3951
    },
    {
      "epoch": 0.4845512506130456,
      "grad_norm": 1.8510114510533229,
      "learning_rate": 4.511523112984683e-06,
      "loss": 0.5471,
      "step": 3952
    },
    {
      "epoch": 0.4846738597351643,
      "grad_norm": 1.915566055959225,
      "learning_rate": 4.5112221081210286e-06,
      "loss": 0.5062,
      "step": 3953
    },
    {
      "epoch": 0.484796468857283,
      "grad_norm": 2.101976613773318,
      "learning_rate": 4.510921020592583e-06,
      "loss": 0.5486,
      "step": 3954
    },
    {
      "epoch": 0.48491907797940165,
      "grad_norm": 1.9659983704803254,
      "learning_rate": 4.510619850411725e-06,
      "loss": 0.5422,
      "step": 3955
    },
    {
      "epoch": 0.48504168710152035,
      "grad_norm": 2.1376645911460908,
      "learning_rate": 4.510318597590831e-06,
      "loss": 0.5494,
      "step": 3956
    },
    {
      "epoch": 0.48516429622363905,
      "grad_norm": 2.1052507047813878,
      "learning_rate": 4.510017262142285e-06,
      "loss": 0.5695,
      "step": 3957
    },
    {
      "epoch": 0.48528690534575775,
      "grad_norm": 1.9931796229458716,
      "learning_rate": 4.50971584407847e-06,
      "loss": 0.5368,
      "step": 3958
    },
    {
      "epoch": 0.4854095144678764,
      "grad_norm": 2.1166598091717237,
      "learning_rate": 4.5094143434117745e-06,
      "loss": 0.5878,
      "step": 3959
    },
    {
      "epoch": 0.4855321235899951,
      "grad_norm": 1.9541993060370637,
      "learning_rate": 4.509112760154594e-06,
      "loss": 0.5289,
      "step": 3960
    },
    {
      "epoch": 0.4856547327121138,
      "grad_norm": 2.1076150365175588,
      "learning_rate": 4.508811094319321e-06,
      "loss": 0.5823,
      "step": 3961
    },
    {
      "epoch": 0.4857773418342325,
      "grad_norm": 2.104091195323855,
      "learning_rate": 4.508509345918357e-06,
      "loss": 0.557,
      "step": 3962
    },
    {
      "epoch": 0.48589995095635113,
      "grad_norm": 2.3324527892771103,
      "learning_rate": 4.5082075149641025e-06,
      "loss": 0.5415,
      "step": 3963
    },
    {
      "epoch": 0.48602256007846983,
      "grad_norm": 2.0467812321367096,
      "learning_rate": 4.507905601468964e-06,
      "loss": 0.552,
      "step": 3964
    },
    {
      "epoch": 0.48614516920058853,
      "grad_norm": 1.8785757599242416,
      "learning_rate": 4.50760360544535e-06,
      "loss": 0.5221,
      "step": 3965
    },
    {
      "epoch": 0.48626777832270723,
      "grad_norm": 1.8887610437793667,
      "learning_rate": 4.507301526905674e-06,
      "loss": 0.4863,
      "step": 3966
    },
    {
      "epoch": 0.4863903874448259,
      "grad_norm": 1.9458112436854436,
      "learning_rate": 4.506999365862351e-06,
      "loss": 0.5395,
      "step": 3967
    },
    {
      "epoch": 0.4865129965669446,
      "grad_norm": 2.1978894832556293,
      "learning_rate": 4.5066971223278025e-06,
      "loss": 0.5528,
      "step": 3968
    },
    {
      "epoch": 0.4866356056890633,
      "grad_norm": 2.0601411792189777,
      "learning_rate": 4.506394796314448e-06,
      "loss": 0.5427,
      "step": 3969
    },
    {
      "epoch": 0.486758214811182,
      "grad_norm": 2.0333948814893192,
      "learning_rate": 4.506092387834717e-06,
      "loss": 0.5788,
      "step": 3970
    },
    {
      "epoch": 0.4868808239333006,
      "grad_norm": 1.972712540555373,
      "learning_rate": 4.5057898969010354e-06,
      "loss": 0.5344,
      "step": 3971
    },
    {
      "epoch": 0.4870034330554193,
      "grad_norm": 2.0352886405848336,
      "learning_rate": 4.505487323525839e-06,
      "loss": 0.56,
      "step": 3972
    },
    {
      "epoch": 0.487126042177538,
      "grad_norm": 1.9651717580047086,
      "learning_rate": 4.505184667721564e-06,
      "loss": 0.5443,
      "step": 3973
    },
    {
      "epoch": 0.4872486512996567,
      "grad_norm": 1.9204370857692132,
      "learning_rate": 4.504881929500648e-06,
      "loss": 0.5604,
      "step": 3974
    },
    {
      "epoch": 0.48737126042177537,
      "grad_norm": 2.0934650251100866,
      "learning_rate": 4.504579108875536e-06,
      "loss": 0.5596,
      "step": 3975
    },
    {
      "epoch": 0.48749386954389407,
      "grad_norm": 2.0712935717049863,
      "learning_rate": 4.504276205858673e-06,
      "loss": 0.5221,
      "step": 3976
    },
    {
      "epoch": 0.48761647866601276,
      "grad_norm": 1.9622715985099313,
      "learning_rate": 4.503973220462511e-06,
      "loss": 0.5563,
      "step": 3977
    },
    {
      "epoch": 0.48773908778813146,
      "grad_norm": 2.1908541784572,
      "learning_rate": 4.503670152699501e-06,
      "loss": 0.5901,
      "step": 3978
    },
    {
      "epoch": 0.4878616969102501,
      "grad_norm": 2.0869642769644057,
      "learning_rate": 4.503367002582101e-06,
      "loss": 0.5584,
      "step": 3979
    },
    {
      "epoch": 0.4879843060323688,
      "grad_norm": 1.9781924110371478,
      "learning_rate": 4.50306377012277e-06,
      "loss": 0.536,
      "step": 3980
    },
    {
      "epoch": 0.4881069151544875,
      "grad_norm": 2.042110736075299,
      "learning_rate": 4.502760455333972e-06,
      "loss": 0.5324,
      "step": 3981
    },
    {
      "epoch": 0.48822952427660615,
      "grad_norm": 2.0480619547321326,
      "learning_rate": 4.502457058228173e-06,
      "loss": 0.5451,
      "step": 3982
    },
    {
      "epoch": 0.48835213339872485,
      "grad_norm": 2.2873058856841166,
      "learning_rate": 4.502153578817845e-06,
      "loss": 0.6706,
      "step": 3983
    },
    {
      "epoch": 0.48847474252084355,
      "grad_norm": 1.8471880189499532,
      "learning_rate": 4.50185001711546e-06,
      "loss": 0.5459,
      "step": 3984
    },
    {
      "epoch": 0.48859735164296225,
      "grad_norm": 1.8564250349970137,
      "learning_rate": 4.501546373133495e-06,
      "loss": 0.5639,
      "step": 3985
    },
    {
      "epoch": 0.4887199607650809,
      "grad_norm": 2.1603619794700033,
      "learning_rate": 4.501242646884431e-06,
      "loss": 0.5632,
      "step": 3986
    },
    {
      "epoch": 0.4888425698871996,
      "grad_norm": 1.952684920978309,
      "learning_rate": 4.5009388383807515e-06,
      "loss": 0.5239,
      "step": 3987
    },
    {
      "epoch": 0.4889651790093183,
      "grad_norm": 2.017262688036285,
      "learning_rate": 4.500634947634943e-06,
      "loss": 0.5343,
      "step": 3988
    },
    {
      "epoch": 0.489087788131437,
      "grad_norm": 2.340242191626488,
      "learning_rate": 4.5003309746594955e-06,
      "loss": 0.5327,
      "step": 3989
    },
    {
      "epoch": 0.48921039725355564,
      "grad_norm": 2.1725019608367906,
      "learning_rate": 4.500026919466906e-06,
      "loss": 0.5599,
      "step": 3990
    },
    {
      "epoch": 0.48933300637567434,
      "grad_norm": 2.053825331203871,
      "learning_rate": 4.499722782069667e-06,
      "loss": 0.5483,
      "step": 3991
    },
    {
      "epoch": 0.48945561549779304,
      "grad_norm": 1.9666938025448748,
      "learning_rate": 4.499418562480282e-06,
      "loss": 0.5234,
      "step": 3992
    },
    {
      "epoch": 0.48957822461991174,
      "grad_norm": 2.1731580378899875,
      "learning_rate": 4.499114260711255e-06,
      "loss": 0.5559,
      "step": 3993
    },
    {
      "epoch": 0.4897008337420304,
      "grad_norm": 1.8152442955259345,
      "learning_rate": 4.498809876775092e-06,
      "loss": 0.5271,
      "step": 3994
    },
    {
      "epoch": 0.4898234428641491,
      "grad_norm": 2.205978439110947,
      "learning_rate": 4.498505410684305e-06,
      "loss": 0.5799,
      "step": 3995
    },
    {
      "epoch": 0.4899460519862678,
      "grad_norm": 2.1121699969327326,
      "learning_rate": 4.498200862451407e-06,
      "loss": 0.5175,
      "step": 3996
    },
    {
      "epoch": 0.4900686611083865,
      "grad_norm": 2.087694653631838,
      "learning_rate": 4.497896232088916e-06,
      "loss": 0.5232,
      "step": 3997
    },
    {
      "epoch": 0.4901912702305051,
      "grad_norm": 2.1823333114412216,
      "learning_rate": 4.497591519609354e-06,
      "loss": 0.6106,
      "step": 3998
    },
    {
      "epoch": 0.4903138793526238,
      "grad_norm": 2.048380971072087,
      "learning_rate": 4.497286725025243e-06,
      "loss": 0.5763,
      "step": 3999
    },
    {
      "epoch": 0.4904364884747425,
      "grad_norm": 2.103352466297775,
      "learning_rate": 4.496981848349112e-06,
      "loss": 0.5262,
      "step": 4000
    },
    {
      "epoch": 0.4905590975968612,
      "grad_norm": 2.2536598297740915,
      "learning_rate": 4.496676889593492e-06,
      "loss": 0.5832,
      "step": 4001
    },
    {
      "epoch": 0.49068170671897987,
      "grad_norm": 2.135688427144851,
      "learning_rate": 4.496371848770916e-06,
      "loss": 0.589,
      "step": 4002
    },
    {
      "epoch": 0.49080431584109857,
      "grad_norm": 1.9101559594585809,
      "learning_rate": 4.496066725893924e-06,
      "loss": 0.5369,
      "step": 4003
    },
    {
      "epoch": 0.49092692496321727,
      "grad_norm": 1.8067633358317048,
      "learning_rate": 4.495761520975055e-06,
      "loss": 0.5249,
      "step": 4004
    },
    {
      "epoch": 0.49104953408533597,
      "grad_norm": 2.0557896684186647,
      "learning_rate": 4.495456234026855e-06,
      "loss": 0.5103,
      "step": 4005
    },
    {
      "epoch": 0.4911721432074546,
      "grad_norm": 2.128642706564979,
      "learning_rate": 4.49515086506187e-06,
      "loss": 0.5227,
      "step": 4006
    },
    {
      "epoch": 0.4912947523295733,
      "grad_norm": 2.06420494999731,
      "learning_rate": 4.494845414092653e-06,
      "loss": 0.5615,
      "step": 4007
    },
    {
      "epoch": 0.491417361451692,
      "grad_norm": 1.8895257110911572,
      "learning_rate": 4.494539881131758e-06,
      "loss": 0.6027,
      "step": 4008
    },
    {
      "epoch": 0.4915399705738107,
      "grad_norm": 2.156063549451779,
      "learning_rate": 4.494234266191742e-06,
      "loss": 0.5393,
      "step": 4009
    },
    {
      "epoch": 0.49166257969592936,
      "grad_norm": 1.9631084486840995,
      "learning_rate": 4.493928569285168e-06,
      "loss": 0.5596,
      "step": 4010
    },
    {
      "epoch": 0.49178518881804806,
      "grad_norm": 1.7666251689592456,
      "learning_rate": 4.493622790424599e-06,
      "loss": 0.5098,
      "step": 4011
    },
    {
      "epoch": 0.49190779794016676,
      "grad_norm": 2.192212110695847,
      "learning_rate": 4.4933169296226045e-06,
      "loss": 0.5237,
      "step": 4012
    },
    {
      "epoch": 0.49203040706228546,
      "grad_norm": 2.1008548428150724,
      "learning_rate": 4.493010986891756e-06,
      "loss": 0.5403,
      "step": 4013
    },
    {
      "epoch": 0.4921530161844041,
      "grad_norm": 1.8995213247907012,
      "learning_rate": 4.492704962244626e-06,
      "loss": 0.5334,
      "step": 4014
    },
    {
      "epoch": 0.4922756253065228,
      "grad_norm": 2.097555662402709,
      "learning_rate": 4.4923988556937955e-06,
      "loss": 0.5555,
      "step": 4015
    },
    {
      "epoch": 0.4923982344286415,
      "grad_norm": 1.8645766245258284,
      "learning_rate": 4.492092667251844e-06,
      "loss": 0.5129,
      "step": 4016
    },
    {
      "epoch": 0.4925208435507602,
      "grad_norm": 1.9518113044191892,
      "learning_rate": 4.491786396931358e-06,
      "loss": 0.5746,
      "step": 4017
    },
    {
      "epoch": 0.49264345267287885,
      "grad_norm": 2.042092065593957,
      "learning_rate": 4.491480044744924e-06,
      "loss": 0.5354,
      "step": 4018
    },
    {
      "epoch": 0.49276606179499755,
      "grad_norm": 1.960800550415919,
      "learning_rate": 4.4911736107051355e-06,
      "loss": 0.5545,
      "step": 4019
    },
    {
      "epoch": 0.49288867091711625,
      "grad_norm": 2.0441464314919395,
      "learning_rate": 4.490867094824586e-06,
      "loss": 0.5983,
      "step": 4020
    },
    {
      "epoch": 0.49301128003923494,
      "grad_norm": 1.944068342081475,
      "learning_rate": 4.490560497115874e-06,
      "loss": 0.5859,
      "step": 4021
    },
    {
      "epoch": 0.4931338891613536,
      "grad_norm": 1.943091528255842,
      "learning_rate": 4.490253817591601e-06,
      "loss": 0.5401,
      "step": 4022
    },
    {
      "epoch": 0.4932564982834723,
      "grad_norm": 2.234690587811866,
      "learning_rate": 4.489947056264375e-06,
      "loss": 0.5558,
      "step": 4023
    },
    {
      "epoch": 0.493379107405591,
      "grad_norm": 1.8829871411054941,
      "learning_rate": 4.4896402131467995e-06,
      "loss": 0.5655,
      "step": 4024
    },
    {
      "epoch": 0.4935017165277097,
      "grad_norm": 1.7549477607283248,
      "learning_rate": 4.48933328825149e-06,
      "loss": 0.4907,
      "step": 4025
    },
    {
      "epoch": 0.49362432564982833,
      "grad_norm": 2.010520304125008,
      "learning_rate": 4.48902628159106e-06,
      "loss": 0.5319,
      "step": 4026
    },
    {
      "epoch": 0.49374693477194703,
      "grad_norm": 2.333998944843092,
      "learning_rate": 4.48871919317813e-06,
      "loss": 0.58,
      "step": 4027
    },
    {
      "epoch": 0.49386954389406573,
      "grad_norm": 2.019102818108377,
      "learning_rate": 4.488412023025319e-06,
      "loss": 0.5472,
      "step": 4028
    },
    {
      "epoch": 0.4939921530161844,
      "grad_norm": 2.058229923685194,
      "learning_rate": 4.488104771145253e-06,
      "loss": 0.5812,
      "step": 4029
    },
    {
      "epoch": 0.4941147621383031,
      "grad_norm": 1.92838243536217,
      "learning_rate": 4.4877974375505625e-06,
      "loss": 0.4829,
      "step": 4030
    },
    {
      "epoch": 0.4942373712604218,
      "grad_norm": 1.767701077450617,
      "learning_rate": 4.487490022253879e-06,
      "loss": 0.5385,
      "step": 4031
    },
    {
      "epoch": 0.4943599803825405,
      "grad_norm": 2.0857980111239587,
      "learning_rate": 4.487182525267835e-06,
      "loss": 0.5138,
      "step": 4032
    },
    {
      "epoch": 0.4944825895046591,
      "grad_norm": 2.0976962111730684,
      "learning_rate": 4.486874946605072e-06,
      "loss": 0.554,
      "step": 4033
    },
    {
      "epoch": 0.4946051986267778,
      "grad_norm": 2.0644199099292897,
      "learning_rate": 4.486567286278231e-06,
      "loss": 0.5084,
      "step": 4034
    },
    {
      "epoch": 0.4947278077488965,
      "grad_norm": 2.0547339453102267,
      "learning_rate": 4.4862595442999584e-06,
      "loss": 0.5399,
      "step": 4035
    },
    {
      "epoch": 0.4948504168710152,
      "grad_norm": 1.9730488708917964,
      "learning_rate": 4.485951720682902e-06,
      "loss": 0.5103,
      "step": 4036
    },
    {
      "epoch": 0.49497302599313386,
      "grad_norm": 2.0170352258098925,
      "learning_rate": 4.485643815439713e-06,
      "loss": 0.5236,
      "step": 4037
    },
    {
      "epoch": 0.49509563511525256,
      "grad_norm": 2.1207451872857264,
      "learning_rate": 4.485335828583049e-06,
      "loss": 0.6096,
      "step": 4038
    },
    {
      "epoch": 0.49521824423737126,
      "grad_norm": 1.9451385668941505,
      "learning_rate": 4.485027760125568e-06,
      "loss": 0.5434,
      "step": 4039
    },
    {
      "epoch": 0.49534085335948996,
      "grad_norm": 2.0938077057814963,
      "learning_rate": 4.4847196100799305e-06,
      "loss": 0.574,
      "step": 4040
    },
    {
      "epoch": 0.4954634624816086,
      "grad_norm": 1.9257720170589079,
      "learning_rate": 4.484411378458804e-06,
      "loss": 0.5313,
      "step": 4041
    },
    {
      "epoch": 0.4955860716037273,
      "grad_norm": 2.1162662844066733,
      "learning_rate": 4.484103065274857e-06,
      "loss": 0.6231,
      "step": 4042
    },
    {
      "epoch": 0.495708680725846,
      "grad_norm": 1.7825632016174386,
      "learning_rate": 4.483794670540761e-06,
      "loss": 0.5242,
      "step": 4043
    },
    {
      "epoch": 0.4958312898479647,
      "grad_norm": 2.1134188190187664,
      "learning_rate": 4.483486194269192e-06,
      "loss": 0.5491,
      "step": 4044
    },
    {
      "epoch": 0.49595389897008335,
      "grad_norm": 2.180975104488335,
      "learning_rate": 4.4831776364728285e-06,
      "loss": 0.5914,
      "step": 4045
    },
    {
      "epoch": 0.49607650809220205,
      "grad_norm": 2.278051403812388,
      "learning_rate": 4.482868997164354e-06,
      "loss": 0.5885,
      "step": 4046
    },
    {
      "epoch": 0.49619911721432075,
      "grad_norm": 2.1570592112397087,
      "learning_rate": 4.482560276356453e-06,
      "loss": 0.5439,
      "step": 4047
    },
    {
      "epoch": 0.49632172633643945,
      "grad_norm": 2.182580182240973,
      "learning_rate": 4.482251474061815e-06,
      "loss": 0.5598,
      "step": 4048
    },
    {
      "epoch": 0.4964443354585581,
      "grad_norm": 2.289682831885453,
      "learning_rate": 4.481942590293133e-06,
      "loss": 0.5827,
      "step": 4049
    },
    {
      "epoch": 0.4965669445806768,
      "grad_norm": 1.8628025739086875,
      "learning_rate": 4.4816336250631e-06,
      "loss": 0.5935,
      "step": 4050
    },
    {
      "epoch": 0.4966895537027955,
      "grad_norm": 2.1413786118724576,
      "learning_rate": 4.481324578384417e-06,
      "loss": 0.546,
      "step": 4051
    },
    {
      "epoch": 0.4968121628249142,
      "grad_norm": 1.9285826518033136,
      "learning_rate": 4.481015450269787e-06,
      "loss": 0.5205,
      "step": 4052
    },
    {
      "epoch": 0.49693477194703284,
      "grad_norm": 1.9900310616308947,
      "learning_rate": 4.480706240731914e-06,
      "loss": 0.5419,
      "step": 4053
    },
    {
      "epoch": 0.49705738106915154,
      "grad_norm": 1.837184255480675,
      "learning_rate": 4.480396949783509e-06,
      "loss": 0.5796,
      "step": 4054
    },
    {
      "epoch": 0.49717999019127024,
      "grad_norm": 2.3687249921464137,
      "learning_rate": 4.480087577437283e-06,
      "loss": 0.6185,
      "step": 4055
    },
    {
      "epoch": 0.49730259931338894,
      "grad_norm": 1.9732589844845676,
      "learning_rate": 4.479778123705952e-06,
      "loss": 0.5554,
      "step": 4056
    },
    {
      "epoch": 0.4974252084355076,
      "grad_norm": 2.182232113953761,
      "learning_rate": 4.4794685886022345e-06,
      "loss": 0.5921,
      "step": 4057
    },
    {
      "epoch": 0.4975478175576263,
      "grad_norm": 1.6249462919201512,
      "learning_rate": 4.479158972138855e-06,
      "loss": 0.4825,
      "step": 4058
    },
    {
      "epoch": 0.497670426679745,
      "grad_norm": 2.1818873674974815,
      "learning_rate": 4.478849274328537e-06,
      "loss": 0.5402,
      "step": 4059
    },
    {
      "epoch": 0.4977930358018637,
      "grad_norm": 2.066325951598183,
      "learning_rate": 4.478539495184011e-06,
      "loss": 0.5413,
      "step": 4060
    },
    {
      "epoch": 0.4979156449239823,
      "grad_norm": 1.8748535544702873,
      "learning_rate": 4.478229634718009e-06,
      "loss": 0.5998,
      "step": 4061
    },
    {
      "epoch": 0.498038254046101,
      "grad_norm": 1.9842431750621048,
      "learning_rate": 4.477919692943267e-06,
      "loss": 0.5663,
      "step": 4062
    },
    {
      "epoch": 0.4981608631682197,
      "grad_norm": 2.163894946896309,
      "learning_rate": 4.477609669872524e-06,
      "loss": 0.5658,
      "step": 4063
    },
    {
      "epoch": 0.4982834722903384,
      "grad_norm": 2.2698649266952957,
      "learning_rate": 4.477299565518522e-06,
      "loss": 0.5865,
      "step": 4064
    },
    {
      "epoch": 0.49840608141245707,
      "grad_norm": 1.8284778069159973,
      "learning_rate": 4.476989379894007e-06,
      "loss": 0.5661,
      "step": 4065
    },
    {
      "epoch": 0.49852869053457577,
      "grad_norm": 2.038964945339632,
      "learning_rate": 4.476679113011729e-06,
      "loss": 0.5372,
      "step": 4066
    },
    {
      "epoch": 0.49865129965669447,
      "grad_norm": 1.787472257088891,
      "learning_rate": 4.47636876488444e-06,
      "loss": 0.466,
      "step": 4067
    },
    {
      "epoch": 0.49877390877881317,
      "grad_norm": 2.0497605619722434,
      "learning_rate": 4.476058335524896e-06,
      "loss": 0.526,
      "step": 4068
    },
    {
      "epoch": 0.4988965179009318,
      "grad_norm": 2.014613562857575,
      "learning_rate": 4.4757478249458555e-06,
      "loss": 0.5591,
      "step": 4069
    },
    {
      "epoch": 0.4990191270230505,
      "grad_norm": 2.0016841574820017,
      "learning_rate": 4.475437233160082e-06,
      "loss": 0.5625,
      "step": 4070
    },
    {
      "epoch": 0.4991417361451692,
      "grad_norm": 1.9576452088084888,
      "learning_rate": 4.4751265601803404e-06,
      "loss": 0.5364,
      "step": 4071
    },
    {
      "epoch": 0.4992643452672879,
      "grad_norm": 1.9446487077504542,
      "learning_rate": 4.474815806019401e-06,
      "loss": 0.5306,
      "step": 4072
    },
    {
      "epoch": 0.49938695438940656,
      "grad_norm": 2.1469649899454075,
      "learning_rate": 4.474504970690036e-06,
      "loss": 0.5555,
      "step": 4073
    },
    {
      "epoch": 0.49950956351152526,
      "grad_norm": 2.076152885572381,
      "learning_rate": 4.474194054205022e-06,
      "loss": 0.5346,
      "step": 4074
    },
    {
      "epoch": 0.49963217263364396,
      "grad_norm": 1.9548557140808935,
      "learning_rate": 4.473883056577136e-06,
      "loss": 0.5464,
      "step": 4075
    },
    {
      "epoch": 0.49975478175576266,
      "grad_norm": 1.9402215484183376,
      "learning_rate": 4.473571977819162e-06,
      "loss": 0.6039,
      "step": 4076
    },
    {
      "epoch": 0.4998773908778813,
      "grad_norm": 1.799220946696325,
      "learning_rate": 4.473260817943887e-06,
      "loss": 0.5476,
      "step": 4077
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.0514602709200567,
      "learning_rate": 4.472949576964097e-06,
      "loss": 0.5703,
      "step": 4078
    },
    {
      "epoch": 0.5001226091221187,
      "grad_norm": 2.0038605816956983,
      "learning_rate": 4.472638254892588e-06,
      "loss": 0.5366,
      "step": 4079
    },
    {
      "epoch": 0.5002452182442374,
      "grad_norm": 2.119440347550823,
      "learning_rate": 4.472326851742154e-06,
      "loss": 0.5278,
      "step": 4080
    },
    {
      "epoch": 0.5003678273663561,
      "grad_norm": 2.198415847658368,
      "learning_rate": 4.472015367525595e-06,
      "loss": 0.5608,
      "step": 4081
    },
    {
      "epoch": 0.5004904364884747,
      "grad_norm": 1.9339579754269,
      "learning_rate": 4.471703802255712e-06,
      "loss": 0.5285,
      "step": 4082
    },
    {
      "epoch": 0.5006130456105934,
      "grad_norm": 2.1551408077391123,
      "learning_rate": 4.471392155945314e-06,
      "loss": 0.5537,
      "step": 4083
    },
    {
      "epoch": 0.5007356547327121,
      "grad_norm": 2.105671047157173,
      "learning_rate": 4.4710804286072074e-06,
      "loss": 0.5625,
      "step": 4084
    },
    {
      "epoch": 0.5008582638548308,
      "grad_norm": 1.8127355824730336,
      "learning_rate": 4.470768620254205e-06,
      "loss": 0.5221,
      "step": 4085
    },
    {
      "epoch": 0.5009808729769495,
      "grad_norm": 1.9407869751635949,
      "learning_rate": 4.470456730899124e-06,
      "loss": 0.558,
      "step": 4086
    },
    {
      "epoch": 0.5011034820990682,
      "grad_norm": 1.9455507757551171,
      "learning_rate": 4.470144760554783e-06,
      "loss": 0.595,
      "step": 4087
    },
    {
      "epoch": 0.5012260912211869,
      "grad_norm": 2.2303091358407383,
      "learning_rate": 4.469832709234005e-06,
      "loss": 0.5626,
      "step": 4088
    },
    {
      "epoch": 0.5013487003433056,
      "grad_norm": 2.3331234254573348,
      "learning_rate": 4.469520576949614e-06,
      "loss": 0.5485,
      "step": 4089
    },
    {
      "epoch": 0.5014713094654242,
      "grad_norm": 2.0668218061108607,
      "learning_rate": 4.469208363714441e-06,
      "loss": 0.521,
      "step": 4090
    },
    {
      "epoch": 0.5015939185875429,
      "grad_norm": 2.0514161145257463,
      "learning_rate": 4.468896069541319e-06,
      "loss": 0.5855,
      "step": 4091
    },
    {
      "epoch": 0.5017165277096616,
      "grad_norm": 1.9693194538034704,
      "learning_rate": 4.4685836944430815e-06,
      "loss": 0.524,
      "step": 4092
    },
    {
      "epoch": 0.5018391368317803,
      "grad_norm": 2.0708198436109804,
      "learning_rate": 4.46827123843257e-06,
      "loss": 0.602,
      "step": 4093
    },
    {
      "epoch": 0.501961745953899,
      "grad_norm": 2.142329209776613,
      "learning_rate": 4.4679587015226255e-06,
      "loss": 0.5083,
      "step": 4094
    },
    {
      "epoch": 0.5020843550760177,
      "grad_norm": 2.383823149910697,
      "learning_rate": 4.467646083726095e-06,
      "loss": 0.5572,
      "step": 4095
    },
    {
      "epoch": 0.5022069641981364,
      "grad_norm": 1.9753195584096526,
      "learning_rate": 4.467333385055826e-06,
      "loss": 0.5579,
      "step": 4096
    },
    {
      "epoch": 0.5023295733202551,
      "grad_norm": 2.1556234482834102,
      "learning_rate": 4.467020605524673e-06,
      "loss": 0.523,
      "step": 4097
    },
    {
      "epoch": 0.5024521824423737,
      "grad_norm": 2.12797872644322,
      "learning_rate": 4.466707745145491e-06,
      "loss": 0.5585,
      "step": 4098
    },
    {
      "epoch": 0.5025747915644924,
      "grad_norm": 1.8731558081835105,
      "learning_rate": 4.466394803931137e-06,
      "loss": 0.5175,
      "step": 4099
    },
    {
      "epoch": 0.5026974006866111,
      "grad_norm": 2.015504214073226,
      "learning_rate": 4.466081781894478e-06,
      "loss": 0.5721,
      "step": 4100
    },
    {
      "epoch": 0.5028200098087298,
      "grad_norm": 2.118831434618762,
      "learning_rate": 4.465768679048376e-06,
      "loss": 0.5086,
      "step": 4101
    },
    {
      "epoch": 0.5029426189308485,
      "grad_norm": 2.154405966726664,
      "learning_rate": 4.465455495405701e-06,
      "loss": 0.5911,
      "step": 4102
    },
    {
      "epoch": 0.5030652280529672,
      "grad_norm": 2.137606450630234,
      "learning_rate": 4.465142230979325e-06,
      "loss": 0.5324,
      "step": 4103
    },
    {
      "epoch": 0.5031878371750859,
      "grad_norm": 1.8575123537703864,
      "learning_rate": 4.4648288857821245e-06,
      "loss": 0.5481,
      "step": 4104
    },
    {
      "epoch": 0.5033104462972046,
      "grad_norm": 2.4271151881543886,
      "learning_rate": 4.464515459826978e-06,
      "loss": 0.5337,
      "step": 4105
    },
    {
      "epoch": 0.5034330554193231,
      "grad_norm": 2.0149306284339508,
      "learning_rate": 4.464201953126769e-06,
      "loss": 0.5459,
      "step": 4106
    },
    {
      "epoch": 0.5035556645414418,
      "grad_norm": 1.98417123252824,
      "learning_rate": 4.463888365694382e-06,
      "loss": 0.5524,
      "step": 4107
    },
    {
      "epoch": 0.5036782736635605,
      "grad_norm": 2.1107544502097992,
      "learning_rate": 4.463574697542707e-06,
      "loss": 0.5152,
      "step": 4108
    },
    {
      "epoch": 0.5038008827856792,
      "grad_norm": 2.221182298703518,
      "learning_rate": 4.463260948684635e-06,
      "loss": 0.5507,
      "step": 4109
    },
    {
      "epoch": 0.503923491907798,
      "grad_norm": 2.011189706225364,
      "learning_rate": 4.4629471191330625e-06,
      "loss": 0.5681,
      "step": 4110
    },
    {
      "epoch": 0.5040461010299166,
      "grad_norm": 1.9554658479440612,
      "learning_rate": 4.462633208900889e-06,
      "loss": 0.5936,
      "step": 4111
    },
    {
      "epoch": 0.5041687101520353,
      "grad_norm": 1.8898573134873908,
      "learning_rate": 4.462319218001015e-06,
      "loss": 0.5088,
      "step": 4112
    },
    {
      "epoch": 0.504291319274154,
      "grad_norm": 4.222723271050583,
      "learning_rate": 4.462005146446348e-06,
      "loss": 0.4938,
      "step": 4113
    },
    {
      "epoch": 0.5044139283962726,
      "grad_norm": 1.8187288375349977,
      "learning_rate": 4.461690994249795e-06,
      "loss": 0.5276,
      "step": 4114
    },
    {
      "epoch": 0.5045365375183913,
      "grad_norm": 1.8820444664428895,
      "learning_rate": 4.4613767614242695e-06,
      "loss": 0.5562,
      "step": 4115
    },
    {
      "epoch": 0.50465914664051,
      "grad_norm": 2.0807341313949297,
      "learning_rate": 4.461062447982687e-06,
      "loss": 0.5408,
      "step": 4116
    },
    {
      "epoch": 0.5047817557626287,
      "grad_norm": 2.1829610208499877,
      "learning_rate": 4.460748053937966e-06,
      "loss": 0.5936,
      "step": 4117
    },
    {
      "epoch": 0.5049043648847474,
      "grad_norm": 2.043913250651783,
      "learning_rate": 4.46043357930303e-06,
      "loss": 0.5409,
      "step": 4118
    },
    {
      "epoch": 0.5050269740068661,
      "grad_norm": 2.032432828115443,
      "learning_rate": 4.460119024090801e-06,
      "loss": 0.5906,
      "step": 4119
    },
    {
      "epoch": 0.5051495831289848,
      "grad_norm": 2.1246555720475766,
      "learning_rate": 4.459804388314211e-06,
      "loss": 0.5601,
      "step": 4120
    },
    {
      "epoch": 0.5052721922511035,
      "grad_norm": 1.9623569209645146,
      "learning_rate": 4.459489671986192e-06,
      "loss": 0.5419,
      "step": 4121
    },
    {
      "epoch": 0.5053948013732221,
      "grad_norm": 2.133317466169491,
      "learning_rate": 4.459174875119677e-06,
      "loss": 0.5602,
      "step": 4122
    },
    {
      "epoch": 0.5055174104953408,
      "grad_norm": 2.0646323655924856,
      "learning_rate": 4.4588599977276075e-06,
      "loss": 0.5443,
      "step": 4123
    },
    {
      "epoch": 0.5056400196174595,
      "grad_norm": 2.0136595156768093,
      "learning_rate": 4.458545039822923e-06,
      "loss": 0.5523,
      "step": 4124
    },
    {
      "epoch": 0.5057626287395782,
      "grad_norm": 2.1681511869066914,
      "learning_rate": 4.4582300014185705e-06,
      "loss": 0.5435,
      "step": 4125
    },
    {
      "epoch": 0.5058852378616969,
      "grad_norm": 1.8272629256837105,
      "learning_rate": 4.457914882527499e-06,
      "loss": 0.5052,
      "step": 4126
    },
    {
      "epoch": 0.5060078469838156,
      "grad_norm": 2.162460468186388,
      "learning_rate": 4.457599683162659e-06,
      "loss": 0.5462,
      "step": 4127
    },
    {
      "epoch": 0.5061304561059343,
      "grad_norm": 2.002811443432001,
      "learning_rate": 4.457284403337006e-06,
      "loss": 0.5658,
      "step": 4128
    },
    {
      "epoch": 0.5062530652280529,
      "grad_norm": 1.9623444995915875,
      "learning_rate": 4.456969043063499e-06,
      "loss": 0.5092,
      "step": 4129
    },
    {
      "epoch": 0.5063756743501716,
      "grad_norm": 2.1084017899586938,
      "learning_rate": 4.4566536023551e-06,
      "loss": 0.5708,
      "step": 4130
    },
    {
      "epoch": 0.5064982834722903,
      "grad_norm": 1.9849298749053683,
      "learning_rate": 4.456338081224773e-06,
      "loss": 0.5595,
      "step": 4131
    },
    {
      "epoch": 0.506620892594409,
      "grad_norm": 2.1311928392391866,
      "learning_rate": 4.456022479685489e-06,
      "loss": 0.5553,
      "step": 4132
    },
    {
      "epoch": 0.5067435017165277,
      "grad_norm": 1.9844710609629328,
      "learning_rate": 4.4557067977502175e-06,
      "loss": 0.5451,
      "step": 4133
    },
    {
      "epoch": 0.5068661108386464,
      "grad_norm": 2.1923608366130374,
      "learning_rate": 4.455391035431935e-06,
      "loss": 0.5154,
      "step": 4134
    },
    {
      "epoch": 0.5069887199607651,
      "grad_norm": 1.9715336182904353,
      "learning_rate": 4.455075192743618e-06,
      "loss": 0.5576,
      "step": 4135
    },
    {
      "epoch": 0.5071113290828838,
      "grad_norm": 1.940300681836532,
      "learning_rate": 4.45475926969825e-06,
      "loss": 0.5892,
      "step": 4136
    },
    {
      "epoch": 0.5072339382050024,
      "grad_norm": 2.009631410075078,
      "learning_rate": 4.454443266308816e-06,
      "loss": 0.5689,
      "step": 4137
    },
    {
      "epoch": 0.5073565473271211,
      "grad_norm": 2.119899703889189,
      "learning_rate": 4.4541271825883025e-06,
      "loss": 0.5364,
      "step": 4138
    },
    {
      "epoch": 0.5074791564492398,
      "grad_norm": 2.107559509061997,
      "learning_rate": 4.453811018549703e-06,
      "loss": 0.4832,
      "step": 4139
    },
    {
      "epoch": 0.5076017655713585,
      "grad_norm": 2.029140226264356,
      "learning_rate": 4.453494774206012e-06,
      "loss": 0.5174,
      "step": 4140
    },
    {
      "epoch": 0.5077243746934772,
      "grad_norm": 2.2523459530924606,
      "learning_rate": 4.453178449570226e-06,
      "loss": 0.59,
      "step": 4141
    },
    {
      "epoch": 0.5078469838155959,
      "grad_norm": 2.0260418443406527,
      "learning_rate": 4.4528620446553495e-06,
      "loss": 0.5096,
      "step": 4142
    },
    {
      "epoch": 0.5079695929377146,
      "grad_norm": 1.8793355806498777,
      "learning_rate": 4.452545559474384e-06,
      "loss": 0.5235,
      "step": 4143
    },
    {
      "epoch": 0.5080922020598333,
      "grad_norm": 2.1894883463508825,
      "learning_rate": 4.452228994040341e-06,
      "loss": 0.5559,
      "step": 4144
    },
    {
      "epoch": 0.5082148111819519,
      "grad_norm": 1.9369977657704531,
      "learning_rate": 4.45191234836623e-06,
      "loss": 0.5629,
      "step": 4145
    },
    {
      "epoch": 0.5083374203040706,
      "grad_norm": 1.9914995898349157,
      "learning_rate": 4.451595622465065e-06,
      "loss": 0.4985,
      "step": 4146
    },
    {
      "epoch": 0.5084600294261893,
      "grad_norm": 2.0431522196419367,
      "learning_rate": 4.451278816349865e-06,
      "loss": 0.5961,
      "step": 4147
    },
    {
      "epoch": 0.508582638548308,
      "grad_norm": 2.075352441407481,
      "learning_rate": 4.450961930033652e-06,
      "loss": 0.5615,
      "step": 4148
    },
    {
      "epoch": 0.5087052476704267,
      "grad_norm": 1.9465383575222355,
      "learning_rate": 4.450644963529449e-06,
      "loss": 0.583,
      "step": 4149
    },
    {
      "epoch": 0.5088278567925454,
      "grad_norm": 2.112047361275278,
      "learning_rate": 4.450327916850284e-06,
      "loss": 0.5375,
      "step": 4150
    },
    {
      "epoch": 0.5089504659146641,
      "grad_norm": 1.9626225779833648,
      "learning_rate": 4.4500107900091904e-06,
      "loss": 0.5336,
      "step": 4151
    },
    {
      "epoch": 0.5090730750367828,
      "grad_norm": 1.9013206478706968,
      "learning_rate": 4.4496935830192e-06,
      "loss": 0.5489,
      "step": 4152
    },
    {
      "epoch": 0.5091956841589014,
      "grad_norm": 1.8997586273361413,
      "learning_rate": 4.4493762958933515e-06,
      "loss": 0.5471,
      "step": 4153
    },
    {
      "epoch": 0.5093182932810201,
      "grad_norm": 1.9918658139006247,
      "learning_rate": 4.449058928644687e-06,
      "loss": 0.5102,
      "step": 4154
    },
    {
      "epoch": 0.5094409024031388,
      "grad_norm": 1.9325205056818264,
      "learning_rate": 4.448741481286249e-06,
      "loss": 0.5997,
      "step": 4155
    },
    {
      "epoch": 0.5095635115252575,
      "grad_norm": 1.8404455321845683,
      "learning_rate": 4.448423953831088e-06,
      "loss": 0.5344,
      "step": 4156
    },
    {
      "epoch": 0.5096861206473762,
      "grad_norm": 1.9734703734141363,
      "learning_rate": 4.4481063462922515e-06,
      "loss": 0.4829,
      "step": 4157
    },
    {
      "epoch": 0.5098087297694949,
      "grad_norm": 2.15898648865682,
      "learning_rate": 4.447788658682796e-06,
      "loss": 0.5921,
      "step": 4158
    },
    {
      "epoch": 0.5099313388916136,
      "grad_norm": 2.183909029309359,
      "learning_rate": 4.447470891015778e-06,
      "loss": 0.5451,
      "step": 4159
    },
    {
      "epoch": 0.5100539480137323,
      "grad_norm": 2.073746728804204,
      "learning_rate": 4.447153043304259e-06,
      "loss": 0.5789,
      "step": 4160
    },
    {
      "epoch": 0.5101765571358509,
      "grad_norm": 1.7478375128066166,
      "learning_rate": 4.446835115561302e-06,
      "loss": 0.5316,
      "step": 4161
    },
    {
      "epoch": 0.5102991662579696,
      "grad_norm": 1.9402058167287461,
      "learning_rate": 4.446517107799976e-06,
      "loss": 0.5372,
      "step": 4162
    },
    {
      "epoch": 0.5104217753800883,
      "grad_norm": 1.870799935678133,
      "learning_rate": 4.44619902003335e-06,
      "loss": 0.5215,
      "step": 4163
    },
    {
      "epoch": 0.510544384502207,
      "grad_norm": 2.023054682810163,
      "learning_rate": 4.4458808522745e-06,
      "loss": 0.6305,
      "step": 4164
    },
    {
      "epoch": 0.5106669936243257,
      "grad_norm": 1.9328297536631773,
      "learning_rate": 4.445562604536501e-06,
      "loss": 0.5249,
      "step": 4165
    },
    {
      "epoch": 0.5107896027464444,
      "grad_norm": 2.057662062993529,
      "learning_rate": 4.445244276832435e-06,
      "loss": 0.4908,
      "step": 4166
    },
    {
      "epoch": 0.5109122118685631,
      "grad_norm": 1.9439409758796722,
      "learning_rate": 4.444925869175385e-06,
      "loss": 0.5192,
      "step": 4167
    },
    {
      "epoch": 0.5110348209906818,
      "grad_norm": 2.0472834657989742,
      "learning_rate": 4.444607381578439e-06,
      "loss": 0.561,
      "step": 4168
    },
    {
      "epoch": 0.5111574301128003,
      "grad_norm": 2.088054168286765,
      "learning_rate": 4.444288814054687e-06,
      "loss": 0.5891,
      "step": 4169
    },
    {
      "epoch": 0.511280039234919,
      "grad_norm": 2.1468192067355973,
      "learning_rate": 4.443970166617223e-06,
      "loss": 0.5389,
      "step": 4170
    },
    {
      "epoch": 0.5114026483570377,
      "grad_norm": 2.0128483209887467,
      "learning_rate": 4.443651439279143e-06,
      "loss": 0.5527,
      "step": 4171
    },
    {
      "epoch": 0.5115252574791564,
      "grad_norm": 2.162370472518087,
      "learning_rate": 4.443332632053548e-06,
      "loss": 0.5868,
      "step": 4172
    },
    {
      "epoch": 0.5116478666012751,
      "grad_norm": 2.165964701596547,
      "learning_rate": 4.443013744953542e-06,
      "loss": 0.5547,
      "step": 4173
    },
    {
      "epoch": 0.5117704757233938,
      "grad_norm": 1.8535376742421557,
      "learning_rate": 4.44269477799223e-06,
      "loss": 0.5323,
      "step": 4174
    },
    {
      "epoch": 0.5118930848455125,
      "grad_norm": 2.073767583037074,
      "learning_rate": 4.442375731182725e-06,
      "loss": 0.5642,
      "step": 4175
    },
    {
      "epoch": 0.5120156939676312,
      "grad_norm": 1.857067902720629,
      "learning_rate": 4.442056604538137e-06,
      "loss": 0.5395,
      "step": 4176
    },
    {
      "epoch": 0.5121383030897498,
      "grad_norm": 1.957606036736362,
      "learning_rate": 4.441737398071585e-06,
      "loss": 0.5028,
      "step": 4177
    },
    {
      "epoch": 0.5122609122118685,
      "grad_norm": 2.1036814274596343,
      "learning_rate": 4.441418111796189e-06,
      "loss": 0.6109,
      "step": 4178
    },
    {
      "epoch": 0.5123835213339872,
      "grad_norm": 1.9653246034529182,
      "learning_rate": 4.441098745725071e-06,
      "loss": 0.5567,
      "step": 4179
    },
    {
      "epoch": 0.5125061304561059,
      "grad_norm": 1.9621997695917295,
      "learning_rate": 4.440779299871358e-06,
      "loss": 0.5176,
      "step": 4180
    },
    {
      "epoch": 0.5126287395782246,
      "grad_norm": 2.1315403810414244,
      "learning_rate": 4.440459774248181e-06,
      "loss": 0.5861,
      "step": 4181
    },
    {
      "epoch": 0.5127513487003433,
      "grad_norm": 1.997024631745692,
      "learning_rate": 4.4401401688686705e-06,
      "loss": 0.5619,
      "step": 4182
    },
    {
      "epoch": 0.512873957822462,
      "grad_norm": 1.987344129330873,
      "learning_rate": 4.439820483745965e-06,
      "loss": 0.5833,
      "step": 4183
    },
    {
      "epoch": 0.5129965669445806,
      "grad_norm": 1.9428840696657828,
      "learning_rate": 4.439500718893204e-06,
      "loss": 0.511,
      "step": 4184
    },
    {
      "epoch": 0.5131191760666993,
      "grad_norm": 2.055072272013106,
      "learning_rate": 4.439180874323529e-06,
      "loss": 0.5377,
      "step": 4185
    },
    {
      "epoch": 0.513241785188818,
      "grad_norm": 1.8407766979977698,
      "learning_rate": 4.438860950050087e-06,
      "loss": 0.5632,
      "step": 4186
    },
    {
      "epoch": 0.5133643943109367,
      "grad_norm": 2.091861877116408,
      "learning_rate": 4.438540946086027e-06,
      "loss": 0.5508,
      "step": 4187
    },
    {
      "epoch": 0.5134870034330554,
      "grad_norm": 2.0782036429012045,
      "learning_rate": 4.438220862444503e-06,
      "loss": 0.5764,
      "step": 4188
    },
    {
      "epoch": 0.5136096125551741,
      "grad_norm": 1.932111231540851,
      "learning_rate": 4.437900699138671e-06,
      "loss": 0.5406,
      "step": 4189
    },
    {
      "epoch": 0.5137322216772928,
      "grad_norm": 1.9852202549027882,
      "learning_rate": 4.4375804561816885e-06,
      "loss": 0.5988,
      "step": 4190
    },
    {
      "epoch": 0.5138548307994115,
      "grad_norm": 2.126995026444883,
      "learning_rate": 4.437260133586719e-06,
      "loss": 0.5597,
      "step": 4191
    },
    {
      "epoch": 0.5139774399215301,
      "grad_norm": 1.9931633835443046,
      "learning_rate": 4.436939731366928e-06,
      "loss": 0.5102,
      "step": 4192
    },
    {
      "epoch": 0.5141000490436488,
      "grad_norm": 2.155129333619603,
      "learning_rate": 4.4366192495354865e-06,
      "loss": 0.5847,
      "step": 4193
    },
    {
      "epoch": 0.5142226581657675,
      "grad_norm": 1.750563891717857,
      "learning_rate": 4.436298688105564e-06,
      "loss": 0.5186,
      "step": 4194
    },
    {
      "epoch": 0.5143452672878862,
      "grad_norm": 1.8287503255653244,
      "learning_rate": 4.435978047090338e-06,
      "loss": 0.5601,
      "step": 4195
    },
    {
      "epoch": 0.5144678764100049,
      "grad_norm": 2.0433627259284814,
      "learning_rate": 4.435657326502986e-06,
      "loss": 0.5464,
      "step": 4196
    },
    {
      "epoch": 0.5145904855321236,
      "grad_norm": 1.9480370369091276,
      "learning_rate": 4.435336526356692e-06,
      "loss": 0.5688,
      "step": 4197
    },
    {
      "epoch": 0.5147130946542423,
      "grad_norm": 1.9903218562488076,
      "learning_rate": 4.43501564666464e-06,
      "loss": 0.5531,
      "step": 4198
    },
    {
      "epoch": 0.514835703776361,
      "grad_norm": 1.9587397872534482,
      "learning_rate": 4.43469468744002e-06,
      "loss": 0.5083,
      "step": 4199
    },
    {
      "epoch": 0.5149583128984796,
      "grad_norm": 2.1781114389931835,
      "learning_rate": 4.434373648696022e-06,
      "loss": 0.5505,
      "step": 4200
    },
    {
      "epoch": 0.5150809220205983,
      "grad_norm": 1.7137172964572722,
      "learning_rate": 4.434052530445843e-06,
      "loss": 0.478,
      "step": 4201
    },
    {
      "epoch": 0.515203531142717,
      "grad_norm": 1.934230012742388,
      "learning_rate": 4.433731332702682e-06,
      "loss": 0.5496,
      "step": 4202
    },
    {
      "epoch": 0.5153261402648357,
      "grad_norm": 1.8993779256402794,
      "learning_rate": 4.433410055479739e-06,
      "loss": 0.5236,
      "step": 4203
    },
    {
      "epoch": 0.5154487493869544,
      "grad_norm": 2.1718793539146946,
      "learning_rate": 4.43308869879022e-06,
      "loss": 0.5172,
      "step": 4204
    },
    {
      "epoch": 0.5155713585090731,
      "grad_norm": 2.0168749428792023,
      "learning_rate": 4.432767262647332e-06,
      "loss": 0.5586,
      "step": 4205
    },
    {
      "epoch": 0.5156939676311918,
      "grad_norm": 2.1746224532754748,
      "learning_rate": 4.43244574706429e-06,
      "loss": 0.5535,
      "step": 4206
    },
    {
      "epoch": 0.5158165767533105,
      "grad_norm": 1.9450062716355572,
      "learning_rate": 4.432124152054306e-06,
      "loss": 0.5795,
      "step": 4207
    },
    {
      "epoch": 0.5159391858754291,
      "grad_norm": 2.1941987557289053,
      "learning_rate": 4.431802477630598e-06,
      "loss": 0.5274,
      "step": 4208
    },
    {
      "epoch": 0.5160617949975478,
      "grad_norm": 2.016110068094391,
      "learning_rate": 4.431480723806388e-06,
      "loss": 0.5339,
      "step": 4209
    },
    {
      "epoch": 0.5161844041196665,
      "grad_norm": 1.9946261133082468,
      "learning_rate": 4.431158890594902e-06,
      "loss": 0.5406,
      "step": 4210
    },
    {
      "epoch": 0.5163070132417852,
      "grad_norm": 1.8239194077074714,
      "learning_rate": 4.430836978009367e-06,
      "loss": 0.5022,
      "step": 4211
    },
    {
      "epoch": 0.5164296223639039,
      "grad_norm": 1.9174144465871734,
      "learning_rate": 4.430514986063013e-06,
      "loss": 0.5515,
      "step": 4212
    },
    {
      "epoch": 0.5165522314860226,
      "grad_norm": 1.9369164990019487,
      "learning_rate": 4.4301929147690744e-06,
      "loss": 0.5246,
      "step": 4213
    },
    {
      "epoch": 0.5166748406081413,
      "grad_norm": 2.0756685332299525,
      "learning_rate": 4.429870764140792e-06,
      "loss": 0.561,
      "step": 4214
    },
    {
      "epoch": 0.51679744973026,
      "grad_norm": 2.0494947003789177,
      "learning_rate": 4.429548534191404e-06,
      "loss": 0.5456,
      "step": 4215
    },
    {
      "epoch": 0.5169200588523786,
      "grad_norm": 2.0539765792091207,
      "learning_rate": 4.429226224934155e-06,
      "loss": 0.5519,
      "step": 4216
    },
    {
      "epoch": 0.5170426679744973,
      "grad_norm": 1.8855455422349479,
      "learning_rate": 4.428903836382291e-06,
      "loss": 0.5916,
      "step": 4217
    },
    {
      "epoch": 0.517165277096616,
      "grad_norm": 1.991367418382875,
      "learning_rate": 4.428581368549067e-06,
      "loss": 0.53,
      "step": 4218
    },
    {
      "epoch": 0.5172878862187347,
      "grad_norm": 2.2233019559215106,
      "learning_rate": 4.428258821447733e-06,
      "loss": 0.548,
      "step": 4219
    },
    {
      "epoch": 0.5174104953408534,
      "grad_norm": 1.856343074095767,
      "learning_rate": 4.427936195091549e-06,
      "loss": 0.5445,
      "step": 4220
    },
    {
      "epoch": 0.5175331044629721,
      "grad_norm": 2.072751850254073,
      "learning_rate": 4.427613489493773e-06,
      "loss": 0.5642,
      "step": 4221
    },
    {
      "epoch": 0.5176557135850908,
      "grad_norm": 2.0001524417112226,
      "learning_rate": 4.4272907046676704e-06,
      "loss": 0.5415,
      "step": 4222
    },
    {
      "epoch": 0.5177783227072095,
      "grad_norm": 2.0522322953437353,
      "learning_rate": 4.426967840626508e-06,
      "loss": 0.5526,
      "step": 4223
    },
    {
      "epoch": 0.5179009318293281,
      "grad_norm": 2.1094550413584128,
      "learning_rate": 4.426644897383556e-06,
      "loss": 0.636,
      "step": 4224
    },
    {
      "epoch": 0.5180235409514468,
      "grad_norm": 2.079374170984911,
      "learning_rate": 4.426321874952088e-06,
      "loss": 0.5878,
      "step": 4225
    },
    {
      "epoch": 0.5181461500735655,
      "grad_norm": 1.9713947565101857,
      "learning_rate": 4.42599877334538e-06,
      "loss": 0.5481,
      "step": 4226
    },
    {
      "epoch": 0.5182687591956842,
      "grad_norm": 2.1406755061838365,
      "learning_rate": 4.425675592576713e-06,
      "loss": 0.5358,
      "step": 4227
    },
    {
      "epoch": 0.5183913683178029,
      "grad_norm": 2.0761459845383503,
      "learning_rate": 4.42535233265937e-06,
      "loss": 0.5342,
      "step": 4228
    },
    {
      "epoch": 0.5185139774399216,
      "grad_norm": 2.600663964418773,
      "learning_rate": 4.425028993606638e-06,
      "loss": 0.5731,
      "step": 4229
    },
    {
      "epoch": 0.5186365865620403,
      "grad_norm": 1.8155319867036754,
      "learning_rate": 4.424705575431806e-06,
      "loss": 0.5633,
      "step": 4230
    },
    {
      "epoch": 0.5187591956841588,
      "grad_norm": 1.9715248698442918,
      "learning_rate": 4.424382078148167e-06,
      "loss": 0.5444,
      "step": 4231
    },
    {
      "epoch": 0.5188818048062775,
      "grad_norm": 1.8838788879336699,
      "learning_rate": 4.4240585017690185e-06,
      "loss": 0.5525,
      "step": 4232
    },
    {
      "epoch": 0.5190044139283962,
      "grad_norm": 2.0382201670892477,
      "learning_rate": 4.423734846307659e-06,
      "loss": 0.529,
      "step": 4233
    },
    {
      "epoch": 0.519127023050515,
      "grad_norm": 1.8518667962095965,
      "learning_rate": 4.423411111777392e-06,
      "loss": 0.5083,
      "step": 4234
    },
    {
      "epoch": 0.5192496321726336,
      "grad_norm": 2.319880444007248,
      "learning_rate": 4.423087298191522e-06,
      "loss": 0.5996,
      "step": 4235
    },
    {
      "epoch": 0.5193722412947523,
      "grad_norm": 2.1013217907891937,
      "learning_rate": 4.42276340556336e-06,
      "loss": 0.5715,
      "step": 4236
    },
    {
      "epoch": 0.519494850416871,
      "grad_norm": 2.124629052594527,
      "learning_rate": 4.422439433906218e-06,
      "loss": 0.5872,
      "step": 4237
    },
    {
      "epoch": 0.5196174595389897,
      "grad_norm": 2.095852982928993,
      "learning_rate": 4.422115383233413e-06,
      "loss": 0.6057,
      "step": 4238
    },
    {
      "epoch": 0.5197400686611083,
      "grad_norm": 1.9809761611551229,
      "learning_rate": 4.421791253558262e-06,
      "loss": 0.5072,
      "step": 4239
    },
    {
      "epoch": 0.519862677783227,
      "grad_norm": 2.004918188820746,
      "learning_rate": 4.4214670448940876e-06,
      "loss": 0.5486,
      "step": 4240
    },
    {
      "epoch": 0.5199852869053457,
      "grad_norm": 2.122206469988478,
      "learning_rate": 4.421142757254217e-06,
      "loss": 0.5239,
      "step": 4241
    },
    {
      "epoch": 0.5201078960274644,
      "grad_norm": 2.127268495052063,
      "learning_rate": 4.4208183906519775e-06,
      "loss": 0.5254,
      "step": 4242
    },
    {
      "epoch": 0.5202305051495831,
      "grad_norm": 1.9653907392887082,
      "learning_rate": 4.420493945100702e-06,
      "loss": 0.5472,
      "step": 4243
    },
    {
      "epoch": 0.5203531142717018,
      "grad_norm": 1.9272605518407495,
      "learning_rate": 4.420169420613725e-06,
      "loss": 0.571,
      "step": 4244
    },
    {
      "epoch": 0.5204757233938205,
      "grad_norm": 2.0465016174851938,
      "learning_rate": 4.419844817204385e-06,
      "loss": 0.5746,
      "step": 4245
    },
    {
      "epoch": 0.5205983325159392,
      "grad_norm": 2.029866889733978,
      "learning_rate": 4.4195201348860254e-06,
      "loss": 0.5202,
      "step": 4246
    },
    {
      "epoch": 0.5207209416380578,
      "grad_norm": 1.7748571722872621,
      "learning_rate": 4.4191953736719885e-06,
      "loss": 0.5276,
      "step": 4247
    },
    {
      "epoch": 0.5208435507601765,
      "grad_norm": 1.899920220115998,
      "learning_rate": 4.418870533575626e-06,
      "loss": 0.5688,
      "step": 4248
    },
    {
      "epoch": 0.5209661598822952,
      "grad_norm": 2.2934548619568242,
      "learning_rate": 4.418545614610285e-06,
      "loss": 0.5011,
      "step": 4249
    },
    {
      "epoch": 0.5210887690044139,
      "grad_norm": 2.066403185949998,
      "learning_rate": 4.418220616789325e-06,
      "loss": 0.5521,
      "step": 4250
    },
    {
      "epoch": 0.5212113781265326,
      "grad_norm": 1.984990812529676,
      "learning_rate": 4.4178955401261e-06,
      "loss": 0.5931,
      "step": 4251
    },
    {
      "epoch": 0.5213339872486513,
      "grad_norm": 2.1531455827955637,
      "learning_rate": 4.417570384633974e-06,
      "loss": 0.5301,
      "step": 4252
    },
    {
      "epoch": 0.52145659637077,
      "grad_norm": 1.9664106047167575,
      "learning_rate": 4.4172451503263105e-06,
      "loss": 0.5463,
      "step": 4253
    },
    {
      "epoch": 0.5215792054928887,
      "grad_norm": 2.095821823019675,
      "learning_rate": 4.416919837216477e-06,
      "loss": 0.5865,
      "step": 4254
    },
    {
      "epoch": 0.5217018146150073,
      "grad_norm": 1.8309003588704016,
      "learning_rate": 4.4165944453178435e-06,
      "loss": 0.4921,
      "step": 4255
    },
    {
      "epoch": 0.521824423737126,
      "grad_norm": 1.9648334936391418,
      "learning_rate": 4.4162689746437856e-06,
      "loss": 0.5447,
      "step": 4256
    },
    {
      "epoch": 0.5219470328592447,
      "grad_norm": 2.2370411833216917,
      "learning_rate": 4.415943425207681e-06,
      "loss": 0.539,
      "step": 4257
    },
    {
      "epoch": 0.5220696419813634,
      "grad_norm": 2.109524543426175,
      "learning_rate": 4.4156177970229095e-06,
      "loss": 0.5645,
      "step": 4258
    },
    {
      "epoch": 0.5221922511034821,
      "grad_norm": 2.0235581244346936,
      "learning_rate": 4.415292090102856e-06,
      "loss": 0.5713,
      "step": 4259
    },
    {
      "epoch": 0.5223148602256008,
      "grad_norm": 2.0859570359031867,
      "learning_rate": 4.414966304460906e-06,
      "loss": 0.5303,
      "step": 4260
    },
    {
      "epoch": 0.5224374693477195,
      "grad_norm": 2.246253447877728,
      "learning_rate": 4.414640440110451e-06,
      "loss": 0.5611,
      "step": 4261
    },
    {
      "epoch": 0.5225600784698382,
      "grad_norm": 2.0985462423616226,
      "learning_rate": 4.414314497064883e-06,
      "loss": 0.5027,
      "step": 4262
    },
    {
      "epoch": 0.5226826875919568,
      "grad_norm": 2.2347114994408,
      "learning_rate": 4.413988475337601e-06,
      "loss": 0.5142,
      "step": 4263
    },
    {
      "epoch": 0.5228052967140755,
      "grad_norm": 1.8327147181451273,
      "learning_rate": 4.413662374942005e-06,
      "loss": 0.5686,
      "step": 4264
    },
    {
      "epoch": 0.5229279058361942,
      "grad_norm": 1.9017723434144642,
      "learning_rate": 4.413336195891497e-06,
      "loss": 0.544,
      "step": 4265
    },
    {
      "epoch": 0.5230505149583129,
      "grad_norm": 2.0241769503543163,
      "learning_rate": 4.413009938199484e-06,
      "loss": 0.5578,
      "step": 4266
    },
    {
      "epoch": 0.5231731240804316,
      "grad_norm": 1.8694709313857047,
      "learning_rate": 4.4126836018793754e-06,
      "loss": 0.5699,
      "step": 4267
    },
    {
      "epoch": 0.5232957332025503,
      "grad_norm": 2.007862677844381,
      "learning_rate": 4.4123571869445855e-06,
      "loss": 0.5696,
      "step": 4268
    },
    {
      "epoch": 0.523418342324669,
      "grad_norm": 1.9986493577125732,
      "learning_rate": 4.412030693408529e-06,
      "loss": 0.5318,
      "step": 4269
    },
    {
      "epoch": 0.5235409514467877,
      "grad_norm": 2.1239488858811004,
      "learning_rate": 4.4117041212846264e-06,
      "loss": 0.5568,
      "step": 4270
    },
    {
      "epoch": 0.5236635605689063,
      "grad_norm": 2.023975250134774,
      "learning_rate": 4.4113774705863e-06,
      "loss": 0.5577,
      "step": 4271
    },
    {
      "epoch": 0.523786169691025,
      "grad_norm": 2.02482586513486,
      "learning_rate": 4.411050741326975e-06,
      "loss": 0.5377,
      "step": 4272
    },
    {
      "epoch": 0.5239087788131437,
      "grad_norm": 1.9600948477667797,
      "learning_rate": 4.410723933520081e-06,
      "loss": 0.5102,
      "step": 4273
    },
    {
      "epoch": 0.5240313879352624,
      "grad_norm": 2.015425522251571,
      "learning_rate": 4.410397047179053e-06,
      "loss": 0.5491,
      "step": 4274
    },
    {
      "epoch": 0.5241539970573811,
      "grad_norm": 2.1092283797195677,
      "learning_rate": 4.410070082317322e-06,
      "loss": 0.5328,
      "step": 4275
    },
    {
      "epoch": 0.5242766061794998,
      "grad_norm": 2.2150913684297193,
      "learning_rate": 4.4097430389483296e-06,
      "loss": 0.6177,
      "step": 4276
    },
    {
      "epoch": 0.5243992153016185,
      "grad_norm": 1.8576409297838694,
      "learning_rate": 4.409415917085518e-06,
      "loss": 0.5366,
      "step": 4277
    },
    {
      "epoch": 0.5245218244237371,
      "grad_norm": 2.1692097185293173,
      "learning_rate": 4.409088716742331e-06,
      "loss": 0.6275,
      "step": 4278
    },
    {
      "epoch": 0.5246444335458558,
      "grad_norm": 2.142811060741203,
      "learning_rate": 4.408761437932219e-06,
      "loss": 0.5953,
      "step": 4279
    },
    {
      "epoch": 0.5247670426679745,
      "grad_norm": 1.7865673153825092,
      "learning_rate": 4.408434080668632e-06,
      "loss": 0.4461,
      "step": 4280
    },
    {
      "epoch": 0.5248896517900932,
      "grad_norm": 1.8296986470386847,
      "learning_rate": 4.408106644965026e-06,
      "loss": 0.5,
      "step": 4281
    },
    {
      "epoch": 0.5250122609122119,
      "grad_norm": 1.9517961154590011,
      "learning_rate": 4.407779130834858e-06,
      "loss": 0.5677,
      "step": 4282
    },
    {
      "epoch": 0.5251348700343306,
      "grad_norm": 2.1738568325392826,
      "learning_rate": 4.407451538291591e-06,
      "loss": 0.5912,
      "step": 4283
    },
    {
      "epoch": 0.5252574791564493,
      "grad_norm": 1.8729010875010736,
      "learning_rate": 4.4071238673486896e-06,
      "loss": 0.4952,
      "step": 4284
    },
    {
      "epoch": 0.525380088278568,
      "grad_norm": 2.089910431638482,
      "learning_rate": 4.406796118019619e-06,
      "loss": 0.5118,
      "step": 4285
    },
    {
      "epoch": 0.5255026974006866,
      "grad_norm": 2.251272686868599,
      "learning_rate": 4.406468290317853e-06,
      "loss": 0.5353,
      "step": 4286
    },
    {
      "epoch": 0.5256253065228053,
      "grad_norm": 2.1102151672535463,
      "learning_rate": 4.406140384256866e-06,
      "loss": 0.5459,
      "step": 4287
    },
    {
      "epoch": 0.525747915644924,
      "grad_norm": 1.8690339855023852,
      "learning_rate": 4.405812399850135e-06,
      "loss": 0.5036,
      "step": 4288
    },
    {
      "epoch": 0.5258705247670427,
      "grad_norm": 1.8983072230484535,
      "learning_rate": 4.405484337111139e-06,
      "loss": 0.5175,
      "step": 4289
    },
    {
      "epoch": 0.5259931338891614,
      "grad_norm": 1.966762346015577,
      "learning_rate": 4.405156196053364e-06,
      "loss": 0.5152,
      "step": 4290
    },
    {
      "epoch": 0.5261157430112801,
      "grad_norm": 1.9661992277930271,
      "learning_rate": 4.404827976690297e-06,
      "loss": 0.556,
      "step": 4291
    },
    {
      "epoch": 0.5262383521333988,
      "grad_norm": 2.1052002429671206,
      "learning_rate": 4.4044996790354264e-06,
      "loss": 0.5656,
      "step": 4292
    },
    {
      "epoch": 0.5263609612555175,
      "grad_norm": 1.94806259261113,
      "learning_rate": 4.404171303102249e-06,
      "loss": 0.5468,
      "step": 4293
    },
    {
      "epoch": 0.526483570377636,
      "grad_norm": 2.079144671176654,
      "learning_rate": 4.40384284890426e-06,
      "loss": 0.5684,
      "step": 4294
    },
    {
      "epoch": 0.5266061794997547,
      "grad_norm": 2.121050206089468,
      "learning_rate": 4.403514316454958e-06,
      "loss": 0.5747,
      "step": 4295
    },
    {
      "epoch": 0.5267287886218734,
      "grad_norm": 2.179298486517336,
      "learning_rate": 4.403185705767849e-06,
      "loss": 0.5171,
      "step": 4296
    },
    {
      "epoch": 0.5268513977439921,
      "grad_norm": 2.144187580661802,
      "learning_rate": 4.4028570168564364e-06,
      "loss": 0.5499,
      "step": 4297
    },
    {
      "epoch": 0.5269740068661108,
      "grad_norm": 1.9591019644479781,
      "learning_rate": 4.402528249734233e-06,
      "loss": 0.5539,
      "step": 4298
    },
    {
      "epoch": 0.5270966159882295,
      "grad_norm": 2.0740748394231905,
      "learning_rate": 4.4021994044147505e-06,
      "loss": 0.5494,
      "step": 4299
    },
    {
      "epoch": 0.5272192251103482,
      "grad_norm": 2.0305294984890243,
      "learning_rate": 4.401870480911505e-06,
      "loss": 0.5602,
      "step": 4300
    },
    {
      "epoch": 0.5273418342324669,
      "grad_norm": 2.113027035979249,
      "learning_rate": 4.401541479238015e-06,
      "loss": 0.5125,
      "step": 4301
    },
    {
      "epoch": 0.5274644433545855,
      "grad_norm": 1.6490777832051264,
      "learning_rate": 4.401212399407804e-06,
      "loss": 0.5266,
      "step": 4302
    },
    {
      "epoch": 0.5275870524767042,
      "grad_norm": 1.9447059662196575,
      "learning_rate": 4.400883241434398e-06,
      "loss": 0.555,
      "step": 4303
    },
    {
      "epoch": 0.5277096615988229,
      "grad_norm": 1.885312870079919,
      "learning_rate": 4.400554005331324e-06,
      "loss": 0.5493,
      "step": 4304
    },
    {
      "epoch": 0.5278322707209416,
      "grad_norm": 1.9194608339372177,
      "learning_rate": 4.400224691112118e-06,
      "loss": 0.5561,
      "step": 4305
    },
    {
      "epoch": 0.5279548798430603,
      "grad_norm": 1.8148262411774616,
      "learning_rate": 4.399895298790312e-06,
      "loss": 0.5549,
      "step": 4306
    },
    {
      "epoch": 0.528077488965179,
      "grad_norm": 2.133815739214289,
      "learning_rate": 4.399565828379445e-06,
      "loss": 0.5423,
      "step": 4307
    },
    {
      "epoch": 0.5282000980872977,
      "grad_norm": 2.0667813842862723,
      "learning_rate": 4.399236279893061e-06,
      "loss": 0.5956,
      "step": 4308
    },
    {
      "epoch": 0.5283227072094164,
      "grad_norm": 2.02948026962453,
      "learning_rate": 4.398906653344702e-06,
      "loss": 0.5614,
      "step": 4309
    },
    {
      "epoch": 0.528445316331535,
      "grad_norm": 2.2441328939996708,
      "learning_rate": 4.3985769487479194e-06,
      "loss": 0.6508,
      "step": 4310
    },
    {
      "epoch": 0.5285679254536537,
      "grad_norm": 1.8880288318065992,
      "learning_rate": 4.398247166116262e-06,
      "loss": 0.5133,
      "step": 4311
    },
    {
      "epoch": 0.5286905345757724,
      "grad_norm": 1.9488445029634818,
      "learning_rate": 4.397917305463287e-06,
      "loss": 0.5007,
      "step": 4312
    },
    {
      "epoch": 0.5288131436978911,
      "grad_norm": 1.9001932806017974,
      "learning_rate": 4.397587366802548e-06,
      "loss": 0.5356,
      "step": 4313
    },
    {
      "epoch": 0.5289357528200098,
      "grad_norm": 1.8649135211441537,
      "learning_rate": 4.3972573501476115e-06,
      "loss": 0.5249,
      "step": 4314
    },
    {
      "epoch": 0.5290583619421285,
      "grad_norm": 2.0433335365506076,
      "learning_rate": 4.396927255512037e-06,
      "loss": 0.5603,
      "step": 4315
    },
    {
      "epoch": 0.5291809710642472,
      "grad_norm": 1.9722954927154202,
      "learning_rate": 4.3965970829093955e-06,
      "loss": 0.5502,
      "step": 4316
    },
    {
      "epoch": 0.5293035801863659,
      "grad_norm": 1.971103471555845,
      "learning_rate": 4.396266832353256e-06,
      "loss": 0.5384,
      "step": 4317
    },
    {
      "epoch": 0.5294261893084845,
      "grad_norm": 2.0562980423915787,
      "learning_rate": 4.395936503857192e-06,
      "loss": 0.5769,
      "step": 4318
    },
    {
      "epoch": 0.5295487984306032,
      "grad_norm": 2.097764635235397,
      "learning_rate": 4.3956060974347815e-06,
      "loss": 0.5377,
      "step": 4319
    },
    {
      "epoch": 0.5296714075527219,
      "grad_norm": 2.2587180051593596,
      "learning_rate": 4.395275613099605e-06,
      "loss": 0.5598,
      "step": 4320
    },
    {
      "epoch": 0.5297940166748406,
      "grad_norm": 2.0651374407414225,
      "learning_rate": 4.394945050865244e-06,
      "loss": 0.5089,
      "step": 4321
    },
    {
      "epoch": 0.5299166257969593,
      "grad_norm": 2.0462596046020427,
      "learning_rate": 4.394614410745288e-06,
      "loss": 0.5572,
      "step": 4322
    },
    {
      "epoch": 0.530039234919078,
      "grad_norm": 2.252081910014456,
      "learning_rate": 4.394283692753325e-06,
      "loss": 0.6035,
      "step": 4323
    },
    {
      "epoch": 0.5301618440411967,
      "grad_norm": 1.9880870202381018,
      "learning_rate": 4.393952896902949e-06,
      "loss": 0.5374,
      "step": 4324
    },
    {
      "epoch": 0.5302844531633154,
      "grad_norm": 1.83756687949796,
      "learning_rate": 4.393622023207756e-06,
      "loss": 0.5511,
      "step": 4325
    },
    {
      "epoch": 0.530407062285434,
      "grad_norm": 2.1056559201027665,
      "learning_rate": 4.393291071681345e-06,
      "loss": 0.6063,
      "step": 4326
    },
    {
      "epoch": 0.5305296714075527,
      "grad_norm": 1.9156674950726844,
      "learning_rate": 4.39296004233732e-06,
      "loss": 0.4403,
      "step": 4327
    },
    {
      "epoch": 0.5306522805296714,
      "grad_norm": 1.9746899231559274,
      "learning_rate": 4.3926289351892845e-06,
      "loss": 0.4972,
      "step": 4328
    },
    {
      "epoch": 0.5307748896517901,
      "grad_norm": 2.305935863129565,
      "learning_rate": 4.3922977502508505e-06,
      "loss": 0.5272,
      "step": 4329
    },
    {
      "epoch": 0.5308974987739088,
      "grad_norm": 2.063931467321759,
      "learning_rate": 4.391966487535629e-06,
      "loss": 0.5826,
      "step": 4330
    },
    {
      "epoch": 0.5310201078960275,
      "grad_norm": 2.0316180648768576,
      "learning_rate": 4.391635147057234e-06,
      "loss": 0.5414,
      "step": 4331
    },
    {
      "epoch": 0.5311427170181462,
      "grad_norm": 1.8640374171641665,
      "learning_rate": 4.391303728829287e-06,
      "loss": 0.5425,
      "step": 4332
    },
    {
      "epoch": 0.5312653261402648,
      "grad_norm": 2.2690867436054782,
      "learning_rate": 4.390972232865408e-06,
      "loss": 0.4617,
      "step": 4333
    },
    {
      "epoch": 0.5313879352623835,
      "grad_norm": 2.264294996860987,
      "learning_rate": 4.390640659179223e-06,
      "loss": 0.5692,
      "step": 4334
    },
    {
      "epoch": 0.5315105443845022,
      "grad_norm": 2.1048312393782647,
      "learning_rate": 4.390309007784359e-06,
      "loss": 0.5756,
      "step": 4335
    },
    {
      "epoch": 0.5316331535066209,
      "grad_norm": 1.8587804745694867,
      "learning_rate": 4.389977278694449e-06,
      "loss": 0.5107,
      "step": 4336
    },
    {
      "epoch": 0.5317557626287396,
      "grad_norm": 1.9098174064113271,
      "learning_rate": 4.389645471923126e-06,
      "loss": 0.5605,
      "step": 4337
    },
    {
      "epoch": 0.5318783717508583,
      "grad_norm": 1.9657073474158588,
      "learning_rate": 4.389313587484028e-06,
      "loss": 0.5596,
      "step": 4338
    },
    {
      "epoch": 0.532000980872977,
      "grad_norm": 2.0371951801264436,
      "learning_rate": 4.388981625390799e-06,
      "loss": 0.5628,
      "step": 4339
    },
    {
      "epoch": 0.5321235899950957,
      "grad_norm": 1.9150421975022554,
      "learning_rate": 4.38864958565708e-06,
      "loss": 0.4784,
      "step": 4340
    },
    {
      "epoch": 0.5322461991172143,
      "grad_norm": 2.6918333279357367,
      "learning_rate": 4.388317468296519e-06,
      "loss": 0.5622,
      "step": 4341
    },
    {
      "epoch": 0.532368808239333,
      "grad_norm": 1.9157982460601592,
      "learning_rate": 4.387985273322768e-06,
      "loss": 0.5216,
      "step": 4342
    },
    {
      "epoch": 0.5324914173614517,
      "grad_norm": 2.258310931345475,
      "learning_rate": 4.387653000749479e-06,
      "loss": 0.498,
      "step": 4343
    },
    {
      "epoch": 0.5326140264835704,
      "grad_norm": 1.9253676593852158,
      "learning_rate": 4.38732065059031e-06,
      "loss": 0.5543,
      "step": 4344
    },
    {
      "epoch": 0.5327366356056891,
      "grad_norm": 2.05543059135229,
      "learning_rate": 4.386988222858921e-06,
      "loss": 0.5481,
      "step": 4345
    },
    {
      "epoch": 0.5328592447278078,
      "grad_norm": 2.2662782910540646,
      "learning_rate": 4.386655717568976e-06,
      "loss": 0.5677,
      "step": 4346
    },
    {
      "epoch": 0.5329818538499265,
      "grad_norm": 2.030802043759628,
      "learning_rate": 4.3863231347341405e-06,
      "loss": 0.5141,
      "step": 4347
    },
    {
      "epoch": 0.5331044629720452,
      "grad_norm": 1.9860600809572142,
      "learning_rate": 4.385990474368085e-06,
      "loss": 0.4913,
      "step": 4348
    },
    {
      "epoch": 0.5332270720941638,
      "grad_norm": 1.9581514354889333,
      "learning_rate": 4.385657736484481e-06,
      "loss": 0.5829,
      "step": 4349
    },
    {
      "epoch": 0.5333496812162825,
      "grad_norm": 1.956290712993182,
      "learning_rate": 4.3853249210970076e-06,
      "loss": 0.5055,
      "step": 4350
    },
    {
      "epoch": 0.5334722903384012,
      "grad_norm": 2.026985396222822,
      "learning_rate": 4.384992028219341e-06,
      "loss": 0.5111,
      "step": 4351
    },
    {
      "epoch": 0.5335948994605199,
      "grad_norm": 2.4581326661360636,
      "learning_rate": 4.384659057865165e-06,
      "loss": 0.5787,
      "step": 4352
    },
    {
      "epoch": 0.5337175085826386,
      "grad_norm": 1.9953980716772222,
      "learning_rate": 4.384326010048165e-06,
      "loss": 0.5601,
      "step": 4353
    },
    {
      "epoch": 0.5338401177047573,
      "grad_norm": 1.9042947444231348,
      "learning_rate": 4.38399288478203e-06,
      "loss": 0.5641,
      "step": 4354
    },
    {
      "epoch": 0.533962726826876,
      "grad_norm": 2.0411558631864923,
      "learning_rate": 4.383659682080453e-06,
      "loss": 0.5436,
      "step": 4355
    },
    {
      "epoch": 0.5340853359489947,
      "grad_norm": 1.9930046258046732,
      "learning_rate": 4.383326401957127e-06,
      "loss": 0.5886,
      "step": 4356
    },
    {
      "epoch": 0.5342079450711132,
      "grad_norm": 1.8814481617046894,
      "learning_rate": 4.382993044425753e-06,
      "loss": 0.5433,
      "step": 4357
    },
    {
      "epoch": 0.5343305541932319,
      "grad_norm": 1.936734967708941,
      "learning_rate": 4.3826596095000306e-06,
      "loss": 0.5083,
      "step": 4358
    },
    {
      "epoch": 0.5344531633153506,
      "grad_norm": 2.1174551568287385,
      "learning_rate": 4.382326097193665e-06,
      "loss": 0.5505,
      "step": 4359
    },
    {
      "epoch": 0.5345757724374693,
      "grad_norm": 1.7912743522377965,
      "learning_rate": 4.381992507520366e-06,
      "loss": 0.5284,
      "step": 4360
    },
    {
      "epoch": 0.534698381559588,
      "grad_norm": 2.125716097830045,
      "learning_rate": 4.381658840493842e-06,
      "loss": 0.5715,
      "step": 4361
    },
    {
      "epoch": 0.5348209906817067,
      "grad_norm": 2.0442490409373653,
      "learning_rate": 4.381325096127808e-06,
      "loss": 0.57,
      "step": 4362
    },
    {
      "epoch": 0.5349435998038254,
      "grad_norm": 1.8055618205950081,
      "learning_rate": 4.380991274435983e-06,
      "loss": 0.5298,
      "step": 4363
    },
    {
      "epoch": 0.5350662089259441,
      "grad_norm": 1.889601990774823,
      "learning_rate": 4.380657375432085e-06,
      "loss": 0.5409,
      "step": 4364
    },
    {
      "epoch": 0.5351888180480627,
      "grad_norm": 2.2849939166653006,
      "learning_rate": 4.380323399129841e-06,
      "loss": 0.5558,
      "step": 4365
    },
    {
      "epoch": 0.5353114271701814,
      "grad_norm": 1.9619093339022216,
      "learning_rate": 4.379989345542975e-06,
      "loss": 0.6009,
      "step": 4366
    },
    {
      "epoch": 0.5354340362923001,
      "grad_norm": 2.013952760774462,
      "learning_rate": 4.37965521468522e-06,
      "loss": 0.599,
      "step": 4367
    },
    {
      "epoch": 0.5355566454144188,
      "grad_norm": 2.1188525539706786,
      "learning_rate": 4.379321006570307e-06,
      "loss": 0.5749,
      "step": 4368
    },
    {
      "epoch": 0.5356792545365375,
      "grad_norm": 2.0219019299007286,
      "learning_rate": 4.378986721211974e-06,
      "loss": 0.5255,
      "step": 4369
    },
    {
      "epoch": 0.5358018636586562,
      "grad_norm": 1.7941127705193785,
      "learning_rate": 4.37865235862396e-06,
      "loss": 0.5514,
      "step": 4370
    },
    {
      "epoch": 0.5359244727807749,
      "grad_norm": 2.1512930566332034,
      "learning_rate": 4.378317918820009e-06,
      "loss": 0.5617,
      "step": 4371
    },
    {
      "epoch": 0.5360470819028936,
      "grad_norm": 1.7738817371220363,
      "learning_rate": 4.377983401813865e-06,
      "loss": 0.5075,
      "step": 4372
    },
    {
      "epoch": 0.5361696910250122,
      "grad_norm": 2.1288039906203235,
      "learning_rate": 4.37764880761928e-06,
      "loss": 0.6216,
      "step": 4373
    },
    {
      "epoch": 0.5362923001471309,
      "grad_norm": 2.001566658303287,
      "learning_rate": 4.3773141362500035e-06,
      "loss": 0.5556,
      "step": 4374
    },
    {
      "epoch": 0.5364149092692496,
      "grad_norm": 1.9325908313405546,
      "learning_rate": 4.376979387719794e-06,
      "loss": 0.5687,
      "step": 4375
    },
    {
      "epoch": 0.5365375183913683,
      "grad_norm": 1.947081212691706,
      "learning_rate": 4.376644562042408e-06,
      "loss": 0.5449,
      "step": 4376
    },
    {
      "epoch": 0.536660127513487,
      "grad_norm": 2.0494293535668215,
      "learning_rate": 4.376309659231607e-06,
      "loss": 0.5158,
      "step": 4377
    },
    {
      "epoch": 0.5367827366356057,
      "grad_norm": 1.9114237857880436,
      "learning_rate": 4.375974679301158e-06,
      "loss": 0.5548,
      "step": 4378
    },
    {
      "epoch": 0.5369053457577244,
      "grad_norm": 2.1218269404103194,
      "learning_rate": 4.375639622264829e-06,
      "loss": 0.552,
      "step": 4379
    },
    {
      "epoch": 0.537027954879843,
      "grad_norm": 1.972299307040516,
      "learning_rate": 4.3753044881363906e-06,
      "loss": 0.5182,
      "step": 4380
    },
    {
      "epoch": 0.5371505640019617,
      "grad_norm": 1.7527880829523177,
      "learning_rate": 4.374969276929618e-06,
      "loss": 0.51,
      "step": 4381
    },
    {
      "epoch": 0.5372731731240804,
      "grad_norm": 1.839471173057509,
      "learning_rate": 4.374633988658289e-06,
      "loss": 0.5532,
      "step": 4382
    },
    {
      "epoch": 0.5373957822461991,
      "grad_norm": 2.0315225449563323,
      "learning_rate": 4.3742986233361835e-06,
      "loss": 0.5461,
      "step": 4383
    },
    {
      "epoch": 0.5375183913683178,
      "grad_norm": 1.9292819129692613,
      "learning_rate": 4.373963180977086e-06,
      "loss": 0.5049,
      "step": 4384
    },
    {
      "epoch": 0.5376410004904365,
      "grad_norm": 2.3729164990458176,
      "learning_rate": 4.373627661594786e-06,
      "loss": 0.565,
      "step": 4385
    },
    {
      "epoch": 0.5377636096125552,
      "grad_norm": 1.9497003468096568,
      "learning_rate": 4.373292065203071e-06,
      "loss": 0.5249,
      "step": 4386
    },
    {
      "epoch": 0.5378862187346739,
      "grad_norm": 2.0004206935046773,
      "learning_rate": 4.372956391815736e-06,
      "loss": 0.4922,
      "step": 4387
    },
    {
      "epoch": 0.5380088278567925,
      "grad_norm": 2.152027960743336,
      "learning_rate": 4.372620641446577e-06,
      "loss": 0.545,
      "step": 4388
    },
    {
      "epoch": 0.5381314369789112,
      "grad_norm": 2.1705719849477396,
      "learning_rate": 4.372284814109394e-06,
      "loss": 0.5078,
      "step": 4389
    },
    {
      "epoch": 0.5382540461010299,
      "grad_norm": 1.8997509586062906,
      "learning_rate": 4.3719489098179915e-06,
      "loss": 0.524,
      "step": 4390
    },
    {
      "epoch": 0.5383766552231486,
      "grad_norm": 2.0309797971219288,
      "learning_rate": 4.371612928586175e-06,
      "loss": 0.551,
      "step": 4391
    },
    {
      "epoch": 0.5384992643452673,
      "grad_norm": 1.9108158751475521,
      "learning_rate": 4.3712768704277535e-06,
      "loss": 0.5545,
      "step": 4392
    },
    {
      "epoch": 0.538621873467386,
      "grad_norm": 1.9970313761506522,
      "learning_rate": 4.370940735356539e-06,
      "loss": 0.5591,
      "step": 4393
    },
    {
      "epoch": 0.5387444825895047,
      "grad_norm": 2.0264714840134372,
      "learning_rate": 4.370604523386348e-06,
      "loss": 0.5576,
      "step": 4394
    },
    {
      "epoch": 0.5388670917116234,
      "grad_norm": 2.1989826779503083,
      "learning_rate": 4.370268234531001e-06,
      "loss": 0.5412,
      "step": 4395
    },
    {
      "epoch": 0.538989700833742,
      "grad_norm": 2.0164923127077183,
      "learning_rate": 4.3699318688043165e-06,
      "loss": 0.5767,
      "step": 4396
    },
    {
      "epoch": 0.5391123099558607,
      "grad_norm": 2.14437317837632,
      "learning_rate": 4.369595426220123e-06,
      "loss": 0.6026,
      "step": 4397
    },
    {
      "epoch": 0.5392349190779794,
      "grad_norm": 1.982485989128362,
      "learning_rate": 4.369258906792247e-06,
      "loss": 0.4836,
      "step": 4398
    },
    {
      "epoch": 0.5393575282000981,
      "grad_norm": 1.9305921658590968,
      "learning_rate": 4.3689223105345215e-06,
      "loss": 0.5553,
      "step": 4399
    },
    {
      "epoch": 0.5394801373222168,
      "grad_norm": 2.1632008197180568,
      "learning_rate": 4.36858563746078e-06,
      "loss": 0.5934,
      "step": 4400
    },
    {
      "epoch": 0.5396027464443355,
      "grad_norm": 2.095677520678825,
      "learning_rate": 4.368248887584861e-06,
      "loss": 0.5277,
      "step": 4401
    },
    {
      "epoch": 0.5397253555664542,
      "grad_norm": 2.2580744169021814,
      "learning_rate": 4.367912060920605e-06,
      "loss": 0.5604,
      "step": 4402
    },
    {
      "epoch": 0.5398479646885729,
      "grad_norm": 1.985682984171437,
      "learning_rate": 4.367575157481856e-06,
      "loss": 0.5212,
      "step": 4403
    },
    {
      "epoch": 0.5399705738106915,
      "grad_norm": 2.081559903900119,
      "learning_rate": 4.367238177282462e-06,
      "loss": 0.5225,
      "step": 4404
    },
    {
      "epoch": 0.5400931829328102,
      "grad_norm": 1.7321298249845156,
      "learning_rate": 4.366901120336273e-06,
      "loss": 0.5016,
      "step": 4405
    },
    {
      "epoch": 0.5402157920549289,
      "grad_norm": 1.9504034313831038,
      "learning_rate": 4.366563986657143e-06,
      "loss": 0.5398,
      "step": 4406
    },
    {
      "epoch": 0.5403384011770476,
      "grad_norm": 2.1192493723920736,
      "learning_rate": 4.366226776258929e-06,
      "loss": 0.5607,
      "step": 4407
    },
    {
      "epoch": 0.5404610102991663,
      "grad_norm": 2.304010581845175,
      "learning_rate": 4.36588948915549e-06,
      "loss": 0.5272,
      "step": 4408
    },
    {
      "epoch": 0.540583619421285,
      "grad_norm": 2.281809128619722,
      "learning_rate": 4.36555212536069e-06,
      "loss": 0.5819,
      "step": 4409
    },
    {
      "epoch": 0.5407062285434037,
      "grad_norm": 1.9103857956013828,
      "learning_rate": 4.365214684888396e-06,
      "loss": 0.5291,
      "step": 4410
    },
    {
      "epoch": 0.5408288376655224,
      "grad_norm": 2.8170418758419107,
      "learning_rate": 4.364877167752475e-06,
      "loss": 0.5546,
      "step": 4411
    },
    {
      "epoch": 0.540951446787641,
      "grad_norm": 1.942074741085428,
      "learning_rate": 4.3645395739668015e-06,
      "loss": 0.5025,
      "step": 4412
    },
    {
      "epoch": 0.5410740559097597,
      "grad_norm": 2.006494914323955,
      "learning_rate": 4.36420190354525e-06,
      "loss": 0.5782,
      "step": 4413
    },
    {
      "epoch": 0.5411966650318784,
      "grad_norm": 1.7362233538400575,
      "learning_rate": 4.3638641565017e-06,
      "loss": 0.5299,
      "step": 4414
    },
    {
      "epoch": 0.541319274153997,
      "grad_norm": 2.2922505955006263,
      "learning_rate": 4.363526332850034e-06,
      "loss": 0.5637,
      "step": 4415
    },
    {
      "epoch": 0.5414418832761158,
      "grad_norm": 2.086845098633022,
      "learning_rate": 4.3631884326041365e-06,
      "loss": 0.5092,
      "step": 4416
    },
    {
      "epoch": 0.5415644923982345,
      "grad_norm": 2.078134213379341,
      "learning_rate": 4.362850455777896e-06,
      "loss": 0.5279,
      "step": 4417
    },
    {
      "epoch": 0.5416871015203532,
      "grad_norm": 1.9545407721504342,
      "learning_rate": 4.3625124023852035e-06,
      "loss": 0.5813,
      "step": 4418
    },
    {
      "epoch": 0.5418097106424719,
      "grad_norm": 2.11011720449424,
      "learning_rate": 4.362174272439954e-06,
      "loss": 0.6021,
      "step": 4419
    },
    {
      "epoch": 0.5419323197645904,
      "grad_norm": 1.9997653921459018,
      "learning_rate": 4.361836065956045e-06,
      "loss": 0.5014,
      "step": 4420
    },
    {
      "epoch": 0.5420549288867091,
      "grad_norm": 1.899224438128691,
      "learning_rate": 4.361497782947378e-06,
      "loss": 0.4825,
      "step": 4421
    },
    {
      "epoch": 0.5421775380088278,
      "grad_norm": 1.8983437753197465,
      "learning_rate": 4.361159423427856e-06,
      "loss": 0.5452,
      "step": 4422
    },
    {
      "epoch": 0.5423001471309465,
      "grad_norm": 1.983781906586022,
      "learning_rate": 4.360820987411388e-06,
      "loss": 0.5847,
      "step": 4423
    },
    {
      "epoch": 0.5424227562530652,
      "grad_norm": 1.940435752075419,
      "learning_rate": 4.360482474911882e-06,
      "loss": 0.5498,
      "step": 4424
    },
    {
      "epoch": 0.5425453653751839,
      "grad_norm": 1.9821584355549884,
      "learning_rate": 4.360143885943253e-06,
      "loss": 0.4976,
      "step": 4425
    },
    {
      "epoch": 0.5426679744973026,
      "grad_norm": 2.116538897219879,
      "learning_rate": 4.359805220519417e-06,
      "loss": 0.5527,
      "step": 4426
    },
    {
      "epoch": 0.5427905836194212,
      "grad_norm": 1.9638949613502816,
      "learning_rate": 4.359466478654294e-06,
      "loss": 0.5127,
      "step": 4427
    },
    {
      "epoch": 0.5429131927415399,
      "grad_norm": 1.8305228441422168,
      "learning_rate": 4.359127660361806e-06,
      "loss": 0.5125,
      "step": 4428
    },
    {
      "epoch": 0.5430358018636586,
      "grad_norm": 2.2706397684206987,
      "learning_rate": 4.358788765655882e-06,
      "loss": 0.5872,
      "step": 4429
    },
    {
      "epoch": 0.5431584109857773,
      "grad_norm": 1.8448796579874116,
      "learning_rate": 4.3584497945504465e-06,
      "loss": 0.5591,
      "step": 4430
    },
    {
      "epoch": 0.543281020107896,
      "grad_norm": 1.9785897454578,
      "learning_rate": 4.358110747059435e-06,
      "loss": 0.518,
      "step": 4431
    },
    {
      "epoch": 0.5434036292300147,
      "grad_norm": 1.8205781314041334,
      "learning_rate": 4.357771623196784e-06,
      "loss": 0.5352,
      "step": 4432
    },
    {
      "epoch": 0.5435262383521334,
      "grad_norm": 2.205848108274737,
      "learning_rate": 4.357432422976428e-06,
      "loss": 0.6132,
      "step": 4433
    },
    {
      "epoch": 0.5436488474742521,
      "grad_norm": 1.7678836444573451,
      "learning_rate": 4.3570931464123124e-06,
      "loss": 0.5371,
      "step": 4434
    },
    {
      "epoch": 0.5437714565963707,
      "grad_norm": 1.9594748233446904,
      "learning_rate": 4.356753793518381e-06,
      "loss": 0.4946,
      "step": 4435
    },
    {
      "epoch": 0.5438940657184894,
      "grad_norm": 1.9472397805338872,
      "learning_rate": 4.356414364308581e-06,
      "loss": 0.5566,
      "step": 4436
    },
    {
      "epoch": 0.5440166748406081,
      "grad_norm": 1.939377256800202,
      "learning_rate": 4.356074858796864e-06,
      "loss": 0.5954,
      "step": 4437
    },
    {
      "epoch": 0.5441392839627268,
      "grad_norm": 1.9121855334797753,
      "learning_rate": 4.355735276997184e-06,
      "loss": 0.5829,
      "step": 4438
    },
    {
      "epoch": 0.5442618930848455,
      "grad_norm": 1.6945604219176384,
      "learning_rate": 4.3553956189234995e-06,
      "loss": 0.5513,
      "step": 4439
    },
    {
      "epoch": 0.5443845022069642,
      "grad_norm": 1.8447256596548411,
      "learning_rate": 4.35505588458977e-06,
      "loss": 0.5221,
      "step": 4440
    },
    {
      "epoch": 0.5445071113290829,
      "grad_norm": 2.0264631413087626,
      "learning_rate": 4.35471607400996e-06,
      "loss": 0.5215,
      "step": 4441
    },
    {
      "epoch": 0.5446297204512016,
      "grad_norm": 1.939553460121857,
      "learning_rate": 4.354376187198035e-06,
      "loss": 0.5102,
      "step": 4442
    },
    {
      "epoch": 0.5447523295733202,
      "grad_norm": 2.0683681509131,
      "learning_rate": 4.354036224167967e-06,
      "loss": 0.5453,
      "step": 4443
    },
    {
      "epoch": 0.5448749386954389,
      "grad_norm": 1.8913477572854973,
      "learning_rate": 4.353696184933727e-06,
      "loss": 0.5549,
      "step": 4444
    },
    {
      "epoch": 0.5449975478175576,
      "grad_norm": 1.8466396587479603,
      "learning_rate": 4.3533560695092924e-06,
      "loss": 0.4952,
      "step": 4445
    },
    {
      "epoch": 0.5451201569396763,
      "grad_norm": 1.9352317941857364,
      "learning_rate": 4.3530158779086414e-06,
      "loss": 0.5608,
      "step": 4446
    },
    {
      "epoch": 0.545242766061795,
      "grad_norm": 2.1948990209426347,
      "learning_rate": 4.352675610145758e-06,
      "loss": 0.5998,
      "step": 4447
    },
    {
      "epoch": 0.5453653751839137,
      "grad_norm": 2.081094136781531,
      "learning_rate": 4.352335266234628e-06,
      "loss": 0.5572,
      "step": 4448
    },
    {
      "epoch": 0.5454879843060324,
      "grad_norm": 1.909639173627345,
      "learning_rate": 4.351994846189238e-06,
      "loss": 0.5574,
      "step": 4449
    },
    {
      "epoch": 0.5456105934281511,
      "grad_norm": 2.101431609516175,
      "learning_rate": 4.351654350023581e-06,
      "loss": 0.5187,
      "step": 4450
    },
    {
      "epoch": 0.5457332025502697,
      "grad_norm": 2.235111619716066,
      "learning_rate": 4.351313777751653e-06,
      "loss": 0.5875,
      "step": 4451
    },
    {
      "epoch": 0.5458558116723884,
      "grad_norm": 2.084324457719132,
      "learning_rate": 4.350973129387451e-06,
      "loss": 0.5914,
      "step": 4452
    },
    {
      "epoch": 0.5459784207945071,
      "grad_norm": 1.8632053786123128,
      "learning_rate": 4.350632404944977e-06,
      "loss": 0.5086,
      "step": 4453
    },
    {
      "epoch": 0.5461010299166258,
      "grad_norm": 2.2846590751925455,
      "learning_rate": 4.350291604438234e-06,
      "loss": 0.5363,
      "step": 4454
    },
    {
      "epoch": 0.5462236390387445,
      "grad_norm": 2.0390970537915076,
      "learning_rate": 4.349950727881231e-06,
      "loss": 0.5213,
      "step": 4455
    },
    {
      "epoch": 0.5463462481608632,
      "grad_norm": 2.2987727860398257,
      "learning_rate": 4.349609775287977e-06,
      "loss": 0.6576,
      "step": 4456
    },
    {
      "epoch": 0.5464688572829819,
      "grad_norm": 1.9899993811181016,
      "learning_rate": 4.349268746672487e-06,
      "loss": 0.5816,
      "step": 4457
    },
    {
      "epoch": 0.5465914664051006,
      "grad_norm": 1.8946801730411424,
      "learning_rate": 4.348927642048778e-06,
      "loss": 0.5921,
      "step": 4458
    },
    {
      "epoch": 0.5467140755272192,
      "grad_norm": 1.8053605820842367,
      "learning_rate": 4.34858646143087e-06,
      "loss": 0.505,
      "step": 4459
    },
    {
      "epoch": 0.5468366846493379,
      "grad_norm": 1.9789242243361689,
      "learning_rate": 4.348245204832785e-06,
      "loss": 0.5026,
      "step": 4460
    },
    {
      "epoch": 0.5469592937714566,
      "grad_norm": 2.0692642415109903,
      "learning_rate": 4.347903872268551e-06,
      "loss": 0.5638,
      "step": 4461
    },
    {
      "epoch": 0.5470819028935753,
      "grad_norm": 1.8686088439852118,
      "learning_rate": 4.347562463752195e-06,
      "loss": 0.577,
      "step": 4462
    },
    {
      "epoch": 0.547204512015694,
      "grad_norm": 2.11776167937705,
      "learning_rate": 4.347220979297752e-06,
      "loss": 0.4956,
      "step": 4463
    },
    {
      "epoch": 0.5473271211378127,
      "grad_norm": 2.162618611853444,
      "learning_rate": 4.346879418919256e-06,
      "loss": 0.4904,
      "step": 4464
    },
    {
      "epoch": 0.5474497302599314,
      "grad_norm": 1.9474423755569312,
      "learning_rate": 4.346537782630746e-06,
      "loss": 0.4982,
      "step": 4465
    },
    {
      "epoch": 0.5475723393820501,
      "grad_norm": 2.103472596585202,
      "learning_rate": 4.346196070446266e-06,
      "loss": 0.5451,
      "step": 4466
    },
    {
      "epoch": 0.5476949485041687,
      "grad_norm": 2.067623982011634,
      "learning_rate": 4.345854282379857e-06,
      "loss": 0.5828,
      "step": 4467
    },
    {
      "epoch": 0.5478175576262874,
      "grad_norm": 1.9164721911239269,
      "learning_rate": 4.34551241844557e-06,
      "loss": 0.5466,
      "step": 4468
    },
    {
      "epoch": 0.5479401667484061,
      "grad_norm": 1.8723399276266772,
      "learning_rate": 4.345170478657455e-06,
      "loss": 0.5379,
      "step": 4469
    },
    {
      "epoch": 0.5480627758705248,
      "grad_norm": 1.9159097511837597,
      "learning_rate": 4.3448284630295664e-06,
      "loss": 0.5117,
      "step": 4470
    },
    {
      "epoch": 0.5481853849926435,
      "grad_norm": 1.9282432395969373,
      "learning_rate": 4.344486371575963e-06,
      "loss": 0.5326,
      "step": 4471
    },
    {
      "epoch": 0.5483079941147622,
      "grad_norm": 2.1548916897828794,
      "learning_rate": 4.344144204310704e-06,
      "loss": 0.5844,
      "step": 4472
    },
    {
      "epoch": 0.5484306032368809,
      "grad_norm": 1.9877566006436465,
      "learning_rate": 4.343801961247853e-06,
      "loss": 0.5107,
      "step": 4473
    },
    {
      "epoch": 0.5485532123589996,
      "grad_norm": 2.035518920790258,
      "learning_rate": 4.3434596424014775e-06,
      "loss": 0.5605,
      "step": 4474
    },
    {
      "epoch": 0.5486758214811182,
      "grad_norm": 2.1064021857190802,
      "learning_rate": 4.343117247785646e-06,
      "loss": 0.6131,
      "step": 4475
    },
    {
      "epoch": 0.5487984306032369,
      "grad_norm": 2.161136525604812,
      "learning_rate": 4.342774777414435e-06,
      "loss": 0.5629,
      "step": 4476
    },
    {
      "epoch": 0.5489210397253556,
      "grad_norm": 1.8918113116025368,
      "learning_rate": 4.342432231301916e-06,
      "loss": 0.5563,
      "step": 4477
    },
    {
      "epoch": 0.5490436488474743,
      "grad_norm": 1.98580477618611,
      "learning_rate": 4.342089609462172e-06,
      "loss": 0.5723,
      "step": 4478
    },
    {
      "epoch": 0.549166257969593,
      "grad_norm": 2.253874143142777,
      "learning_rate": 4.341746911909283e-06,
      "loss": 0.5843,
      "step": 4479
    },
    {
      "epoch": 0.5492888670917117,
      "grad_norm": 2.2702793645124255,
      "learning_rate": 4.341404138657336e-06,
      "loss": 0.5595,
      "step": 4480
    },
    {
      "epoch": 0.5494114762138304,
      "grad_norm": 1.9223676889304293,
      "learning_rate": 4.341061289720419e-06,
      "loss": 0.5632,
      "step": 4481
    },
    {
      "epoch": 0.5495340853359489,
      "grad_norm": 1.9541551512475128,
      "learning_rate": 4.340718365112623e-06,
      "loss": 0.5728,
      "step": 4482
    },
    {
      "epoch": 0.5496566944580676,
      "grad_norm": 2.0204742205846173,
      "learning_rate": 4.340375364848043e-06,
      "loss": 0.57,
      "step": 4483
    },
    {
      "epoch": 0.5497793035801863,
      "grad_norm": 2.1680747435533747,
      "learning_rate": 4.3400322889407785e-06,
      "loss": 0.5826,
      "step": 4484
    },
    {
      "epoch": 0.549901912702305,
      "grad_norm": 2.082709430699185,
      "learning_rate": 4.339689137404929e-06,
      "loss": 0.577,
      "step": 4485
    },
    {
      "epoch": 0.5500245218244237,
      "grad_norm": 1.944508747634276,
      "learning_rate": 4.339345910254599e-06,
      "loss": 0.5106,
      "step": 4486
    },
    {
      "epoch": 0.5501471309465424,
      "grad_norm": 1.8848770021620191,
      "learning_rate": 4.339002607503896e-06,
      "loss": 0.5422,
      "step": 4487
    },
    {
      "epoch": 0.5502697400686611,
      "grad_norm": 1.9006513425625577,
      "learning_rate": 4.33865922916693e-06,
      "loss": 0.6413,
      "step": 4488
    },
    {
      "epoch": 0.5503923491907798,
      "grad_norm": 1.6968604518296908,
      "learning_rate": 4.3383157752578146e-06,
      "loss": 0.5398,
      "step": 4489
    },
    {
      "epoch": 0.5505149583128984,
      "grad_norm": 1.925144948494366,
      "learning_rate": 4.337972245790666e-06,
      "loss": 0.5737,
      "step": 4490
    },
    {
      "epoch": 0.5506375674350171,
      "grad_norm": 1.9206447527042938,
      "learning_rate": 4.337628640779604e-06,
      "loss": 0.5405,
      "step": 4491
    },
    {
      "epoch": 0.5507601765571358,
      "grad_norm": 1.7006208882685914,
      "learning_rate": 4.337284960238752e-06,
      "loss": 0.4614,
      "step": 4492
    },
    {
      "epoch": 0.5508827856792545,
      "grad_norm": 1.7275326142058653,
      "learning_rate": 4.336941204182236e-06,
      "loss": 0.5328,
      "step": 4493
    },
    {
      "epoch": 0.5510053948013732,
      "grad_norm": 1.8380900991559268,
      "learning_rate": 4.336597372624184e-06,
      "loss": 0.5051,
      "step": 4494
    },
    {
      "epoch": 0.5511280039234919,
      "grad_norm": 2.0592474813471235,
      "learning_rate": 4.336253465578729e-06,
      "loss": 0.5333,
      "step": 4495
    },
    {
      "epoch": 0.5512506130456106,
      "grad_norm": 1.9623678104755262,
      "learning_rate": 4.3359094830600056e-06,
      "loss": 0.4992,
      "step": 4496
    },
    {
      "epoch": 0.5513732221677293,
      "grad_norm": 2.4016353382398394,
      "learning_rate": 4.335565425082151e-06,
      "loss": 0.5753,
      "step": 4497
    },
    {
      "epoch": 0.5514958312898479,
      "grad_norm": 2.065055848450175,
      "learning_rate": 4.335221291659309e-06,
      "loss": 0.5057,
      "step": 4498
    },
    {
      "epoch": 0.5516184404119666,
      "grad_norm": 2.2578492756052144,
      "learning_rate": 4.334877082805623e-06,
      "loss": 0.6293,
      "step": 4499
    },
    {
      "epoch": 0.5517410495340853,
      "grad_norm": 2.1708907976082767,
      "learning_rate": 4.3345327985352405e-06,
      "loss": 0.501,
      "step": 4500
    },
    {
      "epoch": 0.551863658656204,
      "grad_norm": 2.0548104278164323,
      "learning_rate": 4.3341884388623114e-06,
      "loss": 0.5698,
      "step": 4501
    },
    {
      "epoch": 0.5519862677783227,
      "grad_norm": 1.9777687282179865,
      "learning_rate": 4.333844003800991e-06,
      "loss": 0.552,
      "step": 4502
    },
    {
      "epoch": 0.5521088769004414,
      "grad_norm": 1.8708869651207902,
      "learning_rate": 4.333499493365435e-06,
      "loss": 0.5444,
      "step": 4503
    },
    {
      "epoch": 0.5522314860225601,
      "grad_norm": 1.9454254783217675,
      "learning_rate": 4.3331549075698044e-06,
      "loss": 0.5661,
      "step": 4504
    },
    {
      "epoch": 0.5523540951446788,
      "grad_norm": 2.027838074908458,
      "learning_rate": 4.332810246428261e-06,
      "loss": 0.5952,
      "step": 4505
    },
    {
      "epoch": 0.5524767042667974,
      "grad_norm": 1.814939794321082,
      "learning_rate": 4.332465509954973e-06,
      "loss": 0.5108,
      "step": 4506
    },
    {
      "epoch": 0.5525993133889161,
      "grad_norm": 2.0445261582382934,
      "learning_rate": 4.3321206981641075e-06,
      "loss": 0.6228,
      "step": 4507
    },
    {
      "epoch": 0.5527219225110348,
      "grad_norm": 1.9638155324668767,
      "learning_rate": 4.331775811069837e-06,
      "loss": 0.508,
      "step": 4508
    },
    {
      "epoch": 0.5528445316331535,
      "grad_norm": 2.3188968413925597,
      "learning_rate": 4.331430848686339e-06,
      "loss": 0.539,
      "step": 4509
    },
    {
      "epoch": 0.5529671407552722,
      "grad_norm": 1.9711364669479305,
      "learning_rate": 4.3310858110277905e-06,
      "loss": 0.5249,
      "step": 4510
    },
    {
      "epoch": 0.5530897498773909,
      "grad_norm": 2.1587506806239003,
      "learning_rate": 4.3307406981083736e-06,
      "loss": 0.5562,
      "step": 4511
    },
    {
      "epoch": 0.5532123589995096,
      "grad_norm": 2.0124185008779016,
      "learning_rate": 4.330395509942273e-06,
      "loss": 0.5097,
      "step": 4512
    },
    {
      "epoch": 0.5533349681216283,
      "grad_norm": 1.9619277376270783,
      "learning_rate": 4.330050246543676e-06,
      "loss": 0.5806,
      "step": 4513
    },
    {
      "epoch": 0.5534575772437469,
      "grad_norm": 2.247032111042961,
      "learning_rate": 4.329704907926774e-06,
      "loss": 0.6214,
      "step": 4514
    },
    {
      "epoch": 0.5535801863658656,
      "grad_norm": 2.0811710904243372,
      "learning_rate": 4.329359494105762e-06,
      "loss": 0.5235,
      "step": 4515
    },
    {
      "epoch": 0.5537027954879843,
      "grad_norm": 1.9045866532755458,
      "learning_rate": 4.329014005094836e-06,
      "loss": 0.4708,
      "step": 4516
    },
    {
      "epoch": 0.553825404610103,
      "grad_norm": 1.819646015343249,
      "learning_rate": 4.328668440908195e-06,
      "loss": 0.5661,
      "step": 4517
    },
    {
      "epoch": 0.5539480137322217,
      "grad_norm": 2.1487073083146755,
      "learning_rate": 4.328322801560045e-06,
      "loss": 0.5937,
      "step": 4518
    },
    {
      "epoch": 0.5540706228543404,
      "grad_norm": 1.7317582132637166,
      "learning_rate": 4.327977087064591e-06,
      "loss": 0.5206,
      "step": 4519
    },
    {
      "epoch": 0.5541932319764591,
      "grad_norm": 2.1659255519081686,
      "learning_rate": 4.327631297436042e-06,
      "loss": 0.5911,
      "step": 4520
    },
    {
      "epoch": 0.5543158410985778,
      "grad_norm": 1.9008861737455178,
      "learning_rate": 4.327285432688611e-06,
      "loss": 0.5349,
      "step": 4521
    },
    {
      "epoch": 0.5544384502206964,
      "grad_norm": 1.8751364687793743,
      "learning_rate": 4.326939492836514e-06,
      "loss": 0.5341,
      "step": 4522
    },
    {
      "epoch": 0.5545610593428151,
      "grad_norm": 1.744402935895259,
      "learning_rate": 4.32659347789397e-06,
      "loss": 0.5242,
      "step": 4523
    },
    {
      "epoch": 0.5546836684649338,
      "grad_norm": 1.9088767438733074,
      "learning_rate": 4.3262473878751996e-06,
      "loss": 0.5549,
      "step": 4524
    },
    {
      "epoch": 0.5548062775870525,
      "grad_norm": 1.9327686870184333,
      "learning_rate": 4.325901222794429e-06,
      "loss": 0.5719,
      "step": 4525
    },
    {
      "epoch": 0.5549288867091712,
      "grad_norm": 2.002807818529121,
      "learning_rate": 4.325554982665885e-06,
      "loss": 0.5237,
      "step": 4526
    },
    {
      "epoch": 0.5550514958312899,
      "grad_norm": 2.071976268136967,
      "learning_rate": 4.3252086675038e-06,
      "loss": 0.5808,
      "step": 4527
    },
    {
      "epoch": 0.5551741049534086,
      "grad_norm": 2.2584390330588326,
      "learning_rate": 4.324862277322407e-06,
      "loss": 0.5452,
      "step": 4528
    },
    {
      "epoch": 0.5552967140755272,
      "grad_norm": 2.083251206034892,
      "learning_rate": 4.324515812135944e-06,
      "loss": 0.5248,
      "step": 4529
    },
    {
      "epoch": 0.5554193231976459,
      "grad_norm": 1.8946864164470225,
      "learning_rate": 4.324169271958651e-06,
      "loss": 0.5727,
      "step": 4530
    },
    {
      "epoch": 0.5555419323197646,
      "grad_norm": 1.8187617077832643,
      "learning_rate": 4.323822656804771e-06,
      "loss": 0.507,
      "step": 4531
    },
    {
      "epoch": 0.5556645414418833,
      "grad_norm": 2.118817510854704,
      "learning_rate": 4.323475966688552e-06,
      "loss": 0.4752,
      "step": 4532
    },
    {
      "epoch": 0.555787150564002,
      "grad_norm": 2.2708484523940498,
      "learning_rate": 4.323129201624242e-06,
      "loss": 0.5543,
      "step": 4533
    },
    {
      "epoch": 0.5559097596861207,
      "grad_norm": 2.213071730812889,
      "learning_rate": 4.322782361626094e-06,
      "loss": 0.528,
      "step": 4534
    },
    {
      "epoch": 0.5560323688082394,
      "grad_norm": 1.8712524987221704,
      "learning_rate": 4.322435446708364e-06,
      "loss": 0.5495,
      "step": 4535
    },
    {
      "epoch": 0.5561549779303581,
      "grad_norm": 1.8630269572221425,
      "learning_rate": 4.3220884568853105e-06,
      "loss": 0.5094,
      "step": 4536
    },
    {
      "epoch": 0.5562775870524767,
      "grad_norm": 1.9759754138160295,
      "learning_rate": 4.321741392171196e-06,
      "loss": 0.5605,
      "step": 4537
    },
    {
      "epoch": 0.5564001961745954,
      "grad_norm": 2.1620789045735966,
      "learning_rate": 4.321394252580285e-06,
      "loss": 0.5505,
      "step": 4538
    },
    {
      "epoch": 0.556522805296714,
      "grad_norm": 1.9754490921162524,
      "learning_rate": 4.321047038126847e-06,
      "loss": 0.5507,
      "step": 4539
    },
    {
      "epoch": 0.5566454144188328,
      "grad_norm": 2.212909387507824,
      "learning_rate": 4.320699748825151e-06,
      "loss": 0.5579,
      "step": 4540
    },
    {
      "epoch": 0.5567680235409515,
      "grad_norm": 2.2830014043374436,
      "learning_rate": 4.3203523846894715e-06,
      "loss": 0.5787,
      "step": 4541
    },
    {
      "epoch": 0.5568906326630702,
      "grad_norm": 1.8981732659823762,
      "learning_rate": 4.320004945734087e-06,
      "loss": 0.5654,
      "step": 4542
    },
    {
      "epoch": 0.5570132417851889,
      "grad_norm": 1.863542261078723,
      "learning_rate": 4.319657431973276e-06,
      "loss": 0.5135,
      "step": 4543
    },
    {
      "epoch": 0.5571358509073076,
      "grad_norm": 2.036744923351856,
      "learning_rate": 4.319309843421324e-06,
      "loss": 0.5531,
      "step": 4544
    },
    {
      "epoch": 0.5572584600294261,
      "grad_norm": 2.005792507650771,
      "learning_rate": 4.318962180092516e-06,
      "loss": 0.5394,
      "step": 4545
    },
    {
      "epoch": 0.5573810691515448,
      "grad_norm": 2.0312900131462848,
      "learning_rate": 4.318614442001144e-06,
      "loss": 0.6129,
      "step": 4546
    },
    {
      "epoch": 0.5575036782736635,
      "grad_norm": 2.0082499996801007,
      "learning_rate": 4.318266629161498e-06,
      "loss": 0.5424,
      "step": 4547
    },
    {
      "epoch": 0.5576262873957822,
      "grad_norm": 2.0146846932954086,
      "learning_rate": 4.317918741587873e-06,
      "loss": 0.5056,
      "step": 4548
    },
    {
      "epoch": 0.5577488965179009,
      "grad_norm": 1.9994710115137102,
      "learning_rate": 4.31757077929457e-06,
      "loss": 0.514,
      "step": 4549
    },
    {
      "epoch": 0.5578715056400196,
      "grad_norm": 2.036572380153905,
      "learning_rate": 4.317222742295891e-06,
      "loss": 0.5347,
      "step": 4550
    },
    {
      "epoch": 0.5579941147621383,
      "grad_norm": 1.9415867994271843,
      "learning_rate": 4.316874630606139e-06,
      "loss": 0.514,
      "step": 4551
    },
    {
      "epoch": 0.558116723884257,
      "grad_norm": 1.9242832176223532,
      "learning_rate": 4.316526444239625e-06,
      "loss": 0.546,
      "step": 4552
    },
    {
      "epoch": 0.5582393330063756,
      "grad_norm": 1.8060147848088606,
      "learning_rate": 4.316178183210657e-06,
      "loss": 0.5206,
      "step": 4553
    },
    {
      "epoch": 0.5583619421284943,
      "grad_norm": 2.0293001619154367,
      "learning_rate": 4.31582984753355e-06,
      "loss": 0.4969,
      "step": 4554
    },
    {
      "epoch": 0.558484551250613,
      "grad_norm": 2.06652417203627,
      "learning_rate": 4.315481437222623e-06,
      "loss": 0.5604,
      "step": 4555
    },
    {
      "epoch": 0.5586071603727317,
      "grad_norm": 1.9095759115972653,
      "learning_rate": 4.315132952292194e-06,
      "loss": 0.5606,
      "step": 4556
    },
    {
      "epoch": 0.5587297694948504,
      "grad_norm": 1.9842376878158703,
      "learning_rate": 4.314784392756587e-06,
      "loss": 0.5573,
      "step": 4557
    },
    {
      "epoch": 0.5588523786169691,
      "grad_norm": 1.7520783713678298,
      "learning_rate": 4.314435758630128e-06,
      "loss": 0.5142,
      "step": 4558
    },
    {
      "epoch": 0.5589749877390878,
      "grad_norm": 1.961376417588159,
      "learning_rate": 4.314087049927148e-06,
      "loss": 0.6017,
      "step": 4559
    },
    {
      "epoch": 0.5590975968612065,
      "grad_norm": 2.0999676443133724,
      "learning_rate": 4.313738266661979e-06,
      "loss": 0.5505,
      "step": 4560
    },
    {
      "epoch": 0.5592202059833251,
      "grad_norm": 2.117240773964895,
      "learning_rate": 4.3133894088489555e-06,
      "loss": 0.5785,
      "step": 4561
    },
    {
      "epoch": 0.5593428151054438,
      "grad_norm": 1.908802562118846,
      "learning_rate": 4.3130404765024175e-06,
      "loss": 0.558,
      "step": 4562
    },
    {
      "epoch": 0.5594654242275625,
      "grad_norm": 2.1263224590039833,
      "learning_rate": 4.312691469636706e-06,
      "loss": 0.5465,
      "step": 4563
    },
    {
      "epoch": 0.5595880333496812,
      "grad_norm": 1.899399168892305,
      "learning_rate": 4.312342388266167e-06,
      "loss": 0.5783,
      "step": 4564
    },
    {
      "epoch": 0.5597106424717999,
      "grad_norm": 1.9800616545950374,
      "learning_rate": 4.311993232405146e-06,
      "loss": 0.5957,
      "step": 4565
    },
    {
      "epoch": 0.5598332515939186,
      "grad_norm": 1.9463992799439294,
      "learning_rate": 4.311644002067995e-06,
      "loss": 0.5362,
      "step": 4566
    },
    {
      "epoch": 0.5599558607160373,
      "grad_norm": 2.037798005870736,
      "learning_rate": 4.311294697269069e-06,
      "loss": 0.5417,
      "step": 4567
    },
    {
      "epoch": 0.560078469838156,
      "grad_norm": 1.9189401054560755,
      "learning_rate": 4.310945318022724e-06,
      "loss": 0.5446,
      "step": 4568
    },
    {
      "epoch": 0.5602010789602746,
      "grad_norm": 1.9394676853155464,
      "learning_rate": 4.310595864343321e-06,
      "loss": 0.5277,
      "step": 4569
    },
    {
      "epoch": 0.5603236880823933,
      "grad_norm": 2.299264234188704,
      "learning_rate": 4.310246336245222e-06,
      "loss": 0.6259,
      "step": 4570
    },
    {
      "epoch": 0.560446297204512,
      "grad_norm": 1.9172540293688454,
      "learning_rate": 4.309896733742794e-06,
      "loss": 0.5257,
      "step": 4571
    },
    {
      "epoch": 0.5605689063266307,
      "grad_norm": 2.0109217092401503,
      "learning_rate": 4.309547056850406e-06,
      "loss": 0.5148,
      "step": 4572
    },
    {
      "epoch": 0.5606915154487494,
      "grad_norm": 2.045377738459692,
      "learning_rate": 4.30919730558243e-06,
      "loss": 0.5193,
      "step": 4573
    },
    {
      "epoch": 0.5608141245708681,
      "grad_norm": 1.8768591919656052,
      "learning_rate": 4.308847479953242e-06,
      "loss": 0.502,
      "step": 4574
    },
    {
      "epoch": 0.5609367336929868,
      "grad_norm": 1.9160248044094834,
      "learning_rate": 4.308497579977221e-06,
      "loss": 0.5234,
      "step": 4575
    },
    {
      "epoch": 0.5610593428151054,
      "grad_norm": 1.8916817439631382,
      "learning_rate": 4.308147605668747e-06,
      "loss": 0.6108,
      "step": 4576
    },
    {
      "epoch": 0.5611819519372241,
      "grad_norm": 1.8368053199224699,
      "learning_rate": 4.307797557042206e-06,
      "loss": 0.5372,
      "step": 4577
    },
    {
      "epoch": 0.5613045610593428,
      "grad_norm": 1.9313206893368806,
      "learning_rate": 4.307447434111984e-06,
      "loss": 0.5459,
      "step": 4578
    },
    {
      "epoch": 0.5614271701814615,
      "grad_norm": 1.8056859048531897,
      "learning_rate": 4.307097236892473e-06,
      "loss": 0.5399,
      "step": 4579
    },
    {
      "epoch": 0.5615497793035802,
      "grad_norm": 1.8481460098603835,
      "learning_rate": 4.306746965398066e-06,
      "loss": 0.5568,
      "step": 4580
    },
    {
      "epoch": 0.5616723884256989,
      "grad_norm": 1.937860560463963,
      "learning_rate": 4.306396619643162e-06,
      "loss": 0.5429,
      "step": 4581
    },
    {
      "epoch": 0.5617949975478176,
      "grad_norm": 2.140234621418726,
      "learning_rate": 4.306046199642157e-06,
      "loss": 0.5636,
      "step": 4582
    },
    {
      "epoch": 0.5619176066699363,
      "grad_norm": 1.944215914859375,
      "learning_rate": 4.305695705409456e-06,
      "loss": 0.5423,
      "step": 4583
    },
    {
      "epoch": 0.5620402157920549,
      "grad_norm": 2.105391299901767,
      "learning_rate": 4.305345136959466e-06,
      "loss": 0.5839,
      "step": 4584
    },
    {
      "epoch": 0.5621628249141736,
      "grad_norm": 2.0702070515367277,
      "learning_rate": 4.304994494306594e-06,
      "loss": 0.5153,
      "step": 4585
    },
    {
      "epoch": 0.5622854340362923,
      "grad_norm": 1.9760319853869526,
      "learning_rate": 4.3046437774652525e-06,
      "loss": 0.497,
      "step": 4586
    },
    {
      "epoch": 0.562408043158411,
      "grad_norm": 2.114003758293673,
      "learning_rate": 4.304292986449857e-06,
      "loss": 0.5914,
      "step": 4587
    },
    {
      "epoch": 0.5625306522805297,
      "grad_norm": 1.8579988619427337,
      "learning_rate": 4.303942121274826e-06,
      "loss": 0.4882,
      "step": 4588
    },
    {
      "epoch": 0.5626532614026484,
      "grad_norm": 1.990335509090655,
      "learning_rate": 4.30359118195458e-06,
      "loss": 0.5622,
      "step": 4589
    },
    {
      "epoch": 0.5627758705247671,
      "grad_norm": 2.1911195328800623,
      "learning_rate": 4.303240168503543e-06,
      "loss": 0.583,
      "step": 4590
    },
    {
      "epoch": 0.5628984796468858,
      "grad_norm": 1.918176940363186,
      "learning_rate": 4.302889080936143e-06,
      "loss": 0.5308,
      "step": 4591
    },
    {
      "epoch": 0.5630210887690044,
      "grad_norm": 2.0419551007241803,
      "learning_rate": 4.302537919266809e-06,
      "loss": 0.5756,
      "step": 4592
    },
    {
      "epoch": 0.5631436978911231,
      "grad_norm": 1.8035571658452085,
      "learning_rate": 4.302186683509977e-06,
      "loss": 0.5948,
      "step": 4593
    },
    {
      "epoch": 0.5632663070132418,
      "grad_norm": 2.0093410181706117,
      "learning_rate": 4.301835373680081e-06,
      "loss": 0.5709,
      "step": 4594
    },
    {
      "epoch": 0.5633889161353605,
      "grad_norm": 2.072393887053177,
      "learning_rate": 4.301483989791561e-06,
      "loss": 0.5168,
      "step": 4595
    },
    {
      "epoch": 0.5635115252574792,
      "grad_norm": 1.7597966842935513,
      "learning_rate": 4.301132531858861e-06,
      "loss": 0.5488,
      "step": 4596
    },
    {
      "epoch": 0.5636341343795979,
      "grad_norm": 2.1788965688703064,
      "learning_rate": 4.300780999896424e-06,
      "loss": 0.6069,
      "step": 4597
    },
    {
      "epoch": 0.5637567435017166,
      "grad_norm": 1.874653861500083,
      "learning_rate": 4.3004293939187e-06,
      "loss": 0.5308,
      "step": 4598
    },
    {
      "epoch": 0.5638793526238353,
      "grad_norm": 1.92726754257326,
      "learning_rate": 4.300077713940141e-06,
      "loss": 0.5601,
      "step": 4599
    },
    {
      "epoch": 0.5640019617459539,
      "grad_norm": 2.1628523307377296,
      "learning_rate": 4.2997259599752e-06,
      "loss": 0.5083,
      "step": 4600
    },
    {
      "epoch": 0.5641245708680726,
      "grad_norm": 2.016234896902367,
      "learning_rate": 4.299374132038337e-06,
      "loss": 0.6173,
      "step": 4601
    },
    {
      "epoch": 0.5642471799901912,
      "grad_norm": 1.9491300400286111,
      "learning_rate": 4.299022230144012e-06,
      "loss": 0.5563,
      "step": 4602
    },
    {
      "epoch": 0.56436978911231,
      "grad_norm": 2.081320375725382,
      "learning_rate": 4.298670254306688e-06,
      "loss": 0.5471,
      "step": 4603
    },
    {
      "epoch": 0.5644923982344286,
      "grad_norm": 1.9137219648842703,
      "learning_rate": 4.298318204540832e-06,
      "loss": 0.6025,
      "step": 4604
    },
    {
      "epoch": 0.5646150073565473,
      "grad_norm": 2.1083508013238066,
      "learning_rate": 4.297966080860914e-06,
      "loss": 0.5538,
      "step": 4605
    },
    {
      "epoch": 0.564737616478666,
      "grad_norm": 1.9851959688409353,
      "learning_rate": 4.297613883281407e-06,
      "loss": 0.5845,
      "step": 4606
    },
    {
      "epoch": 0.5648602256007847,
      "grad_norm": 2.0272342443299824,
      "learning_rate": 4.297261611816787e-06,
      "loss": 0.569,
      "step": 4607
    },
    {
      "epoch": 0.5649828347229033,
      "grad_norm": 2.009108545604349,
      "learning_rate": 4.296909266481534e-06,
      "loss": 0.5187,
      "step": 4608
    },
    {
      "epoch": 0.565105443845022,
      "grad_norm": 1.9711524088551649,
      "learning_rate": 4.296556847290128e-06,
      "loss": 0.5111,
      "step": 4609
    },
    {
      "epoch": 0.5652280529671407,
      "grad_norm": 1.9164105675747152,
      "learning_rate": 4.296204354257055e-06,
      "loss": 0.5011,
      "step": 4610
    },
    {
      "epoch": 0.5653506620892594,
      "grad_norm": 1.9536603923284221,
      "learning_rate": 4.295851787396803e-06,
      "loss": 0.5468,
      "step": 4611
    },
    {
      "epoch": 0.5654732712113781,
      "grad_norm": 2.1041131368481953,
      "learning_rate": 4.295499146723864e-06,
      "loss": 0.5725,
      "step": 4612
    },
    {
      "epoch": 0.5655958803334968,
      "grad_norm": 2.100245836398757,
      "learning_rate": 4.2951464322527305e-06,
      "loss": 0.4952,
      "step": 4613
    },
    {
      "epoch": 0.5657184894556155,
      "grad_norm": 1.9662381594246396,
      "learning_rate": 4.294793643997901e-06,
      "loss": 0.6052,
      "step": 4614
    },
    {
      "epoch": 0.5658410985777342,
      "grad_norm": 2.042284789328476,
      "learning_rate": 4.294440781973876e-06,
      "loss": 0.5826,
      "step": 4615
    },
    {
      "epoch": 0.5659637076998528,
      "grad_norm": 2.350862847417886,
      "learning_rate": 4.294087846195157e-06,
      "loss": 0.5545,
      "step": 4616
    },
    {
      "epoch": 0.5660863168219715,
      "grad_norm": 1.9602625678449666,
      "learning_rate": 4.2937348366762514e-06,
      "loss": 0.6127,
      "step": 4617
    },
    {
      "epoch": 0.5662089259440902,
      "grad_norm": 2.0405974547323784,
      "learning_rate": 4.293381753431669e-06,
      "loss": 0.5641,
      "step": 4618
    },
    {
      "epoch": 0.5663315350662089,
      "grad_norm": 2.1328929707864623,
      "learning_rate": 4.293028596475921e-06,
      "loss": 0.5541,
      "step": 4619
    },
    {
      "epoch": 0.5664541441883276,
      "grad_norm": 2.131689650620925,
      "learning_rate": 4.2926753658235234e-06,
      "loss": 0.5244,
      "step": 4620
    },
    {
      "epoch": 0.5665767533104463,
      "grad_norm": 2.0353425390111237,
      "learning_rate": 4.2923220614889945e-06,
      "loss": 0.5788,
      "step": 4621
    },
    {
      "epoch": 0.566699362432565,
      "grad_norm": 1.9971173474639774,
      "learning_rate": 4.2919686834868565e-06,
      "loss": 0.5399,
      "step": 4622
    },
    {
      "epoch": 0.5668219715546837,
      "grad_norm": 2.209304737518138,
      "learning_rate": 4.2916152318316335e-06,
      "loss": 0.6079,
      "step": 4623
    },
    {
      "epoch": 0.5669445806768023,
      "grad_norm": 2.0602623450930775,
      "learning_rate": 4.291261706537851e-06,
      "loss": 0.5063,
      "step": 4624
    },
    {
      "epoch": 0.567067189798921,
      "grad_norm": 2.1452870355659117,
      "learning_rate": 4.290908107620043e-06,
      "loss": 0.5354,
      "step": 4625
    },
    {
      "epoch": 0.5671897989210397,
      "grad_norm": 2.046272474085277,
      "learning_rate": 4.290554435092741e-06,
      "loss": 0.5553,
      "step": 4626
    },
    {
      "epoch": 0.5673124080431584,
      "grad_norm": 2.3143930307462615,
      "learning_rate": 4.29020068897048e-06,
      "loss": 0.5475,
      "step": 4627
    },
    {
      "epoch": 0.5674350171652771,
      "grad_norm": 2.2823148672169946,
      "learning_rate": 4.289846869267803e-06,
      "loss": 0.5809,
      "step": 4628
    },
    {
      "epoch": 0.5675576262873958,
      "grad_norm": 2.000251433393156,
      "learning_rate": 4.289492975999251e-06,
      "loss": 0.5547,
      "step": 4629
    },
    {
      "epoch": 0.5676802354095145,
      "grad_norm": 1.9341011104577384,
      "learning_rate": 4.289139009179369e-06,
      "loss": 0.5339,
      "step": 4630
    },
    {
      "epoch": 0.5678028445316331,
      "grad_norm": 1.8964136419117383,
      "learning_rate": 4.288784968822707e-06,
      "loss": 0.5423,
      "step": 4631
    },
    {
      "epoch": 0.5679254536537518,
      "grad_norm": 1.7740401899043625,
      "learning_rate": 4.2884308549438156e-06,
      "loss": 0.5595,
      "step": 4632
    },
    {
      "epoch": 0.5680480627758705,
      "grad_norm": 2.0992878859093076,
      "learning_rate": 4.288076667557249e-06,
      "loss": 0.5455,
      "step": 4633
    },
    {
      "epoch": 0.5681706718979892,
      "grad_norm": 2.1738175253496124,
      "learning_rate": 4.287722406677567e-06,
      "loss": 0.5735,
      "step": 4634
    },
    {
      "epoch": 0.5682932810201079,
      "grad_norm": 2.167759771378541,
      "learning_rate": 4.287368072319329e-06,
      "loss": 0.5158,
      "step": 4635
    },
    {
      "epoch": 0.5684158901422266,
      "grad_norm": 1.9954333107063413,
      "learning_rate": 4.287013664497099e-06,
      "loss": 0.5605,
      "step": 4636
    },
    {
      "epoch": 0.5685384992643453,
      "grad_norm": 2.0720973610888653,
      "learning_rate": 4.2866591832254436e-06,
      "loss": 0.5253,
      "step": 4637
    },
    {
      "epoch": 0.568661108386464,
      "grad_norm": 1.9237167337319905,
      "learning_rate": 4.286304628518932e-06,
      "loss": 0.5524,
      "step": 4638
    },
    {
      "epoch": 0.5687837175085826,
      "grad_norm": 1.8110288210483736,
      "learning_rate": 4.285950000392138e-06,
      "loss": 0.571,
      "step": 4639
    },
    {
      "epoch": 0.5689063266307013,
      "grad_norm": 2.007126727078442,
      "learning_rate": 4.285595298859638e-06,
      "loss": 0.5179,
      "step": 4640
    },
    {
      "epoch": 0.56902893575282,
      "grad_norm": 1.9963235191787518,
      "learning_rate": 4.28524052393601e-06,
      "loss": 0.4918,
      "step": 4641
    },
    {
      "epoch": 0.5691515448749387,
      "grad_norm": 2.0799782326290264,
      "learning_rate": 4.2848856756358355e-06,
      "loss": 0.5068,
      "step": 4642
    },
    {
      "epoch": 0.5692741539970574,
      "grad_norm": 1.8939467322145134,
      "learning_rate": 4.2845307539737e-06,
      "loss": 0.5603,
      "step": 4643
    },
    {
      "epoch": 0.5693967631191761,
      "grad_norm": 1.9263072900687026,
      "learning_rate": 4.284175758964191e-06,
      "loss": 0.4864,
      "step": 4644
    },
    {
      "epoch": 0.5695193722412948,
      "grad_norm": 1.858016615004884,
      "learning_rate": 4.283820690621901e-06,
      "loss": 0.5076,
      "step": 4645
    },
    {
      "epoch": 0.5696419813634135,
      "grad_norm": 2.135974180965467,
      "learning_rate": 4.283465548961421e-06,
      "loss": 0.5307,
      "step": 4646
    },
    {
      "epoch": 0.5697645904855321,
      "grad_norm": 1.9137087393171286,
      "learning_rate": 4.28311033399735e-06,
      "loss": 0.5433,
      "step": 4647
    },
    {
      "epoch": 0.5698871996076508,
      "grad_norm": 2.26259042836833,
      "learning_rate": 4.282755045744287e-06,
      "loss": 0.5401,
      "step": 4648
    },
    {
      "epoch": 0.5700098087297695,
      "grad_norm": 1.9678219288673517,
      "learning_rate": 4.282399684216836e-06,
      "loss": 0.5072,
      "step": 4649
    },
    {
      "epoch": 0.5701324178518882,
      "grad_norm": 1.8056849974873515,
      "learning_rate": 4.282044249429603e-06,
      "loss": 0.4818,
      "step": 4650
    },
    {
      "epoch": 0.5702550269740069,
      "grad_norm": 1.9635769948465118,
      "learning_rate": 4.281688741397196e-06,
      "loss": 0.5117,
      "step": 4651
    },
    {
      "epoch": 0.5703776360961256,
      "grad_norm": 1.8761279898418892,
      "learning_rate": 4.281333160134228e-06,
      "loss": 0.5412,
      "step": 4652
    },
    {
      "epoch": 0.5705002452182443,
      "grad_norm": 2.1259121664501093,
      "learning_rate": 4.280977505655312e-06,
      "loss": 0.5443,
      "step": 4653
    },
    {
      "epoch": 0.570622854340363,
      "grad_norm": 2.0284384128670796,
      "learning_rate": 4.2806217779750694e-06,
      "loss": 0.5294,
      "step": 4654
    },
    {
      "epoch": 0.5707454634624816,
      "grad_norm": 1.8557798835196966,
      "learning_rate": 4.280265977108119e-06,
      "loss": 0.4986,
      "step": 4655
    },
    {
      "epoch": 0.5708680725846003,
      "grad_norm": 2.111202038071709,
      "learning_rate": 4.279910103069084e-06,
      "loss": 0.5462,
      "step": 4656
    },
    {
      "epoch": 0.570990681706719,
      "grad_norm": 2.1269305001508587,
      "learning_rate": 4.279554155872594e-06,
      "loss": 0.5655,
      "step": 4657
    },
    {
      "epoch": 0.5711132908288377,
      "grad_norm": 1.9965048371081882,
      "learning_rate": 4.279198135533277e-06,
      "loss": 0.533,
      "step": 4658
    },
    {
      "epoch": 0.5712358999509564,
      "grad_norm": 2.1458788279511856,
      "learning_rate": 4.278842042065767e-06,
      "loss": 0.5696,
      "step": 4659
    },
    {
      "epoch": 0.5713585090730751,
      "grad_norm": 2.13469847878231,
      "learning_rate": 4.2784858754847005e-06,
      "loss": 0.5361,
      "step": 4660
    },
    {
      "epoch": 0.5714811181951938,
      "grad_norm": 1.821979322101495,
      "learning_rate": 4.278129635804715e-06,
      "loss": 0.5269,
      "step": 4661
    },
    {
      "epoch": 0.5716037273173125,
      "grad_norm": 1.882472816089253,
      "learning_rate": 4.277773323040454e-06,
      "loss": 0.5406,
      "step": 4662
    },
    {
      "epoch": 0.571726336439431,
      "grad_norm": 1.953823589308147,
      "learning_rate": 4.277416937206562e-06,
      "loss": 0.5335,
      "step": 4663
    },
    {
      "epoch": 0.5718489455615497,
      "grad_norm": 2.4827422404135056,
      "learning_rate": 4.277060478317687e-06,
      "loss": 0.5135,
      "step": 4664
    },
    {
      "epoch": 0.5719715546836684,
      "grad_norm": 1.9964483669188566,
      "learning_rate": 4.27670394638848e-06,
      "loss": 0.5315,
      "step": 4665
    },
    {
      "epoch": 0.5720941638057871,
      "grad_norm": 2.1819891850959308,
      "learning_rate": 4.276347341433597e-06,
      "loss": 0.609,
      "step": 4666
    },
    {
      "epoch": 0.5722167729279058,
      "grad_norm": 2.257196547327417,
      "learning_rate": 4.2759906634676925e-06,
      "loss": 0.5698,
      "step": 4667
    },
    {
      "epoch": 0.5723393820500245,
      "grad_norm": 1.8506580226932285,
      "learning_rate": 4.275633912505427e-06,
      "loss": 0.5636,
      "step": 4668
    },
    {
      "epoch": 0.5724619911721432,
      "grad_norm": 2.285743135311373,
      "learning_rate": 4.2752770885614645e-06,
      "loss": 0.5494,
      "step": 4669
    },
    {
      "epoch": 0.572584600294262,
      "grad_norm": 1.8783604024518668,
      "learning_rate": 4.274920191650471e-06,
      "loss": 0.5547,
      "step": 4670
    },
    {
      "epoch": 0.5727072094163805,
      "grad_norm": 1.6870747310376302,
      "learning_rate": 4.2745632217871155e-06,
      "loss": 0.495,
      "step": 4671
    },
    {
      "epoch": 0.5728298185384992,
      "grad_norm": 2.1467751419152683,
      "learning_rate": 4.27420617898607e-06,
      "loss": 0.5693,
      "step": 4672
    },
    {
      "epoch": 0.5729524276606179,
      "grad_norm": 1.913654725823874,
      "learning_rate": 4.27384906326201e-06,
      "loss": 0.5469,
      "step": 4673
    },
    {
      "epoch": 0.5730750367827366,
      "grad_norm": 1.8559325164562324,
      "learning_rate": 4.273491874629612e-06,
      "loss": 0.5268,
      "step": 4674
    },
    {
      "epoch": 0.5731976459048553,
      "grad_norm": 1.912196070734483,
      "learning_rate": 4.273134613103559e-06,
      "loss": 0.5341,
      "step": 4675
    },
    {
      "epoch": 0.573320255026974,
      "grad_norm": 2.0341416393624416,
      "learning_rate": 4.2727772786985345e-06,
      "loss": 0.5662,
      "step": 4676
    },
    {
      "epoch": 0.5734428641490927,
      "grad_norm": 2.1944985041481826,
      "learning_rate": 4.272419871429224e-06,
      "loss": 0.4906,
      "step": 4677
    },
    {
      "epoch": 0.5735654732712113,
      "grad_norm": 1.9726991586082103,
      "learning_rate": 4.2720623913103205e-06,
      "loss": 0.595,
      "step": 4678
    },
    {
      "epoch": 0.57368808239333,
      "grad_norm": 2.003894272075084,
      "learning_rate": 4.271704838356515e-06,
      "loss": 0.5798,
      "step": 4679
    },
    {
      "epoch": 0.5738106915154487,
      "grad_norm": 1.8848930612818116,
      "learning_rate": 4.2713472125825035e-06,
      "loss": 0.5039,
      "step": 4680
    },
    {
      "epoch": 0.5739333006375674,
      "grad_norm": 1.9510887140922484,
      "learning_rate": 4.270989514002987e-06,
      "loss": 0.4845,
      "step": 4681
    },
    {
      "epoch": 0.5740559097596861,
      "grad_norm": 1.9784106840971782,
      "learning_rate": 4.2706317426326646e-06,
      "loss": 0.5461,
      "step": 4682
    },
    {
      "epoch": 0.5741785188818048,
      "grad_norm": 1.9659943232030166,
      "learning_rate": 4.270273898486245e-06,
      "loss": 0.5639,
      "step": 4683
    },
    {
      "epoch": 0.5743011280039235,
      "grad_norm": 1.8741875183477914,
      "learning_rate": 4.269915981578432e-06,
      "loss": 0.5189,
      "step": 4684
    },
    {
      "epoch": 0.5744237371260422,
      "grad_norm": 2.052479263544828,
      "learning_rate": 4.26955799192394e-06,
      "loss": 0.5275,
      "step": 4685
    },
    {
      "epoch": 0.5745463462481608,
      "grad_norm": 2.0823946507914863,
      "learning_rate": 4.269199929537481e-06,
      "loss": 0.5871,
      "step": 4686
    },
    {
      "epoch": 0.5746689553702795,
      "grad_norm": 2.097320919856761,
      "learning_rate": 4.2688417944337735e-06,
      "loss": 0.479,
      "step": 4687
    },
    {
      "epoch": 0.5747915644923982,
      "grad_norm": 2.0175489039665018,
      "learning_rate": 4.2684835866275365e-06,
      "loss": 0.4989,
      "step": 4688
    },
    {
      "epoch": 0.5749141736145169,
      "grad_norm": 1.9938136433074667,
      "learning_rate": 4.268125306133492e-06,
      "loss": 0.5815,
      "step": 4689
    },
    {
      "epoch": 0.5750367827366356,
      "grad_norm": 1.9646366391531356,
      "learning_rate": 4.267766952966369e-06,
      "loss": 0.5595,
      "step": 4690
    },
    {
      "epoch": 0.5751593918587543,
      "grad_norm": 1.9398750761515613,
      "learning_rate": 4.267408527140894e-06,
      "loss": 0.5552,
      "step": 4691
    },
    {
      "epoch": 0.575282000980873,
      "grad_norm": 1.8489961583474244,
      "learning_rate": 4.267050028671799e-06,
      "loss": 0.4929,
      "step": 4692
    },
    {
      "epoch": 0.5754046101029917,
      "grad_norm": 2.3276853994797873,
      "learning_rate": 4.266691457573821e-06,
      "loss": 0.5616,
      "step": 4693
    },
    {
      "epoch": 0.5755272192251103,
      "grad_norm": 1.9162863808204298,
      "learning_rate": 4.2663328138616956e-06,
      "loss": 0.5132,
      "step": 4694
    },
    {
      "epoch": 0.575649828347229,
      "grad_norm": 2.0807239047136354,
      "learning_rate": 4.265974097550164e-06,
      "loss": 0.55,
      "step": 4695
    },
    {
      "epoch": 0.5757724374693477,
      "grad_norm": 2.0745782074733774,
      "learning_rate": 4.265615308653971e-06,
      "loss": 0.5192,
      "step": 4696
    },
    {
      "epoch": 0.5758950465914664,
      "grad_norm": 2.2128226575057215,
      "learning_rate": 4.265256447187863e-06,
      "loss": 0.6064,
      "step": 4697
    },
    {
      "epoch": 0.5760176557135851,
      "grad_norm": 2.001638146258939,
      "learning_rate": 4.26489751316659e-06,
      "loss": 0.5699,
      "step": 4698
    },
    {
      "epoch": 0.5761402648357038,
      "grad_norm": 1.8557743898099859,
      "learning_rate": 4.264538506604905e-06,
      "loss": 0.4884,
      "step": 4699
    },
    {
      "epoch": 0.5762628739578225,
      "grad_norm": 1.9053600734227782,
      "learning_rate": 4.264179427517563e-06,
      "loss": 0.5251,
      "step": 4700
    },
    {
      "epoch": 0.5763854830799412,
      "grad_norm": 2.061196093003059,
      "learning_rate": 4.2638202759193235e-06,
      "loss": 0.5613,
      "step": 4701
    },
    {
      "epoch": 0.5765080922020598,
      "grad_norm": 1.9867299997643328,
      "learning_rate": 4.263461051824948e-06,
      "loss": 0.5554,
      "step": 4702
    },
    {
      "epoch": 0.5766307013241785,
      "grad_norm": 1.9182297757682356,
      "learning_rate": 4.263101755249201e-06,
      "loss": 0.5233,
      "step": 4703
    },
    {
      "epoch": 0.5767533104462972,
      "grad_norm": 2.0499220112751715,
      "learning_rate": 4.2627423862068515e-06,
      "loss": 0.5599,
      "step": 4704
    },
    {
      "epoch": 0.5768759195684159,
      "grad_norm": 2.2457389116188433,
      "learning_rate": 4.262382944712668e-06,
      "loss": 0.5807,
      "step": 4705
    },
    {
      "epoch": 0.5769985286905346,
      "grad_norm": 1.9566196621411454,
      "learning_rate": 4.262023430781426e-06,
      "loss": 0.5299,
      "step": 4706
    },
    {
      "epoch": 0.5771211378126533,
      "grad_norm": 2.091564713770154,
      "learning_rate": 4.2616638444279015e-06,
      "loss": 0.5805,
      "step": 4707
    },
    {
      "epoch": 0.577243746934772,
      "grad_norm": 1.915691065313721,
      "learning_rate": 4.2613041856668746e-06,
      "loss": 0.5509,
      "step": 4708
    },
    {
      "epoch": 0.5773663560568907,
      "grad_norm": 2.0498652955739614,
      "learning_rate": 4.2609444545131265e-06,
      "loss": 0.5289,
      "step": 4709
    },
    {
      "epoch": 0.5774889651790093,
      "grad_norm": 2.0584128949957328,
      "learning_rate": 4.2605846509814456e-06,
      "loss": 0.5212,
      "step": 4710
    },
    {
      "epoch": 0.577611574301128,
      "grad_norm": 1.7132410285140418,
      "learning_rate": 4.260224775086617e-06,
      "loss": 0.5053,
      "step": 4711
    },
    {
      "epoch": 0.5777341834232467,
      "grad_norm": 2.0138565158433646,
      "learning_rate": 4.2598648268434345e-06,
      "loss": 0.5696,
      "step": 4712
    },
    {
      "epoch": 0.5778567925453654,
      "grad_norm": 2.0389487023610946,
      "learning_rate": 4.259504806266692e-06,
      "loss": 0.541,
      "step": 4713
    },
    {
      "epoch": 0.5779794016674841,
      "grad_norm": 1.9980601951851633,
      "learning_rate": 4.259144713371187e-06,
      "loss": 0.5508,
      "step": 4714
    },
    {
      "epoch": 0.5781020107896028,
      "grad_norm": 1.9040985655591045,
      "learning_rate": 4.25878454817172e-06,
      "loss": 0.4984,
      "step": 4715
    },
    {
      "epoch": 0.5782246199117215,
      "grad_norm": 1.8997106260177938,
      "learning_rate": 4.258424310683094e-06,
      "loss": 0.5685,
      "step": 4716
    },
    {
      "epoch": 0.5783472290338402,
      "grad_norm": 2.0747938057325177,
      "learning_rate": 4.2580640009201164e-06,
      "loss": 0.5096,
      "step": 4717
    },
    {
      "epoch": 0.5784698381559588,
      "grad_norm": 2.1170213330868766,
      "learning_rate": 4.257703618897596e-06,
      "loss": 0.5832,
      "step": 4718
    },
    {
      "epoch": 0.5785924472780775,
      "grad_norm": 2.0737981612669047,
      "learning_rate": 4.257343164630344e-06,
      "loss": 0.5473,
      "step": 4719
    },
    {
      "epoch": 0.5787150564001962,
      "grad_norm": 1.930916979870611,
      "learning_rate": 4.256982638133178e-06,
      "loss": 0.5618,
      "step": 4720
    },
    {
      "epoch": 0.5788376655223149,
      "grad_norm": 2.1222424428057742,
      "learning_rate": 4.2566220394209144e-06,
      "loss": 0.5619,
      "step": 4721
    },
    {
      "epoch": 0.5789602746444336,
      "grad_norm": 2.0898703381318113,
      "learning_rate": 4.256261368508376e-06,
      "loss": 0.4855,
      "step": 4722
    },
    {
      "epoch": 0.5790828837665523,
      "grad_norm": 1.9137707371846175,
      "learning_rate": 4.255900625410385e-06,
      "loss": 0.5695,
      "step": 4723
    },
    {
      "epoch": 0.579205492888671,
      "grad_norm": 2.1935008938234435,
      "learning_rate": 4.255539810141771e-06,
      "loss": 0.5218,
      "step": 4724
    },
    {
      "epoch": 0.5793281020107895,
      "grad_norm": 1.9883803085883265,
      "learning_rate": 4.2551789227173614e-06,
      "loss": 0.5564,
      "step": 4725
    },
    {
      "epoch": 0.5794507111329082,
      "grad_norm": 2.011336831253548,
      "learning_rate": 4.254817963151992e-06,
      "loss": 0.5246,
      "step": 4726
    },
    {
      "epoch": 0.579573320255027,
      "grad_norm": 2.11754590719629,
      "learning_rate": 4.254456931460497e-06,
      "loss": 0.5466,
      "step": 4727
    },
    {
      "epoch": 0.5796959293771456,
      "grad_norm": 1.9990185278602484,
      "learning_rate": 4.254095827657716e-06,
      "loss": 0.6237,
      "step": 4728
    },
    {
      "epoch": 0.5798185384992643,
      "grad_norm": 1.7913043525132482,
      "learning_rate": 4.253734651758492e-06,
      "loss": 0.4839,
      "step": 4729
    },
    {
      "epoch": 0.579941147621383,
      "grad_norm": 2.1226630654402787,
      "learning_rate": 4.253373403777668e-06,
      "loss": 0.5729,
      "step": 4730
    },
    {
      "epoch": 0.5800637567435017,
      "grad_norm": 1.906456057141034,
      "learning_rate": 4.253012083730094e-06,
      "loss": 0.5201,
      "step": 4731
    },
    {
      "epoch": 0.5801863658656204,
      "grad_norm": 1.8303218655532496,
      "learning_rate": 4.2526506916306185e-06,
      "loss": 0.5827,
      "step": 4732
    },
    {
      "epoch": 0.580308974987739,
      "grad_norm": 2.112424351158853,
      "learning_rate": 4.252289227494098e-06,
      "loss": 0.5504,
      "step": 4733
    },
    {
      "epoch": 0.5804315841098577,
      "grad_norm": 1.986679796350922,
      "learning_rate": 4.2519276913353874e-06,
      "loss": 0.5739,
      "step": 4734
    },
    {
      "epoch": 0.5805541932319764,
      "grad_norm": 2.024122899446073,
      "learning_rate": 4.251566083169347e-06,
      "loss": 0.5295,
      "step": 4735
    },
    {
      "epoch": 0.5806768023540951,
      "grad_norm": 1.8972985410505143,
      "learning_rate": 4.25120440301084e-06,
      "loss": 0.4916,
      "step": 4736
    },
    {
      "epoch": 0.5807994114762138,
      "grad_norm": 2.1138447150409525,
      "learning_rate": 4.250842650874732e-06,
      "loss": 0.5358,
      "step": 4737
    },
    {
      "epoch": 0.5809220205983325,
      "grad_norm": 1.8940673800961214,
      "learning_rate": 4.250480826775891e-06,
      "loss": 0.5534,
      "step": 4738
    },
    {
      "epoch": 0.5810446297204512,
      "grad_norm": 2.107708863000496,
      "learning_rate": 4.2501189307291895e-06,
      "loss": 0.5417,
      "step": 4739
    },
    {
      "epoch": 0.5811672388425699,
      "grad_norm": 2.0773430675547155,
      "learning_rate": 4.249756962749502e-06,
      "loss": 0.547,
      "step": 4740
    },
    {
      "epoch": 0.5812898479646885,
      "grad_norm": 2.03786748492232,
      "learning_rate": 4.249394922851705e-06,
      "loss": 0.5941,
      "step": 4741
    },
    {
      "epoch": 0.5814124570868072,
      "grad_norm": 1.985490513131527,
      "learning_rate": 4.24903281105068e-06,
      "loss": 0.5485,
      "step": 4742
    },
    {
      "epoch": 0.5815350662089259,
      "grad_norm": 2.0463734662552113,
      "learning_rate": 4.248670627361309e-06,
      "loss": 0.5461,
      "step": 4743
    },
    {
      "epoch": 0.5816576753310446,
      "grad_norm": 2.0506629216896286,
      "learning_rate": 4.248308371798481e-06,
      "loss": 0.5954,
      "step": 4744
    },
    {
      "epoch": 0.5817802844531633,
      "grad_norm": 1.9600519439810389,
      "learning_rate": 4.247946044377082e-06,
      "loss": 0.545,
      "step": 4745
    },
    {
      "epoch": 0.581902893575282,
      "grad_norm": 1.9366910311765761,
      "learning_rate": 4.247583645112007e-06,
      "loss": 0.5117,
      "step": 4746
    },
    {
      "epoch": 0.5820255026974007,
      "grad_norm": 2.252125219423666,
      "learning_rate": 4.24722117401815e-06,
      "loss": 0.5242,
      "step": 4747
    },
    {
      "epoch": 0.5821481118195194,
      "grad_norm": 1.9212611652283866,
      "learning_rate": 4.24685863111041e-06,
      "loss": 0.5095,
      "step": 4748
    },
    {
      "epoch": 0.582270720941638,
      "grad_norm": 1.9380389899335684,
      "learning_rate": 4.246496016403688e-06,
      "loss": 0.5338,
      "step": 4749
    },
    {
      "epoch": 0.5823933300637567,
      "grad_norm": 1.9206381579103062,
      "learning_rate": 4.246133329912887e-06,
      "loss": 0.5822,
      "step": 4750
    },
    {
      "epoch": 0.5825159391858754,
      "grad_norm": 1.9086551594932228,
      "learning_rate": 4.245770571652915e-06,
      "loss": 0.5554,
      "step": 4751
    },
    {
      "epoch": 0.5826385483079941,
      "grad_norm": 1.8255911328139642,
      "learning_rate": 4.245407741638683e-06,
      "loss": 0.5247,
      "step": 4752
    },
    {
      "epoch": 0.5827611574301128,
      "grad_norm": 2.0525711339330095,
      "learning_rate": 4.245044839885102e-06,
      "loss": 0.5705,
      "step": 4753
    },
    {
      "epoch": 0.5828837665522315,
      "grad_norm": 1.6886755029969105,
      "learning_rate": 4.244681866407088e-06,
      "loss": 0.5209,
      "step": 4754
    },
    {
      "epoch": 0.5830063756743502,
      "grad_norm": 2.228255967197227,
      "learning_rate": 4.244318821219561e-06,
      "loss": 0.5098,
      "step": 4755
    },
    {
      "epoch": 0.5831289847964689,
      "grad_norm": 1.8298875747869452,
      "learning_rate": 4.243955704337443e-06,
      "loss": 0.6092,
      "step": 4756
    },
    {
      "epoch": 0.5832515939185875,
      "grad_norm": 1.8799215276323666,
      "learning_rate": 4.243592515775657e-06,
      "loss": 0.5449,
      "step": 4757
    },
    {
      "epoch": 0.5833742030407062,
      "grad_norm": 2.031246518688229,
      "learning_rate": 4.2432292555491325e-06,
      "loss": 0.5328,
      "step": 4758
    },
    {
      "epoch": 0.5834968121628249,
      "grad_norm": 2.0161543788394445,
      "learning_rate": 4.2428659236728e-06,
      "loss": 0.5095,
      "step": 4759
    },
    {
      "epoch": 0.5836194212849436,
      "grad_norm": 1.9991516822163398,
      "learning_rate": 4.242502520161592e-06,
      "loss": 0.5047,
      "step": 4760
    },
    {
      "epoch": 0.5837420304070623,
      "grad_norm": 1.9589387223476598,
      "learning_rate": 4.242139045030445e-06,
      "loss": 0.5346,
      "step": 4761
    },
    {
      "epoch": 0.583864639529181,
      "grad_norm": 1.9837882023140525,
      "learning_rate": 4.2417754982942995e-06,
      "loss": 0.5466,
      "step": 4762
    },
    {
      "epoch": 0.5839872486512997,
      "grad_norm": 2.044585125393598,
      "learning_rate": 4.241411879968098e-06,
      "loss": 0.5849,
      "step": 4763
    },
    {
      "epoch": 0.5841098577734184,
      "grad_norm": 2.0688221768181814,
      "learning_rate": 4.2410481900667846e-06,
      "loss": 0.5529,
      "step": 4764
    },
    {
      "epoch": 0.584232466895537,
      "grad_norm": 2.099164914400149,
      "learning_rate": 4.240684428605308e-06,
      "loss": 0.5173,
      "step": 4765
    },
    {
      "epoch": 0.5843550760176557,
      "grad_norm": 1.9886905115908036,
      "learning_rate": 4.24032059559862e-06,
      "loss": 0.5427,
      "step": 4766
    },
    {
      "epoch": 0.5844776851397744,
      "grad_norm": 2.0141987797611236,
      "learning_rate": 4.239956691061676e-06,
      "loss": 0.5029,
      "step": 4767
    },
    {
      "epoch": 0.5846002942618931,
      "grad_norm": 2.012528792443212,
      "learning_rate": 4.23959271500943e-06,
      "loss": 0.5734,
      "step": 4768
    },
    {
      "epoch": 0.5847229033840118,
      "grad_norm": 2.007085928158911,
      "learning_rate": 4.239228667456844e-06,
      "loss": 0.5471,
      "step": 4769
    },
    {
      "epoch": 0.5848455125061305,
      "grad_norm": 2.0403282906068982,
      "learning_rate": 4.238864548418881e-06,
      "loss": 0.546,
      "step": 4770
    },
    {
      "epoch": 0.5849681216282492,
      "grad_norm": 1.8608630706419231,
      "learning_rate": 4.238500357910507e-06,
      "loss": 0.5756,
      "step": 4771
    },
    {
      "epoch": 0.5850907307503679,
      "grad_norm": 2.037637491442047,
      "learning_rate": 4.238136095946689e-06,
      "loss": 0.6074,
      "step": 4772
    },
    {
      "epoch": 0.5852133398724865,
      "grad_norm": 2.0967675344889454,
      "learning_rate": 4.237771762542402e-06,
      "loss": 0.5703,
      "step": 4773
    },
    {
      "epoch": 0.5853359489946052,
      "grad_norm": 2.1161939160748866,
      "learning_rate": 4.237407357712619e-06,
      "loss": 0.5676,
      "step": 4774
    },
    {
      "epoch": 0.5854585581167239,
      "grad_norm": 2.0333593332535265,
      "learning_rate": 4.237042881472316e-06,
      "loss": 0.5403,
      "step": 4775
    },
    {
      "epoch": 0.5855811672388426,
      "grad_norm": 2.0999347105681263,
      "learning_rate": 4.236678333836477e-06,
      "loss": 0.5496,
      "step": 4776
    },
    {
      "epoch": 0.5857037763609613,
      "grad_norm": 1.8647257544115914,
      "learning_rate": 4.236313714820084e-06,
      "loss": 0.5434,
      "step": 4777
    },
    {
      "epoch": 0.58582638548308,
      "grad_norm": 1.9104494906933456,
      "learning_rate": 4.235949024438122e-06,
      "loss": 0.5332,
      "step": 4778
    },
    {
      "epoch": 0.5859489946051987,
      "grad_norm": 1.940634796264466,
      "learning_rate": 4.235584262705583e-06,
      "loss": 0.5611,
      "step": 4779
    },
    {
      "epoch": 0.5860716037273173,
      "grad_norm": 2.035901860595002,
      "learning_rate": 4.235219429637458e-06,
      "loss": 0.6007,
      "step": 4780
    },
    {
      "epoch": 0.586194212849436,
      "grad_norm": 1.8743397348262634,
      "learning_rate": 4.234854525248743e-06,
      "loss": 0.5326,
      "step": 4781
    },
    {
      "epoch": 0.5863168219715547,
      "grad_norm": 1.9568865535332032,
      "learning_rate": 4.234489549554435e-06,
      "loss": 0.5366,
      "step": 4782
    },
    {
      "epoch": 0.5864394310936734,
      "grad_norm": 2.0432262464121607,
      "learning_rate": 4.234124502569536e-06,
      "loss": 0.5686,
      "step": 4783
    },
    {
      "epoch": 0.5865620402157921,
      "grad_norm": 1.9916244372767424,
      "learning_rate": 4.233759384309049e-06,
      "loss": 0.6073,
      "step": 4784
    },
    {
      "epoch": 0.5866846493379108,
      "grad_norm": 2.0139820333039133,
      "learning_rate": 4.233394194787983e-06,
      "loss": 0.5152,
      "step": 4785
    },
    {
      "epoch": 0.5868072584600295,
      "grad_norm": 1.9696333188967012,
      "learning_rate": 4.233028934021347e-06,
      "loss": 0.5578,
      "step": 4786
    },
    {
      "epoch": 0.5869298675821482,
      "grad_norm": 1.827744111057887,
      "learning_rate": 4.232663602024153e-06,
      "loss": 0.5084,
      "step": 4787
    },
    {
      "epoch": 0.5870524767042667,
      "grad_norm": 1.9057654398221338,
      "learning_rate": 4.232298198811418e-06,
      "loss": 0.5843,
      "step": 4788
    },
    {
      "epoch": 0.5871750858263854,
      "grad_norm": 2.044288397588186,
      "learning_rate": 4.23193272439816e-06,
      "loss": 0.5896,
      "step": 4789
    },
    {
      "epoch": 0.5872976949485041,
      "grad_norm": 1.9310295194752696,
      "learning_rate": 4.231567178799401e-06,
      "loss": 0.5497,
      "step": 4790
    },
    {
      "epoch": 0.5874203040706228,
      "grad_norm": 1.9066391027276492,
      "learning_rate": 4.231201562030165e-06,
      "loss": 0.523,
      "step": 4791
    },
    {
      "epoch": 0.5875429131927415,
      "grad_norm": 1.8676521932204817,
      "learning_rate": 4.2308358741054816e-06,
      "loss": 0.5201,
      "step": 4792
    },
    {
      "epoch": 0.5876655223148602,
      "grad_norm": 1.8406582847272788,
      "learning_rate": 4.230470115040379e-06,
      "loss": 0.5226,
      "step": 4793
    },
    {
      "epoch": 0.587788131436979,
      "grad_norm": 1.8959909224212983,
      "learning_rate": 4.230104284849889e-06,
      "loss": 0.5008,
      "step": 4794
    },
    {
      "epoch": 0.5879107405590976,
      "grad_norm": 2.046544428258191,
      "learning_rate": 4.229738383549052e-06,
      "loss": 0.5301,
      "step": 4795
    },
    {
      "epoch": 0.5880333496812162,
      "grad_norm": 1.859441565287282,
      "learning_rate": 4.229372411152904e-06,
      "loss": 0.544,
      "step": 4796
    },
    {
      "epoch": 0.5881559588033349,
      "grad_norm": 1.9078375819894915,
      "learning_rate": 4.229006367676489e-06,
      "loss": 0.5231,
      "step": 4797
    },
    {
      "epoch": 0.5882785679254536,
      "grad_norm": 2.1337149986095336,
      "learning_rate": 4.228640253134852e-06,
      "loss": 0.5028,
      "step": 4798
    },
    {
      "epoch": 0.5884011770475723,
      "grad_norm": 1.8911332209908183,
      "learning_rate": 4.2282740675430395e-06,
      "loss": 0.5608,
      "step": 4799
    },
    {
      "epoch": 0.588523786169691,
      "grad_norm": 1.897198558179483,
      "learning_rate": 4.227907810916103e-06,
      "loss": 0.5016,
      "step": 4800
    },
    {
      "epoch": 0.5886463952918097,
      "grad_norm": 2.0646999377588355,
      "learning_rate": 4.227541483269096e-06,
      "loss": 0.6357,
      "step": 4801
    },
    {
      "epoch": 0.5887690044139284,
      "grad_norm": 2.0901387342902344,
      "learning_rate": 4.227175084617078e-06,
      "loss": 0.4827,
      "step": 4802
    },
    {
      "epoch": 0.5888916135360471,
      "grad_norm": 2.068050313856663,
      "learning_rate": 4.226808614975104e-06,
      "loss": 0.5829,
      "step": 4803
    },
    {
      "epoch": 0.5890142226581657,
      "grad_norm": 2.2202292071910867,
      "learning_rate": 4.226442074358239e-06,
      "loss": 0.5257,
      "step": 4804
    },
    {
      "epoch": 0.5891368317802844,
      "grad_norm": 1.9999549318879346,
      "learning_rate": 4.226075462781549e-06,
      "loss": 0.5472,
      "step": 4805
    },
    {
      "epoch": 0.5892594409024031,
      "grad_norm": 1.9484393318698336,
      "learning_rate": 4.225708780260102e-06,
      "loss": 0.5154,
      "step": 4806
    },
    {
      "epoch": 0.5893820500245218,
      "grad_norm": 1.9403845060224978,
      "learning_rate": 4.225342026808968e-06,
      "loss": 0.553,
      "step": 4807
    },
    {
      "epoch": 0.5895046591466405,
      "grad_norm": 1.844005204571395,
      "learning_rate": 4.2249752024432225e-06,
      "loss": 0.5251,
      "step": 4808
    },
    {
      "epoch": 0.5896272682687592,
      "grad_norm": 1.9203614729733538,
      "learning_rate": 4.224608307177943e-06,
      "loss": 0.5252,
      "step": 4809
    },
    {
      "epoch": 0.5897498773908779,
      "grad_norm": 2.0759730957105083,
      "learning_rate": 4.224241341028208e-06,
      "loss": 0.5374,
      "step": 4810
    },
    {
      "epoch": 0.5898724865129966,
      "grad_norm": 1.9587212188373488,
      "learning_rate": 4.2238743040091025e-06,
      "loss": 0.6071,
      "step": 4811
    },
    {
      "epoch": 0.5899950956351152,
      "grad_norm": 2.038472335819192,
      "learning_rate": 4.22350719613571e-06,
      "loss": 0.5349,
      "step": 4812
    },
    {
      "epoch": 0.5901177047572339,
      "grad_norm": 1.9700664853743093,
      "learning_rate": 4.223140017423121e-06,
      "loss": 0.5417,
      "step": 4813
    },
    {
      "epoch": 0.5902403138793526,
      "grad_norm": 1.8252145313405397,
      "learning_rate": 4.222772767886427e-06,
      "loss": 0.4787,
      "step": 4814
    },
    {
      "epoch": 0.5903629230014713,
      "grad_norm": 2.1520115693225237,
      "learning_rate": 4.222405447540722e-06,
      "loss": 0.5507,
      "step": 4815
    },
    {
      "epoch": 0.59048553212359,
      "grad_norm": 2.144904673503128,
      "learning_rate": 4.2220380564011035e-06,
      "loss": 0.6348,
      "step": 4816
    },
    {
      "epoch": 0.5906081412457087,
      "grad_norm": 1.892417798247972,
      "learning_rate": 4.221670594482673e-06,
      "loss": 0.4742,
      "step": 4817
    },
    {
      "epoch": 0.5907307503678274,
      "grad_norm": 1.8764891044936054,
      "learning_rate": 4.221303061800532e-06,
      "loss": 0.5505,
      "step": 4818
    },
    {
      "epoch": 0.5908533594899461,
      "grad_norm": 2.1917247045626858,
      "learning_rate": 4.220935458369789e-06,
      "loss": 0.5728,
      "step": 4819
    },
    {
      "epoch": 0.5909759686120647,
      "grad_norm": 1.8894893026078892,
      "learning_rate": 4.220567784205551e-06,
      "loss": 0.5316,
      "step": 4820
    },
    {
      "epoch": 0.5910985777341834,
      "grad_norm": 1.9927555791062967,
      "learning_rate": 4.220200039322933e-06,
      "loss": 0.5143,
      "step": 4821
    },
    {
      "epoch": 0.5912211868563021,
      "grad_norm": 2.10112436418736,
      "learning_rate": 4.219832223737046e-06,
      "loss": 0.5739,
      "step": 4822
    },
    {
      "epoch": 0.5913437959784208,
      "grad_norm": 2.2086489712780444,
      "learning_rate": 4.219464337463011e-06,
      "loss": 0.5701,
      "step": 4823
    },
    {
      "epoch": 0.5914664051005395,
      "grad_norm": 2.055292425692809,
      "learning_rate": 4.219096380515947e-06,
      "loss": 0.5549,
      "step": 4824
    },
    {
      "epoch": 0.5915890142226582,
      "grad_norm": 2.0312334723578,
      "learning_rate": 4.218728352910978e-06,
      "loss": 0.5811,
      "step": 4825
    },
    {
      "epoch": 0.5917116233447769,
      "grad_norm": 2.0177467019064634,
      "learning_rate": 4.218360254663232e-06,
      "loss": 0.5584,
      "step": 4826
    },
    {
      "epoch": 0.5918342324668955,
      "grad_norm": 1.948438693442032,
      "learning_rate": 4.2179920857878364e-06,
      "loss": 0.4689,
      "step": 4827
    },
    {
      "epoch": 0.5919568415890142,
      "grad_norm": 2.1274894106434683,
      "learning_rate": 4.2176238462999254e-06,
      "loss": 0.5813,
      "step": 4828
    },
    {
      "epoch": 0.5920794507111329,
      "grad_norm": 1.8314119610814654,
      "learning_rate": 4.217255536214633e-06,
      "loss": 0.4842,
      "step": 4829
    },
    {
      "epoch": 0.5922020598332516,
      "grad_norm": 1.9342404586282544,
      "learning_rate": 4.216887155547098e-06,
      "loss": 0.5202,
      "step": 4830
    },
    {
      "epoch": 0.5923246689553703,
      "grad_norm": 1.9994084841345487,
      "learning_rate": 4.2165187043124614e-06,
      "loss": 0.5203,
      "step": 4831
    },
    {
      "epoch": 0.592447278077489,
      "grad_norm": 2.0753186469863136,
      "learning_rate": 4.216150182525868e-06,
      "loss": 0.522,
      "step": 4832
    },
    {
      "epoch": 0.5925698871996077,
      "grad_norm": 2.1648913882509064,
      "learning_rate": 4.215781590202463e-06,
      "loss": 0.5497,
      "step": 4833
    },
    {
      "epoch": 0.5926924963217264,
      "grad_norm": 1.9222280440344355,
      "learning_rate": 4.2154129273573966e-06,
      "loss": 0.5318,
      "step": 4834
    },
    {
      "epoch": 0.592815105443845,
      "grad_norm": 1.8210943754220479,
      "learning_rate": 4.215044194005822e-06,
      "loss": 0.5427,
      "step": 4835
    },
    {
      "epoch": 0.5929377145659637,
      "grad_norm": 2.1651503602198034,
      "learning_rate": 4.214675390162896e-06,
      "loss": 0.589,
      "step": 4836
    },
    {
      "epoch": 0.5930603236880824,
      "grad_norm": 2.334625383065769,
      "learning_rate": 4.214306515843775e-06,
      "loss": 0.5599,
      "step": 4837
    },
    {
      "epoch": 0.5931829328102011,
      "grad_norm": 2.4349800670249917,
      "learning_rate": 4.213937571063622e-06,
      "loss": 0.592,
      "step": 4838
    },
    {
      "epoch": 0.5933055419323198,
      "grad_norm": 2.0883634260799813,
      "learning_rate": 4.2135685558376e-06,
      "loss": 0.5682,
      "step": 4839
    },
    {
      "epoch": 0.5934281510544385,
      "grad_norm": 2.042446729857085,
      "learning_rate": 4.213199470180877e-06,
      "loss": 0.524,
      "step": 4840
    },
    {
      "epoch": 0.5935507601765572,
      "grad_norm": 1.9415602116249757,
      "learning_rate": 4.212830314108622e-06,
      "loss": 0.5496,
      "step": 4841
    },
    {
      "epoch": 0.5936733692986759,
      "grad_norm": 2.2158539323087276,
      "learning_rate": 4.21246108763601e-06,
      "loss": 0.5429,
      "step": 4842
    },
    {
      "epoch": 0.5937959784207945,
      "grad_norm": 1.8934100377595888,
      "learning_rate": 4.212091790778214e-06,
      "loss": 0.5167,
      "step": 4843
    },
    {
      "epoch": 0.5939185875429132,
      "grad_norm": 2.0427577496268383,
      "learning_rate": 4.211722423550416e-06,
      "loss": 0.5372,
      "step": 4844
    },
    {
      "epoch": 0.5940411966650319,
      "grad_norm": 1.953726207312983,
      "learning_rate": 4.2113529859677945e-06,
      "loss": 0.5201,
      "step": 4845
    },
    {
      "epoch": 0.5941638057871506,
      "grad_norm": 1.8300426175695286,
      "learning_rate": 4.210983478045537e-06,
      "loss": 0.5299,
      "step": 4846
    },
    {
      "epoch": 0.5942864149092693,
      "grad_norm": 1.8767395323089793,
      "learning_rate": 4.210613899798829e-06,
      "loss": 0.5453,
      "step": 4847
    },
    {
      "epoch": 0.594409024031388,
      "grad_norm": 1.9869610080859823,
      "learning_rate": 4.210244251242861e-06,
      "loss": 0.535,
      "step": 4848
    },
    {
      "epoch": 0.5945316331535067,
      "grad_norm": 2.10216930478048,
      "learning_rate": 4.209874532392827e-06,
      "loss": 0.5047,
      "step": 4849
    },
    {
      "epoch": 0.5946542422756254,
      "grad_norm": 1.8357768110640516,
      "learning_rate": 4.209504743263923e-06,
      "loss": 0.5476,
      "step": 4850
    },
    {
      "epoch": 0.5947768513977439,
      "grad_norm": 2.3177438117597027,
      "learning_rate": 4.209134883871347e-06,
      "loss": 0.5777,
      "step": 4851
    },
    {
      "epoch": 0.5948994605198626,
      "grad_norm": 1.607800710567982,
      "learning_rate": 4.2087649542303015e-06,
      "loss": 0.4831,
      "step": 4852
    },
    {
      "epoch": 0.5950220696419813,
      "grad_norm": 1.9854955054279964,
      "learning_rate": 4.2083949543559905e-06,
      "loss": 0.5069,
      "step": 4853
    },
    {
      "epoch": 0.5951446787641,
      "grad_norm": 1.7920882589174114,
      "learning_rate": 4.208024884263623e-06,
      "loss": 0.5697,
      "step": 4854
    },
    {
      "epoch": 0.5952672878862187,
      "grad_norm": 2.301096514535378,
      "learning_rate": 4.207654743968409e-06,
      "loss": 0.5418,
      "step": 4855
    },
    {
      "epoch": 0.5953898970083374,
      "grad_norm": 2.040699378753976,
      "learning_rate": 4.2072845334855625e-06,
      "loss": 0.5418,
      "step": 4856
    },
    {
      "epoch": 0.5955125061304561,
      "grad_norm": 1.7392145175362006,
      "learning_rate": 4.206914252830299e-06,
      "loss": 0.4889,
      "step": 4857
    },
    {
      "epoch": 0.5956351152525748,
      "grad_norm": 1.8798438791049419,
      "learning_rate": 4.2065439020178365e-06,
      "loss": 0.5295,
      "step": 4858
    },
    {
      "epoch": 0.5957577243746934,
      "grad_norm": 2.0060537261145477,
      "learning_rate": 4.206173481063399e-06,
      "loss": 0.6072,
      "step": 4859
    },
    {
      "epoch": 0.5958803334968121,
      "grad_norm": 1.7695863388418978,
      "learning_rate": 4.205802989982212e-06,
      "loss": 0.4805,
      "step": 4860
    },
    {
      "epoch": 0.5960029426189308,
      "grad_norm": 1.906289861789618,
      "learning_rate": 4.2054324287895015e-06,
      "loss": 0.5424,
      "step": 4861
    },
    {
      "epoch": 0.5961255517410495,
      "grad_norm": 1.9162172843362317,
      "learning_rate": 4.205061797500499e-06,
      "loss": 0.4995,
      "step": 4862
    },
    {
      "epoch": 0.5962481608631682,
      "grad_norm": 1.8774178924494607,
      "learning_rate": 4.204691096130438e-06,
      "loss": 0.5567,
      "step": 4863
    },
    {
      "epoch": 0.5963707699852869,
      "grad_norm": 1.953564955608582,
      "learning_rate": 4.2043203246945545e-06,
      "loss": 0.5171,
      "step": 4864
    },
    {
      "epoch": 0.5964933791074056,
      "grad_norm": 1.9675188024548242,
      "learning_rate": 4.2039494832080895e-06,
      "loss": 0.5635,
      "step": 4865
    },
    {
      "epoch": 0.5966159882295243,
      "grad_norm": 2.0302324625785806,
      "learning_rate": 4.203578571686283e-06,
      "loss": 0.5517,
      "step": 4866
    },
    {
      "epoch": 0.5967385973516429,
      "grad_norm": 2.0835183069079832,
      "learning_rate": 4.203207590144382e-06,
      "loss": 0.5641,
      "step": 4867
    },
    {
      "epoch": 0.5968612064737616,
      "grad_norm": 1.9856970386450659,
      "learning_rate": 4.202836538597633e-06,
      "loss": 0.5081,
      "step": 4868
    },
    {
      "epoch": 0.5969838155958803,
      "grad_norm": 1.986120672817032,
      "learning_rate": 4.202465417061289e-06,
      "loss": 0.5171,
      "step": 4869
    },
    {
      "epoch": 0.597106424717999,
      "grad_norm": 2.1852987574124123,
      "learning_rate": 4.2020942255506016e-06,
      "loss": 0.6505,
      "step": 4870
    },
    {
      "epoch": 0.5972290338401177,
      "grad_norm": 1.8491800441233246,
      "learning_rate": 4.201722964080828e-06,
      "loss": 0.5379,
      "step": 4871
    },
    {
      "epoch": 0.5973516429622364,
      "grad_norm": 1.988315496242922,
      "learning_rate": 4.201351632667227e-06,
      "loss": 0.604,
      "step": 4872
    },
    {
      "epoch": 0.5974742520843551,
      "grad_norm": 2.171189187586128,
      "learning_rate": 4.200980231325063e-06,
      "loss": 0.5297,
      "step": 4873
    },
    {
      "epoch": 0.5975968612064737,
      "grad_norm": 1.8525703595083358,
      "learning_rate": 4.200608760069601e-06,
      "loss": 0.5334,
      "step": 4874
    },
    {
      "epoch": 0.5977194703285924,
      "grad_norm": 1.9542406264948056,
      "learning_rate": 4.200237218916108e-06,
      "loss": 0.5688,
      "step": 4875
    },
    {
      "epoch": 0.5978420794507111,
      "grad_norm": 1.8962584188909575,
      "learning_rate": 4.199865607879854e-06,
      "loss": 0.4934,
      "step": 4876
    },
    {
      "epoch": 0.5979646885728298,
      "grad_norm": 1.8642705419351364,
      "learning_rate": 4.199493926976114e-06,
      "loss": 0.5218,
      "step": 4877
    },
    {
      "epoch": 0.5980872976949485,
      "grad_norm": 1.9665588268518792,
      "learning_rate": 4.199122176220166e-06,
      "loss": 0.5798,
      "step": 4878
    },
    {
      "epoch": 0.5982099068170672,
      "grad_norm": 1.8862779826510911,
      "learning_rate": 4.198750355627289e-06,
      "loss": 0.5638,
      "step": 4879
    },
    {
      "epoch": 0.5983325159391859,
      "grad_norm": 2.1956469597032617,
      "learning_rate": 4.198378465212763e-06,
      "loss": 0.567,
      "step": 4880
    },
    {
      "epoch": 0.5984551250613046,
      "grad_norm": 2.088072989036708,
      "learning_rate": 4.1980065049918774e-06,
      "loss": 0.5449,
      "step": 4881
    },
    {
      "epoch": 0.5985777341834232,
      "grad_norm": 1.7320655360757198,
      "learning_rate": 4.197634474979918e-06,
      "loss": 0.5701,
      "step": 4882
    },
    {
      "epoch": 0.5987003433055419,
      "grad_norm": 1.9915763377194475,
      "learning_rate": 4.197262375192175e-06,
      "loss": 0.5638,
      "step": 4883
    },
    {
      "epoch": 0.5988229524276606,
      "grad_norm": 2.184106170185628,
      "learning_rate": 4.1968902056439445e-06,
      "loss": 0.5581,
      "step": 4884
    },
    {
      "epoch": 0.5989455615497793,
      "grad_norm": 2.4559887392970183,
      "learning_rate": 4.1965179663505214e-06,
      "loss": 0.5618,
      "step": 4885
    },
    {
      "epoch": 0.599068170671898,
      "grad_norm": 1.962823044182792,
      "learning_rate": 4.196145657327208e-06,
      "loss": 0.5628,
      "step": 4886
    },
    {
      "epoch": 0.5991907797940167,
      "grad_norm": 2.0307639410286806,
      "learning_rate": 4.195773278589305e-06,
      "loss": 0.5961,
      "step": 4887
    },
    {
      "epoch": 0.5993133889161354,
      "grad_norm": 1.8683259744360758,
      "learning_rate": 4.195400830152117e-06,
      "loss": 0.5639,
      "step": 4888
    },
    {
      "epoch": 0.5994359980382541,
      "grad_norm": 1.846241219906179,
      "learning_rate": 4.195028312030955e-06,
      "loss": 0.5319,
      "step": 4889
    },
    {
      "epoch": 0.5995586071603727,
      "grad_norm": 2.068818122800695,
      "learning_rate": 4.194655724241127e-06,
      "loss": 0.5701,
      "step": 4890
    },
    {
      "epoch": 0.5996812162824914,
      "grad_norm": 2.3603980748201416,
      "learning_rate": 4.194283066797949e-06,
      "loss": 0.582,
      "step": 4891
    },
    {
      "epoch": 0.5998038254046101,
      "grad_norm": 2.0534828221374037,
      "learning_rate": 4.193910339716738e-06,
      "loss": 0.5393,
      "step": 4892
    },
    {
      "epoch": 0.5999264345267288,
      "grad_norm": 1.8786346607393605,
      "learning_rate": 4.193537543012813e-06,
      "loss": 0.4824,
      "step": 4893
    },
    {
      "epoch": 0.6000490436488475,
      "grad_norm": 2.163659680410027,
      "learning_rate": 4.193164676701497e-06,
      "loss": 0.567,
      "step": 4894
    },
    {
      "epoch": 0.6001716527709662,
      "grad_norm": 2.0804674435966115,
      "learning_rate": 4.192791740798114e-06,
      "loss": 0.6177,
      "step": 4895
    },
    {
      "epoch": 0.6002942618930849,
      "grad_norm": 2.0519623145437897,
      "learning_rate": 4.192418735317995e-06,
      "loss": 0.531,
      "step": 4896
    },
    {
      "epoch": 0.6004168710152036,
      "grad_norm": 2.0655952871627687,
      "learning_rate": 4.19204566027647e-06,
      "loss": 0.5703,
      "step": 4897
    },
    {
      "epoch": 0.6005394801373222,
      "grad_norm": 1.9565382532699924,
      "learning_rate": 4.191672515688872e-06,
      "loss": 0.5169,
      "step": 4898
    },
    {
      "epoch": 0.6006620892594409,
      "grad_norm": 2.003963329626254,
      "learning_rate": 4.191299301570539e-06,
      "loss": 0.5267,
      "step": 4899
    },
    {
      "epoch": 0.6007846983815596,
      "grad_norm": 2.148100734967306,
      "learning_rate": 4.190926017936811e-06,
      "loss": 0.5894,
      "step": 4900
    },
    {
      "epoch": 0.6009073075036783,
      "grad_norm": 2.057110018204682,
      "learning_rate": 4.190552664803029e-06,
      "loss": 0.5716,
      "step": 4901
    },
    {
      "epoch": 0.601029916625797,
      "grad_norm": 1.9573638439178729,
      "learning_rate": 4.19017924218454e-06,
      "loss": 0.5821,
      "step": 4902
    },
    {
      "epoch": 0.6011525257479157,
      "grad_norm": 2.045994698693185,
      "learning_rate": 4.189805750096692e-06,
      "loss": 0.6151,
      "step": 4903
    },
    {
      "epoch": 0.6012751348700344,
      "grad_norm": 2.135242926903437,
      "learning_rate": 4.189432188554837e-06,
      "loss": 0.5368,
      "step": 4904
    },
    {
      "epoch": 0.6013977439921531,
      "grad_norm": 2.2402164235215043,
      "learning_rate": 4.189058557574327e-06,
      "loss": 0.5693,
      "step": 4905
    },
    {
      "epoch": 0.6015203531142717,
      "grad_norm": 1.9037832219911612,
      "learning_rate": 4.188684857170519e-06,
      "loss": 0.5392,
      "step": 4906
    },
    {
      "epoch": 0.6016429622363904,
      "grad_norm": 1.829057022636371,
      "learning_rate": 4.188311087358776e-06,
      "loss": 0.4713,
      "step": 4907
    },
    {
      "epoch": 0.601765571358509,
      "grad_norm": 1.908053571116955,
      "learning_rate": 4.187937248154458e-06,
      "loss": 0.5296,
      "step": 4908
    },
    {
      "epoch": 0.6018881804806278,
      "grad_norm": 1.8522681293438905,
      "learning_rate": 4.18756333957293e-06,
      "loss": 0.5331,
      "step": 4909
    },
    {
      "epoch": 0.6020107896027465,
      "grad_norm": 2.0635860009549374,
      "learning_rate": 4.187189361629562e-06,
      "loss": 0.5366,
      "step": 4910
    },
    {
      "epoch": 0.6021333987248652,
      "grad_norm": 2.117795928058934,
      "learning_rate": 4.1868153143397235e-06,
      "loss": 0.5535,
      "step": 4911
    },
    {
      "epoch": 0.6022560078469839,
      "grad_norm": 1.9382041197639692,
      "learning_rate": 4.186441197718789e-06,
      "loss": 0.5715,
      "step": 4912
    },
    {
      "epoch": 0.6023786169691026,
      "grad_norm": 2.1560070307741195,
      "learning_rate": 4.186067011782137e-06,
      "loss": 0.5459,
      "step": 4913
    },
    {
      "epoch": 0.6025012260912211,
      "grad_norm": 1.9340911352504684,
      "learning_rate": 4.185692756545145e-06,
      "loss": 0.5638,
      "step": 4914
    },
    {
      "epoch": 0.6026238352133398,
      "grad_norm": 2.0562742446380535,
      "learning_rate": 4.185318432023197e-06,
      "loss": 0.5313,
      "step": 4915
    },
    {
      "epoch": 0.6027464443354585,
      "grad_norm": 1.8219360944850476,
      "learning_rate": 4.184944038231677e-06,
      "loss": 0.5179,
      "step": 4916
    },
    {
      "epoch": 0.6028690534575772,
      "grad_norm": 2.2004730002995703,
      "learning_rate": 4.1845695751859745e-06,
      "loss": 0.5591,
      "step": 4917
    },
    {
      "epoch": 0.6029916625796959,
      "grad_norm": 2.1371372806264,
      "learning_rate": 4.18419504290148e-06,
      "loss": 0.5854,
      "step": 4918
    },
    {
      "epoch": 0.6031142717018146,
      "grad_norm": 1.8561255917545068,
      "learning_rate": 4.183820441393589e-06,
      "loss": 0.5129,
      "step": 4919
    },
    {
      "epoch": 0.6032368808239333,
      "grad_norm": 1.7911413151998998,
      "learning_rate": 4.183445770677695e-06,
      "loss": 0.5221,
      "step": 4920
    },
    {
      "epoch": 0.603359489946052,
      "grad_norm": 1.9632472384759807,
      "learning_rate": 4.1830710307692e-06,
      "loss": 0.5035,
      "step": 4921
    },
    {
      "epoch": 0.6034820990681706,
      "grad_norm": 1.9266519237935336,
      "learning_rate": 4.182696221683507e-06,
      "loss": 0.5254,
      "step": 4922
    },
    {
      "epoch": 0.6036047081902893,
      "grad_norm": 1.9566420623718217,
      "learning_rate": 4.1823213434360185e-06,
      "loss": 0.5403,
      "step": 4923
    },
    {
      "epoch": 0.603727317312408,
      "grad_norm": 1.9157297027590365,
      "learning_rate": 4.181946396042146e-06,
      "loss": 0.5084,
      "step": 4924
    },
    {
      "epoch": 0.6038499264345267,
      "grad_norm": 1.947366869102171,
      "learning_rate": 4.181571379517299e-06,
      "loss": 0.5159,
      "step": 4925
    },
    {
      "epoch": 0.6039725355566454,
      "grad_norm": 2.137759695484491,
      "learning_rate": 4.18119629387689e-06,
      "loss": 0.5965,
      "step": 4926
    },
    {
      "epoch": 0.6040951446787641,
      "grad_norm": 2.0625522319749843,
      "learning_rate": 4.180821139136338e-06,
      "loss": 0.5334,
      "step": 4927
    },
    {
      "epoch": 0.6042177538008828,
      "grad_norm": 1.777546857101975,
      "learning_rate": 4.1804459153110615e-06,
      "loss": 0.5544,
      "step": 4928
    },
    {
      "epoch": 0.6043403629230014,
      "grad_norm": 1.804338353037367,
      "learning_rate": 4.180070622416482e-06,
      "loss": 0.4844,
      "step": 4929
    },
    {
      "epoch": 0.6044629720451201,
      "grad_norm": 2.1375102729989988,
      "learning_rate": 4.1796952604680265e-06,
      "loss": 0.5444,
      "step": 4930
    },
    {
      "epoch": 0.6045855811672388,
      "grad_norm": 1.8791324877090132,
      "learning_rate": 4.179319829481121e-06,
      "loss": 0.6076,
      "step": 4931
    },
    {
      "epoch": 0.6047081902893575,
      "grad_norm": 1.8739183943042919,
      "learning_rate": 4.178944329471199e-06,
      "loss": 0.5183,
      "step": 4932
    },
    {
      "epoch": 0.6048307994114762,
      "grad_norm": 1.8788647299458847,
      "learning_rate": 4.1785687604536915e-06,
      "loss": 0.5444,
      "step": 4933
    },
    {
      "epoch": 0.6049534085335949,
      "grad_norm": 1.9069199845202964,
      "learning_rate": 4.178193122444038e-06,
      "loss": 0.5175,
      "step": 4934
    },
    {
      "epoch": 0.6050760176557136,
      "grad_norm": 2.1652483759143943,
      "learning_rate": 4.177817415457674e-06,
      "loss": 0.5839,
      "step": 4935
    },
    {
      "epoch": 0.6051986267778323,
      "grad_norm": 2.0975008489109714,
      "learning_rate": 4.177441639510045e-06,
      "loss": 0.5408,
      "step": 4936
    },
    {
      "epoch": 0.6053212358999509,
      "grad_norm": 2.0154825727568486,
      "learning_rate": 4.177065794616595e-06,
      "loss": 0.5658,
      "step": 4937
    },
    {
      "epoch": 0.6054438450220696,
      "grad_norm": 2.19089901616338,
      "learning_rate": 4.176689880792772e-06,
      "loss": 0.6094,
      "step": 4938
    },
    {
      "epoch": 0.6055664541441883,
      "grad_norm": 2.0708548187747042,
      "learning_rate": 4.176313898054027e-06,
      "loss": 0.5659,
      "step": 4939
    },
    {
      "epoch": 0.605689063266307,
      "grad_norm": 2.036300709600475,
      "learning_rate": 4.175937846415811e-06,
      "loss": 0.529,
      "step": 4940
    },
    {
      "epoch": 0.6058116723884257,
      "grad_norm": 2.1001611459493352,
      "learning_rate": 4.1755617258935845e-06,
      "loss": 0.546,
      "step": 4941
    },
    {
      "epoch": 0.6059342815105444,
      "grad_norm": 1.937795614285879,
      "learning_rate": 4.175185536502804e-06,
      "loss": 0.5707,
      "step": 4942
    },
    {
      "epoch": 0.6060568906326631,
      "grad_norm": 1.9391202480745995,
      "learning_rate": 4.174809278258932e-06,
      "loss": 0.5337,
      "step": 4943
    },
    {
      "epoch": 0.6061794997547818,
      "grad_norm": 2.076970831340041,
      "learning_rate": 4.174432951177434e-06,
      "loss": 0.4931,
      "step": 4944
    },
    {
      "epoch": 0.6063021088769004,
      "grad_norm": 2.1192003681932863,
      "learning_rate": 4.174056555273778e-06,
      "loss": 0.5226,
      "step": 4945
    },
    {
      "epoch": 0.6064247179990191,
      "grad_norm": 1.9365627518401867,
      "learning_rate": 4.173680090563433e-06,
      "loss": 0.4875,
      "step": 4946
    },
    {
      "epoch": 0.6065473271211378,
      "grad_norm": 2.229790342063123,
      "learning_rate": 4.173303557061873e-06,
      "loss": 0.5487,
      "step": 4947
    },
    {
      "epoch": 0.6066699362432565,
      "grad_norm": 1.8316500109391383,
      "learning_rate": 4.172926954784575e-06,
      "loss": 0.5192,
      "step": 4948
    },
    {
      "epoch": 0.6067925453653752,
      "grad_norm": 2.175518666014872,
      "learning_rate": 4.172550283747018e-06,
      "loss": 0.547,
      "step": 4949
    },
    {
      "epoch": 0.6069151544874939,
      "grad_norm": 1.8655705067348813,
      "learning_rate": 4.172173543964682e-06,
      "loss": 0.5439,
      "step": 4950
    },
    {
      "epoch": 0.6070377636096126,
      "grad_norm": 2.010544788197159,
      "learning_rate": 4.1717967354530535e-06,
      "loss": 0.5719,
      "step": 4951
    },
    {
      "epoch": 0.6071603727317313,
      "grad_norm": 1.9793857828791435,
      "learning_rate": 4.17141985822762e-06,
      "loss": 0.5782,
      "step": 4952
    },
    {
      "epoch": 0.6072829818538499,
      "grad_norm": 2.159027035742665,
      "learning_rate": 4.171042912303871e-06,
      "loss": 0.4562,
      "step": 4953
    },
    {
      "epoch": 0.6074055909759686,
      "grad_norm": 2.1321107811699838,
      "learning_rate": 4.1706658976973e-06,
      "loss": 0.5528,
      "step": 4954
    },
    {
      "epoch": 0.6075282000980873,
      "grad_norm": 1.9590528559998712,
      "learning_rate": 4.170288814423403e-06,
      "loss": 0.5139,
      "step": 4955
    },
    {
      "epoch": 0.607650809220206,
      "grad_norm": 2.0034454100188412,
      "learning_rate": 4.169911662497679e-06,
      "loss": 0.538,
      "step": 4956
    },
    {
      "epoch": 0.6077734183423247,
      "grad_norm": 2.036765598476887,
      "learning_rate": 4.169534441935629e-06,
      "loss": 0.572,
      "step": 4957
    },
    {
      "epoch": 0.6078960274644434,
      "grad_norm": 1.9108538711582037,
      "learning_rate": 4.169157152752757e-06,
      "loss": 0.5449,
      "step": 4958
    },
    {
      "epoch": 0.6080186365865621,
      "grad_norm": 2.1137903452746847,
      "learning_rate": 4.168779794964572e-06,
      "loss": 0.6247,
      "step": 4959
    },
    {
      "epoch": 0.6081412457086808,
      "grad_norm": 1.9045263720216299,
      "learning_rate": 4.168402368586583e-06,
      "loss": 0.5616,
      "step": 4960
    },
    {
      "epoch": 0.6082638548307994,
      "grad_norm": 2.1619154237934084,
      "learning_rate": 4.168024873634302e-06,
      "loss": 0.5305,
      "step": 4961
    },
    {
      "epoch": 0.6083864639529181,
      "grad_norm": 2.2062166330623483,
      "learning_rate": 4.167647310123247e-06,
      "loss": 0.5512,
      "step": 4962
    },
    {
      "epoch": 0.6085090730750368,
      "grad_norm": 1.8726599922834795,
      "learning_rate": 4.167269678068935e-06,
      "loss": 0.4804,
      "step": 4963
    },
    {
      "epoch": 0.6086316821971555,
      "grad_norm": 2.021630789705674,
      "learning_rate": 4.166891977486886e-06,
      "loss": 0.6024,
      "step": 4964
    },
    {
      "epoch": 0.6087542913192742,
      "grad_norm": 2.156095317748146,
      "learning_rate": 4.166514208392628e-06,
      "loss": 0.5378,
      "step": 4965
    },
    {
      "epoch": 0.6088769004413929,
      "grad_norm": 2.1492080276976377,
      "learning_rate": 4.166136370801685e-06,
      "loss": 0.5786,
      "step": 4966
    },
    {
      "epoch": 0.6089995095635116,
      "grad_norm": 2.065875560039659,
      "learning_rate": 4.165758464729587e-06,
      "loss": 0.5404,
      "step": 4967
    },
    {
      "epoch": 0.6091221186856303,
      "grad_norm": 1.9116887707794596,
      "learning_rate": 4.1653804901918675e-06,
      "loss": 0.467,
      "step": 4968
    },
    {
      "epoch": 0.6092447278077489,
      "grad_norm": 2.311149840807082,
      "learning_rate": 4.16500244720406e-06,
      "loss": 0.5386,
      "step": 4969
    },
    {
      "epoch": 0.6093673369298676,
      "grad_norm": 1.8512911645483758,
      "learning_rate": 4.164624335781706e-06,
      "loss": 0.5867,
      "step": 4970
    },
    {
      "epoch": 0.6094899460519863,
      "grad_norm": 1.8877441079806594,
      "learning_rate": 4.164246155940344e-06,
      "loss": 0.5175,
      "step": 4971
    },
    {
      "epoch": 0.609612555174105,
      "grad_norm": 1.9691090263795434,
      "learning_rate": 4.16386790769552e-06,
      "loss": 0.5297,
      "step": 4972
    },
    {
      "epoch": 0.6097351642962237,
      "grad_norm": 2.277243601566473,
      "learning_rate": 4.163489591062778e-06,
      "loss": 0.5753,
      "step": 4973
    },
    {
      "epoch": 0.6098577734183424,
      "grad_norm": 2.071485923956862,
      "learning_rate": 4.163111206057669e-06,
      "loss": 0.5088,
      "step": 4974
    },
    {
      "epoch": 0.609980382540461,
      "grad_norm": 2.0131472842500133,
      "learning_rate": 4.1627327526957454e-06,
      "loss": 0.5775,
      "step": 4975
    },
    {
      "epoch": 0.6101029916625796,
      "grad_norm": 1.9554442679689379,
      "learning_rate": 4.162354230992562e-06,
      "loss": 0.5336,
      "step": 4976
    },
    {
      "epoch": 0.6102256007846983,
      "grad_norm": 2.0863911541388163,
      "learning_rate": 4.161975640963677e-06,
      "loss": 0.5824,
      "step": 4977
    },
    {
      "epoch": 0.610348209906817,
      "grad_norm": 2.0819166519163517,
      "learning_rate": 4.161596982624651e-06,
      "loss": 0.5273,
      "step": 4978
    },
    {
      "epoch": 0.6104708190289357,
      "grad_norm": 2.092705891678633,
      "learning_rate": 4.161218255991046e-06,
      "loss": 0.5353,
      "step": 4979
    },
    {
      "epoch": 0.6105934281510544,
      "grad_norm": 1.9838463334857885,
      "learning_rate": 4.160839461078431e-06,
      "loss": 0.5058,
      "step": 4980
    },
    {
      "epoch": 0.6107160372731731,
      "grad_norm": 1.9636246509255146,
      "learning_rate": 4.160460597902374e-06,
      "loss": 0.5467,
      "step": 4981
    },
    {
      "epoch": 0.6108386463952918,
      "grad_norm": 1.9370450401925192,
      "learning_rate": 4.160081666478446e-06,
      "loss": 0.5325,
      "step": 4982
    },
    {
      "epoch": 0.6109612555174105,
      "grad_norm": 1.7806447468521371,
      "learning_rate": 4.159702666822223e-06,
      "loss": 0.5146,
      "step": 4983
    },
    {
      "epoch": 0.6110838646395291,
      "grad_norm": 1.90345155997826,
      "learning_rate": 4.159323598949282e-06,
      "loss": 0.5261,
      "step": 4984
    },
    {
      "epoch": 0.6112064737616478,
      "grad_norm": 1.934419487401275,
      "learning_rate": 4.1589444628752025e-06,
      "loss": 0.553,
      "step": 4985
    },
    {
      "epoch": 0.6113290828837665,
      "grad_norm": 2.3006856193432896,
      "learning_rate": 4.1585652586155705e-06,
      "loss": 0.5556,
      "step": 4986
    },
    {
      "epoch": 0.6114516920058852,
      "grad_norm": 1.9861258362877274,
      "learning_rate": 4.1581859861859695e-06,
      "loss": 0.5394,
      "step": 4987
    },
    {
      "epoch": 0.6115743011280039,
      "grad_norm": 1.9395120707111515,
      "learning_rate": 4.1578066456019885e-06,
      "loss": 0.5188,
      "step": 4988
    },
    {
      "epoch": 0.6116969102501226,
      "grad_norm": 1.6391901256182533,
      "learning_rate": 4.157427236879219e-06,
      "loss": 0.5303,
      "step": 4989
    },
    {
      "epoch": 0.6118195193722413,
      "grad_norm": 1.9707122631037262,
      "learning_rate": 4.157047760033257e-06,
      "loss": 0.5298,
      "step": 4990
    },
    {
      "epoch": 0.61194212849436,
      "grad_norm": 2.0173144067201165,
      "learning_rate": 4.156668215079698e-06,
      "loss": 0.5586,
      "step": 4991
    },
    {
      "epoch": 0.6120647376164786,
      "grad_norm": 2.0425554660903855,
      "learning_rate": 4.1562886020341425e-06,
      "loss": 0.5467,
      "step": 4992
    },
    {
      "epoch": 0.6121873467385973,
      "grad_norm": 1.8193363276829948,
      "learning_rate": 4.155908920912193e-06,
      "loss": 0.4954,
      "step": 4993
    },
    {
      "epoch": 0.612309955860716,
      "grad_norm": 2.1879942860507575,
      "learning_rate": 4.155529171729456e-06,
      "loss": 0.5608,
      "step": 4994
    },
    {
      "epoch": 0.6124325649828347,
      "grad_norm": 2.049768430029705,
      "learning_rate": 4.155149354501538e-06,
      "loss": 0.5689,
      "step": 4995
    },
    {
      "epoch": 0.6125551741049534,
      "grad_norm": 1.908595523206632,
      "learning_rate": 4.154769469244052e-06,
      "loss": 0.5717,
      "step": 4996
    },
    {
      "epoch": 0.6126777832270721,
      "grad_norm": 1.699666513674382,
      "learning_rate": 4.154389515972611e-06,
      "loss": 0.5599,
      "step": 4997
    },
    {
      "epoch": 0.6128003923491908,
      "grad_norm": 1.8343449813656025,
      "learning_rate": 4.154009494702832e-06,
      "loss": 0.5648,
      "step": 4998
    },
    {
      "epoch": 0.6129230014713095,
      "grad_norm": 2.045239621562387,
      "learning_rate": 4.1536294054503346e-06,
      "loss": 0.5372,
      "step": 4999
    },
    {
      "epoch": 0.6130456105934281,
      "grad_norm": 2.022431150790463,
      "learning_rate": 4.153249248230742e-06,
      "loss": 0.5267,
      "step": 5000
    },
    {
      "epoch": 0.6131682197155468,
      "grad_norm": 1.9266031123652663,
      "learning_rate": 4.1528690230596765e-06,
      "loss": 0.5565,
      "step": 5001
    },
    {
      "epoch": 0.6132908288376655,
      "grad_norm": 2.0515892482830753,
      "learning_rate": 4.1524887299527695e-06,
      "loss": 0.513,
      "step": 5002
    },
    {
      "epoch": 0.6134134379597842,
      "grad_norm": 1.9920789132509076,
      "learning_rate": 4.152108368925651e-06,
      "loss": 0.5911,
      "step": 5003
    },
    {
      "epoch": 0.6135360470819029,
      "grad_norm": 1.8393319903374887,
      "learning_rate": 4.151727939993952e-06,
      "loss": 0.4627,
      "step": 5004
    },
    {
      "epoch": 0.6136586562040216,
      "grad_norm": 1.8779828074218743,
      "learning_rate": 4.151347443173311e-06,
      "loss": 0.5243,
      "step": 5005
    },
    {
      "epoch": 0.6137812653261403,
      "grad_norm": 1.831512025862714,
      "learning_rate": 4.1509668784793665e-06,
      "loss": 0.5095,
      "step": 5006
    },
    {
      "epoch": 0.613903874448259,
      "grad_norm": 1.765726347557373,
      "learning_rate": 4.150586245927762e-06,
      "loss": 0.5131,
      "step": 5007
    },
    {
      "epoch": 0.6140264835703776,
      "grad_norm": 2.062598107620847,
      "learning_rate": 4.150205545534139e-06,
      "loss": 0.5528,
      "step": 5008
    },
    {
      "epoch": 0.6141490926924963,
      "grad_norm": 1.9744621239870184,
      "learning_rate": 4.149824777314147e-06,
      "loss": 0.5788,
      "step": 5009
    },
    {
      "epoch": 0.614271701814615,
      "grad_norm": 1.8823229279886466,
      "learning_rate": 4.1494439412834355e-06,
      "loss": 0.5396,
      "step": 5010
    },
    {
      "epoch": 0.6143943109367337,
      "grad_norm": 2.1413176695245864,
      "learning_rate": 4.149063037457658e-06,
      "loss": 0.5399,
      "step": 5011
    },
    {
      "epoch": 0.6145169200588524,
      "grad_norm": 1.7243239469131204,
      "learning_rate": 4.14868206585247e-06,
      "loss": 0.5454,
      "step": 5012
    },
    {
      "epoch": 0.6146395291809711,
      "grad_norm": 2.178315090396028,
      "learning_rate": 4.148301026483531e-06,
      "loss": 0.5518,
      "step": 5013
    },
    {
      "epoch": 0.6147621383030898,
      "grad_norm": 2.0306259023336177,
      "learning_rate": 4.147919919366502e-06,
      "loss": 0.5273,
      "step": 5014
    },
    {
      "epoch": 0.6148847474252085,
      "grad_norm": 1.8455202097118544,
      "learning_rate": 4.147538744517046e-06,
      "loss": 0.5021,
      "step": 5015
    },
    {
      "epoch": 0.6150073565473271,
      "grad_norm": 1.7594965476198814,
      "learning_rate": 4.1471575019508315e-06,
      "loss": 0.4847,
      "step": 5016
    },
    {
      "epoch": 0.6151299656694458,
      "grad_norm": 2.0034919460045324,
      "learning_rate": 4.146776191683527e-06,
      "loss": 0.6031,
      "step": 5017
    },
    {
      "epoch": 0.6152525747915645,
      "grad_norm": 1.8400745342179377,
      "learning_rate": 4.146394813730806e-06,
      "loss": 0.5294,
      "step": 5018
    },
    {
      "epoch": 0.6153751839136832,
      "grad_norm": 1.9269833305519395,
      "learning_rate": 4.146013368108344e-06,
      "loss": 0.5559,
      "step": 5019
    },
    {
      "epoch": 0.6154977930358019,
      "grad_norm": 2.0251725467467674,
      "learning_rate": 4.145631854831818e-06,
      "loss": 0.5803,
      "step": 5020
    },
    {
      "epoch": 0.6156204021579206,
      "grad_norm": 2.0527099366803143,
      "learning_rate": 4.14525027391691e-06,
      "loss": 0.5368,
      "step": 5021
    },
    {
      "epoch": 0.6157430112800393,
      "grad_norm": 1.9904136443678655,
      "learning_rate": 4.144868625379302e-06,
      "loss": 0.5303,
      "step": 5022
    },
    {
      "epoch": 0.6158656204021579,
      "grad_norm": 2.1212066022350484,
      "learning_rate": 4.144486909234682e-06,
      "loss": 0.5166,
      "step": 5023
    },
    {
      "epoch": 0.6159882295242766,
      "grad_norm": 1.8689938299970807,
      "learning_rate": 4.144105125498739e-06,
      "loss": 0.5385,
      "step": 5024
    },
    {
      "epoch": 0.6161108386463953,
      "grad_norm": 1.9456509950884207,
      "learning_rate": 4.143723274187164e-06,
      "loss": 0.5698,
      "step": 5025
    },
    {
      "epoch": 0.616233447768514,
      "grad_norm": 1.896760063392623,
      "learning_rate": 4.143341355315653e-06,
      "loss": 0.5271,
      "step": 5026
    },
    {
      "epoch": 0.6163560568906327,
      "grad_norm": 1.9954424848465588,
      "learning_rate": 4.142959368899902e-06,
      "loss": 0.5048,
      "step": 5027
    },
    {
      "epoch": 0.6164786660127514,
      "grad_norm": 1.913229284901435,
      "learning_rate": 4.142577314955614e-06,
      "loss": 0.5123,
      "step": 5028
    },
    {
      "epoch": 0.6166012751348701,
      "grad_norm": 2.155591983412093,
      "learning_rate": 4.142195193498489e-06,
      "loss": 0.6142,
      "step": 5029
    },
    {
      "epoch": 0.6167238842569888,
      "grad_norm": 2.0975897627092808,
      "learning_rate": 4.141813004544235e-06,
      "loss": 0.5445,
      "step": 5030
    },
    {
      "epoch": 0.6168464933791074,
      "grad_norm": 2.0734813798738956,
      "learning_rate": 4.141430748108559e-06,
      "loss": 0.4935,
      "step": 5031
    },
    {
      "epoch": 0.616969102501226,
      "grad_norm": 1.8582132710244528,
      "learning_rate": 4.141048424207175e-06,
      "loss": 0.5807,
      "step": 5032
    },
    {
      "epoch": 0.6170917116233448,
      "grad_norm": 1.877095765017051,
      "learning_rate": 4.1406660328557934e-06,
      "loss": 0.525,
      "step": 5033
    },
    {
      "epoch": 0.6172143207454635,
      "grad_norm": 1.9640785575299795,
      "learning_rate": 4.140283574070135e-06,
      "loss": 0.5224,
      "step": 5034
    },
    {
      "epoch": 0.6173369298675822,
      "grad_norm": 1.8411178652107827,
      "learning_rate": 4.139901047865918e-06,
      "loss": 0.5127,
      "step": 5035
    },
    {
      "epoch": 0.6174595389897009,
      "grad_norm": 1.7042643866455016,
      "learning_rate": 4.139518454258863e-06,
      "loss": 0.5137,
      "step": 5036
    },
    {
      "epoch": 0.6175821481118196,
      "grad_norm": 1.86539574595805,
      "learning_rate": 4.139135793264698e-06,
      "loss": 0.5522,
      "step": 5037
    },
    {
      "epoch": 0.6177047572339383,
      "grad_norm": 1.9723614981601145,
      "learning_rate": 4.13875306489915e-06,
      "loss": 0.5465,
      "step": 5038
    },
    {
      "epoch": 0.6178273663560568,
      "grad_norm": 1.973268054115767,
      "learning_rate": 4.1383702691779494e-06,
      "loss": 0.5412,
      "step": 5039
    },
    {
      "epoch": 0.6179499754781755,
      "grad_norm": 1.7786914311105015,
      "learning_rate": 4.1379874061168315e-06,
      "loss": 0.4906,
      "step": 5040
    },
    {
      "epoch": 0.6180725846002942,
      "grad_norm": 1.964867437324583,
      "learning_rate": 4.13760447573153e-06,
      "loss": 0.5232,
      "step": 5041
    },
    {
      "epoch": 0.6181951937224129,
      "grad_norm": 1.8466234380659845,
      "learning_rate": 4.137221478037786e-06,
      "loss": 0.5307,
      "step": 5042
    },
    {
      "epoch": 0.6183178028445316,
      "grad_norm": 1.9993084250065096,
      "learning_rate": 4.136838413051341e-06,
      "loss": 0.5238,
      "step": 5043
    },
    {
      "epoch": 0.6184404119666503,
      "grad_norm": 2.0448898499034738,
      "learning_rate": 4.1364552807879385e-06,
      "loss": 0.5073,
      "step": 5044
    },
    {
      "epoch": 0.618563021088769,
      "grad_norm": 1.893643458837407,
      "learning_rate": 4.136072081263327e-06,
      "loss": 0.5728,
      "step": 5045
    },
    {
      "epoch": 0.6186856302108877,
      "grad_norm": 1.8613803861690126,
      "learning_rate": 4.135688814493258e-06,
      "loss": 0.5148,
      "step": 5046
    },
    {
      "epoch": 0.6188082393330063,
      "grad_norm": 1.7710565013662745,
      "learning_rate": 4.135305480493481e-06,
      "loss": 0.5493,
      "step": 5047
    },
    {
      "epoch": 0.618930848455125,
      "grad_norm": 2.104630693257007,
      "learning_rate": 4.134922079279755e-06,
      "loss": 0.551,
      "step": 5048
    },
    {
      "epoch": 0.6190534575772437,
      "grad_norm": 2.1008868149786757,
      "learning_rate": 4.134538610867837e-06,
      "loss": 0.5889,
      "step": 5049
    },
    {
      "epoch": 0.6191760666993624,
      "grad_norm": 1.8217077870373488,
      "learning_rate": 4.134155075273487e-06,
      "loss": 0.52,
      "step": 5050
    },
    {
      "epoch": 0.6192986758214811,
      "grad_norm": 2.0542767778882816,
      "learning_rate": 4.133771472512472e-06,
      "loss": 0.49,
      "step": 5051
    },
    {
      "epoch": 0.6194212849435998,
      "grad_norm": 1.9815991483792301,
      "learning_rate": 4.133387802600557e-06,
      "loss": 0.5472,
      "step": 5052
    },
    {
      "epoch": 0.6195438940657185,
      "grad_norm": 1.8232055546844828,
      "learning_rate": 4.1330040655535105e-06,
      "loss": 0.5413,
      "step": 5053
    },
    {
      "epoch": 0.6196665031878372,
      "grad_norm": 1.8686093448766499,
      "learning_rate": 4.1326202613871065e-06,
      "loss": 0.5543,
      "step": 5054
    },
    {
      "epoch": 0.6197891123099558,
      "grad_norm": 1.7712107409808295,
      "learning_rate": 4.13223639011712e-06,
      "loss": 0.5442,
      "step": 5055
    },
    {
      "epoch": 0.6199117214320745,
      "grad_norm": 1.9208091728542158,
      "learning_rate": 4.131852451759328e-06,
      "loss": 0.5397,
      "step": 5056
    },
    {
      "epoch": 0.6200343305541932,
      "grad_norm": 1.7030139145688339,
      "learning_rate": 4.131468446329511e-06,
      "loss": 0.5139,
      "step": 5057
    },
    {
      "epoch": 0.6201569396763119,
      "grad_norm": 1.9448631516593753,
      "learning_rate": 4.131084373843453e-06,
      "loss": 0.4946,
      "step": 5058
    },
    {
      "epoch": 0.6202795487984306,
      "grad_norm": 1.9727508249364523,
      "learning_rate": 4.130700234316939e-06,
      "loss": 0.5853,
      "step": 5059
    },
    {
      "epoch": 0.6204021579205493,
      "grad_norm": 1.8462836677680696,
      "learning_rate": 4.130316027765758e-06,
      "loss": 0.4935,
      "step": 5060
    },
    {
      "epoch": 0.620524767042668,
      "grad_norm": 1.7874621645738453,
      "learning_rate": 4.1299317542057035e-06,
      "loss": 0.5446,
      "step": 5061
    },
    {
      "epoch": 0.6206473761647867,
      "grad_norm": 1.8674399599123956,
      "learning_rate": 4.129547413652567e-06,
      "loss": 0.5719,
      "step": 5062
    },
    {
      "epoch": 0.6207699852869053,
      "grad_norm": 1.814677106562758,
      "learning_rate": 4.129163006122147e-06,
      "loss": 0.5039,
      "step": 5063
    },
    {
      "epoch": 0.620892594409024,
      "grad_norm": 1.8942483673172374,
      "learning_rate": 4.1287785316302445e-06,
      "loss": 0.5736,
      "step": 5064
    },
    {
      "epoch": 0.6210152035311427,
      "grad_norm": 2.064423901112212,
      "learning_rate": 4.128393990192661e-06,
      "loss": 0.5861,
      "step": 5065
    },
    {
      "epoch": 0.6211378126532614,
      "grad_norm": 1.922415122364791,
      "learning_rate": 4.1280093818252e-06,
      "loss": 0.4963,
      "step": 5066
    },
    {
      "epoch": 0.6212604217753801,
      "grad_norm": 1.8251190573421134,
      "learning_rate": 4.127624706543671e-06,
      "loss": 0.5964,
      "step": 5067
    },
    {
      "epoch": 0.6213830308974988,
      "grad_norm": 1.851544517401785,
      "learning_rate": 4.127239964363887e-06,
      "loss": 0.541,
      "step": 5068
    },
    {
      "epoch": 0.6215056400196175,
      "grad_norm": 1.7949607486858146,
      "learning_rate": 4.126855155301659e-06,
      "loss": 0.4959,
      "step": 5069
    },
    {
      "epoch": 0.6216282491417362,
      "grad_norm": 2.007070903824414,
      "learning_rate": 4.126470279372803e-06,
      "loss": 0.5363,
      "step": 5070
    },
    {
      "epoch": 0.6217508582638548,
      "grad_norm": 1.8748446410580621,
      "learning_rate": 4.126085336593139e-06,
      "loss": 0.5539,
      "step": 5071
    },
    {
      "epoch": 0.6218734673859735,
      "grad_norm": 2.2514133982047366,
      "learning_rate": 4.12570032697849e-06,
      "loss": 0.6189,
      "step": 5072
    },
    {
      "epoch": 0.6219960765080922,
      "grad_norm": 2.0869772929735118,
      "learning_rate": 4.125315250544679e-06,
      "loss": 0.5934,
      "step": 5073
    },
    {
      "epoch": 0.6221186856302109,
      "grad_norm": 2.248243921229412,
      "learning_rate": 4.124930107307534e-06,
      "loss": 0.5698,
      "step": 5074
    },
    {
      "epoch": 0.6222412947523296,
      "grad_norm": 2.024777523130653,
      "learning_rate": 4.124544897282885e-06,
      "loss": 0.5302,
      "step": 5075
    },
    {
      "epoch": 0.6223639038744483,
      "grad_norm": 1.946945748285468,
      "learning_rate": 4.124159620486564e-06,
      "loss": 0.5776,
      "step": 5076
    },
    {
      "epoch": 0.622486512996567,
      "grad_norm": 1.9099185902735685,
      "learning_rate": 4.123774276934408e-06,
      "loss": 0.5724,
      "step": 5077
    },
    {
      "epoch": 0.6226091221186856,
      "grad_norm": 2.091390779872443,
      "learning_rate": 4.123388866642255e-06,
      "loss": 0.597,
      "step": 5078
    },
    {
      "epoch": 0.6227317312408043,
      "grad_norm": 2.0710232497631287,
      "learning_rate": 4.123003389625944e-06,
      "loss": 0.576,
      "step": 5079
    },
    {
      "epoch": 0.622854340362923,
      "grad_norm": 2.03628333524079,
      "learning_rate": 4.122617845901322e-06,
      "loss": 0.573,
      "step": 5080
    },
    {
      "epoch": 0.6229769494850417,
      "grad_norm": 1.9989127526816786,
      "learning_rate": 4.122232235484233e-06,
      "loss": 0.5759,
      "step": 5081
    },
    {
      "epoch": 0.6230995586071604,
      "grad_norm": 1.8030822522817285,
      "learning_rate": 4.121846558390528e-06,
      "loss": 0.5035,
      "step": 5082
    },
    {
      "epoch": 0.6232221677292791,
      "grad_norm": 1.940643100865622,
      "learning_rate": 4.121460814636057e-06,
      "loss": 0.5653,
      "step": 5083
    },
    {
      "epoch": 0.6233447768513978,
      "grad_norm": 1.977048442700943,
      "learning_rate": 4.121075004236677e-06,
      "loss": 0.5522,
      "step": 5084
    },
    {
      "epoch": 0.6234673859735165,
      "grad_norm": 1.9903692039381722,
      "learning_rate": 4.120689127208243e-06,
      "loss": 0.5483,
      "step": 5085
    },
    {
      "epoch": 0.6235899950956351,
      "grad_norm": 1.9325171083663029,
      "learning_rate": 4.120303183566618e-06,
      "loss": 0.5031,
      "step": 5086
    },
    {
      "epoch": 0.6237126042177538,
      "grad_norm": 1.8854580372492278,
      "learning_rate": 4.119917173327663e-06,
      "loss": 0.5287,
      "step": 5087
    },
    {
      "epoch": 0.6238352133398725,
      "grad_norm": 2.141413092398457,
      "learning_rate": 4.119531096507245e-06,
      "loss": 0.4977,
      "step": 5088
    },
    {
      "epoch": 0.6239578224619912,
      "grad_norm": 2.0078875395521623,
      "learning_rate": 4.119144953121232e-06,
      "loss": 0.5702,
      "step": 5089
    },
    {
      "epoch": 0.6240804315841099,
      "grad_norm": 1.9891202707246576,
      "learning_rate": 4.118758743185494e-06,
      "loss": 0.5898,
      "step": 5090
    },
    {
      "epoch": 0.6242030407062286,
      "grad_norm": 1.854358813058139,
      "learning_rate": 4.1183724667159056e-06,
      "loss": 0.5077,
      "step": 5091
    },
    {
      "epoch": 0.6243256498283473,
      "grad_norm": 2.0356937414147813,
      "learning_rate": 4.117986123728345e-06,
      "loss": 0.5503,
      "step": 5092
    },
    {
      "epoch": 0.624448258950466,
      "grad_norm": 1.9603302310402246,
      "learning_rate": 4.1175997142386894e-06,
      "loss": 0.526,
      "step": 5093
    },
    {
      "epoch": 0.6245708680725846,
      "grad_norm": 2.085524567660029,
      "learning_rate": 4.117213238262822e-06,
      "loss": 0.5618,
      "step": 5094
    },
    {
      "epoch": 0.6246934771947033,
      "grad_norm": 2.0893008799941994,
      "learning_rate": 4.116826695816627e-06,
      "loss": 0.573,
      "step": 5095
    },
    {
      "epoch": 0.624816086316822,
      "grad_norm": 2.0901301001806765,
      "learning_rate": 4.116440086915993e-06,
      "loss": 0.5929,
      "step": 5096
    },
    {
      "epoch": 0.6249386954389407,
      "grad_norm": 1.9384196143512897,
      "learning_rate": 4.11605341157681e-06,
      "loss": 0.5601,
      "step": 5097
    },
    {
      "epoch": 0.6250613045610593,
      "grad_norm": 2.0137420618183053,
      "learning_rate": 4.11566666981497e-06,
      "loss": 0.5819,
      "step": 5098
    },
    {
      "epoch": 0.625183913683178,
      "grad_norm": 2.05318810120073,
      "learning_rate": 4.11527986164637e-06,
      "loss": 0.5265,
      "step": 5099
    },
    {
      "epoch": 0.6253065228052967,
      "grad_norm": 2.0137173348894954,
      "learning_rate": 4.114892987086908e-06,
      "loss": 0.535,
      "step": 5100
    },
    {
      "epoch": 0.6254291319274154,
      "grad_norm": 1.838807759508794,
      "learning_rate": 4.114506046152486e-06,
      "loss": 0.4957,
      "step": 5101
    },
    {
      "epoch": 0.625551741049534,
      "grad_norm": 1.970977255494076,
      "learning_rate": 4.114119038859006e-06,
      "loss": 0.499,
      "step": 5102
    },
    {
      "epoch": 0.6256743501716527,
      "grad_norm": 2.0652965112285737,
      "learning_rate": 4.1137319652223765e-06,
      "loss": 0.5359,
      "step": 5103
    },
    {
      "epoch": 0.6257969592937714,
      "grad_norm": 1.936500747427007,
      "learning_rate": 4.113344825258506e-06,
      "loss": 0.5074,
      "step": 5104
    },
    {
      "epoch": 0.6259195684158901,
      "grad_norm": 2.0143096235632534,
      "learning_rate": 4.1129576189833075e-06,
      "loss": 0.5472,
      "step": 5105
    },
    {
      "epoch": 0.6260421775380088,
      "grad_norm": 2.0595624477891956,
      "learning_rate": 4.112570346412696e-06,
      "loss": 0.5432,
      "step": 5106
    },
    {
      "epoch": 0.6261647866601275,
      "grad_norm": 1.9232373854879667,
      "learning_rate": 4.1121830075625875e-06,
      "loss": 0.5528,
      "step": 5107
    },
    {
      "epoch": 0.6262873957822462,
      "grad_norm": 2.0685935771001676,
      "learning_rate": 4.111795602448903e-06,
      "loss": 0.5767,
      "step": 5108
    },
    {
      "epoch": 0.6264100049043649,
      "grad_norm": 2.0145632106591593,
      "learning_rate": 4.111408131087566e-06,
      "loss": 0.5563,
      "step": 5109
    },
    {
      "epoch": 0.6265326140264835,
      "grad_norm": 1.9949858580806026,
      "learning_rate": 4.111020593494503e-06,
      "loss": 0.5122,
      "step": 5110
    },
    {
      "epoch": 0.6266552231486022,
      "grad_norm": 1.8040494834324943,
      "learning_rate": 4.11063298968564e-06,
      "loss": 0.5047,
      "step": 5111
    },
    {
      "epoch": 0.6267778322707209,
      "grad_norm": 1.9302512845986226,
      "learning_rate": 4.11024531967691e-06,
      "loss": 0.5312,
      "step": 5112
    },
    {
      "epoch": 0.6269004413928396,
      "grad_norm": 1.9882996611281167,
      "learning_rate": 4.109857583484248e-06,
      "loss": 0.5555,
      "step": 5113
    },
    {
      "epoch": 0.6270230505149583,
      "grad_norm": 2.017029139995638,
      "learning_rate": 4.109469781123588e-06,
      "loss": 0.5278,
      "step": 5114
    },
    {
      "epoch": 0.627145659637077,
      "grad_norm": 1.8586705853177274,
      "learning_rate": 4.1090819126108715e-06,
      "loss": 0.5483,
      "step": 5115
    },
    {
      "epoch": 0.6272682687591957,
      "grad_norm": 1.8959007310455482,
      "learning_rate": 4.108693977962039e-06,
      "loss": 0.5071,
      "step": 5116
    },
    {
      "epoch": 0.6273908778813144,
      "grad_norm": 2.0117828111374823,
      "learning_rate": 4.108305977193036e-06,
      "loss": 0.5642,
      "step": 5117
    },
    {
      "epoch": 0.627513487003433,
      "grad_norm": 2.0389011606512053,
      "learning_rate": 4.10791791031981e-06,
      "loss": 0.4979,
      "step": 5118
    },
    {
      "epoch": 0.6276360961255517,
      "grad_norm": 1.8681931107466463,
      "learning_rate": 4.107529777358312e-06,
      "loss": 0.5469,
      "step": 5119
    },
    {
      "epoch": 0.6277587052476704,
      "grad_norm": 1.8953970500802764,
      "learning_rate": 4.107141578324494e-06,
      "loss": 0.5552,
      "step": 5120
    },
    {
      "epoch": 0.6278813143697891,
      "grad_norm": 1.8606832804689533,
      "learning_rate": 4.106753313234312e-06,
      "loss": 0.5675,
      "step": 5121
    },
    {
      "epoch": 0.6280039234919078,
      "grad_norm": 2.104978558252579,
      "learning_rate": 4.106364982103724e-06,
      "loss": 0.5313,
      "step": 5122
    },
    {
      "epoch": 0.6281265326140265,
      "grad_norm": 2.0376302216761535,
      "learning_rate": 4.105976584948691e-06,
      "loss": 0.5517,
      "step": 5123
    },
    {
      "epoch": 0.6282491417361452,
      "grad_norm": 2.032863515670374,
      "learning_rate": 4.105588121785178e-06,
      "loss": 0.5325,
      "step": 5124
    },
    {
      "epoch": 0.6283717508582638,
      "grad_norm": 1.9181122223088525,
      "learning_rate": 4.10519959262915e-06,
      "loss": 0.529,
      "step": 5125
    },
    {
      "epoch": 0.6284943599803825,
      "grad_norm": 2.05837007121759,
      "learning_rate": 4.104810997496577e-06,
      "loss": 0.6064,
      "step": 5126
    },
    {
      "epoch": 0.6286169691025012,
      "grad_norm": 1.7589827219284753,
      "learning_rate": 4.104422336403432e-06,
      "loss": 0.5121,
      "step": 5127
    },
    {
      "epoch": 0.6287395782246199,
      "grad_norm": 1.9779913793350845,
      "learning_rate": 4.104033609365687e-06,
      "loss": 0.5349,
      "step": 5128
    },
    {
      "epoch": 0.6288621873467386,
      "grad_norm": 1.9296190027180238,
      "learning_rate": 4.103644816399323e-06,
      "loss": 0.5569,
      "step": 5129
    },
    {
      "epoch": 0.6289847964688573,
      "grad_norm": 2.15759613410017,
      "learning_rate": 4.103255957520316e-06,
      "loss": 0.5665,
      "step": 5130
    },
    {
      "epoch": 0.629107405590976,
      "grad_norm": 1.942147610696711,
      "learning_rate": 4.102867032744652e-06,
      "loss": 0.5095,
      "step": 5131
    },
    {
      "epoch": 0.6292300147130947,
      "grad_norm": 1.8323326291809312,
      "learning_rate": 4.102478042088315e-06,
      "loss": 0.5495,
      "step": 5132
    },
    {
      "epoch": 0.6293526238352133,
      "grad_norm": 1.9079829048967576,
      "learning_rate": 4.102088985567293e-06,
      "loss": 0.4577,
      "step": 5133
    },
    {
      "epoch": 0.629475232957332,
      "grad_norm": 1.973944223816147,
      "learning_rate": 4.101699863197578e-06,
      "loss": 0.5975,
      "step": 5134
    },
    {
      "epoch": 0.6295978420794507,
      "grad_norm": 2.0439411926817206,
      "learning_rate": 4.1013106749951625e-06,
      "loss": 0.547,
      "step": 5135
    },
    {
      "epoch": 0.6297204512015694,
      "grad_norm": 1.985973915645312,
      "learning_rate": 4.100921420976044e-06,
      "loss": 0.5318,
      "step": 5136
    },
    {
      "epoch": 0.6298430603236881,
      "grad_norm": 1.8273932311223882,
      "learning_rate": 4.10053210115622e-06,
      "loss": 0.6092,
      "step": 5137
    },
    {
      "epoch": 0.6299656694458068,
      "grad_norm": 2.0968644755666745,
      "learning_rate": 4.1001427155516946e-06,
      "loss": 0.5697,
      "step": 5138
    },
    {
      "epoch": 0.6300882785679255,
      "grad_norm": 2.1178370897375673,
      "learning_rate": 4.099753264178469e-06,
      "loss": 0.5809,
      "step": 5139
    },
    {
      "epoch": 0.6302108876900442,
      "grad_norm": 2.1304943683265405,
      "learning_rate": 4.099363747052554e-06,
      "loss": 0.5563,
      "step": 5140
    },
    {
      "epoch": 0.6303334968121628,
      "grad_norm": 2.4507652816118175,
      "learning_rate": 4.098974164189956e-06,
      "loss": 0.549,
      "step": 5141
    },
    {
      "epoch": 0.6304561059342815,
      "grad_norm": 2.037590679690543,
      "learning_rate": 4.09858451560669e-06,
      "loss": 0.5052,
      "step": 5142
    },
    {
      "epoch": 0.6305787150564002,
      "grad_norm": 2.0310700823157872,
      "learning_rate": 4.098194801318769e-06,
      "loss": 0.534,
      "step": 5143
    },
    {
      "epoch": 0.6307013241785189,
      "grad_norm": 1.7353202403053163,
      "learning_rate": 4.0978050213422135e-06,
      "loss": 0.5093,
      "step": 5144
    },
    {
      "epoch": 0.6308239333006376,
      "grad_norm": 1.775488004867914,
      "learning_rate": 4.097415175693043e-06,
      "loss": 0.4938,
      "step": 5145
    },
    {
      "epoch": 0.6309465424227563,
      "grad_norm": 1.9954122728539565,
      "learning_rate": 4.09702526438728e-06,
      "loss": 0.567,
      "step": 5146
    },
    {
      "epoch": 0.631069151544875,
      "grad_norm": 1.8363860458673027,
      "learning_rate": 4.096635287440952e-06,
      "loss": 0.5549,
      "step": 5147
    },
    {
      "epoch": 0.6311917606669937,
      "grad_norm": 1.9786998753809795,
      "learning_rate": 4.096245244870087e-06,
      "loss": 0.5597,
      "step": 5148
    },
    {
      "epoch": 0.6313143697891123,
      "grad_norm": 2.071240610800331,
      "learning_rate": 4.095855136690716e-06,
      "loss": 0.5481,
      "step": 5149
    },
    {
      "epoch": 0.631436978911231,
      "grad_norm": 1.888294047478305,
      "learning_rate": 4.095464962918874e-06,
      "loss": 0.5302,
      "step": 5150
    },
    {
      "epoch": 0.6315595880333497,
      "grad_norm": 1.9362246236204073,
      "learning_rate": 4.095074723570598e-06,
      "loss": 0.5588,
      "step": 5151
    },
    {
      "epoch": 0.6316821971554684,
      "grad_norm": 1.968559592836428,
      "learning_rate": 4.094684418661926e-06,
      "loss": 0.5518,
      "step": 5152
    },
    {
      "epoch": 0.6318048062775871,
      "grad_norm": 2.2964325787508177,
      "learning_rate": 4.094294048208902e-06,
      "loss": 0.5259,
      "step": 5153
    },
    {
      "epoch": 0.6319274153997058,
      "grad_norm": 2.1941467989139807,
      "learning_rate": 4.09390361222757e-06,
      "loss": 0.5751,
      "step": 5154
    },
    {
      "epoch": 0.6320500245218245,
      "grad_norm": 1.6998121893482483,
      "learning_rate": 4.093513110733978e-06,
      "loss": 0.4937,
      "step": 5155
    },
    {
      "epoch": 0.6321726336439432,
      "grad_norm": 1.9279736005346846,
      "learning_rate": 4.093122543744174e-06,
      "loss": 0.5356,
      "step": 5156
    },
    {
      "epoch": 0.6322952427660617,
      "grad_norm": 1.8607186195768428,
      "learning_rate": 4.092731911274216e-06,
      "loss": 0.5304,
      "step": 5157
    },
    {
      "epoch": 0.6324178518881804,
      "grad_norm": 2.2138685629428605,
      "learning_rate": 4.092341213340155e-06,
      "loss": 0.5564,
      "step": 5158
    },
    {
      "epoch": 0.6325404610102991,
      "grad_norm": 2.108416467531767,
      "learning_rate": 4.091950449958051e-06,
      "loss": 0.5496,
      "step": 5159
    },
    {
      "epoch": 0.6326630701324178,
      "grad_norm": 1.9951273797542999,
      "learning_rate": 4.091559621143966e-06,
      "loss": 0.5203,
      "step": 5160
    },
    {
      "epoch": 0.6327856792545365,
      "grad_norm": 1.9811491717340615,
      "learning_rate": 4.091168726913963e-06,
      "loss": 0.5464,
      "step": 5161
    },
    {
      "epoch": 0.6329082883766552,
      "grad_norm": 2.0791928346766135,
      "learning_rate": 4.090777767284108e-06,
      "loss": 0.5814,
      "step": 5162
    },
    {
      "epoch": 0.633030897498774,
      "grad_norm": 1.9800651780452327,
      "learning_rate": 4.0903867422704706e-06,
      "loss": 0.4996,
      "step": 5163
    },
    {
      "epoch": 0.6331535066208926,
      "grad_norm": 2.1530746589920033,
      "learning_rate": 4.089995651889122e-06,
      "loss": 0.4862,
      "step": 5164
    },
    {
      "epoch": 0.6332761157430112,
      "grad_norm": 2.173339243296745,
      "learning_rate": 4.089604496156139e-06,
      "loss": 0.5287,
      "step": 5165
    },
    {
      "epoch": 0.6333987248651299,
      "grad_norm": 2.10356749460482,
      "learning_rate": 4.089213275087595e-06,
      "loss": 0.5277,
      "step": 5166
    },
    {
      "epoch": 0.6335213339872486,
      "grad_norm": 1.918788824640104,
      "learning_rate": 4.088821988699572e-06,
      "loss": 0.5446,
      "step": 5167
    },
    {
      "epoch": 0.6336439431093673,
      "grad_norm": 1.9584998594653797,
      "learning_rate": 4.088430637008154e-06,
      "loss": 0.5399,
      "step": 5168
    },
    {
      "epoch": 0.633766552231486,
      "grad_norm": 1.9073936215861145,
      "learning_rate": 4.088039220029424e-06,
      "loss": 0.4818,
      "step": 5169
    },
    {
      "epoch": 0.6338891613536047,
      "grad_norm": 1.9358718909278718,
      "learning_rate": 4.08764773777947e-06,
      "loss": 0.5895,
      "step": 5170
    },
    {
      "epoch": 0.6340117704757234,
      "grad_norm": 1.9190380999324597,
      "learning_rate": 4.087256190274383e-06,
      "loss": 0.5369,
      "step": 5171
    },
    {
      "epoch": 0.6341343795978421,
      "grad_norm": 2.027439327750049,
      "learning_rate": 4.086864577530258e-06,
      "loss": 0.522,
      "step": 5172
    },
    {
      "epoch": 0.6342569887199607,
      "grad_norm": 2.0480159582581816,
      "learning_rate": 4.0864728995631895e-06,
      "loss": 0.5649,
      "step": 5173
    },
    {
      "epoch": 0.6343795978420794,
      "grad_norm": 1.9948666010829403,
      "learning_rate": 4.0860811563892756e-06,
      "loss": 0.5641,
      "step": 5174
    },
    {
      "epoch": 0.6345022069641981,
      "grad_norm": 1.9600701444689879,
      "learning_rate": 4.085689348024618e-06,
      "loss": 0.5162,
      "step": 5175
    },
    {
      "epoch": 0.6346248160863168,
      "grad_norm": 1.883180470268028,
      "learning_rate": 4.0852974744853215e-06,
      "loss": 0.5693,
      "step": 5176
    },
    {
      "epoch": 0.6347474252084355,
      "grad_norm": 2.1026149092271744,
      "learning_rate": 4.0849055357874915e-06,
      "loss": 0.5227,
      "step": 5177
    },
    {
      "epoch": 0.6348700343305542,
      "grad_norm": 2.0278825295986294,
      "learning_rate": 4.084513531947238e-06,
      "loss": 0.4973,
      "step": 5178
    },
    {
      "epoch": 0.6349926434526729,
      "grad_norm": 2.191046515522163,
      "learning_rate": 4.084121462980675e-06,
      "loss": 0.5382,
      "step": 5179
    },
    {
      "epoch": 0.6351152525747915,
      "grad_norm": 1.8934065660740904,
      "learning_rate": 4.083729328903914e-06,
      "loss": 0.4844,
      "step": 5180
    },
    {
      "epoch": 0.6352378616969102,
      "grad_norm": 1.8144145924677333,
      "learning_rate": 4.083337129733074e-06,
      "loss": 0.5358,
      "step": 5181
    },
    {
      "epoch": 0.6353604708190289,
      "grad_norm": 2.1081494661719913,
      "learning_rate": 4.082944865484274e-06,
      "loss": 0.589,
      "step": 5182
    },
    {
      "epoch": 0.6354830799411476,
      "grad_norm": 2.0404577195094684,
      "learning_rate": 4.082552536173639e-06,
      "loss": 0.5234,
      "step": 5183
    },
    {
      "epoch": 0.6356056890632663,
      "grad_norm": 1.9904159831469446,
      "learning_rate": 4.0821601418172926e-06,
      "loss": 0.5348,
      "step": 5184
    },
    {
      "epoch": 0.635728298185385,
      "grad_norm": 1.9510694948791214,
      "learning_rate": 4.081767682431363e-06,
      "loss": 0.5758,
      "step": 5185
    },
    {
      "epoch": 0.6358509073075037,
      "grad_norm": 2.0085731172304198,
      "learning_rate": 4.081375158031982e-06,
      "loss": 0.5736,
      "step": 5186
    },
    {
      "epoch": 0.6359735164296224,
      "grad_norm": 2.1387141278463564,
      "learning_rate": 4.080982568635281e-06,
      "loss": 0.5863,
      "step": 5187
    },
    {
      "epoch": 0.636096125551741,
      "grad_norm": 1.9873752642376612,
      "learning_rate": 4.0805899142574e-06,
      "loss": 0.5199,
      "step": 5188
    },
    {
      "epoch": 0.6362187346738597,
      "grad_norm": 1.9926020058070164,
      "learning_rate": 4.080197194914474e-06,
      "loss": 0.5751,
      "step": 5189
    },
    {
      "epoch": 0.6363413437959784,
      "grad_norm": 1.800857179836472,
      "learning_rate": 4.0798044106226454e-06,
      "loss": 0.5285,
      "step": 5190
    },
    {
      "epoch": 0.6364639529180971,
      "grad_norm": 1.891666329082984,
      "learning_rate": 4.079411561398059e-06,
      "loss": 0.4917,
      "step": 5191
    },
    {
      "epoch": 0.6365865620402158,
      "grad_norm": 1.9368155732437489,
      "learning_rate": 4.079018647256861e-06,
      "loss": 0.528,
      "step": 5192
    },
    {
      "epoch": 0.6367091711623345,
      "grad_norm": 1.8579729122585689,
      "learning_rate": 4.078625668215201e-06,
      "loss": 0.5406,
      "step": 5193
    },
    {
      "epoch": 0.6368317802844532,
      "grad_norm": 1.9381699675522877,
      "learning_rate": 4.078232624289232e-06,
      "loss": 0.5414,
      "step": 5194
    },
    {
      "epoch": 0.6369543894065719,
      "grad_norm": 2.095271577317868,
      "learning_rate": 4.077839515495108e-06,
      "loss": 0.5837,
      "step": 5195
    },
    {
      "epoch": 0.6370769985286905,
      "grad_norm": 1.9657230522583626,
      "learning_rate": 4.077446341848987e-06,
      "loss": 0.5515,
      "step": 5196
    },
    {
      "epoch": 0.6371996076508092,
      "grad_norm": 1.9145988572544153,
      "learning_rate": 4.077053103367028e-06,
      "loss": 0.5053,
      "step": 5197
    },
    {
      "epoch": 0.6373222167729279,
      "grad_norm": 1.8365927526589227,
      "learning_rate": 4.0766598000653945e-06,
      "loss": 0.5477,
      "step": 5198
    },
    {
      "epoch": 0.6374448258950466,
      "grad_norm": 1.9826311873910618,
      "learning_rate": 4.076266431960252e-06,
      "loss": 0.5553,
      "step": 5199
    },
    {
      "epoch": 0.6375674350171653,
      "grad_norm": 2.100699201062082,
      "learning_rate": 4.075872999067768e-06,
      "loss": 0.5613,
      "step": 5200
    },
    {
      "epoch": 0.637690044139284,
      "grad_norm": 1.9774282715849723,
      "learning_rate": 4.075479501404115e-06,
      "loss": 0.5463,
      "step": 5201
    },
    {
      "epoch": 0.6378126532614027,
      "grad_norm": 1.9941752252946963,
      "learning_rate": 4.075085938985465e-06,
      "loss": 0.534,
      "step": 5202
    },
    {
      "epoch": 0.6379352623835214,
      "grad_norm": 1.8432215055545518,
      "learning_rate": 4.074692311827995e-06,
      "loss": 0.5505,
      "step": 5203
    },
    {
      "epoch": 0.63805787150564,
      "grad_norm": 1.993214149966292,
      "learning_rate": 4.074298619947882e-06,
      "loss": 0.5058,
      "step": 5204
    },
    {
      "epoch": 0.6381804806277587,
      "grad_norm": 1.8670906720348919,
      "learning_rate": 4.073904863361309e-06,
      "loss": 0.5166,
      "step": 5205
    },
    {
      "epoch": 0.6383030897498774,
      "grad_norm": 2.1128495121676054,
      "learning_rate": 4.0735110420844594e-06,
      "loss": 0.5813,
      "step": 5206
    },
    {
      "epoch": 0.6384256988719961,
      "grad_norm": 1.9837473075967045,
      "learning_rate": 4.073117156133521e-06,
      "loss": 0.5351,
      "step": 5207
    },
    {
      "epoch": 0.6385483079941148,
      "grad_norm": 1.7827755310051436,
      "learning_rate": 4.072723205524682e-06,
      "loss": 0.5186,
      "step": 5208
    },
    {
      "epoch": 0.6386709171162335,
      "grad_norm": 2.0794797408668493,
      "learning_rate": 4.072329190274136e-06,
      "loss": 0.5786,
      "step": 5209
    },
    {
      "epoch": 0.6387935262383522,
      "grad_norm": 1.9993143950776466,
      "learning_rate": 4.0719351103980754e-06,
      "loss": 0.5407,
      "step": 5210
    },
    {
      "epoch": 0.6389161353604709,
      "grad_norm": 1.8784325585624413,
      "learning_rate": 4.0715409659127e-06,
      "loss": 0.5381,
      "step": 5211
    },
    {
      "epoch": 0.6390387444825895,
      "grad_norm": 1.9959483220345327,
      "learning_rate": 4.071146756834208e-06,
      "loss": 0.5179,
      "step": 5212
    },
    {
      "epoch": 0.6391613536047082,
      "grad_norm": 1.8991529094170703,
      "learning_rate": 4.0707524831788025e-06,
      "loss": 0.5063,
      "step": 5213
    },
    {
      "epoch": 0.6392839627268269,
      "grad_norm": 2.1237234603999307,
      "learning_rate": 4.070358144962689e-06,
      "loss": 0.5237,
      "step": 5214
    },
    {
      "epoch": 0.6394065718489456,
      "grad_norm": 1.8837025891334198,
      "learning_rate": 4.0699637422020775e-06,
      "loss": 0.5389,
      "step": 5215
    },
    {
      "epoch": 0.6395291809710643,
      "grad_norm": 2.2507398079681114,
      "learning_rate": 4.0695692749131745e-06,
      "loss": 0.5427,
      "step": 5216
    },
    {
      "epoch": 0.639651790093183,
      "grad_norm": 1.9212478285320198,
      "learning_rate": 4.069174743112197e-06,
      "loss": 0.5117,
      "step": 5217
    },
    {
      "epoch": 0.6397743992153017,
      "grad_norm": 2.0064303646307167,
      "learning_rate": 4.0687801468153595e-06,
      "loss": 0.4702,
      "step": 5218
    },
    {
      "epoch": 0.6398970083374204,
      "grad_norm": 2.1658193252107316,
      "learning_rate": 4.06838548603888e-06,
      "loss": 0.562,
      "step": 5219
    },
    {
      "epoch": 0.640019617459539,
      "grad_norm": 2.2643891006516688,
      "learning_rate": 4.067990760798981e-06,
      "loss": 0.6072,
      "step": 5220
    },
    {
      "epoch": 0.6401422265816576,
      "grad_norm": 2.20234927399566,
      "learning_rate": 4.067595971111885e-06,
      "loss": 0.5824,
      "step": 5221
    },
    {
      "epoch": 0.6402648357037763,
      "grad_norm": 2.1417683951833015,
      "learning_rate": 4.06720111699382e-06,
      "loss": 0.5973,
      "step": 5222
    },
    {
      "epoch": 0.640387444825895,
      "grad_norm": 1.9085757884491632,
      "learning_rate": 4.066806198461015e-06,
      "loss": 0.5652,
      "step": 5223
    },
    {
      "epoch": 0.6405100539480137,
      "grad_norm": 1.8462806397133062,
      "learning_rate": 4.066411215529701e-06,
      "loss": 0.5978,
      "step": 5224
    },
    {
      "epoch": 0.6406326630701324,
      "grad_norm": 1.923273655765976,
      "learning_rate": 4.066016168216113e-06,
      "loss": 0.5111,
      "step": 5225
    },
    {
      "epoch": 0.6407552721922511,
      "grad_norm": 1.6595098000253228,
      "learning_rate": 4.065621056536488e-06,
      "loss": 0.5105,
      "step": 5226
    },
    {
      "epoch": 0.6408778813143697,
      "grad_norm": 2.2025562390167037,
      "learning_rate": 4.065225880507066e-06,
      "loss": 0.5694,
      "step": 5227
    },
    {
      "epoch": 0.6410004904364884,
      "grad_norm": 1.8576765390823322,
      "learning_rate": 4.0648306401440886e-06,
      "loss": 0.5058,
      "step": 5228
    },
    {
      "epoch": 0.6411230995586071,
      "grad_norm": 2.1970328100968652,
      "learning_rate": 4.064435335463802e-06,
      "loss": 0.5374,
      "step": 5229
    },
    {
      "epoch": 0.6412457086807258,
      "grad_norm": 2.0202589379948734,
      "learning_rate": 4.064039966482454e-06,
      "loss": 0.528,
      "step": 5230
    },
    {
      "epoch": 0.6413683178028445,
      "grad_norm": 1.9212680450270865,
      "learning_rate": 4.063644533216293e-06,
      "loss": 0.5564,
      "step": 5231
    },
    {
      "epoch": 0.6414909269249632,
      "grad_norm": 1.861277963137504,
      "learning_rate": 4.063249035681575e-06,
      "loss": 0.5063,
      "step": 5232
    },
    {
      "epoch": 0.6416135360470819,
      "grad_norm": 1.9523960375837854,
      "learning_rate": 4.062853473894554e-06,
      "loss": 0.5479,
      "step": 5233
    },
    {
      "epoch": 0.6417361451692006,
      "grad_norm": 1.9529401129051698,
      "learning_rate": 4.062457847871487e-06,
      "loss": 0.5471,
      "step": 5234
    },
    {
      "epoch": 0.6418587542913192,
      "grad_norm": 1.995163278769872,
      "learning_rate": 4.062062157628638e-06,
      "loss": 0.5092,
      "step": 5235
    },
    {
      "epoch": 0.6419813634134379,
      "grad_norm": 2.057106545230601,
      "learning_rate": 4.0616664031822686e-06,
      "loss": 0.5797,
      "step": 5236
    },
    {
      "epoch": 0.6421039725355566,
      "grad_norm": 1.891759481835326,
      "learning_rate": 4.061270584548645e-06,
      "loss": 0.5172,
      "step": 5237
    },
    {
      "epoch": 0.6422265816576753,
      "grad_norm": 1.9799355437165818,
      "learning_rate": 4.060874701744036e-06,
      "loss": 0.5585,
      "step": 5238
    },
    {
      "epoch": 0.642349190779794,
      "grad_norm": 1.8781211953890729,
      "learning_rate": 4.0604787547847135e-06,
      "loss": 0.4629,
      "step": 5239
    },
    {
      "epoch": 0.6424717999019127,
      "grad_norm": 1.8139054311007072,
      "learning_rate": 4.060082743686951e-06,
      "loss": 0.4847,
      "step": 5240
    },
    {
      "epoch": 0.6425944090240314,
      "grad_norm": 2.1565051647480766,
      "learning_rate": 4.0596866684670275e-06,
      "loss": 0.5691,
      "step": 5241
    },
    {
      "epoch": 0.6427170181461501,
      "grad_norm": 2.4919360979589467,
      "learning_rate": 4.05929052914122e-06,
      "loss": 0.592,
      "step": 5242
    },
    {
      "epoch": 0.6428396272682687,
      "grad_norm": 2.273043356725442,
      "learning_rate": 4.058894325725811e-06,
      "loss": 0.5667,
      "step": 5243
    },
    {
      "epoch": 0.6429622363903874,
      "grad_norm": 1.891476047751699,
      "learning_rate": 4.058498058237085e-06,
      "loss": 0.5213,
      "step": 5244
    },
    {
      "epoch": 0.6430848455125061,
      "grad_norm": 2.064095396733775,
      "learning_rate": 4.058101726691331e-06,
      "loss": 0.5631,
      "step": 5245
    },
    {
      "epoch": 0.6432074546346248,
      "grad_norm": 2.013927940893178,
      "learning_rate": 4.0577053311048365e-06,
      "loss": 0.4985,
      "step": 5246
    },
    {
      "epoch": 0.6433300637567435,
      "grad_norm": 2.0856192019836284,
      "learning_rate": 4.057308871493895e-06,
      "loss": 0.5924,
      "step": 5247
    },
    {
      "epoch": 0.6434526728788622,
      "grad_norm": 1.864575672280462,
      "learning_rate": 4.056912347874803e-06,
      "loss": 0.5117,
      "step": 5248
    },
    {
      "epoch": 0.6435752820009809,
      "grad_norm": 2.09241896879355,
      "learning_rate": 4.056515760263857e-06,
      "loss": 0.5721,
      "step": 5249
    },
    {
      "epoch": 0.6436978911230996,
      "grad_norm": 2.1280573624320813,
      "learning_rate": 4.056119108677357e-06,
      "loss": 0.5338,
      "step": 5250
    },
    {
      "epoch": 0.6438205002452182,
      "grad_norm": 1.9717803753333714,
      "learning_rate": 4.055722393131608e-06,
      "loss": 0.584,
      "step": 5251
    },
    {
      "epoch": 0.6439431093673369,
      "grad_norm": 2.0232024524050365,
      "learning_rate": 4.055325613642913e-06,
      "loss": 0.6003,
      "step": 5252
    },
    {
      "epoch": 0.6440657184894556,
      "grad_norm": 1.8645665795185615,
      "learning_rate": 4.0549287702275815e-06,
      "loss": 0.5611,
      "step": 5253
    },
    {
      "epoch": 0.6441883276115743,
      "grad_norm": 2.1051926718404137,
      "learning_rate": 4.054531862901926e-06,
      "loss": 0.5796,
      "step": 5254
    },
    {
      "epoch": 0.644310936733693,
      "grad_norm": 2.0265645513775468,
      "learning_rate": 4.054134891682259e-06,
      "loss": 0.5482,
      "step": 5255
    },
    {
      "epoch": 0.6444335458558117,
      "grad_norm": 2.0399902351250483,
      "learning_rate": 4.053737856584896e-06,
      "loss": 0.5306,
      "step": 5256
    },
    {
      "epoch": 0.6445561549779304,
      "grad_norm": 1.9750367110868414,
      "learning_rate": 4.0533407576261566e-06,
      "loss": 0.5403,
      "step": 5257
    },
    {
      "epoch": 0.6446787641000491,
      "grad_norm": 2.331344949931823,
      "learning_rate": 4.0529435948223616e-06,
      "loss": 0.516,
      "step": 5258
    },
    {
      "epoch": 0.6448013732221677,
      "grad_norm": 2.017726256619022,
      "learning_rate": 4.0525463681898356e-06,
      "loss": 0.5487,
      "step": 5259
    },
    {
      "epoch": 0.6449239823442864,
      "grad_norm": 1.957212997706633,
      "learning_rate": 4.052149077744906e-06,
      "loss": 0.5414,
      "step": 5260
    },
    {
      "epoch": 0.6450465914664051,
      "grad_norm": 2.0619105263485276,
      "learning_rate": 4.051751723503901e-06,
      "loss": 0.5064,
      "step": 5261
    },
    {
      "epoch": 0.6451692005885238,
      "grad_norm": 1.8561068985370652,
      "learning_rate": 4.051354305483153e-06,
      "loss": 0.5718,
      "step": 5262
    },
    {
      "epoch": 0.6452918097106425,
      "grad_norm": 2.1153609583619617,
      "learning_rate": 4.050956823698997e-06,
      "loss": 0.546,
      "step": 5263
    },
    {
      "epoch": 0.6454144188327612,
      "grad_norm": 2.105705383353786,
      "learning_rate": 4.050559278167768e-06,
      "loss": 0.5938,
      "step": 5264
    },
    {
      "epoch": 0.6455370279548799,
      "grad_norm": 2.0517104915875795,
      "learning_rate": 4.0501616689058095e-06,
      "loss": 0.5192,
      "step": 5265
    },
    {
      "epoch": 0.6456596370769986,
      "grad_norm": 1.8616363524807007,
      "learning_rate": 4.0497639959294625e-06,
      "loss": 0.4848,
      "step": 5266
    },
    {
      "epoch": 0.6457822461991172,
      "grad_norm": 1.8904033147239516,
      "learning_rate": 4.04936625925507e-06,
      "loss": 0.5792,
      "step": 5267
    },
    {
      "epoch": 0.6459048553212359,
      "grad_norm": 1.9516756063418903,
      "learning_rate": 4.048968458898982e-06,
      "loss": 0.5354,
      "step": 5268
    },
    {
      "epoch": 0.6460274644433546,
      "grad_norm": 1.913532854707455,
      "learning_rate": 4.048570594877548e-06,
      "loss": 0.6096,
      "step": 5269
    },
    {
      "epoch": 0.6461500735654733,
      "grad_norm": 2.005418902778178,
      "learning_rate": 4.048172667207121e-06,
      "loss": 0.5388,
      "step": 5270
    },
    {
      "epoch": 0.646272682687592,
      "grad_norm": 1.8946480693650538,
      "learning_rate": 4.047774675904057e-06,
      "loss": 0.5736,
      "step": 5271
    },
    {
      "epoch": 0.6463952918097107,
      "grad_norm": 1.9986910523150276,
      "learning_rate": 4.047376620984713e-06,
      "loss": 0.5222,
      "step": 5272
    },
    {
      "epoch": 0.6465179009318294,
      "grad_norm": 2.166477784455603,
      "learning_rate": 4.046978502465451e-06,
      "loss": 0.5817,
      "step": 5273
    },
    {
      "epoch": 0.646640510053948,
      "grad_norm": 1.9335307515379827,
      "learning_rate": 4.046580320362634e-06,
      "loss": 0.5138,
      "step": 5274
    },
    {
      "epoch": 0.6467631191760667,
      "grad_norm": 1.9441630268272467,
      "learning_rate": 4.046182074692628e-06,
      "loss": 0.5483,
      "step": 5275
    },
    {
      "epoch": 0.6468857282981854,
      "grad_norm": 2.164761425554148,
      "learning_rate": 4.0457837654718005e-06,
      "loss": 0.5658,
      "step": 5276
    },
    {
      "epoch": 0.6470083374203041,
      "grad_norm": 2.4655084675121928,
      "learning_rate": 4.0453853927165244e-06,
      "loss": 0.5212,
      "step": 5277
    },
    {
      "epoch": 0.6471309465424228,
      "grad_norm": 2.2227605713438074,
      "learning_rate": 4.044986956443172e-06,
      "loss": 0.5584,
      "step": 5278
    },
    {
      "epoch": 0.6472535556645415,
      "grad_norm": 1.7908257786396768,
      "learning_rate": 4.044588456668122e-06,
      "loss": 0.5954,
      "step": 5279
    },
    {
      "epoch": 0.6473761647866602,
      "grad_norm": 1.9179477671355336,
      "learning_rate": 4.0441898934077505e-06,
      "loss": 0.4783,
      "step": 5280
    },
    {
      "epoch": 0.6474987739087789,
      "grad_norm": 2.047958730988665,
      "learning_rate": 4.043791266678441e-06,
      "loss": 0.5413,
      "step": 5281
    },
    {
      "epoch": 0.6476213830308974,
      "grad_norm": 2.1079395280797275,
      "learning_rate": 4.043392576496578e-06,
      "loss": 0.4933,
      "step": 5282
    },
    {
      "epoch": 0.6477439921530161,
      "grad_norm": 1.9197365387933802,
      "learning_rate": 4.042993822878546e-06,
      "loss": 0.5553,
      "step": 5283
    },
    {
      "epoch": 0.6478666012751348,
      "grad_norm": 2.1620633095175315,
      "learning_rate": 4.042595005840737e-06,
      "loss": 0.6029,
      "step": 5284
    },
    {
      "epoch": 0.6479892103972535,
      "grad_norm": 2.1559279346889806,
      "learning_rate": 4.042196125399543e-06,
      "loss": 0.5426,
      "step": 5285
    },
    {
      "epoch": 0.6481118195193722,
      "grad_norm": 2.0378460953313877,
      "learning_rate": 4.041797181571358e-06,
      "loss": 0.5795,
      "step": 5286
    },
    {
      "epoch": 0.648234428641491,
      "grad_norm": 2.101002507228683,
      "learning_rate": 4.041398174372579e-06,
      "loss": 0.566,
      "step": 5287
    },
    {
      "epoch": 0.6483570377636096,
      "grad_norm": 1.971836001018289,
      "learning_rate": 4.040999103819606e-06,
      "loss": 0.5328,
      "step": 5288
    },
    {
      "epoch": 0.6484796468857283,
      "grad_norm": 1.8974407565245437,
      "learning_rate": 4.040599969928842e-06,
      "loss": 0.5166,
      "step": 5289
    },
    {
      "epoch": 0.6486022560078469,
      "grad_norm": 1.9354977024081526,
      "learning_rate": 4.040200772716691e-06,
      "loss": 0.5516,
      "step": 5290
    },
    {
      "epoch": 0.6487248651299656,
      "grad_norm": 1.9764134726685576,
      "learning_rate": 4.039801512199563e-06,
      "loss": 0.5586,
      "step": 5291
    },
    {
      "epoch": 0.6488474742520843,
      "grad_norm": 1.8923684463335377,
      "learning_rate": 4.039402188393866e-06,
      "loss": 0.5995,
      "step": 5292
    },
    {
      "epoch": 0.648970083374203,
      "grad_norm": 2.1253544984680377,
      "learning_rate": 4.0390028013160145e-06,
      "loss": 0.5735,
      "step": 5293
    },
    {
      "epoch": 0.6490926924963217,
      "grad_norm": 2.0502576317391985,
      "learning_rate": 4.038603350982422e-06,
      "loss": 0.5285,
      "step": 5294
    },
    {
      "epoch": 0.6492153016184404,
      "grad_norm": 1.7754720759493046,
      "learning_rate": 4.038203837409509e-06,
      "loss": 0.5319,
      "step": 5295
    },
    {
      "epoch": 0.6493379107405591,
      "grad_norm": 2.0170517841908375,
      "learning_rate": 4.037804260613694e-06,
      "loss": 0.61,
      "step": 5296
    },
    {
      "epoch": 0.6494605198626778,
      "grad_norm": 1.9037118271841476,
      "learning_rate": 4.037404620611403e-06,
      "loss": 0.5464,
      "step": 5297
    },
    {
      "epoch": 0.6495831289847964,
      "grad_norm": 1.8977130443654378,
      "learning_rate": 4.037004917419058e-06,
      "loss": 0.5857,
      "step": 5298
    },
    {
      "epoch": 0.6497057381069151,
      "grad_norm": 1.7413265650068732,
      "learning_rate": 4.036605151053092e-06,
      "loss": 0.5578,
      "step": 5299
    },
    {
      "epoch": 0.6498283472290338,
      "grad_norm": 1.9957776257953952,
      "learning_rate": 4.036205321529933e-06,
      "loss": 0.543,
      "step": 5300
    },
    {
      "epoch": 0.6499509563511525,
      "grad_norm": 1.9636222494863718,
      "learning_rate": 4.035805428866017e-06,
      "loss": 0.5165,
      "step": 5301
    },
    {
      "epoch": 0.6500735654732712,
      "grad_norm": 1.8418521417023217,
      "learning_rate": 4.035405473077777e-06,
      "loss": 0.5322,
      "step": 5302
    },
    {
      "epoch": 0.6501961745953899,
      "grad_norm": 1.9955017116458174,
      "learning_rate": 4.035005454181654e-06,
      "loss": 0.5424,
      "step": 5303
    },
    {
      "epoch": 0.6503187837175086,
      "grad_norm": 2.036527760796528,
      "learning_rate": 4.034605372194091e-06,
      "loss": 0.5263,
      "step": 5304
    },
    {
      "epoch": 0.6504413928396273,
      "grad_norm": 1.8844523224859624,
      "learning_rate": 4.034205227131529e-06,
      "loss": 0.5302,
      "step": 5305
    },
    {
      "epoch": 0.6505640019617459,
      "grad_norm": 2.2136833983407582,
      "learning_rate": 4.033805019010416e-06,
      "loss": 0.5593,
      "step": 5306
    },
    {
      "epoch": 0.6506866110838646,
      "grad_norm": 1.887207852684385,
      "learning_rate": 4.033404747847201e-06,
      "loss": 0.5077,
      "step": 5307
    },
    {
      "epoch": 0.6508092202059833,
      "grad_norm": 1.8042220442859704,
      "learning_rate": 4.033004413658338e-06,
      "loss": 0.5459,
      "step": 5308
    },
    {
      "epoch": 0.650931829328102,
      "grad_norm": 2.0689240437597127,
      "learning_rate": 4.032604016460279e-06,
      "loss": 0.5426,
      "step": 5309
    },
    {
      "epoch": 0.6510544384502207,
      "grad_norm": 1.9994911393820216,
      "learning_rate": 4.032203556269481e-06,
      "loss": 0.5718,
      "step": 5310
    },
    {
      "epoch": 0.6511770475723394,
      "grad_norm": 1.8543828922152474,
      "learning_rate": 4.031803033102404e-06,
      "loss": 0.5036,
      "step": 5311
    },
    {
      "epoch": 0.6512996566944581,
      "grad_norm": 2.298111817989641,
      "learning_rate": 4.031402446975511e-06,
      "loss": 0.6274,
      "step": 5312
    },
    {
      "epoch": 0.6514222658165768,
      "grad_norm": 1.9737763136759945,
      "learning_rate": 4.031001797905266e-06,
      "loss": 0.5239,
      "step": 5313
    },
    {
      "epoch": 0.6515448749386954,
      "grad_norm": 1.9431645426903115,
      "learning_rate": 4.030601085908137e-06,
      "loss": 0.5674,
      "step": 5314
    },
    {
      "epoch": 0.6516674840608141,
      "grad_norm": 2.122844107899148,
      "learning_rate": 4.030200311000593e-06,
      "loss": 0.5165,
      "step": 5315
    },
    {
      "epoch": 0.6517900931829328,
      "grad_norm": 1.8867468190531238,
      "learning_rate": 4.029799473199109e-06,
      "loss": 0.5334,
      "step": 5316
    },
    {
      "epoch": 0.6519127023050515,
      "grad_norm": 1.9949041461240082,
      "learning_rate": 4.029398572520156e-06,
      "loss": 0.5627,
      "step": 5317
    },
    {
      "epoch": 0.6520353114271702,
      "grad_norm": 2.0003838455218297,
      "learning_rate": 4.028997608980217e-06,
      "loss": 0.5264,
      "step": 5318
    },
    {
      "epoch": 0.6521579205492889,
      "grad_norm": 2.1192003737680603,
      "learning_rate": 4.028596582595766e-06,
      "loss": 0.5584,
      "step": 5319
    },
    {
      "epoch": 0.6522805296714076,
      "grad_norm": 1.9407875825668894,
      "learning_rate": 4.028195493383291e-06,
      "loss": 0.5528,
      "step": 5320
    },
    {
      "epoch": 0.6524031387935263,
      "grad_norm": 1.972522831549312,
      "learning_rate": 4.027794341359276e-06,
      "loss": 0.5506,
      "step": 5321
    },
    {
      "epoch": 0.6525257479156449,
      "grad_norm": 1.8977703394570773,
      "learning_rate": 4.027393126540209e-06,
      "loss": 0.517,
      "step": 5322
    },
    {
      "epoch": 0.6526483570377636,
      "grad_norm": 2.0082281557381862,
      "learning_rate": 4.02699184894258e-06,
      "loss": 0.5499,
      "step": 5323
    },
    {
      "epoch": 0.6527709661598823,
      "grad_norm": 1.9257754537727827,
      "learning_rate": 4.026590508582883e-06,
      "loss": 0.5571,
      "step": 5324
    },
    {
      "epoch": 0.652893575282001,
      "grad_norm": 1.7977190946653827,
      "learning_rate": 4.0261891054776125e-06,
      "loss": 0.4999,
      "step": 5325
    },
    {
      "epoch": 0.6530161844041197,
      "grad_norm": 1.769895859225606,
      "learning_rate": 4.025787639643268e-06,
      "loss": 0.4922,
      "step": 5326
    },
    {
      "epoch": 0.6531387935262384,
      "grad_norm": 1.762180412127371,
      "learning_rate": 4.0253861110963514e-06,
      "loss": 0.5697,
      "step": 5327
    },
    {
      "epoch": 0.6532614026483571,
      "grad_norm": 1.976149949775931,
      "learning_rate": 4.024984519853364e-06,
      "loss": 0.5135,
      "step": 5328
    },
    {
      "epoch": 0.6533840117704757,
      "grad_norm": 2.020559935080151,
      "learning_rate": 4.024582865930814e-06,
      "loss": 0.554,
      "step": 5329
    },
    {
      "epoch": 0.6535066208925944,
      "grad_norm": 1.8996351089393815,
      "learning_rate": 4.024181149345209e-06,
      "loss": 0.5608,
      "step": 5330
    },
    {
      "epoch": 0.6536292300147131,
      "grad_norm": 2.2116284222604876,
      "learning_rate": 4.02377937011306e-06,
      "loss": 0.5779,
      "step": 5331
    },
    {
      "epoch": 0.6537518391368318,
      "grad_norm": 1.8054850732460204,
      "learning_rate": 4.023377528250881e-06,
      "loss": 0.5437,
      "step": 5332
    },
    {
      "epoch": 0.6538744482589505,
      "grad_norm": 2.3021874195620082,
      "learning_rate": 4.02297562377519e-06,
      "loss": 0.5884,
      "step": 5333
    },
    {
      "epoch": 0.6539970573810692,
      "grad_norm": 1.7856540742038371,
      "learning_rate": 4.022573656702503e-06,
      "loss": 0.5024,
      "step": 5334
    },
    {
      "epoch": 0.6541196665031879,
      "grad_norm": 2.1070215310859033,
      "learning_rate": 4.022171627049344e-06,
      "loss": 0.5607,
      "step": 5335
    },
    {
      "epoch": 0.6542422756253066,
      "grad_norm": 2.0094583953750016,
      "learning_rate": 4.021769534832236e-06,
      "loss": 0.591,
      "step": 5336
    },
    {
      "epoch": 0.6543648847474252,
      "grad_norm": 2.0434957034762284,
      "learning_rate": 4.021367380067707e-06,
      "loss": 0.5214,
      "step": 5337
    },
    {
      "epoch": 0.6544874938695439,
      "grad_norm": 2.045518641787074,
      "learning_rate": 4.020965162772285e-06,
      "loss": 0.5463,
      "step": 5338
    },
    {
      "epoch": 0.6546101029916626,
      "grad_norm": 2.117246286838675,
      "learning_rate": 4.020562882962501e-06,
      "loss": 0.5686,
      "step": 5339
    },
    {
      "epoch": 0.6547327121137813,
      "grad_norm": 1.8044663124385905,
      "learning_rate": 4.020160540654892e-06,
      "loss": 0.5141,
      "step": 5340
    },
    {
      "epoch": 0.6548553212359,
      "grad_norm": 1.96217694610782,
      "learning_rate": 4.019758135865992e-06,
      "loss": 0.5377,
      "step": 5341
    },
    {
      "epoch": 0.6549779303580187,
      "grad_norm": 2.1184958611830975,
      "learning_rate": 4.0193556686123435e-06,
      "loss": 0.5045,
      "step": 5342
    },
    {
      "epoch": 0.6551005394801374,
      "grad_norm": 1.9692014150480843,
      "learning_rate": 4.018953138910487e-06,
      "loss": 0.5515,
      "step": 5343
    },
    {
      "epoch": 0.6552231486022561,
      "grad_norm": 1.9865131956308204,
      "learning_rate": 4.018550546776966e-06,
      "loss": 0.5559,
      "step": 5344
    },
    {
      "epoch": 0.6553457577243746,
      "grad_norm": 2.1881158254256245,
      "learning_rate": 4.0181478922283315e-06,
      "loss": 0.5716,
      "step": 5345
    },
    {
      "epoch": 0.6554683668464933,
      "grad_norm": 1.9966960839834258,
      "learning_rate": 4.017745175281129e-06,
      "loss": 0.5595,
      "step": 5346
    },
    {
      "epoch": 0.655590975968612,
      "grad_norm": 2.238887637279592,
      "learning_rate": 4.017342395951914e-06,
      "loss": 0.53,
      "step": 5347
    },
    {
      "epoch": 0.6557135850907307,
      "grad_norm": 1.7873179075366215,
      "learning_rate": 4.016939554257239e-06,
      "loss": 0.5344,
      "step": 5348
    },
    {
      "epoch": 0.6558361942128494,
      "grad_norm": 2.0079519963922614,
      "learning_rate": 4.016536650213663e-06,
      "loss": 0.5909,
      "step": 5349
    },
    {
      "epoch": 0.6559588033349681,
      "grad_norm": 1.7192558703185052,
      "learning_rate": 4.016133683837748e-06,
      "loss": 0.5279,
      "step": 5350
    },
    {
      "epoch": 0.6560814124570868,
      "grad_norm": 1.6969988007026142,
      "learning_rate": 4.0157306551460525e-06,
      "loss": 0.4819,
      "step": 5351
    },
    {
      "epoch": 0.6562040215792055,
      "grad_norm": 2.1118166604230235,
      "learning_rate": 4.015327564155143e-06,
      "loss": 0.5307,
      "step": 5352
    },
    {
      "epoch": 0.6563266307013241,
      "grad_norm": 2.0721376212979634,
      "learning_rate": 4.014924410881591e-06,
      "loss": 0.5451,
      "step": 5353
    },
    {
      "epoch": 0.6564492398234428,
      "grad_norm": 2.0124776917050067,
      "learning_rate": 4.014521195341961e-06,
      "loss": 0.5234,
      "step": 5354
    },
    {
      "epoch": 0.6565718489455615,
      "grad_norm": 2.037412321702475,
      "learning_rate": 4.01411791755283e-06,
      "loss": 0.5473,
      "step": 5355
    },
    {
      "epoch": 0.6566944580676802,
      "grad_norm": 1.8171542257755113,
      "learning_rate": 4.01371457753077e-06,
      "loss": 0.5407,
      "step": 5356
    },
    {
      "epoch": 0.6568170671897989,
      "grad_norm": 1.9269101055251907,
      "learning_rate": 4.013311175292363e-06,
      "loss": 0.5217,
      "step": 5357
    },
    {
      "epoch": 0.6569396763119176,
      "grad_norm": 1.8544710316987232,
      "learning_rate": 4.012907710854188e-06,
      "loss": 0.569,
      "step": 5358
    },
    {
      "epoch": 0.6570622854340363,
      "grad_norm": 2.0750456387187683,
      "learning_rate": 4.0125041842328265e-06,
      "loss": 0.548,
      "step": 5359
    },
    {
      "epoch": 0.657184894556155,
      "grad_norm": 1.9345321019808939,
      "learning_rate": 4.012100595444866e-06,
      "loss": 0.5927,
      "step": 5360
    },
    {
      "epoch": 0.6573075036782736,
      "grad_norm": 1.9447146436480687,
      "learning_rate": 4.011696944506893e-06,
      "loss": 0.5379,
      "step": 5361
    },
    {
      "epoch": 0.6574301128003923,
      "grad_norm": 2.0480151158478073,
      "learning_rate": 4.0112932314355e-06,
      "loss": 0.5887,
      "step": 5362
    },
    {
      "epoch": 0.657552721922511,
      "grad_norm": 2.0181194925667794,
      "learning_rate": 4.01088945624728e-06,
      "loss": 0.5383,
      "step": 5363
    },
    {
      "epoch": 0.6576753310446297,
      "grad_norm": 1.9279522538209968,
      "learning_rate": 4.010485618958828e-06,
      "loss": 0.5431,
      "step": 5364
    },
    {
      "epoch": 0.6577979401667484,
      "grad_norm": 2.0308224419627017,
      "learning_rate": 4.010081719586743e-06,
      "loss": 0.5546,
      "step": 5365
    },
    {
      "epoch": 0.6579205492888671,
      "grad_norm": 2.018135519648576,
      "learning_rate": 4.009677758147627e-06,
      "loss": 0.5636,
      "step": 5366
    },
    {
      "epoch": 0.6580431584109858,
      "grad_norm": 1.8404137415935384,
      "learning_rate": 4.009273734658082e-06,
      "loss": 0.5714,
      "step": 5367
    },
    {
      "epoch": 0.6581657675331045,
      "grad_norm": 2.1474381109974012,
      "learning_rate": 4.008869649134713e-06,
      "loss": 0.5755,
      "step": 5368
    },
    {
      "epoch": 0.6582883766552231,
      "grad_norm": 2.0641120403628177,
      "learning_rate": 4.008465501594132e-06,
      "loss": 0.5259,
      "step": 5369
    },
    {
      "epoch": 0.6584109857773418,
      "grad_norm": 2.044149782430377,
      "learning_rate": 4.008061292052948e-06,
      "loss": 0.4919,
      "step": 5370
    },
    {
      "epoch": 0.6585335948994605,
      "grad_norm": 1.8791965910066675,
      "learning_rate": 4.0076570205277745e-06,
      "loss": 0.549,
      "step": 5371
    },
    {
      "epoch": 0.6586562040215792,
      "grad_norm": 1.9495788633329156,
      "learning_rate": 4.007252687035229e-06,
      "loss": 0.5619,
      "step": 5372
    },
    {
      "epoch": 0.6587788131436979,
      "grad_norm": 1.7496486905527022,
      "learning_rate": 4.006848291591929e-06,
      "loss": 0.4954,
      "step": 5373
    },
    {
      "epoch": 0.6589014222658166,
      "grad_norm": 2.071177688700371,
      "learning_rate": 4.0064438342144965e-06,
      "loss": 0.5647,
      "step": 5374
    },
    {
      "epoch": 0.6590240313879353,
      "grad_norm": 1.9803432516910762,
      "learning_rate": 4.006039314919556e-06,
      "loss": 0.4717,
      "step": 5375
    },
    {
      "epoch": 0.6591466405100539,
      "grad_norm": 2.112528127151743,
      "learning_rate": 4.005634733723734e-06,
      "loss": 0.5066,
      "step": 5376
    },
    {
      "epoch": 0.6592692496321726,
      "grad_norm": 1.9276940949734067,
      "learning_rate": 4.005230090643658e-06,
      "loss": 0.5163,
      "step": 5377
    },
    {
      "epoch": 0.6593918587542913,
      "grad_norm": 2.190588422618814,
      "learning_rate": 4.004825385695961e-06,
      "loss": 0.5203,
      "step": 5378
    },
    {
      "epoch": 0.65951446787641,
      "grad_norm": 1.8236982833124,
      "learning_rate": 4.004420618897276e-06,
      "loss": 0.5008,
      "step": 5379
    },
    {
      "epoch": 0.6596370769985287,
      "grad_norm": 1.8354377671919782,
      "learning_rate": 4.0040157902642396e-06,
      "loss": 0.5262,
      "step": 5380
    },
    {
      "epoch": 0.6597596861206474,
      "grad_norm": 1.880603503366887,
      "learning_rate": 4.003610899813493e-06,
      "loss": 0.5591,
      "step": 5381
    },
    {
      "epoch": 0.6598822952427661,
      "grad_norm": 1.8504547164721181,
      "learning_rate": 4.003205947561676e-06,
      "loss": 0.5422,
      "step": 5382
    },
    {
      "epoch": 0.6600049043648848,
      "grad_norm": 1.822644933215736,
      "learning_rate": 4.002800933525432e-06,
      "loss": 0.5407,
      "step": 5383
    },
    {
      "epoch": 0.6601275134870034,
      "grad_norm": 1.905119161538083,
      "learning_rate": 4.002395857721411e-06,
      "loss": 0.5508,
      "step": 5384
    },
    {
      "epoch": 0.6602501226091221,
      "grad_norm": 1.9378283517072616,
      "learning_rate": 4.00199072016626e-06,
      "loss": 0.5311,
      "step": 5385
    },
    {
      "epoch": 0.6603727317312408,
      "grad_norm": 1.8130046944690035,
      "learning_rate": 4.001585520876631e-06,
      "loss": 0.5472,
      "step": 5386
    },
    {
      "epoch": 0.6604953408533595,
      "grad_norm": 1.9825157747999615,
      "learning_rate": 4.001180259869179e-06,
      "loss": 0.5728,
      "step": 5387
    },
    {
      "epoch": 0.6606179499754782,
      "grad_norm": 1.9079446151073824,
      "learning_rate": 4.00077493716056e-06,
      "loss": 0.5736,
      "step": 5388
    },
    {
      "epoch": 0.6607405590975969,
      "grad_norm": 1.6197198906668033,
      "learning_rate": 4.000369552767435e-06,
      "loss": 0.5051,
      "step": 5389
    },
    {
      "epoch": 0.6608631682197156,
      "grad_norm": 1.7917998245594364,
      "learning_rate": 3.999964106706465e-06,
      "loss": 0.5573,
      "step": 5390
    },
    {
      "epoch": 0.6609857773418343,
      "grad_norm": 2.1577527849516853,
      "learning_rate": 3.999558598994316e-06,
      "loss": 0.5986,
      "step": 5391
    },
    {
      "epoch": 0.6611083864639529,
      "grad_norm": 1.9307235981547686,
      "learning_rate": 3.999153029647651e-06,
      "loss": 0.5086,
      "step": 5392
    },
    {
      "epoch": 0.6612309955860716,
      "grad_norm": 1.8606574451884443,
      "learning_rate": 3.9987473986831445e-06,
      "loss": 0.5366,
      "step": 5393
    },
    {
      "epoch": 0.6613536047081903,
      "grad_norm": 2.0879830182582246,
      "learning_rate": 3.9983417061174655e-06,
      "loss": 0.5689,
      "step": 5394
    },
    {
      "epoch": 0.661476213830309,
      "grad_norm": 1.9379700657357943,
      "learning_rate": 3.997935951967291e-06,
      "loss": 0.5417,
      "step": 5395
    },
    {
      "epoch": 0.6615988229524277,
      "grad_norm": 1.8710935360601362,
      "learning_rate": 3.997530136249296e-06,
      "loss": 0.5091,
      "step": 5396
    },
    {
      "epoch": 0.6617214320745464,
      "grad_norm": 2.103700768844947,
      "learning_rate": 3.997124258980162e-06,
      "loss": 0.5708,
      "step": 5397
    },
    {
      "epoch": 0.6618440411966651,
      "grad_norm": 1.813745965720834,
      "learning_rate": 3.996718320176569e-06,
      "loss": 0.4857,
      "step": 5398
    },
    {
      "epoch": 0.6619666503187838,
      "grad_norm": 1.7672479336932057,
      "learning_rate": 3.996312319855205e-06,
      "loss": 0.5578,
      "step": 5399
    },
    {
      "epoch": 0.6620892594409024,
      "grad_norm": 2.311680743848026,
      "learning_rate": 3.9959062580327545e-06,
      "loss": 0.5721,
      "step": 5400
    },
    {
      "epoch": 0.662211868563021,
      "grad_norm": 1.9649143646146394,
      "learning_rate": 3.995500134725909e-06,
      "loss": 0.5287,
      "step": 5401
    },
    {
      "epoch": 0.6623344776851398,
      "grad_norm": 2.107019661275992,
      "learning_rate": 3.995093949951361e-06,
      "loss": 0.5467,
      "step": 5402
    },
    {
      "epoch": 0.6624570868072585,
      "grad_norm": 1.8160914540081652,
      "learning_rate": 3.994687703725804e-06,
      "loss": 0.5617,
      "step": 5403
    },
    {
      "epoch": 0.6625796959293772,
      "grad_norm": 1.9564068810446251,
      "learning_rate": 3.994281396065936e-06,
      "loss": 0.5659,
      "step": 5404
    },
    {
      "epoch": 0.6627023050514959,
      "grad_norm": 1.7848811520377421,
      "learning_rate": 3.993875026988458e-06,
      "loss": 0.514,
      "step": 5405
    },
    {
      "epoch": 0.6628249141736146,
      "grad_norm": 1.8872493091592908,
      "learning_rate": 3.993468596510071e-06,
      "loss": 0.4712,
      "step": 5406
    },
    {
      "epoch": 0.6629475232957333,
      "grad_norm": 2.048562648171971,
      "learning_rate": 3.993062104647482e-06,
      "loss": 0.5444,
      "step": 5407
    },
    {
      "epoch": 0.6630701324178518,
      "grad_norm": 2.003859248168485,
      "learning_rate": 3.992655551417395e-06,
      "loss": 0.5397,
      "step": 5408
    },
    {
      "epoch": 0.6631927415399705,
      "grad_norm": 1.6710986313889606,
      "learning_rate": 3.992248936836524e-06,
      "loss": 0.5261,
      "step": 5409
    },
    {
      "epoch": 0.6633153506620892,
      "grad_norm": 1.9869462934207567,
      "learning_rate": 3.991842260921579e-06,
      "loss": 0.5485,
      "step": 5410
    },
    {
      "epoch": 0.6634379597842079,
      "grad_norm": 1.8531330093475347,
      "learning_rate": 3.991435523689276e-06,
      "loss": 0.5093,
      "step": 5411
    },
    {
      "epoch": 0.6635605689063266,
      "grad_norm": 2.2025894079928667,
      "learning_rate": 3.991028725156334e-06,
      "loss": 0.5351,
      "step": 5412
    },
    {
      "epoch": 0.6636831780284453,
      "grad_norm": 1.8724747808893845,
      "learning_rate": 3.99062186533947e-06,
      "loss": 0.5866,
      "step": 5413
    },
    {
      "epoch": 0.663805787150564,
      "grad_norm": 2.0816887245876656,
      "learning_rate": 3.99021494425541e-06,
      "loss": 0.5726,
      "step": 5414
    },
    {
      "epoch": 0.6639283962726827,
      "grad_norm": 2.033572219802354,
      "learning_rate": 3.989807961920876e-06,
      "loss": 0.5359,
      "step": 5415
    },
    {
      "epoch": 0.6640510053948013,
      "grad_norm": 1.869336917276334,
      "learning_rate": 3.989400918352598e-06,
      "loss": 0.5205,
      "step": 5416
    },
    {
      "epoch": 0.66417361451692,
      "grad_norm": 2.3516019079114696,
      "learning_rate": 3.988993813567306e-06,
      "loss": 0.5486,
      "step": 5417
    },
    {
      "epoch": 0.6642962236390387,
      "grad_norm": 2.1397805713333997,
      "learning_rate": 3.988586647581732e-06,
      "loss": 0.5737,
      "step": 5418
    },
    {
      "epoch": 0.6644188327611574,
      "grad_norm": 1.8007075515025175,
      "learning_rate": 3.98817942041261e-06,
      "loss": 0.492,
      "step": 5419
    },
    {
      "epoch": 0.6645414418832761,
      "grad_norm": 2.1411302834982444,
      "learning_rate": 3.987772132076681e-06,
      "loss": 0.5822,
      "step": 5420
    },
    {
      "epoch": 0.6646640510053948,
      "grad_norm": 1.7561470563876034,
      "learning_rate": 3.987364782590682e-06,
      "loss": 0.4837,
      "step": 5421
    },
    {
      "epoch": 0.6647866601275135,
      "grad_norm": 2.153628818415908,
      "learning_rate": 3.9869573719713585e-06,
      "loss": 0.5719,
      "step": 5422
    },
    {
      "epoch": 0.6649092692496321,
      "grad_norm": 1.6257630579019684,
      "learning_rate": 3.986549900235455e-06,
      "loss": 0.4908,
      "step": 5423
    },
    {
      "epoch": 0.6650318783717508,
      "grad_norm": 1.9511956853271735,
      "learning_rate": 3.986142367399717e-06,
      "loss": 0.5333,
      "step": 5424
    },
    {
      "epoch": 0.6651544874938695,
      "grad_norm": 1.9157480207973132,
      "learning_rate": 3.985734773480898e-06,
      "loss": 0.5627,
      "step": 5425
    },
    {
      "epoch": 0.6652770966159882,
      "grad_norm": 2.0759939271112184,
      "learning_rate": 3.985327118495749e-06,
      "loss": 0.587,
      "step": 5426
    },
    {
      "epoch": 0.6653997057381069,
      "grad_norm": 2.08463812814919,
      "learning_rate": 3.984919402461026e-06,
      "loss": 0.5711,
      "step": 5427
    },
    {
      "epoch": 0.6655223148602256,
      "grad_norm": 2.095660861260383,
      "learning_rate": 3.9845116253934865e-06,
      "loss": 0.5696,
      "step": 5428
    },
    {
      "epoch": 0.6656449239823443,
      "grad_norm": 2.0918717286029445,
      "learning_rate": 3.984103787309892e-06,
      "loss": 0.5399,
      "step": 5429
    },
    {
      "epoch": 0.665767533104463,
      "grad_norm": 1.881395812712009,
      "learning_rate": 3.983695888227004e-06,
      "loss": 0.5366,
      "step": 5430
    },
    {
      "epoch": 0.6658901422265816,
      "grad_norm": 1.9474999221893823,
      "learning_rate": 3.9832879281615875e-06,
      "loss": 0.5343,
      "step": 5431
    },
    {
      "epoch": 0.6660127513487003,
      "grad_norm": 1.8383034907174103,
      "learning_rate": 3.982879907130412e-06,
      "loss": 0.5704,
      "step": 5432
    },
    {
      "epoch": 0.666135360470819,
      "grad_norm": 1.906404916078657,
      "learning_rate": 3.982471825150247e-06,
      "loss": 0.5118,
      "step": 5433
    },
    {
      "epoch": 0.6662579695929377,
      "grad_norm": 2.0732041892228614,
      "learning_rate": 3.982063682237865e-06,
      "loss": 0.5979,
      "step": 5434
    },
    {
      "epoch": 0.6663805787150564,
      "grad_norm": 1.9006632038928717,
      "learning_rate": 3.981655478410043e-06,
      "loss": 0.5021,
      "step": 5435
    },
    {
      "epoch": 0.6665031878371751,
      "grad_norm": 2.0150359957971964,
      "learning_rate": 3.981247213683556e-06,
      "loss": 0.5537,
      "step": 5436
    },
    {
      "epoch": 0.6666257969592938,
      "grad_norm": 1.8442235650867895,
      "learning_rate": 3.980838888075188e-06,
      "loss": 0.524,
      "step": 5437
    },
    {
      "epoch": 0.6667484060814125,
      "grad_norm": 1.9308166974544188,
      "learning_rate": 3.980430501601718e-06,
      "loss": 0.55,
      "step": 5438
    },
    {
      "epoch": 0.6668710152035311,
      "grad_norm": 1.875371944989357,
      "learning_rate": 3.9800220542799365e-06,
      "loss": 0.4817,
      "step": 5439
    },
    {
      "epoch": 0.6669936243256498,
      "grad_norm": 2.1125969184248032,
      "learning_rate": 3.979613546126626e-06,
      "loss": 0.5745,
      "step": 5440
    },
    {
      "epoch": 0.6671162334477685,
      "grad_norm": 2.047649426812253,
      "learning_rate": 3.97920497715858e-06,
      "loss": 0.5223,
      "step": 5441
    },
    {
      "epoch": 0.6672388425698872,
      "grad_norm": 1.7423285497495002,
      "learning_rate": 3.978796347392591e-06,
      "loss": 0.5096,
      "step": 5442
    },
    {
      "epoch": 0.6673614516920059,
      "grad_norm": 1.953317485859097,
      "learning_rate": 3.978387656845454e-06,
      "loss": 0.5441,
      "step": 5443
    },
    {
      "epoch": 0.6674840608141246,
      "grad_norm": 1.8575473769797604,
      "learning_rate": 3.977978905533966e-06,
      "loss": 0.5421,
      "step": 5444
    },
    {
      "epoch": 0.6676066699362433,
      "grad_norm": 2.0163494406472062,
      "learning_rate": 3.977570093474929e-06,
      "loss": 0.5391,
      "step": 5445
    },
    {
      "epoch": 0.667729279058362,
      "grad_norm": 1.9021899668786406,
      "learning_rate": 3.9771612206851465e-06,
      "loss": 0.5304,
      "step": 5446
    },
    {
      "epoch": 0.6678518881804806,
      "grad_norm": 1.826491274174037,
      "learning_rate": 3.976752287181421e-06,
      "loss": 0.5397,
      "step": 5447
    },
    {
      "epoch": 0.6679744973025993,
      "grad_norm": 1.7973500905110578,
      "learning_rate": 3.976343292980564e-06,
      "loss": 0.5309,
      "step": 5448
    },
    {
      "epoch": 0.668097106424718,
      "grad_norm": 1.9246058232375198,
      "learning_rate": 3.975934238099383e-06,
      "loss": 0.5136,
      "step": 5449
    },
    {
      "epoch": 0.6682197155468367,
      "grad_norm": 1.758647568354237,
      "learning_rate": 3.975525122554691e-06,
      "loss": 0.5236,
      "step": 5450
    },
    {
      "epoch": 0.6683423246689554,
      "grad_norm": 2.087516364048592,
      "learning_rate": 3.975115946363305e-06,
      "loss": 0.574,
      "step": 5451
    },
    {
      "epoch": 0.6684649337910741,
      "grad_norm": 2.0703316038246857,
      "learning_rate": 3.9747067095420426e-06,
      "loss": 0.5708,
      "step": 5452
    },
    {
      "epoch": 0.6685875429131928,
      "grad_norm": 1.7222541659020418,
      "learning_rate": 3.974297412107723e-06,
      "loss": 0.5621,
      "step": 5453
    },
    {
      "epoch": 0.6687101520353115,
      "grad_norm": 2.008881160773243,
      "learning_rate": 3.973888054077169e-06,
      "loss": 0.579,
      "step": 5454
    },
    {
      "epoch": 0.6688327611574301,
      "grad_norm": 2.0694265078379006,
      "learning_rate": 3.973478635467208e-06,
      "loss": 0.6129,
      "step": 5455
    },
    {
      "epoch": 0.6689553702795488,
      "grad_norm": 1.83608224641359,
      "learning_rate": 3.973069156294666e-06,
      "loss": 0.5258,
      "step": 5456
    },
    {
      "epoch": 0.6690779794016675,
      "grad_norm": 1.8512175295953046,
      "learning_rate": 3.972659616576374e-06,
      "loss": 0.4718,
      "step": 5457
    },
    {
      "epoch": 0.6692005885237862,
      "grad_norm": 2.1727679515354854,
      "learning_rate": 3.972250016329165e-06,
      "loss": 0.5325,
      "step": 5458
    },
    {
      "epoch": 0.6693231976459049,
      "grad_norm": 2.2723465667381735,
      "learning_rate": 3.971840355569873e-06,
      "loss": 0.4804,
      "step": 5459
    },
    {
      "epoch": 0.6694458067680236,
      "grad_norm": 2.213273981203656,
      "learning_rate": 3.971430634315337e-06,
      "loss": 0.5769,
      "step": 5460
    },
    {
      "epoch": 0.6695684158901423,
      "grad_norm": 2.1369135954879317,
      "learning_rate": 3.971020852582397e-06,
      "loss": 0.5616,
      "step": 5461
    },
    {
      "epoch": 0.669691025012261,
      "grad_norm": 2.0183991218724677,
      "learning_rate": 3.970611010387897e-06,
      "loss": 0.5538,
      "step": 5462
    },
    {
      "epoch": 0.6698136341343796,
      "grad_norm": 2.2023343990822792,
      "learning_rate": 3.970201107748679e-06,
      "loss": 0.6695,
      "step": 5463
    },
    {
      "epoch": 0.6699362432564983,
      "grad_norm": 1.7486987053119318,
      "learning_rate": 3.969791144681594e-06,
      "loss": 0.4962,
      "step": 5464
    },
    {
      "epoch": 0.670058852378617,
      "grad_norm": 2.0149859894138036,
      "learning_rate": 3.96938112120349e-06,
      "loss": 0.5254,
      "step": 5465
    },
    {
      "epoch": 0.6701814615007357,
      "grad_norm": 1.774008740978389,
      "learning_rate": 3.968971037331221e-06,
      "loss": 0.5362,
      "step": 5466
    },
    {
      "epoch": 0.6703040706228544,
      "grad_norm": 2.106547764630085,
      "learning_rate": 3.968560893081642e-06,
      "loss": 0.5371,
      "step": 5467
    },
    {
      "epoch": 0.670426679744973,
      "grad_norm": 1.8745468771855793,
      "learning_rate": 3.9681506884716095e-06,
      "loss": 0.5636,
      "step": 5468
    },
    {
      "epoch": 0.6705492888670918,
      "grad_norm": 2.2013183170524715,
      "learning_rate": 3.967740423517985e-06,
      "loss": 0.5291,
      "step": 5469
    },
    {
      "epoch": 0.6706718979892105,
      "grad_norm": 1.9826764959350136,
      "learning_rate": 3.9673300982376315e-06,
      "loss": 0.5722,
      "step": 5470
    },
    {
      "epoch": 0.670794507111329,
      "grad_norm": 1.82706933314082,
      "learning_rate": 3.966919712647412e-06,
      "loss": 0.5492,
      "step": 5471
    },
    {
      "epoch": 0.6709171162334477,
      "grad_norm": 2.038859892465385,
      "learning_rate": 3.966509266764197e-06,
      "loss": 0.5975,
      "step": 5472
    },
    {
      "epoch": 0.6710397253555664,
      "grad_norm": 1.8027510032830565,
      "learning_rate": 3.966098760604853e-06,
      "loss": 0.4996,
      "step": 5473
    },
    {
      "epoch": 0.6711623344776851,
      "grad_norm": 2.0291725399267913,
      "learning_rate": 3.965688194186256e-06,
      "loss": 0.5658,
      "step": 5474
    },
    {
      "epoch": 0.6712849435998038,
      "grad_norm": 2.1652976288673447,
      "learning_rate": 3.965277567525278e-06,
      "loss": 0.5558,
      "step": 5475
    },
    {
      "epoch": 0.6714075527219225,
      "grad_norm": 2.1012590246358793,
      "learning_rate": 3.9648668806387984e-06,
      "loss": 0.5364,
      "step": 5476
    },
    {
      "epoch": 0.6715301618440412,
      "grad_norm": 2.2134722778159213,
      "learning_rate": 3.964456133543697e-06,
      "loss": 0.4948,
      "step": 5477
    },
    {
      "epoch": 0.6716527709661598,
      "grad_norm": 2.0525105991024155,
      "learning_rate": 3.964045326256856e-06,
      "loss": 0.5002,
      "step": 5478
    },
    {
      "epoch": 0.6717753800882785,
      "grad_norm": 1.860974189484338,
      "learning_rate": 3.963634458795159e-06,
      "loss": 0.5502,
      "step": 5479
    },
    {
      "epoch": 0.6718979892103972,
      "grad_norm": 1.9796492707890212,
      "learning_rate": 3.963223531175496e-06,
      "loss": 0.5394,
      "step": 5480
    },
    {
      "epoch": 0.6720205983325159,
      "grad_norm": 2.067112686914353,
      "learning_rate": 3.9628125434147555e-06,
      "loss": 0.5443,
      "step": 5481
    },
    {
      "epoch": 0.6721432074546346,
      "grad_norm": 2.09574570900968,
      "learning_rate": 3.962401495529829e-06,
      "loss": 0.57,
      "step": 5482
    },
    {
      "epoch": 0.6722658165767533,
      "grad_norm": 1.8543871475192302,
      "learning_rate": 3.961990387537612e-06,
      "loss": 0.5088,
      "step": 5483
    },
    {
      "epoch": 0.672388425698872,
      "grad_norm": 1.9071598689114935,
      "learning_rate": 3.961579219455002e-06,
      "loss": 0.5528,
      "step": 5484
    },
    {
      "epoch": 0.6725110348209907,
      "grad_norm": 1.9957858417321523,
      "learning_rate": 3.961167991298898e-06,
      "loss": 0.584,
      "step": 5485
    },
    {
      "epoch": 0.6726336439431093,
      "grad_norm": 1.8924285653639323,
      "learning_rate": 3.960756703086204e-06,
      "loss": 0.5685,
      "step": 5486
    },
    {
      "epoch": 0.672756253065228,
      "grad_norm": 2.0889118609073245,
      "learning_rate": 3.9603453548338235e-06,
      "loss": 0.5565,
      "step": 5487
    },
    {
      "epoch": 0.6728788621873467,
      "grad_norm": 1.9640132701783086,
      "learning_rate": 3.959933946558662e-06,
      "loss": 0.5278,
      "step": 5488
    },
    {
      "epoch": 0.6730014713094654,
      "grad_norm": 2.1775872444198914,
      "learning_rate": 3.959522478277632e-06,
      "loss": 0.5582,
      "step": 5489
    },
    {
      "epoch": 0.6731240804315841,
      "grad_norm": 2.315300586478074,
      "learning_rate": 3.9591109500076445e-06,
      "loss": 0.5877,
      "step": 5490
    },
    {
      "epoch": 0.6732466895537028,
      "grad_norm": 2.0124682642077287,
      "learning_rate": 3.958699361765612e-06,
      "loss": 0.5315,
      "step": 5491
    },
    {
      "epoch": 0.6733692986758215,
      "grad_norm": 2.165926430898986,
      "learning_rate": 3.958287713568455e-06,
      "loss": 0.5809,
      "step": 5492
    },
    {
      "epoch": 0.6734919077979402,
      "grad_norm": 2.067364296621591,
      "learning_rate": 3.957876005433091e-06,
      "loss": 0.5388,
      "step": 5493
    },
    {
      "epoch": 0.6736145169200588,
      "grad_norm": 2.0162338037007785,
      "learning_rate": 3.957464237376441e-06,
      "loss": 0.6021,
      "step": 5494
    },
    {
      "epoch": 0.6737371260421775,
      "grad_norm": 1.897912223078395,
      "learning_rate": 3.957052409415433e-06,
      "loss": 0.4687,
      "step": 5495
    },
    {
      "epoch": 0.6738597351642962,
      "grad_norm": 1.84546556774225,
      "learning_rate": 3.956640521566989e-06,
      "loss": 0.5827,
      "step": 5496
    },
    {
      "epoch": 0.6739823442864149,
      "grad_norm": 1.9562472298010989,
      "learning_rate": 3.956228573848042e-06,
      "loss": 0.5392,
      "step": 5497
    },
    {
      "epoch": 0.6741049534085336,
      "grad_norm": 1.8883368786284045,
      "learning_rate": 3.9558165662755225e-06,
      "loss": 0.5472,
      "step": 5498
    },
    {
      "epoch": 0.6742275625306523,
      "grad_norm": 1.8974962235148487,
      "learning_rate": 3.955404498866365e-06,
      "loss": 0.4963,
      "step": 5499
    },
    {
      "epoch": 0.674350171652771,
      "grad_norm": 1.8353229527373807,
      "learning_rate": 3.954992371637505e-06,
      "loss": 0.5214,
      "step": 5500
    },
    {
      "epoch": 0.6744727807748897,
      "grad_norm": 1.8833485107481198,
      "learning_rate": 3.954580184605884e-06,
      "loss": 0.515,
      "step": 5501
    },
    {
      "epoch": 0.6745953898970083,
      "grad_norm": 1.8320500702479812,
      "learning_rate": 3.954167937788441e-06,
      "loss": 0.5174,
      "step": 5502
    },
    {
      "epoch": 0.674717999019127,
      "grad_norm": 1.8857638395429488,
      "learning_rate": 3.953755631202122e-06,
      "loss": 0.5068,
      "step": 5503
    },
    {
      "epoch": 0.6748406081412457,
      "grad_norm": 1.8731958922782845,
      "learning_rate": 3.953343264863873e-06,
      "loss": 0.4782,
      "step": 5504
    },
    {
      "epoch": 0.6749632172633644,
      "grad_norm": 1.779323012199504,
      "learning_rate": 3.952930838790643e-06,
      "loss": 0.519,
      "step": 5505
    },
    {
      "epoch": 0.6750858263854831,
      "grad_norm": 1.9420753851546593,
      "learning_rate": 3.952518352999382e-06,
      "loss": 0.4743,
      "step": 5506
    },
    {
      "epoch": 0.6752084355076018,
      "grad_norm": 1.9923350893673792,
      "learning_rate": 3.952105807507048e-06,
      "loss": 0.5368,
      "step": 5507
    },
    {
      "epoch": 0.6753310446297205,
      "grad_norm": 1.945194919801789,
      "learning_rate": 3.951693202330592e-06,
      "loss": 0.5531,
      "step": 5508
    },
    {
      "epoch": 0.6754536537518392,
      "grad_norm": 2.1291173255061144,
      "learning_rate": 3.951280537486976e-06,
      "loss": 0.5579,
      "step": 5509
    },
    {
      "epoch": 0.6755762628739578,
      "grad_norm": 1.8485629912493748,
      "learning_rate": 3.950867812993161e-06,
      "loss": 0.5158,
      "step": 5510
    },
    {
      "epoch": 0.6756988719960765,
      "grad_norm": 2.074922621058053,
      "learning_rate": 3.95045502886611e-06,
      "loss": 0.553,
      "step": 5511
    },
    {
      "epoch": 0.6758214811181952,
      "grad_norm": 1.9333991898936127,
      "learning_rate": 3.95004218512279e-06,
      "loss": 0.5156,
      "step": 5512
    },
    {
      "epoch": 0.6759440902403139,
      "grad_norm": 1.8041595778776494,
      "learning_rate": 3.949629281780168e-06,
      "loss": 0.5132,
      "step": 5513
    },
    {
      "epoch": 0.6760666993624326,
      "grad_norm": 2.0094183963900534,
      "learning_rate": 3.949216318855217e-06,
      "loss": 0.5435,
      "step": 5514
    },
    {
      "epoch": 0.6761893084845513,
      "grad_norm": 1.833358666472127,
      "learning_rate": 3.94880329636491e-06,
      "loss": 0.5327,
      "step": 5515
    },
    {
      "epoch": 0.67631191760667,
      "grad_norm": 2.0190914812075635,
      "learning_rate": 3.948390214326222e-06,
      "loss": 0.5172,
      "step": 5516
    },
    {
      "epoch": 0.6764345267287887,
      "grad_norm": 1.8129415027813904,
      "learning_rate": 3.947977072756132e-06,
      "loss": 0.5334,
      "step": 5517
    },
    {
      "epoch": 0.6765571358509073,
      "grad_norm": 1.7771097524841701,
      "learning_rate": 3.947563871671622e-06,
      "loss": 0.4861,
      "step": 5518
    },
    {
      "epoch": 0.676679744973026,
      "grad_norm": 2.1620935492100557,
      "learning_rate": 3.947150611089673e-06,
      "loss": 0.525,
      "step": 5519
    },
    {
      "epoch": 0.6768023540951447,
      "grad_norm": 1.9461186465375808,
      "learning_rate": 3.946737291027272e-06,
      "loss": 0.51,
      "step": 5520
    },
    {
      "epoch": 0.6769249632172634,
      "grad_norm": 1.9937775918468499,
      "learning_rate": 3.946323911501406e-06,
      "loss": 0.5405,
      "step": 5521
    },
    {
      "epoch": 0.6770475723393821,
      "grad_norm": 2.0258882934589915,
      "learning_rate": 3.945910472529068e-06,
      "loss": 0.535,
      "step": 5522
    },
    {
      "epoch": 0.6771701814615008,
      "grad_norm": 1.8590065196983936,
      "learning_rate": 3.94549697412725e-06,
      "loss": 0.5256,
      "step": 5523
    },
    {
      "epoch": 0.6772927905836195,
      "grad_norm": 1.772918963604782,
      "learning_rate": 3.945083416312947e-06,
      "loss": 0.515,
      "step": 5524
    },
    {
      "epoch": 0.677415399705738,
      "grad_norm": 2.032361471972687,
      "learning_rate": 3.9446697991031575e-06,
      "loss": 0.5004,
      "step": 5525
    },
    {
      "epoch": 0.6775380088278568,
      "grad_norm": 1.9955639134473526,
      "learning_rate": 3.944256122514882e-06,
      "loss": 0.4964,
      "step": 5526
    },
    {
      "epoch": 0.6776606179499755,
      "grad_norm": 1.8426455041170617,
      "learning_rate": 3.943842386565122e-06,
      "loss": 0.5237,
      "step": 5527
    },
    {
      "epoch": 0.6777832270720942,
      "grad_norm": 1.7397849353343702,
      "learning_rate": 3.943428591270884e-06,
      "loss": 0.5134,
      "step": 5528
    },
    {
      "epoch": 0.6779058361942129,
      "grad_norm": 1.7920799274637347,
      "learning_rate": 3.9430147366491765e-06,
      "loss": 0.4963,
      "step": 5529
    },
    {
      "epoch": 0.6780284453163316,
      "grad_norm": 1.9777495644458476,
      "learning_rate": 3.9426008227170074e-06,
      "loss": 0.5304,
      "step": 5530
    },
    {
      "epoch": 0.6781510544384503,
      "grad_norm": 1.895817961840033,
      "learning_rate": 3.942186849491391e-06,
      "loss": 0.5411,
      "step": 5531
    },
    {
      "epoch": 0.678273663560569,
      "grad_norm": 1.941271641166681,
      "learning_rate": 3.941772816989342e-06,
      "loss": 0.5373,
      "step": 5532
    },
    {
      "epoch": 0.6783962726826875,
      "grad_norm": 1.8185249518605222,
      "learning_rate": 3.941358725227878e-06,
      "loss": 0.5296,
      "step": 5533
    },
    {
      "epoch": 0.6785188818048062,
      "grad_norm": 1.9783600106790922,
      "learning_rate": 3.940944574224018e-06,
      "loss": 0.5322,
      "step": 5534
    },
    {
      "epoch": 0.6786414909269249,
      "grad_norm": 1.870298891154678,
      "learning_rate": 3.940530363994786e-06,
      "loss": 0.5027,
      "step": 5535
    },
    {
      "epoch": 0.6787641000490436,
      "grad_norm": 2.014219387096148,
      "learning_rate": 3.9401160945572044e-06,
      "loss": 0.6307,
      "step": 5536
    },
    {
      "epoch": 0.6788867091711623,
      "grad_norm": 2.0750635244014686,
      "learning_rate": 3.939701765928303e-06,
      "loss": 0.5897,
      "step": 5537
    },
    {
      "epoch": 0.679009318293281,
      "grad_norm": 1.7483648800624982,
      "learning_rate": 3.9392873781251104e-06,
      "loss": 0.5199,
      "step": 5538
    },
    {
      "epoch": 0.6791319274153997,
      "grad_norm": 1.9317316643987015,
      "learning_rate": 3.938872931164658e-06,
      "loss": 0.5548,
      "step": 5539
    },
    {
      "epoch": 0.6792545365375184,
      "grad_norm": 1.9514900092233962,
      "learning_rate": 3.93845842506398e-06,
      "loss": 0.5369,
      "step": 5540
    },
    {
      "epoch": 0.679377145659637,
      "grad_norm": 2.1691682080312664,
      "learning_rate": 3.938043859840115e-06,
      "loss": 0.5584,
      "step": 5541
    },
    {
      "epoch": 0.6794997547817557,
      "grad_norm": 1.8226149525005317,
      "learning_rate": 3.9376292355101015e-06,
      "loss": 0.5197,
      "step": 5542
    },
    {
      "epoch": 0.6796223639038744,
      "grad_norm": 2.0122787169921534,
      "learning_rate": 3.937214552090981e-06,
      "loss": 0.4936,
      "step": 5543
    },
    {
      "epoch": 0.6797449730259931,
      "grad_norm": 1.7943755997973154,
      "learning_rate": 3.936799809599798e-06,
      "loss": 0.4905,
      "step": 5544
    },
    {
      "epoch": 0.6798675821481118,
      "grad_norm": 2.147036608565974,
      "learning_rate": 3.9363850080536e-06,
      "loss": 0.5135,
      "step": 5545
    },
    {
      "epoch": 0.6799901912702305,
      "grad_norm": 1.823588012613731,
      "learning_rate": 3.9359701474694345e-06,
      "loss": 0.5098,
      "step": 5546
    },
    {
      "epoch": 0.6801128003923492,
      "grad_norm": 1.9247222077926267,
      "learning_rate": 3.9355552278643535e-06,
      "loss": 0.5601,
      "step": 5547
    },
    {
      "epoch": 0.6802354095144679,
      "grad_norm": 2.035638722199648,
      "learning_rate": 3.935140249255412e-06,
      "loss": 0.5771,
      "step": 5548
    },
    {
      "epoch": 0.6803580186365865,
      "grad_norm": 1.970236967239671,
      "learning_rate": 3.934725211659665e-06,
      "loss": 0.537,
      "step": 5549
    },
    {
      "epoch": 0.6804806277587052,
      "grad_norm": 1.9808981984182474,
      "learning_rate": 3.934310115094172e-06,
      "loss": 0.5721,
      "step": 5550
    },
    {
      "epoch": 0.6806032368808239,
      "grad_norm": 2.10176855628578,
      "learning_rate": 3.933894959575993e-06,
      "loss": 0.5443,
      "step": 5551
    },
    {
      "epoch": 0.6807258460029426,
      "grad_norm": 1.7565788295437996,
      "learning_rate": 3.933479745122194e-06,
      "loss": 0.5048,
      "step": 5552
    },
    {
      "epoch": 0.6808484551250613,
      "grad_norm": 1.8295532429431225,
      "learning_rate": 3.933064471749839e-06,
      "loss": 0.5251,
      "step": 5553
    },
    {
      "epoch": 0.68097106424718,
      "grad_norm": 1.896055041461408,
      "learning_rate": 3.932649139475998e-06,
      "loss": 0.5107,
      "step": 5554
    },
    {
      "epoch": 0.6810936733692987,
      "grad_norm": 1.7357518865729973,
      "learning_rate": 3.932233748317741e-06,
      "loss": 0.4855,
      "step": 5555
    },
    {
      "epoch": 0.6812162824914174,
      "grad_norm": 1.8567196572676303,
      "learning_rate": 3.93181829829214e-06,
      "loss": 0.5621,
      "step": 5556
    },
    {
      "epoch": 0.681338891613536,
      "grad_norm": 1.9692543581882795,
      "learning_rate": 3.9314027894162735e-06,
      "loss": 0.5245,
      "step": 5557
    },
    {
      "epoch": 0.6814615007356547,
      "grad_norm": 2.004208023720354,
      "learning_rate": 3.930987221707218e-06,
      "loss": 0.5523,
      "step": 5558
    },
    {
      "epoch": 0.6815841098577734,
      "grad_norm": 1.9745987941329002,
      "learning_rate": 3.930571595182054e-06,
      "loss": 0.5205,
      "step": 5559
    },
    {
      "epoch": 0.6817067189798921,
      "grad_norm": 1.8824898310395126,
      "learning_rate": 3.930155909857865e-06,
      "loss": 0.5615,
      "step": 5560
    },
    {
      "epoch": 0.6818293281020108,
      "grad_norm": 1.9069026126152202,
      "learning_rate": 3.9297401657517355e-06,
      "loss": 0.5118,
      "step": 5561
    },
    {
      "epoch": 0.6819519372241295,
      "grad_norm": 2.1295142927267263,
      "learning_rate": 3.929324362880756e-06,
      "loss": 0.5725,
      "step": 5562
    },
    {
      "epoch": 0.6820745463462482,
      "grad_norm": 2.082572949199782,
      "learning_rate": 3.928908501262012e-06,
      "loss": 0.471,
      "step": 5563
    },
    {
      "epoch": 0.6821971554683669,
      "grad_norm": 2.142417858850891,
      "learning_rate": 3.9284925809126004e-06,
      "loss": 0.5745,
      "step": 5564
    },
    {
      "epoch": 0.6823197645904855,
      "grad_norm": 1.8760904852300464,
      "learning_rate": 3.928076601849615e-06,
      "loss": 0.513,
      "step": 5565
    },
    {
      "epoch": 0.6824423737126042,
      "grad_norm": 1.995359402017693,
      "learning_rate": 3.9276605640901525e-06,
      "loss": 0.545,
      "step": 5566
    },
    {
      "epoch": 0.6825649828347229,
      "grad_norm": 2.0075154987896284,
      "learning_rate": 3.927244467651314e-06,
      "loss": 0.6008,
      "step": 5567
    },
    {
      "epoch": 0.6826875919568416,
      "grad_norm": 1.687398599293356,
      "learning_rate": 3.926828312550201e-06,
      "loss": 0.5168,
      "step": 5568
    },
    {
      "epoch": 0.6828102010789603,
      "grad_norm": 1.9168735121721747,
      "learning_rate": 3.926412098803918e-06,
      "loss": 0.5302,
      "step": 5569
    },
    {
      "epoch": 0.682932810201079,
      "grad_norm": 2.0852278511238995,
      "learning_rate": 3.925995826429573e-06,
      "loss": 0.569,
      "step": 5570
    },
    {
      "epoch": 0.6830554193231977,
      "grad_norm": 1.9815487280582387,
      "learning_rate": 3.925579495444274e-06,
      "loss": 0.5792,
      "step": 5571
    },
    {
      "epoch": 0.6831780284453163,
      "grad_norm": 1.8692525927062387,
      "learning_rate": 3.925163105865134e-06,
      "loss": 0.4889,
      "step": 5572
    },
    {
      "epoch": 0.683300637567435,
      "grad_norm": 1.9347899243142055,
      "learning_rate": 3.924746657709268e-06,
      "loss": 0.5085,
      "step": 5573
    },
    {
      "epoch": 0.6834232466895537,
      "grad_norm": 1.8400722908824163,
      "learning_rate": 3.924330150993793e-06,
      "loss": 0.4901,
      "step": 5574
    },
    {
      "epoch": 0.6835458558116724,
      "grad_norm": 1.7672108079615545,
      "learning_rate": 3.9239135857358254e-06,
      "loss": 0.511,
      "step": 5575
    },
    {
      "epoch": 0.6836684649337911,
      "grad_norm": 1.65865106765423,
      "learning_rate": 3.923496961952489e-06,
      "loss": 0.5493,
      "step": 5576
    },
    {
      "epoch": 0.6837910740559098,
      "grad_norm": 2.09672555920351,
      "learning_rate": 3.923080279660908e-06,
      "loss": 0.5562,
      "step": 5577
    },
    {
      "epoch": 0.6839136831780285,
      "grad_norm": 1.7021947567059836,
      "learning_rate": 3.922663538878207e-06,
      "loss": 0.5326,
      "step": 5578
    },
    {
      "epoch": 0.6840362923001472,
      "grad_norm": 1.8492086518281738,
      "learning_rate": 3.922246739621517e-06,
      "loss": 0.4878,
      "step": 5579
    },
    {
      "epoch": 0.6841589014222658,
      "grad_norm": 1.80218661154174,
      "learning_rate": 3.921829881907967e-06,
      "loss": 0.5034,
      "step": 5580
    },
    {
      "epoch": 0.6842815105443845,
      "grad_norm": 1.9086295269138176,
      "learning_rate": 3.9214129657546926e-06,
      "loss": 0.554,
      "step": 5581
    },
    {
      "epoch": 0.6844041196665032,
      "grad_norm": 1.8853050281741688,
      "learning_rate": 3.92099599117883e-06,
      "loss": 0.5323,
      "step": 5582
    },
    {
      "epoch": 0.6845267287886219,
      "grad_norm": 1.9474163556163129,
      "learning_rate": 3.920578958197515e-06,
      "loss": 0.5536,
      "step": 5583
    },
    {
      "epoch": 0.6846493379107406,
      "grad_norm": 1.9636201790119372,
      "learning_rate": 3.92016186682789e-06,
      "loss": 0.5452,
      "step": 5584
    },
    {
      "epoch": 0.6847719470328593,
      "grad_norm": 1.984758709799688,
      "learning_rate": 3.919744717087098e-06,
      "loss": 0.5259,
      "step": 5585
    },
    {
      "epoch": 0.684894556154978,
      "grad_norm": 1.8988123232916418,
      "learning_rate": 3.919327508992285e-06,
      "loss": 0.5025,
      "step": 5586
    },
    {
      "epoch": 0.6850171652770967,
      "grad_norm": 2.015018388255079,
      "learning_rate": 3.918910242560598e-06,
      "loss": 0.5485,
      "step": 5587
    },
    {
      "epoch": 0.6851397743992153,
      "grad_norm": 2.3294380670373895,
      "learning_rate": 3.918492917809189e-06,
      "loss": 0.5862,
      "step": 5588
    },
    {
      "epoch": 0.685262383521334,
      "grad_norm": 1.975822956863693,
      "learning_rate": 3.9180755347552105e-06,
      "loss": 0.6138,
      "step": 5589
    },
    {
      "epoch": 0.6853849926434527,
      "grad_norm": 1.9549897239285692,
      "learning_rate": 3.9176580934158145e-06,
      "loss": 0.5252,
      "step": 5590
    },
    {
      "epoch": 0.6855076017655714,
      "grad_norm": 1.9582789757016135,
      "learning_rate": 3.917240593808163e-06,
      "loss": 0.5655,
      "step": 5591
    },
    {
      "epoch": 0.68563021088769,
      "grad_norm": 1.8500330948899486,
      "learning_rate": 3.916823035949414e-06,
      "loss": 0.5635,
      "step": 5592
    },
    {
      "epoch": 0.6857528200098088,
      "grad_norm": 1.8779161221717755,
      "learning_rate": 3.9164054198567295e-06,
      "loss": 0.5347,
      "step": 5593
    },
    {
      "epoch": 0.6858754291319274,
      "grad_norm": 2.0436645912826563,
      "learning_rate": 3.915987745547274e-06,
      "loss": 0.5576,
      "step": 5594
    },
    {
      "epoch": 0.6859980382540461,
      "grad_norm": 1.9127223644970879,
      "learning_rate": 3.915570013038217e-06,
      "loss": 0.5067,
      "step": 5595
    },
    {
      "epoch": 0.6861206473761647,
      "grad_norm": 1.79542895018855,
      "learning_rate": 3.915152222346725e-06,
      "loss": 0.5255,
      "step": 5596
    },
    {
      "epoch": 0.6862432564982834,
      "grad_norm": 1.7857460376463705,
      "learning_rate": 3.914734373489972e-06,
      "loss": 0.4877,
      "step": 5597
    },
    {
      "epoch": 0.6863658656204021,
      "grad_norm": 1.8886620315349458,
      "learning_rate": 3.9143164664851315e-06,
      "loss": 0.5203,
      "step": 5598
    },
    {
      "epoch": 0.6864884747425208,
      "grad_norm": 1.9068721145186103,
      "learning_rate": 3.91389850134938e-06,
      "loss": 0.5581,
      "step": 5599
    },
    {
      "epoch": 0.6866110838646395,
      "grad_norm": 1.9937336396451384,
      "learning_rate": 3.913480478099898e-06,
      "loss": 0.5577,
      "step": 5600
    },
    {
      "epoch": 0.6867336929867582,
      "grad_norm": 1.8626813349427915,
      "learning_rate": 3.913062396753865e-06,
      "loss": 0.5164,
      "step": 5601
    },
    {
      "epoch": 0.6868563021088769,
      "grad_norm": 2.0338912126613238,
      "learning_rate": 3.912644257328466e-06,
      "loss": 0.5208,
      "step": 5602
    },
    {
      "epoch": 0.6869789112309956,
      "grad_norm": 1.8827353834289557,
      "learning_rate": 3.912226059840887e-06,
      "loss": 0.5138,
      "step": 5603
    },
    {
      "epoch": 0.6871015203531142,
      "grad_norm": 2.0093732232504946,
      "learning_rate": 3.911807804308318e-06,
      "loss": 0.6482,
      "step": 5604
    },
    {
      "epoch": 0.6872241294752329,
      "grad_norm": 1.9049468732640968,
      "learning_rate": 3.911389490747948e-06,
      "loss": 0.4923,
      "step": 5605
    },
    {
      "epoch": 0.6873467385973516,
      "grad_norm": 1.944956030122499,
      "learning_rate": 3.910971119176971e-06,
      "loss": 0.5724,
      "step": 5606
    },
    {
      "epoch": 0.6874693477194703,
      "grad_norm": 2.1048506961805096,
      "learning_rate": 3.910552689612584e-06,
      "loss": 0.5897,
      "step": 5607
    },
    {
      "epoch": 0.687591956841589,
      "grad_norm": 1.9895057496978323,
      "learning_rate": 3.910134202071982e-06,
      "loss": 0.5173,
      "step": 5608
    },
    {
      "epoch": 0.6877145659637077,
      "grad_norm": 1.7575465446527614,
      "learning_rate": 3.90971565657237e-06,
      "loss": 0.4873,
      "step": 5609
    },
    {
      "epoch": 0.6878371750858264,
      "grad_norm": 1.974587775251818,
      "learning_rate": 3.909297053130948e-06,
      "loss": 0.5042,
      "step": 5610
    },
    {
      "epoch": 0.6879597842079451,
      "grad_norm": 1.998063847966676,
      "learning_rate": 3.9088783917649225e-06,
      "loss": 0.5627,
      "step": 5611
    },
    {
      "epoch": 0.6880823933300637,
      "grad_norm": 1.8862243462201433,
      "learning_rate": 3.908459672491501e-06,
      "loss": 0.5658,
      "step": 5612
    },
    {
      "epoch": 0.6882050024521824,
      "grad_norm": 1.8982569315611204,
      "learning_rate": 3.908040895327893e-06,
      "loss": 0.4979,
      "step": 5613
    },
    {
      "epoch": 0.6883276115743011,
      "grad_norm": 1.9355341879795611,
      "learning_rate": 3.907622060291312e-06,
      "loss": 0.5451,
      "step": 5614
    },
    {
      "epoch": 0.6884502206964198,
      "grad_norm": 2.0434401128253037,
      "learning_rate": 3.907203167398972e-06,
      "loss": 0.5319,
      "step": 5615
    },
    {
      "epoch": 0.6885728298185385,
      "grad_norm": 1.9319391812426177,
      "learning_rate": 3.90678421666809e-06,
      "loss": 0.5888,
      "step": 5616
    },
    {
      "epoch": 0.6886954389406572,
      "grad_norm": 2.1755839855893404,
      "learning_rate": 3.906365208115887e-06,
      "loss": 0.5267,
      "step": 5617
    },
    {
      "epoch": 0.6888180480627759,
      "grad_norm": 1.9043544391214915,
      "learning_rate": 3.905946141759584e-06,
      "loss": 0.5453,
      "step": 5618
    },
    {
      "epoch": 0.6889406571848946,
      "grad_norm": 1.9742829258227055,
      "learning_rate": 3.905527017616405e-06,
      "loss": 0.5439,
      "step": 5619
    },
    {
      "epoch": 0.6890632663070132,
      "grad_norm": 1.7376480221929294,
      "learning_rate": 3.9051078357035784e-06,
      "loss": 0.5006,
      "step": 5620
    },
    {
      "epoch": 0.6891858754291319,
      "grad_norm": 2.0153746353322877,
      "learning_rate": 3.904688596038331e-06,
      "loss": 0.5496,
      "step": 5621
    },
    {
      "epoch": 0.6893084845512506,
      "grad_norm": 2.005244192801592,
      "learning_rate": 3.9042692986378965e-06,
      "loss": 0.5545,
      "step": 5622
    },
    {
      "epoch": 0.6894310936733693,
      "grad_norm": 1.9768617018750694,
      "learning_rate": 3.903849943519507e-06,
      "loss": 0.5675,
      "step": 5623
    },
    {
      "epoch": 0.689553702795488,
      "grad_norm": 1.948068773148756,
      "learning_rate": 3.9034305307004e-06,
      "loss": 0.5384,
      "step": 5624
    },
    {
      "epoch": 0.6896763119176067,
      "grad_norm": 2.240868635393812,
      "learning_rate": 3.903011060197813e-06,
      "loss": 0.5705,
      "step": 5625
    },
    {
      "epoch": 0.6897989210397254,
      "grad_norm": 1.9247635613501783,
      "learning_rate": 3.902591532028989e-06,
      "loss": 0.5931,
      "step": 5626
    },
    {
      "epoch": 0.689921530161844,
      "grad_norm": 2.004567607618707,
      "learning_rate": 3.902171946211168e-06,
      "loss": 0.5416,
      "step": 5627
    },
    {
      "epoch": 0.6900441392839627,
      "grad_norm": 2.041955274081852,
      "learning_rate": 3.901752302761599e-06,
      "loss": 0.5463,
      "step": 5628
    },
    {
      "epoch": 0.6901667484060814,
      "grad_norm": 2.0950226510031715,
      "learning_rate": 3.9013326016975285e-06,
      "loss": 0.6026,
      "step": 5629
    },
    {
      "epoch": 0.6902893575282001,
      "grad_norm": 2.0715469356172838,
      "learning_rate": 3.900912843036207e-06,
      "loss": 0.5697,
      "step": 5630
    },
    {
      "epoch": 0.6904119666503188,
      "grad_norm": 1.9493810300608374,
      "learning_rate": 3.900493026794888e-06,
      "loss": 0.571,
      "step": 5631
    },
    {
      "epoch": 0.6905345757724375,
      "grad_norm": 1.7666960960665326,
      "learning_rate": 3.9000731529908254e-06,
      "loss": 0.4878,
      "step": 5632
    },
    {
      "epoch": 0.6906571848945562,
      "grad_norm": 1.900178174167103,
      "learning_rate": 3.899653221641278e-06,
      "loss": 0.5696,
      "step": 5633
    },
    {
      "epoch": 0.6907797940166749,
      "grad_norm": 2.107713522395818,
      "learning_rate": 3.8992332327635055e-06,
      "loss": 0.5529,
      "step": 5634
    },
    {
      "epoch": 0.6909024031387935,
      "grad_norm": 2.1254879659428454,
      "learning_rate": 3.898813186374769e-06,
      "loss": 0.516,
      "step": 5635
    },
    {
      "epoch": 0.6910250122609122,
      "grad_norm": 2.188717159217201,
      "learning_rate": 3.898393082492335e-06,
      "loss": 0.541,
      "step": 5636
    },
    {
      "epoch": 0.6911476213830309,
      "grad_norm": 2.1244624965743184,
      "learning_rate": 3.8979729211334705e-06,
      "loss": 0.5885,
      "step": 5637
    },
    {
      "epoch": 0.6912702305051496,
      "grad_norm": 1.981725783487595,
      "learning_rate": 3.897552702315442e-06,
      "loss": 0.5439,
      "step": 5638
    },
    {
      "epoch": 0.6913928396272683,
      "grad_norm": 1.8340468319841612,
      "learning_rate": 3.897132426055525e-06,
      "loss": 0.5457,
      "step": 5639
    },
    {
      "epoch": 0.691515448749387,
      "grad_norm": 1.8345169703994018,
      "learning_rate": 3.896712092370991e-06,
      "loss": 0.5175,
      "step": 5640
    },
    {
      "epoch": 0.6916380578715057,
      "grad_norm": 2.284229408449506,
      "learning_rate": 3.896291701279118e-06,
      "loss": 0.5636,
      "step": 5641
    },
    {
      "epoch": 0.6917606669936244,
      "grad_norm": 1.8154097076369917,
      "learning_rate": 3.895871252797183e-06,
      "loss": 0.5473,
      "step": 5642
    },
    {
      "epoch": 0.691883276115743,
      "grad_norm": 1.7834951530165681,
      "learning_rate": 3.89545074694247e-06,
      "loss": 0.5411,
      "step": 5643
    },
    {
      "epoch": 0.6920058852378617,
      "grad_norm": 1.8256142892283993,
      "learning_rate": 3.895030183732259e-06,
      "loss": 0.5348,
      "step": 5644
    },
    {
      "epoch": 0.6921284943599804,
      "grad_norm": 2.013771647341678,
      "learning_rate": 3.894609563183838e-06,
      "loss": 0.6271,
      "step": 5645
    },
    {
      "epoch": 0.6922511034820991,
      "grad_norm": 1.9882431678485553,
      "learning_rate": 3.894188885314496e-06,
      "loss": 0.5498,
      "step": 5646
    },
    {
      "epoch": 0.6923737126042178,
      "grad_norm": 1.7585432159067114,
      "learning_rate": 3.893768150141523e-06,
      "loss": 0.5021,
      "step": 5647
    },
    {
      "epoch": 0.6924963217263365,
      "grad_norm": 1.8949801926223224,
      "learning_rate": 3.89334735768221e-06,
      "loss": 0.5165,
      "step": 5648
    },
    {
      "epoch": 0.6926189308484552,
      "grad_norm": 2.0865098589219215,
      "learning_rate": 3.892926507953855e-06,
      "loss": 0.5947,
      "step": 5649
    },
    {
      "epoch": 0.6927415399705739,
      "grad_norm": 1.9457897108209987,
      "learning_rate": 3.892505600973754e-06,
      "loss": 0.5476,
      "step": 5650
    },
    {
      "epoch": 0.6928641490926924,
      "grad_norm": 1.9041073167990767,
      "learning_rate": 3.892084636759208e-06,
      "loss": 0.5607,
      "step": 5651
    },
    {
      "epoch": 0.6929867582148111,
      "grad_norm": 2.0537198099119327,
      "learning_rate": 3.891663615327518e-06,
      "loss": 0.5496,
      "step": 5652
    },
    {
      "epoch": 0.6931093673369298,
      "grad_norm": 2.037428535652115,
      "learning_rate": 3.89124253669599e-06,
      "loss": 0.5495,
      "step": 5653
    },
    {
      "epoch": 0.6932319764590485,
      "grad_norm": 1.8258228458339503,
      "learning_rate": 3.890821400881931e-06,
      "loss": 0.4873,
      "step": 5654
    },
    {
      "epoch": 0.6933545855811672,
      "grad_norm": 1.981019381356516,
      "learning_rate": 3.8904002079026505e-06,
      "loss": 0.5505,
      "step": 5655
    },
    {
      "epoch": 0.693477194703286,
      "grad_norm": 1.8966031776689147,
      "learning_rate": 3.88997895777546e-06,
      "loss": 0.5232,
      "step": 5656
    },
    {
      "epoch": 0.6935998038254046,
      "grad_norm": 1.7942807923243265,
      "learning_rate": 3.889557650517673e-06,
      "loss": 0.5216,
      "step": 5657
    },
    {
      "epoch": 0.6937224129475233,
      "grad_norm": 1.7860443941867221,
      "learning_rate": 3.889136286146607e-06,
      "loss": 0.4867,
      "step": 5658
    },
    {
      "epoch": 0.6938450220696419,
      "grad_norm": 2.0001858458911244,
      "learning_rate": 3.88871486467958e-06,
      "loss": 0.547,
      "step": 5659
    },
    {
      "epoch": 0.6939676311917606,
      "grad_norm": 2.058424090230559,
      "learning_rate": 3.888293386133914e-06,
      "loss": 0.4902,
      "step": 5660
    },
    {
      "epoch": 0.6940902403138793,
      "grad_norm": 2.0802986119895523,
      "learning_rate": 3.8878718505269305e-06,
      "loss": 0.5762,
      "step": 5661
    },
    {
      "epoch": 0.694212849435998,
      "grad_norm": 2.0360324825942304,
      "learning_rate": 3.887450257875959e-06,
      "loss": 0.5349,
      "step": 5662
    },
    {
      "epoch": 0.6943354585581167,
      "grad_norm": 1.906506412565479,
      "learning_rate": 3.887028608198324e-06,
      "loss": 0.5221,
      "step": 5663
    },
    {
      "epoch": 0.6944580676802354,
      "grad_norm": 1.9050753382423644,
      "learning_rate": 3.8866069015113594e-06,
      "loss": 0.534,
      "step": 5664
    },
    {
      "epoch": 0.6945806768023541,
      "grad_norm": 1.9737827471781224,
      "learning_rate": 3.886185137832395e-06,
      "loss": 0.5704,
      "step": 5665
    },
    {
      "epoch": 0.6947032859244728,
      "grad_norm": 1.8809694314670344,
      "learning_rate": 3.885763317178768e-06,
      "loss": 0.5253,
      "step": 5666
    },
    {
      "epoch": 0.6948258950465914,
      "grad_norm": 2.022963756412327,
      "learning_rate": 3.8853414395678144e-06,
      "loss": 0.5485,
      "step": 5667
    },
    {
      "epoch": 0.6949485041687101,
      "grad_norm": 2.0052306466060634,
      "learning_rate": 3.884919505016876e-06,
      "loss": 0.5179,
      "step": 5668
    },
    {
      "epoch": 0.6950711132908288,
      "grad_norm": 2.013481192732132,
      "learning_rate": 3.884497513543294e-06,
      "loss": 0.5341,
      "step": 5669
    },
    {
      "epoch": 0.6951937224129475,
      "grad_norm": 2.149867990899234,
      "learning_rate": 3.884075465164413e-06,
      "loss": 0.533,
      "step": 5670
    },
    {
      "epoch": 0.6953163315350662,
      "grad_norm": 1.825826779486935,
      "learning_rate": 3.88365335989758e-06,
      "loss": 0.5158,
      "step": 5671
    },
    {
      "epoch": 0.6954389406571849,
      "grad_norm": 1.9405152070565048,
      "learning_rate": 3.883231197760144e-06,
      "loss": 0.5474,
      "step": 5672
    },
    {
      "epoch": 0.6955615497793036,
      "grad_norm": 1.892398014056848,
      "learning_rate": 3.882808978769458e-06,
      "loss": 0.5294,
      "step": 5673
    },
    {
      "epoch": 0.6956841589014222,
      "grad_norm": 2.0141130955649587,
      "learning_rate": 3.8823867029428744e-06,
      "loss": 0.5426,
      "step": 5674
    },
    {
      "epoch": 0.6958067680235409,
      "grad_norm": 2.002393463107627,
      "learning_rate": 3.88196437029775e-06,
      "loss": 0.535,
      "step": 5675
    },
    {
      "epoch": 0.6959293771456596,
      "grad_norm": 2.1827897289826823,
      "learning_rate": 3.881541980851443e-06,
      "loss": 0.5659,
      "step": 5676
    },
    {
      "epoch": 0.6960519862677783,
      "grad_norm": 1.9915395488105927,
      "learning_rate": 3.8811195346213155e-06,
      "loss": 0.5361,
      "step": 5677
    },
    {
      "epoch": 0.696174595389897,
      "grad_norm": 1.8096620828120242,
      "learning_rate": 3.880697031624729e-06,
      "loss": 0.501,
      "step": 5678
    },
    {
      "epoch": 0.6962972045120157,
      "grad_norm": 2.0057153767817284,
      "learning_rate": 3.880274471879052e-06,
      "loss": 0.5015,
      "step": 5679
    },
    {
      "epoch": 0.6964198136341344,
      "grad_norm": 1.8644258559198232,
      "learning_rate": 3.879851855401648e-06,
      "loss": 0.5516,
      "step": 5680
    },
    {
      "epoch": 0.6965424227562531,
      "grad_norm": 2.0184382750472594,
      "learning_rate": 3.879429182209892e-06,
      "loss": 0.6162,
      "step": 5681
    },
    {
      "epoch": 0.6966650318783717,
      "grad_norm": 1.965967609071016,
      "learning_rate": 3.879006452321154e-06,
      "loss": 0.5024,
      "step": 5682
    },
    {
      "epoch": 0.6967876410004904,
      "grad_norm": 2.2011198447522764,
      "learning_rate": 3.878583665752809e-06,
      "loss": 0.575,
      "step": 5683
    },
    {
      "epoch": 0.6969102501226091,
      "grad_norm": 1.990701193558083,
      "learning_rate": 3.878160822522236e-06,
      "loss": 0.5529,
      "step": 5684
    },
    {
      "epoch": 0.6970328592447278,
      "grad_norm": 2.3766438765550086,
      "learning_rate": 3.877737922646812e-06,
      "loss": 0.6041,
      "step": 5685
    },
    {
      "epoch": 0.6971554683668465,
      "grad_norm": 2.0957942468005517,
      "learning_rate": 3.877314966143921e-06,
      "loss": 0.5379,
      "step": 5686
    },
    {
      "epoch": 0.6972780774889652,
      "grad_norm": 1.883136472726022,
      "learning_rate": 3.8768919530309455e-06,
      "loss": 0.5439,
      "step": 5687
    },
    {
      "epoch": 0.6974006866110839,
      "grad_norm": 1.9673120895539915,
      "learning_rate": 3.876468883325274e-06,
      "loss": 0.5425,
      "step": 5688
    },
    {
      "epoch": 0.6975232957332026,
      "grad_norm": 2.2281958834573103,
      "learning_rate": 3.876045757044294e-06,
      "loss": 0.5497,
      "step": 5689
    },
    {
      "epoch": 0.6976459048553212,
      "grad_norm": 2.0335256096793084,
      "learning_rate": 3.8756225742053984e-06,
      "loss": 0.5461,
      "step": 5690
    },
    {
      "epoch": 0.6977685139774399,
      "grad_norm": 2.2079810164615736,
      "learning_rate": 3.875199334825978e-06,
      "loss": 0.5045,
      "step": 5691
    },
    {
      "epoch": 0.6978911230995586,
      "grad_norm": 2.147488178832539,
      "learning_rate": 3.874776038923432e-06,
      "loss": 0.4908,
      "step": 5692
    },
    {
      "epoch": 0.6980137322216773,
      "grad_norm": 2.1263970338971636,
      "learning_rate": 3.874352686515156e-06,
      "loss": 0.5227,
      "step": 5693
    },
    {
      "epoch": 0.698136341343796,
      "grad_norm": 2.1825573591787726,
      "learning_rate": 3.873929277618552e-06,
      "loss": 0.5689,
      "step": 5694
    },
    {
      "epoch": 0.6982589504659147,
      "grad_norm": 1.8667176764616031,
      "learning_rate": 3.873505812251021e-06,
      "loss": 0.4823,
      "step": 5695
    },
    {
      "epoch": 0.6983815595880334,
      "grad_norm": 1.9576950776096973,
      "learning_rate": 3.873082290429971e-06,
      "loss": 0.5229,
      "step": 5696
    },
    {
      "epoch": 0.6985041687101521,
      "grad_norm": 2.1960557189300065,
      "learning_rate": 3.872658712172808e-06,
      "loss": 0.5105,
      "step": 5697
    },
    {
      "epoch": 0.6986267778322707,
      "grad_norm": 2.088615679692621,
      "learning_rate": 3.8722350774969416e-06,
      "loss": 0.574,
      "step": 5698
    },
    {
      "epoch": 0.6987493869543894,
      "grad_norm": 2.0861358450044043,
      "learning_rate": 3.871811386419784e-06,
      "loss": 0.5302,
      "step": 5699
    },
    {
      "epoch": 0.6988719960765081,
      "grad_norm": 1.7987466195511042,
      "learning_rate": 3.87138763895875e-06,
      "loss": 0.4946,
      "step": 5700
    },
    {
      "epoch": 0.6989946051986268,
      "grad_norm": 1.9250497382068168,
      "learning_rate": 3.8709638351312555e-06,
      "loss": 0.5356,
      "step": 5701
    },
    {
      "epoch": 0.6991172143207455,
      "grad_norm": 1.9218563861773215,
      "learning_rate": 3.870539974954721e-06,
      "loss": 0.5236,
      "step": 5702
    },
    {
      "epoch": 0.6992398234428642,
      "grad_norm": 2.0156726126999915,
      "learning_rate": 3.870116058446567e-06,
      "loss": 0.527,
      "step": 5703
    },
    {
      "epoch": 0.6993624325649829,
      "grad_norm": 1.8592611224085829,
      "learning_rate": 3.869692085624218e-06,
      "loss": 0.5027,
      "step": 5704
    },
    {
      "epoch": 0.6994850416871016,
      "grad_norm": 1.9658484733081176,
      "learning_rate": 3.869268056505098e-06,
      "loss": 0.5919,
      "step": 5705
    },
    {
      "epoch": 0.6996076508092202,
      "grad_norm": 1.819969079006855,
      "learning_rate": 3.8688439711066386e-06,
      "loss": 0.4947,
      "step": 5706
    },
    {
      "epoch": 0.6997302599313389,
      "grad_norm": 2.0569567345464326,
      "learning_rate": 3.868419829446267e-06,
      "loss": 0.5304,
      "step": 5707
    },
    {
      "epoch": 0.6998528690534576,
      "grad_norm": 1.798505106404957,
      "learning_rate": 3.867995631541418e-06,
      "loss": 0.5067,
      "step": 5708
    },
    {
      "epoch": 0.6999754781755763,
      "grad_norm": 1.972610843327109,
      "learning_rate": 3.867571377409528e-06,
      "loss": 0.5261,
      "step": 5709
    },
    {
      "epoch": 0.700098087297695,
      "grad_norm": 1.791791624686915,
      "learning_rate": 3.867147067068032e-06,
      "loss": 0.5229,
      "step": 5710
    },
    {
      "epoch": 0.7002206964198137,
      "grad_norm": 1.8586467135615554,
      "learning_rate": 3.8667227005343714e-06,
      "loss": 0.5012,
      "step": 5711
    },
    {
      "epoch": 0.7003433055419324,
      "grad_norm": 2.1383271780919175,
      "learning_rate": 3.8662982778259886e-06,
      "loss": 0.5251,
      "step": 5712
    },
    {
      "epoch": 0.7004659146640511,
      "grad_norm": 1.6523675022409272,
      "learning_rate": 3.8658737989603276e-06,
      "loss": 0.5321,
      "step": 5713
    },
    {
      "epoch": 0.7005885237861696,
      "grad_norm": 1.9221282972968012,
      "learning_rate": 3.865449263954836e-06,
      "loss": 0.5481,
      "step": 5714
    },
    {
      "epoch": 0.7007111329082883,
      "grad_norm": 2.0544416978680946,
      "learning_rate": 3.865024672826961e-06,
      "loss": 0.4955,
      "step": 5715
    },
    {
      "epoch": 0.700833742030407,
      "grad_norm": 1.9266916317878267,
      "learning_rate": 3.864600025594156e-06,
      "loss": 0.5322,
      "step": 5716
    },
    {
      "epoch": 0.7009563511525257,
      "grad_norm": 1.905780858919143,
      "learning_rate": 3.864175322273874e-06,
      "loss": 0.5124,
      "step": 5717
    },
    {
      "epoch": 0.7010789602746444,
      "grad_norm": 2.0101540896507264,
      "learning_rate": 3.863750562883572e-06,
      "loss": 0.5926,
      "step": 5718
    },
    {
      "epoch": 0.7012015693967631,
      "grad_norm": 1.9509927161475915,
      "learning_rate": 3.8633257474407056e-06,
      "loss": 0.5811,
      "step": 5719
    },
    {
      "epoch": 0.7013241785188818,
      "grad_norm": 2.0153529002293213,
      "learning_rate": 3.862900875962739e-06,
      "loss": 0.543,
      "step": 5720
    },
    {
      "epoch": 0.7014467876410004,
      "grad_norm": 2.2346071907657556,
      "learning_rate": 3.862475948467134e-06,
      "loss": 0.5139,
      "step": 5721
    },
    {
      "epoch": 0.7015693967631191,
      "grad_norm": 1.947816912495238,
      "learning_rate": 3.862050964971354e-06,
      "loss": 0.5462,
      "step": 5722
    },
    {
      "epoch": 0.7016920058852378,
      "grad_norm": 1.9696736386509681,
      "learning_rate": 3.861625925492869e-06,
      "loss": 0.5422,
      "step": 5723
    },
    {
      "epoch": 0.7018146150073565,
      "grad_norm": 1.8591548155428252,
      "learning_rate": 3.861200830049147e-06,
      "loss": 0.4764,
      "step": 5724
    },
    {
      "epoch": 0.7019372241294752,
      "grad_norm": 1.9702072985106642,
      "learning_rate": 3.860775678657662e-06,
      "loss": 0.5604,
      "step": 5725
    },
    {
      "epoch": 0.7020598332515939,
      "grad_norm": 1.9390858625600684,
      "learning_rate": 3.8603504713358875e-06,
      "loss": 0.5258,
      "step": 5726
    },
    {
      "epoch": 0.7021824423737126,
      "grad_norm": 1.7897230063747385,
      "learning_rate": 3.859925208101299e-06,
      "loss": 0.5202,
      "step": 5727
    },
    {
      "epoch": 0.7023050514958313,
      "grad_norm": 1.9909927700784789,
      "learning_rate": 3.859499888971379e-06,
      "loss": 0.5029,
      "step": 5728
    },
    {
      "epoch": 0.7024276606179499,
      "grad_norm": 1.9942562415134995,
      "learning_rate": 3.859074513963605e-06,
      "loss": 0.5341,
      "step": 5729
    },
    {
      "epoch": 0.7025502697400686,
      "grad_norm": 1.961171441189679,
      "learning_rate": 3.858649083095463e-06,
      "loss": 0.5456,
      "step": 5730
    },
    {
      "epoch": 0.7026728788621873,
      "grad_norm": 2.063213212959208,
      "learning_rate": 3.858223596384438e-06,
      "loss": 0.5512,
      "step": 5731
    },
    {
      "epoch": 0.702795487984306,
      "grad_norm": 1.935742181657084,
      "learning_rate": 3.85779805384802e-06,
      "loss": 0.5271,
      "step": 5732
    },
    {
      "epoch": 0.7029180971064247,
      "grad_norm": 1.8731729478760946,
      "learning_rate": 3.857372455503698e-06,
      "loss": 0.5565,
      "step": 5733
    },
    {
      "epoch": 0.7030407062285434,
      "grad_norm": 1.8421032728048723,
      "learning_rate": 3.8569468013689634e-06,
      "loss": 0.5138,
      "step": 5734
    },
    {
      "epoch": 0.7031633153506621,
      "grad_norm": 1.787485049001866,
      "learning_rate": 3.856521091461313e-06,
      "loss": 0.5172,
      "step": 5735
    },
    {
      "epoch": 0.7032859244727808,
      "grad_norm": 2.24461393999837,
      "learning_rate": 3.856095325798246e-06,
      "loss": 0.5101,
      "step": 5736
    },
    {
      "epoch": 0.7034085335948994,
      "grad_norm": 1.8752844972442326,
      "learning_rate": 3.85566950439726e-06,
      "loss": 0.522,
      "step": 5737
    },
    {
      "epoch": 0.7035311427170181,
      "grad_norm": 2.030423461679757,
      "learning_rate": 3.8552436272758565e-06,
      "loss": 0.5627,
      "step": 5738
    },
    {
      "epoch": 0.7036537518391368,
      "grad_norm": 2.014186953404684,
      "learning_rate": 3.8548176944515405e-06,
      "loss": 0.4699,
      "step": 5739
    },
    {
      "epoch": 0.7037763609612555,
      "grad_norm": 1.727141233796873,
      "learning_rate": 3.854391705941819e-06,
      "loss": 0.5386,
      "step": 5740
    },
    {
      "epoch": 0.7038989700833742,
      "grad_norm": 1.9531730588400196,
      "learning_rate": 3.853965661764201e-06,
      "loss": 0.5328,
      "step": 5741
    },
    {
      "epoch": 0.7040215792054929,
      "grad_norm": 1.840219887385319,
      "learning_rate": 3.853539561936197e-06,
      "loss": 0.5065,
      "step": 5742
    },
    {
      "epoch": 0.7041441883276116,
      "grad_norm": 2.0463812257192813,
      "learning_rate": 3.85311340647532e-06,
      "loss": 0.5297,
      "step": 5743
    },
    {
      "epoch": 0.7042667974497303,
      "grad_norm": 1.707031756759173,
      "learning_rate": 3.852687195399089e-06,
      "loss": 0.4892,
      "step": 5744
    },
    {
      "epoch": 0.7043894065718489,
      "grad_norm": 1.7222374900987423,
      "learning_rate": 3.852260928725017e-06,
      "loss": 0.5061,
      "step": 5745
    },
    {
      "epoch": 0.7045120156939676,
      "grad_norm": 1.98895395036775,
      "learning_rate": 3.851834606470628e-06,
      "loss": 0.5209,
      "step": 5746
    },
    {
      "epoch": 0.7046346248160863,
      "grad_norm": 1.790608743819349,
      "learning_rate": 3.851408228653443e-06,
      "loss": 0.5218,
      "step": 5747
    },
    {
      "epoch": 0.704757233938205,
      "grad_norm": 2.0425201829464195,
      "learning_rate": 3.850981795290988e-06,
      "loss": 0.6209,
      "step": 5748
    },
    {
      "epoch": 0.7048798430603237,
      "grad_norm": 1.8975956953551636,
      "learning_rate": 3.850555306400788e-06,
      "loss": 0.5182,
      "step": 5749
    },
    {
      "epoch": 0.7050024521824424,
      "grad_norm": 1.996019440532595,
      "learning_rate": 3.850128762000376e-06,
      "loss": 0.549,
      "step": 5750
    },
    {
      "epoch": 0.7051250613045611,
      "grad_norm": 1.8831614687073772,
      "learning_rate": 3.84970216210728e-06,
      "loss": 0.5316,
      "step": 5751
    },
    {
      "epoch": 0.7052476704266798,
      "grad_norm": 2.1954086595066906,
      "learning_rate": 3.849275506739037e-06,
      "loss": 0.5572,
      "step": 5752
    },
    {
      "epoch": 0.7053702795487984,
      "grad_norm": 1.8754058539063287,
      "learning_rate": 3.848848795913181e-06,
      "loss": 0.5758,
      "step": 5753
    },
    {
      "epoch": 0.7054928886709171,
      "grad_norm": 1.8494388440929515,
      "learning_rate": 3.848422029647252e-06,
      "loss": 0.4871,
      "step": 5754
    },
    {
      "epoch": 0.7056154977930358,
      "grad_norm": 2.063396722676329,
      "learning_rate": 3.847995207958789e-06,
      "loss": 0.5659,
      "step": 5755
    },
    {
      "epoch": 0.7057381069151545,
      "grad_norm": 1.9917479277281003,
      "learning_rate": 3.8475683308653385e-06,
      "loss": 0.5001,
      "step": 5756
    },
    {
      "epoch": 0.7058607160372732,
      "grad_norm": 1.8894865578929612,
      "learning_rate": 3.847141398384443e-06,
      "loss": 0.5258,
      "step": 5757
    },
    {
      "epoch": 0.7059833251593919,
      "grad_norm": 2.0370200317686042,
      "learning_rate": 3.846714410533651e-06,
      "loss": 0.4693,
      "step": 5758
    },
    {
      "epoch": 0.7061059342815106,
      "grad_norm": 1.7701203684665343,
      "learning_rate": 3.846287367330513e-06,
      "loss": 0.5041,
      "step": 5759
    },
    {
      "epoch": 0.7062285434036293,
      "grad_norm": 1.9477932676685574,
      "learning_rate": 3.84586026879258e-06,
      "loss": 0.5324,
      "step": 5760
    },
    {
      "epoch": 0.7063511525257479,
      "grad_norm": 1.9551429109909473,
      "learning_rate": 3.845433114937408e-06,
      "loss": 0.5621,
      "step": 5761
    },
    {
      "epoch": 0.7064737616478666,
      "grad_norm": 1.7270485253209575,
      "learning_rate": 3.845005905782553e-06,
      "loss": 0.5071,
      "step": 5762
    },
    {
      "epoch": 0.7065963707699853,
      "grad_norm": 1.7872261028746441,
      "learning_rate": 3.844578641345574e-06,
      "loss": 0.5799,
      "step": 5763
    },
    {
      "epoch": 0.706718979892104,
      "grad_norm": 2.1648817382091736,
      "learning_rate": 3.844151321644033e-06,
      "loss": 0.5461,
      "step": 5764
    },
    {
      "epoch": 0.7068415890142227,
      "grad_norm": 1.9520920589317845,
      "learning_rate": 3.8437239466954915e-06,
      "loss": 0.5042,
      "step": 5765
    },
    {
      "epoch": 0.7069641981363414,
      "grad_norm": 1.8570818222843224,
      "learning_rate": 3.843296516517518e-06,
      "loss": 0.5075,
      "step": 5766
    },
    {
      "epoch": 0.7070868072584601,
      "grad_norm": 2.067450252726702,
      "learning_rate": 3.842869031127679e-06,
      "loss": 0.5672,
      "step": 5767
    },
    {
      "epoch": 0.7072094163805788,
      "grad_norm": 1.8466437202799904,
      "learning_rate": 3.842441490543546e-06,
      "loss": 0.5155,
      "step": 5768
    },
    {
      "epoch": 0.7073320255026974,
      "grad_norm": 2.0173057050443446,
      "learning_rate": 3.842013894782692e-06,
      "loss": 0.5449,
      "step": 5769
    },
    {
      "epoch": 0.7074546346248161,
      "grad_norm": 1.9415624394426938,
      "learning_rate": 3.8415862438626895e-06,
      "loss": 0.5807,
      "step": 5770
    },
    {
      "epoch": 0.7075772437469348,
      "grad_norm": 2.0260272400699213,
      "learning_rate": 3.841158537801118e-06,
      "loss": 0.5253,
      "step": 5771
    },
    {
      "epoch": 0.7076998528690535,
      "grad_norm": 1.777354480339012,
      "learning_rate": 3.840730776615557e-06,
      "loss": 0.4928,
      "step": 5772
    },
    {
      "epoch": 0.7078224619911722,
      "grad_norm": 1.9686467728671182,
      "learning_rate": 3.840302960323586e-06,
      "loss": 0.5537,
      "step": 5773
    },
    {
      "epoch": 0.7079450711132909,
      "grad_norm": 1.9544646526770801,
      "learning_rate": 3.839875088942791e-06,
      "loss": 0.5058,
      "step": 5774
    },
    {
      "epoch": 0.7080676802354096,
      "grad_norm": 1.9845155817662394,
      "learning_rate": 3.839447162490758e-06,
      "loss": 0.5635,
      "step": 5775
    },
    {
      "epoch": 0.7081902893575281,
      "grad_norm": 1.8644903325731372,
      "learning_rate": 3.839019180985075e-06,
      "loss": 0.5334,
      "step": 5776
    },
    {
      "epoch": 0.7083128984796468,
      "grad_norm": 2.0312587609050143,
      "learning_rate": 3.8385911444433345e-06,
      "loss": 0.5067,
      "step": 5777
    },
    {
      "epoch": 0.7084355076017655,
      "grad_norm": 1.9053127585703131,
      "learning_rate": 3.838163052883127e-06,
      "loss": 0.5185,
      "step": 5778
    },
    {
      "epoch": 0.7085581167238842,
      "grad_norm": 1.774678696661979,
      "learning_rate": 3.8377349063220494e-06,
      "loss": 0.5402,
      "step": 5779
    },
    {
      "epoch": 0.708680725846003,
      "grad_norm": 2.0658716460232895,
      "learning_rate": 3.837306704777698e-06,
      "loss": 0.5482,
      "step": 5780
    },
    {
      "epoch": 0.7088033349681216,
      "grad_norm": 1.8324400332065618,
      "learning_rate": 3.8368784482676745e-06,
      "loss": 0.4946,
      "step": 5781
    },
    {
      "epoch": 0.7089259440902403,
      "grad_norm": 2.0423952898627813,
      "learning_rate": 3.836450136809579e-06,
      "loss": 0.5373,
      "step": 5782
    },
    {
      "epoch": 0.709048553212359,
      "grad_norm": 1.9742574649426232,
      "learning_rate": 3.836021770421018e-06,
      "loss": 0.5825,
      "step": 5783
    },
    {
      "epoch": 0.7091711623344776,
      "grad_norm": 1.9936863554330402,
      "learning_rate": 3.8355933491195965e-06,
      "loss": 0.5213,
      "step": 5784
    },
    {
      "epoch": 0.7092937714565963,
      "grad_norm": 2.1844308470720297,
      "learning_rate": 3.8351648729229235e-06,
      "loss": 0.5622,
      "step": 5785
    },
    {
      "epoch": 0.709416380578715,
      "grad_norm": 2.1686231304414965,
      "learning_rate": 3.8347363418486104e-06,
      "loss": 0.5961,
      "step": 5786
    },
    {
      "epoch": 0.7095389897008337,
      "grad_norm": 2.1604069529854857,
      "learning_rate": 3.834307755914271e-06,
      "loss": 0.6585,
      "step": 5787
    },
    {
      "epoch": 0.7096615988229524,
      "grad_norm": 1.7639563843838377,
      "learning_rate": 3.833879115137521e-06,
      "loss": 0.5252,
      "step": 5788
    },
    {
      "epoch": 0.7097842079450711,
      "grad_norm": 2.099648366601743,
      "learning_rate": 3.8334504195359775e-06,
      "loss": 0.5149,
      "step": 5789
    },
    {
      "epoch": 0.7099068170671898,
      "grad_norm": 1.9427651472162568,
      "learning_rate": 3.833021669127262e-06,
      "loss": 0.5086,
      "step": 5790
    },
    {
      "epoch": 0.7100294261893085,
      "grad_norm": 2.1325624855020697,
      "learning_rate": 3.832592863928995e-06,
      "loss": 0.5312,
      "step": 5791
    },
    {
      "epoch": 0.7101520353114271,
      "grad_norm": 1.9411393046190768,
      "learning_rate": 3.832164003958803e-06,
      "loss": 0.5067,
      "step": 5792
    },
    {
      "epoch": 0.7102746444335458,
      "grad_norm": 1.9322875772738237,
      "learning_rate": 3.83173508923431e-06,
      "loss": 0.5462,
      "step": 5793
    },
    {
      "epoch": 0.7103972535556645,
      "grad_norm": 1.949044130895668,
      "learning_rate": 3.8313061197731496e-06,
      "loss": 0.5063,
      "step": 5794
    },
    {
      "epoch": 0.7105198626777832,
      "grad_norm": 1.8928979720739503,
      "learning_rate": 3.830877095592949e-06,
      "loss": 0.5274,
      "step": 5795
    },
    {
      "epoch": 0.7106424717999019,
      "grad_norm": 2.169127084600202,
      "learning_rate": 3.8304480167113445e-06,
      "loss": 0.5455,
      "step": 5796
    },
    {
      "epoch": 0.7107650809220206,
      "grad_norm": 1.9728267071408065,
      "learning_rate": 3.8300188831459715e-06,
      "loss": 0.5455,
      "step": 5797
    },
    {
      "epoch": 0.7108876900441393,
      "grad_norm": 2.0886476038033717,
      "learning_rate": 3.829589694914467e-06,
      "loss": 0.5215,
      "step": 5798
    },
    {
      "epoch": 0.711010299166258,
      "grad_norm": 2.3280248642302195,
      "learning_rate": 3.829160452034472e-06,
      "loss": 0.6146,
      "step": 5799
    },
    {
      "epoch": 0.7111329082883766,
      "grad_norm": 2.172335743094789,
      "learning_rate": 3.82873115452363e-06,
      "loss": 0.5989,
      "step": 5800
    },
    {
      "epoch": 0.7112555174104953,
      "grad_norm": 2.0494594208293853,
      "learning_rate": 3.828301802399585e-06,
      "loss": 0.5304,
      "step": 5801
    },
    {
      "epoch": 0.711378126532614,
      "grad_norm": 1.8596511991762024,
      "learning_rate": 3.827872395679985e-06,
      "loss": 0.5156,
      "step": 5802
    },
    {
      "epoch": 0.7115007356547327,
      "grad_norm": 1.7510906461737274,
      "learning_rate": 3.827442934382477e-06,
      "loss": 0.5129,
      "step": 5803
    },
    {
      "epoch": 0.7116233447768514,
      "grad_norm": 2.122980538703863,
      "learning_rate": 3.827013418524717e-06,
      "loss": 0.5474,
      "step": 5804
    },
    {
      "epoch": 0.7117459538989701,
      "grad_norm": 2.1188098328834606,
      "learning_rate": 3.8265838481243535e-06,
      "loss": 0.5574,
      "step": 5805
    },
    {
      "epoch": 0.7118685630210888,
      "grad_norm": 2.1090520291186645,
      "learning_rate": 3.826154223199046e-06,
      "loss": 0.5015,
      "step": 5806
    },
    {
      "epoch": 0.7119911721432075,
      "grad_norm": 1.906965021480834,
      "learning_rate": 3.825724543766453e-06,
      "loss": 0.475,
      "step": 5807
    },
    {
      "epoch": 0.7121137812653261,
      "grad_norm": 1.8469541233678648,
      "learning_rate": 3.825294809844234e-06,
      "loss": 0.4635,
      "step": 5808
    },
    {
      "epoch": 0.7122363903874448,
      "grad_norm": 2.0997342834574444,
      "learning_rate": 3.824865021450052e-06,
      "loss": 0.5137,
      "step": 5809
    },
    {
      "epoch": 0.7123589995095635,
      "grad_norm": 2.0500239544100727,
      "learning_rate": 3.824435178601572e-06,
      "loss": 0.5414,
      "step": 5810
    },
    {
      "epoch": 0.7124816086316822,
      "grad_norm": 2.0867751308641473,
      "learning_rate": 3.824005281316462e-06,
      "loss": 0.5299,
      "step": 5811
    },
    {
      "epoch": 0.7126042177538009,
      "grad_norm": 2.0385951389544106,
      "learning_rate": 3.82357532961239e-06,
      "loss": 0.5645,
      "step": 5812
    },
    {
      "epoch": 0.7127268268759196,
      "grad_norm": 1.9799009479579626,
      "learning_rate": 3.823145323507029e-06,
      "loss": 0.5642,
      "step": 5813
    },
    {
      "epoch": 0.7128494359980383,
      "grad_norm": 2.2441597799990194,
      "learning_rate": 3.8227152630180535e-06,
      "loss": 0.5025,
      "step": 5814
    },
    {
      "epoch": 0.712972045120157,
      "grad_norm": 1.990348248970015,
      "learning_rate": 3.822285148163138e-06,
      "loss": 0.5493,
      "step": 5815
    },
    {
      "epoch": 0.7130946542422756,
      "grad_norm": 1.8878463848623488,
      "learning_rate": 3.8218549789599625e-06,
      "loss": 0.5669,
      "step": 5816
    },
    {
      "epoch": 0.7132172633643943,
      "grad_norm": 2.0864143064783494,
      "learning_rate": 3.821424755426207e-06,
      "loss": 0.4992,
      "step": 5817
    },
    {
      "epoch": 0.713339872486513,
      "grad_norm": 1.997543121519728,
      "learning_rate": 3.820994477579555e-06,
      "loss": 0.5295,
      "step": 5818
    },
    {
      "epoch": 0.7134624816086317,
      "grad_norm": 2.380359534301074,
      "learning_rate": 3.820564145437691e-06,
      "loss": 0.5611,
      "step": 5819
    },
    {
      "epoch": 0.7135850907307504,
      "grad_norm": 2.1031375374380996,
      "learning_rate": 3.820133759018303e-06,
      "loss": 0.5812,
      "step": 5820
    },
    {
      "epoch": 0.7137076998528691,
      "grad_norm": 1.7972941023857743,
      "learning_rate": 3.81970331833908e-06,
      "loss": 0.5077,
      "step": 5821
    },
    {
      "epoch": 0.7138303089749878,
      "grad_norm": 2.1654435688169382,
      "learning_rate": 3.8192728234177145e-06,
      "loss": 0.5122,
      "step": 5822
    },
    {
      "epoch": 0.7139529180971064,
      "grad_norm": 1.8867636690519805,
      "learning_rate": 3.818842274271901e-06,
      "loss": 0.542,
      "step": 5823
    },
    {
      "epoch": 0.7140755272192251,
      "grad_norm": 1.7812955158240147,
      "learning_rate": 3.818411670919334e-06,
      "loss": 0.6027,
      "step": 5824
    },
    {
      "epoch": 0.7141981363413438,
      "grad_norm": 1.9817016235562048,
      "learning_rate": 3.817981013377715e-06,
      "loss": 0.5053,
      "step": 5825
    },
    {
      "epoch": 0.7143207454634625,
      "grad_norm": 2.0297557159530366,
      "learning_rate": 3.817550301664741e-06,
      "loss": 0.4921,
      "step": 5826
    },
    {
      "epoch": 0.7144433545855812,
      "grad_norm": 1.8585097512144266,
      "learning_rate": 3.817119535798118e-06,
      "loss": 0.5529,
      "step": 5827
    },
    {
      "epoch": 0.7145659637076999,
      "grad_norm": 2.0978784054457256,
      "learning_rate": 3.81668871579555e-06,
      "loss": 0.5844,
      "step": 5828
    },
    {
      "epoch": 0.7146885728298186,
      "grad_norm": 1.9402409025494078,
      "learning_rate": 3.816257841674746e-06,
      "loss": 0.5198,
      "step": 5829
    },
    {
      "epoch": 0.7148111819519373,
      "grad_norm": 2.124959233804407,
      "learning_rate": 3.815826913453413e-06,
      "loss": 0.5259,
      "step": 5830
    },
    {
      "epoch": 0.7149337910740559,
      "grad_norm": 1.8801547211527885,
      "learning_rate": 3.815395931149265e-06,
      "loss": 0.5086,
      "step": 5831
    },
    {
      "epoch": 0.7150564001961746,
      "grad_norm": 1.9229303829804945,
      "learning_rate": 3.8149648947800155e-06,
      "loss": 0.544,
      "step": 5832
    },
    {
      "epoch": 0.7151790093182933,
      "grad_norm": 1.873265601422589,
      "learning_rate": 3.814533804363381e-06,
      "loss": 0.51,
      "step": 5833
    },
    {
      "epoch": 0.715301618440412,
      "grad_norm": 1.964340644599978,
      "learning_rate": 3.8141026599170792e-06,
      "loss": 0.5616,
      "step": 5834
    },
    {
      "epoch": 0.7154242275625307,
      "grad_norm": 1.9962716994377898,
      "learning_rate": 3.813671461458833e-06,
      "loss": 0.5026,
      "step": 5835
    },
    {
      "epoch": 0.7155468366846494,
      "grad_norm": 1.9964299409538202,
      "learning_rate": 3.8132402090063623e-06,
      "loss": 0.5169,
      "step": 5836
    },
    {
      "epoch": 0.7156694458067681,
      "grad_norm": 1.9151418612924467,
      "learning_rate": 3.8128089025773953e-06,
      "loss": 0.5419,
      "step": 5837
    },
    {
      "epoch": 0.7157920549288868,
      "grad_norm": 2.053564814565748,
      "learning_rate": 3.812377542189658e-06,
      "loss": 0.5346,
      "step": 5838
    },
    {
      "epoch": 0.7159146640510053,
      "grad_norm": 2.0669231486085615,
      "learning_rate": 3.81194612786088e-06,
      "loss": 0.5082,
      "step": 5839
    },
    {
      "epoch": 0.716037273173124,
      "grad_norm": 2.144019238427319,
      "learning_rate": 3.8115146596087937e-06,
      "loss": 0.5542,
      "step": 5840
    },
    {
      "epoch": 0.7161598822952427,
      "grad_norm": 2.103732482400015,
      "learning_rate": 3.811083137451133e-06,
      "loss": 0.5064,
      "step": 5841
    },
    {
      "epoch": 0.7162824914173614,
      "grad_norm": 2.0189522366593105,
      "learning_rate": 3.810651561405635e-06,
      "loss": 0.5347,
      "step": 5842
    },
    {
      "epoch": 0.7164051005394801,
      "grad_norm": 1.8674532430175017,
      "learning_rate": 3.810219931490037e-06,
      "loss": 0.5639,
      "step": 5843
    },
    {
      "epoch": 0.7165277096615988,
      "grad_norm": 1.8266515912131376,
      "learning_rate": 3.80978824772208e-06,
      "loss": 0.5244,
      "step": 5844
    },
    {
      "epoch": 0.7166503187837175,
      "grad_norm": 2.1670031380225234,
      "learning_rate": 3.8093565101195074e-06,
      "loss": 0.5356,
      "step": 5845
    },
    {
      "epoch": 0.7167729279058362,
      "grad_norm": 1.954051603407911,
      "learning_rate": 3.8089247187000644e-06,
      "loss": 0.5423,
      "step": 5846
    },
    {
      "epoch": 0.7168955370279548,
      "grad_norm": 1.8875372668344017,
      "learning_rate": 3.8084928734814975e-06,
      "loss": 0.5596,
      "step": 5847
    },
    {
      "epoch": 0.7170181461500735,
      "grad_norm": 2.0356966867314594,
      "learning_rate": 3.808060974481558e-06,
      "loss": 0.5691,
      "step": 5848
    },
    {
      "epoch": 0.7171407552721922,
      "grad_norm": 1.6739170719709153,
      "learning_rate": 3.807629021717996e-06,
      "loss": 0.517,
      "step": 5849
    },
    {
      "epoch": 0.7172633643943109,
      "grad_norm": 2.0108725600729347,
      "learning_rate": 3.807197015208567e-06,
      "loss": 0.5386,
      "step": 5850
    },
    {
      "epoch": 0.7173859735164296,
      "grad_norm": 2.1900534802769545,
      "learning_rate": 3.806764954971025e-06,
      "loss": 0.5196,
      "step": 5851
    },
    {
      "epoch": 0.7175085826385483,
      "grad_norm": 2.0722984771246495,
      "learning_rate": 3.806332841023131e-06,
      "loss": 0.5313,
      "step": 5852
    },
    {
      "epoch": 0.717631191760667,
      "grad_norm": 1.9815831651808644,
      "learning_rate": 3.8059006733826442e-06,
      "loss": 0.56,
      "step": 5853
    },
    {
      "epoch": 0.7177538008827857,
      "grad_norm": 1.909366377443261,
      "learning_rate": 3.805468452067328e-06,
      "loss": 0.4894,
      "step": 5854
    },
    {
      "epoch": 0.7178764100049043,
      "grad_norm": 1.9391185955236958,
      "learning_rate": 3.805036177094947e-06,
      "loss": 0.5356,
      "step": 5855
    },
    {
      "epoch": 0.717999019127023,
      "grad_norm": 1.9097946112425215,
      "learning_rate": 3.804603848483269e-06,
      "loss": 0.5626,
      "step": 5856
    },
    {
      "epoch": 0.7181216282491417,
      "grad_norm": 2.076176599528011,
      "learning_rate": 3.8041714662500636e-06,
      "loss": 0.5159,
      "step": 5857
    },
    {
      "epoch": 0.7182442373712604,
      "grad_norm": 1.7754006493801353,
      "learning_rate": 3.803739030413102e-06,
      "loss": 0.5248,
      "step": 5858
    },
    {
      "epoch": 0.7183668464933791,
      "grad_norm": 1.874949808329648,
      "learning_rate": 3.803306540990157e-06,
      "loss": 0.522,
      "step": 5859
    },
    {
      "epoch": 0.7184894556154978,
      "grad_norm": 1.7771838244384905,
      "learning_rate": 3.8028739979990072e-06,
      "loss": 0.4395,
      "step": 5860
    },
    {
      "epoch": 0.7186120647376165,
      "grad_norm": 2.0072279723918935,
      "learning_rate": 3.8024414014574287e-06,
      "loss": 0.5776,
      "step": 5861
    },
    {
      "epoch": 0.7187346738597352,
      "grad_norm": 2.012049256963096,
      "learning_rate": 3.8020087513832026e-06,
      "loss": 0.5146,
      "step": 5862
    },
    {
      "epoch": 0.7188572829818538,
      "grad_norm": 1.9457098161254536,
      "learning_rate": 3.801576047794113e-06,
      "loss": 0.5788,
      "step": 5863
    },
    {
      "epoch": 0.7189798921039725,
      "grad_norm": 2.279433414822429,
      "learning_rate": 3.801143290707942e-06,
      "loss": 0.5492,
      "step": 5864
    },
    {
      "epoch": 0.7191025012260912,
      "grad_norm": 1.9168213557984204,
      "learning_rate": 3.80071048014248e-06,
      "loss": 0.5185,
      "step": 5865
    },
    {
      "epoch": 0.7192251103482099,
      "grad_norm": 2.0345313308425332,
      "learning_rate": 3.8002776161155132e-06,
      "loss": 0.535,
      "step": 5866
    },
    {
      "epoch": 0.7193477194703286,
      "grad_norm": 2.1001033536284686,
      "learning_rate": 3.799844698644835e-06,
      "loss": 0.5049,
      "step": 5867
    },
    {
      "epoch": 0.7194703285924473,
      "grad_norm": 2.1098029400179765,
      "learning_rate": 3.7994117277482383e-06,
      "loss": 0.5287,
      "step": 5868
    },
    {
      "epoch": 0.719592937714566,
      "grad_norm": 1.9792353922956931,
      "learning_rate": 3.7989787034435186e-06,
      "loss": 0.5672,
      "step": 5869
    },
    {
      "epoch": 0.7197155468366846,
      "grad_norm": 2.051602616922466,
      "learning_rate": 3.7985456257484755e-06,
      "loss": 0.5106,
      "step": 5870
    },
    {
      "epoch": 0.7198381559588033,
      "grad_norm": 2.006558763037176,
      "learning_rate": 3.7981124946809077e-06,
      "loss": 0.5449,
      "step": 5871
    },
    {
      "epoch": 0.719960765080922,
      "grad_norm": 2.0574182448016054,
      "learning_rate": 3.7976793102586184e-06,
      "loss": 0.5814,
      "step": 5872
    },
    {
      "epoch": 0.7200833742030407,
      "grad_norm": 2.133854734746584,
      "learning_rate": 3.7972460724994133e-06,
      "loss": 0.5002,
      "step": 5873
    },
    {
      "epoch": 0.7202059833251594,
      "grad_norm": 2.101879825149043,
      "learning_rate": 3.7968127814210964e-06,
      "loss": 0.4944,
      "step": 5874
    },
    {
      "epoch": 0.7203285924472781,
      "grad_norm": 1.9787160416187095,
      "learning_rate": 3.7963794370414784e-06,
      "loss": 0.5076,
      "step": 5875
    },
    {
      "epoch": 0.7204512015693968,
      "grad_norm": 2.0346335583798942,
      "learning_rate": 3.795946039378371e-06,
      "loss": 0.5774,
      "step": 5876
    },
    {
      "epoch": 0.7205738106915155,
      "grad_norm": 1.9931972080814533,
      "learning_rate": 3.7955125884495876e-06,
      "loss": 0.6654,
      "step": 5877
    },
    {
      "epoch": 0.7206964198136341,
      "grad_norm": 1.8337050020692185,
      "learning_rate": 3.795079084272942e-06,
      "loss": 0.5637,
      "step": 5878
    },
    {
      "epoch": 0.7208190289357528,
      "grad_norm": 1.9926044146370125,
      "learning_rate": 3.794645526866254e-06,
      "loss": 0.523,
      "step": 5879
    },
    {
      "epoch": 0.7209416380578715,
      "grad_norm": 2.0003223610961065,
      "learning_rate": 3.7942119162473434e-06,
      "loss": 0.53,
      "step": 5880
    },
    {
      "epoch": 0.7210642471799902,
      "grad_norm": 2.231804816210005,
      "learning_rate": 3.793778252434031e-06,
      "loss": 0.578,
      "step": 5881
    },
    {
      "epoch": 0.7211868563021089,
      "grad_norm": 2.008649935747151,
      "learning_rate": 3.793344535444142e-06,
      "loss": 0.4962,
      "step": 5882
    },
    {
      "epoch": 0.7213094654242276,
      "grad_norm": 2.097015945988796,
      "learning_rate": 3.7929107652955037e-06,
      "loss": 0.4978,
      "step": 5883
    },
    {
      "epoch": 0.7214320745463463,
      "grad_norm": 1.8702927308828747,
      "learning_rate": 3.7924769420059433e-06,
      "loss": 0.5424,
      "step": 5884
    },
    {
      "epoch": 0.721554683668465,
      "grad_norm": 1.8300274800324956,
      "learning_rate": 3.792043065593293e-06,
      "loss": 0.5766,
      "step": 5885
    },
    {
      "epoch": 0.7216772927905836,
      "grad_norm": 2.0179125115906014,
      "learning_rate": 3.791609136075384e-06,
      "loss": 0.5776,
      "step": 5886
    },
    {
      "epoch": 0.7217999019127023,
      "grad_norm": 1.8005941746088112,
      "learning_rate": 3.7911751534700544e-06,
      "loss": 0.4716,
      "step": 5887
    },
    {
      "epoch": 0.721922511034821,
      "grad_norm": 1.9318549613678333,
      "learning_rate": 3.79074111779514e-06,
      "loss": 0.5268,
      "step": 5888
    },
    {
      "epoch": 0.7220451201569397,
      "grad_norm": 1.9903639917837501,
      "learning_rate": 3.7903070290684796e-06,
      "loss": 0.5082,
      "step": 5889
    },
    {
      "epoch": 0.7221677292790584,
      "grad_norm": 1.9079613280470675,
      "learning_rate": 3.7898728873079167e-06,
      "loss": 0.4984,
      "step": 5890
    },
    {
      "epoch": 0.7222903384011771,
      "grad_norm": 1.870457930927345,
      "learning_rate": 3.789438692531294e-06,
      "loss": 0.5691,
      "step": 5891
    },
    {
      "epoch": 0.7224129475232958,
      "grad_norm": 2.0138816773569452,
      "learning_rate": 3.789004444756459e-06,
      "loss": 0.5822,
      "step": 5892
    },
    {
      "epoch": 0.7225355566454145,
      "grad_norm": 1.8488854333374518,
      "learning_rate": 3.788570144001259e-06,
      "loss": 0.4726,
      "step": 5893
    },
    {
      "epoch": 0.7226581657675331,
      "grad_norm": 2.220879320330452,
      "learning_rate": 3.788135790283545e-06,
      "loss": 0.566,
      "step": 5894
    },
    {
      "epoch": 0.7227807748896518,
      "grad_norm": 1.885376480874175,
      "learning_rate": 3.787701383621169e-06,
      "loss": 0.5106,
      "step": 5895
    },
    {
      "epoch": 0.7229033840117705,
      "grad_norm": 2.1009925683258657,
      "learning_rate": 3.787266924031987e-06,
      "loss": 0.5492,
      "step": 5896
    },
    {
      "epoch": 0.7230259931338892,
      "grad_norm": 1.9833108379652375,
      "learning_rate": 3.786832411533855e-06,
      "loss": 0.5713,
      "step": 5897
    },
    {
      "epoch": 0.7231486022560079,
      "grad_norm": 1.9787815831797606,
      "learning_rate": 3.7863978461446337e-06,
      "loss": 0.5169,
      "step": 5898
    },
    {
      "epoch": 0.7232712113781266,
      "grad_norm": 2.030325258474345,
      "learning_rate": 3.7859632278821822e-06,
      "loss": 0.5485,
      "step": 5899
    },
    {
      "epoch": 0.7233938205002453,
      "grad_norm": 2.1337990454551954,
      "learning_rate": 3.785528556764367e-06,
      "loss": 0.5588,
      "step": 5900
    },
    {
      "epoch": 0.723516429622364,
      "grad_norm": 1.9123654170624416,
      "learning_rate": 3.785093832809051e-06,
      "loss": 0.5279,
      "step": 5901
    },
    {
      "epoch": 0.7236390387444825,
      "grad_norm": 1.85980012286131,
      "learning_rate": 3.784659056034104e-06,
      "loss": 0.6069,
      "step": 5902
    },
    {
      "epoch": 0.7237616478666012,
      "grad_norm": 1.9134483215150329,
      "learning_rate": 3.7842242264573954e-06,
      "loss": 0.5428,
      "step": 5903
    },
    {
      "epoch": 0.7238842569887199,
      "grad_norm": 1.9594299797174146,
      "learning_rate": 3.783789344096797e-06,
      "loss": 0.5332,
      "step": 5904
    },
    {
      "epoch": 0.7240068661108386,
      "grad_norm": 2.039989628936439,
      "learning_rate": 3.7833544089701846e-06,
      "loss": 0.4881,
      "step": 5905
    },
    {
      "epoch": 0.7241294752329573,
      "grad_norm": 1.9880454165381929,
      "learning_rate": 3.7829194210954334e-06,
      "loss": 0.5606,
      "step": 5906
    },
    {
      "epoch": 0.724252084355076,
      "grad_norm": 1.8580816179527133,
      "learning_rate": 3.782484380490423e-06,
      "loss": 0.4541,
      "step": 5907
    },
    {
      "epoch": 0.7243746934771947,
      "grad_norm": 1.7458521597927559,
      "learning_rate": 3.782049287173034e-06,
      "loss": 0.5201,
      "step": 5908
    },
    {
      "epoch": 0.7244973025993134,
      "grad_norm": 1.7487186512903805,
      "learning_rate": 3.7816141411611496e-06,
      "loss": 0.49,
      "step": 5909
    },
    {
      "epoch": 0.724619911721432,
      "grad_norm": 2.103288257932898,
      "learning_rate": 3.7811789424726554e-06,
      "loss": 0.5534,
      "step": 5910
    },
    {
      "epoch": 0.7247425208435507,
      "grad_norm": 2.144354345119364,
      "learning_rate": 3.7807436911254382e-06,
      "loss": 0.5899,
      "step": 5911
    },
    {
      "epoch": 0.7248651299656694,
      "grad_norm": 1.98885927883733,
      "learning_rate": 3.7803083871373876e-06,
      "loss": 0.5404,
      "step": 5912
    },
    {
      "epoch": 0.7249877390877881,
      "grad_norm": 2.0354001920042353,
      "learning_rate": 3.7798730305263965e-06,
      "loss": 0.6513,
      "step": 5913
    },
    {
      "epoch": 0.7251103482099068,
      "grad_norm": 2.0712212688101914,
      "learning_rate": 3.779437621310357e-06,
      "loss": 0.548,
      "step": 5914
    },
    {
      "epoch": 0.7252329573320255,
      "grad_norm": 2.1388900517562712,
      "learning_rate": 3.779002159507167e-06,
      "loss": 0.5733,
      "step": 5915
    },
    {
      "epoch": 0.7253555664541442,
      "grad_norm": 1.725191020737827,
      "learning_rate": 3.7785666451347237e-06,
      "loss": 0.5153,
      "step": 5916
    },
    {
      "epoch": 0.7254781755762629,
      "grad_norm": 2.0055300242494645,
      "learning_rate": 3.7781310782109283e-06,
      "loss": 0.5217,
      "step": 5917
    },
    {
      "epoch": 0.7256007846983815,
      "grad_norm": 1.9398189652620297,
      "learning_rate": 3.777695458753682e-06,
      "loss": 0.5024,
      "step": 5918
    },
    {
      "epoch": 0.7257233938205002,
      "grad_norm": 2.0264327152944603,
      "learning_rate": 3.77725978678089e-06,
      "loss": 0.5569,
      "step": 5919
    },
    {
      "epoch": 0.7258460029426189,
      "grad_norm": 1.7629213750304178,
      "learning_rate": 3.7768240623104607e-06,
      "loss": 0.5128,
      "step": 5920
    },
    {
      "epoch": 0.7259686120647376,
      "grad_norm": 2.016498583372481,
      "learning_rate": 3.7763882853603017e-06,
      "loss": 0.5401,
      "step": 5921
    },
    {
      "epoch": 0.7260912211868563,
      "grad_norm": 1.6128765425788727,
      "learning_rate": 3.775952455948324e-06,
      "loss": 0.49,
      "step": 5922
    },
    {
      "epoch": 0.726213830308975,
      "grad_norm": 1.982586925298325,
      "learning_rate": 3.7755165740924425e-06,
      "loss": 0.5225,
      "step": 5923
    },
    {
      "epoch": 0.7263364394310937,
      "grad_norm": 1.985726704487548,
      "learning_rate": 3.7750806398105705e-06,
      "loss": 0.5798,
      "step": 5924
    },
    {
      "epoch": 0.7264590485532123,
      "grad_norm": 2.0018798067280117,
      "learning_rate": 3.7746446531206276e-06,
      "loss": 0.5212,
      "step": 5925
    },
    {
      "epoch": 0.726581657675331,
      "grad_norm": 1.6810389350857047,
      "learning_rate": 3.7742086140405326e-06,
      "loss": 0.5194,
      "step": 5926
    },
    {
      "epoch": 0.7267042667974497,
      "grad_norm": 1.6947093482717623,
      "learning_rate": 3.773772522588208e-06,
      "loss": 0.5143,
      "step": 5927
    },
    {
      "epoch": 0.7268268759195684,
      "grad_norm": 2.1431533955875244,
      "learning_rate": 3.7733363787815768e-06,
      "loss": 0.5445,
      "step": 5928
    },
    {
      "epoch": 0.7269494850416871,
      "grad_norm": 2.280900797137026,
      "learning_rate": 3.772900182638567e-06,
      "loss": 0.553,
      "step": 5929
    },
    {
      "epoch": 0.7270720941638058,
      "grad_norm": 2.182142356099618,
      "learning_rate": 3.7724639341771062e-06,
      "loss": 0.5408,
      "step": 5930
    },
    {
      "epoch": 0.7271947032859245,
      "grad_norm": 2.182001733812262,
      "learning_rate": 3.772027633415125e-06,
      "loss": 0.5288,
      "step": 5931
    },
    {
      "epoch": 0.7273173124080432,
      "grad_norm": 2.040268264792275,
      "learning_rate": 3.7715912803705555e-06,
      "loss": 0.5304,
      "step": 5932
    },
    {
      "epoch": 0.7274399215301618,
      "grad_norm": 1.9776180116213888,
      "learning_rate": 3.771154875061334e-06,
      "loss": 0.4959,
      "step": 5933
    },
    {
      "epoch": 0.7275625306522805,
      "grad_norm": 1.8968069573756703,
      "learning_rate": 3.7707184175053957e-06,
      "loss": 0.567,
      "step": 5934
    },
    {
      "epoch": 0.7276851397743992,
      "grad_norm": 1.9412225629856867,
      "learning_rate": 3.7702819077206813e-06,
      "loss": 0.592,
      "step": 5935
    },
    {
      "epoch": 0.7278077488965179,
      "grad_norm": 1.9727266042284857,
      "learning_rate": 3.7698453457251326e-06,
      "loss": 0.5128,
      "step": 5936
    },
    {
      "epoch": 0.7279303580186366,
      "grad_norm": 1.741744808098077,
      "learning_rate": 3.7694087315366905e-06,
      "loss": 0.4982,
      "step": 5937
    },
    {
      "epoch": 0.7280529671407553,
      "grad_norm": 2.117195463613279,
      "learning_rate": 3.7689720651733034e-06,
      "loss": 0.6058,
      "step": 5938
    },
    {
      "epoch": 0.728175576262874,
      "grad_norm": 1.9903634756356905,
      "learning_rate": 3.7685353466529177e-06,
      "loss": 0.5351,
      "step": 5939
    },
    {
      "epoch": 0.7282981853849927,
      "grad_norm": 1.755869227717694,
      "learning_rate": 3.768098575993483e-06,
      "loss": 0.4714,
      "step": 5940
    },
    {
      "epoch": 0.7284207945071113,
      "grad_norm": 1.8489065731029908,
      "learning_rate": 3.767661753212952e-06,
      "loss": 0.5092,
      "step": 5941
    },
    {
      "epoch": 0.72854340362923,
      "grad_norm": 1.9222710717470906,
      "learning_rate": 3.7672248783292793e-06,
      "loss": 0.4946,
      "step": 5942
    },
    {
      "epoch": 0.7286660127513487,
      "grad_norm": 1.9018369060941978,
      "learning_rate": 3.76678795136042e-06,
      "loss": 0.5218,
      "step": 5943
    },
    {
      "epoch": 0.7287886218734674,
      "grad_norm": 1.7611214810320184,
      "learning_rate": 3.7663509723243337e-06,
      "loss": 0.5224,
      "step": 5944
    },
    {
      "epoch": 0.7289112309955861,
      "grad_norm": 1.9533132696365354,
      "learning_rate": 3.76591394123898e-06,
      "loss": 0.5192,
      "step": 5945
    },
    {
      "epoch": 0.7290338401177048,
      "grad_norm": 1.9328528963355336,
      "learning_rate": 3.7654768581223233e-06,
      "loss": 0.5959,
      "step": 5946
    },
    {
      "epoch": 0.7291564492398235,
      "grad_norm": 2.1692920321398916,
      "learning_rate": 3.7650397229923256e-06,
      "loss": 0.5796,
      "step": 5947
    },
    {
      "epoch": 0.7292790583619422,
      "grad_norm": 2.1705316609340612,
      "learning_rate": 3.7646025358669568e-06,
      "loss": 0.6133,
      "step": 5948
    },
    {
      "epoch": 0.7294016674840608,
      "grad_norm": 1.9351055149157244,
      "learning_rate": 3.764165296764185e-06,
      "loss": 0.6066,
      "step": 5949
    },
    {
      "epoch": 0.7295242766061795,
      "grad_norm": 1.8258474213985032,
      "learning_rate": 3.7637280057019814e-06,
      "loss": 0.5398,
      "step": 5950
    },
    {
      "epoch": 0.7296468857282982,
      "grad_norm": 2.1943296900375735,
      "learning_rate": 3.763290662698319e-06,
      "loss": 0.5631,
      "step": 5951
    },
    {
      "epoch": 0.7297694948504169,
      "grad_norm": 2.1219737509464807,
      "learning_rate": 3.762853267771175e-06,
      "loss": 0.5042,
      "step": 5952
    },
    {
      "epoch": 0.7298921039725356,
      "grad_norm": 1.9753758685037939,
      "learning_rate": 3.7624158209385248e-06,
      "loss": 0.5093,
      "step": 5953
    },
    {
      "epoch": 0.7300147130946543,
      "grad_norm": 2.2344901650737827,
      "learning_rate": 3.7619783222183503e-06,
      "loss": 0.5402,
      "step": 5954
    },
    {
      "epoch": 0.730137322216773,
      "grad_norm": 1.9079512713225202,
      "learning_rate": 3.7615407716286315e-06,
      "loss": 0.5292,
      "step": 5955
    },
    {
      "epoch": 0.7302599313388917,
      "grad_norm": 1.8050653092028763,
      "learning_rate": 3.761103169187355e-06,
      "loss": 0.4997,
      "step": 5956
    },
    {
      "epoch": 0.7303825404610103,
      "grad_norm": 1.9391139019563852,
      "learning_rate": 3.7606655149125047e-06,
      "loss": 0.542,
      "step": 5957
    },
    {
      "epoch": 0.730505149583129,
      "grad_norm": 2.0132093117217527,
      "learning_rate": 3.76022780882207e-06,
      "loss": 0.5207,
      "step": 5958
    },
    {
      "epoch": 0.7306277587052477,
      "grad_norm": 1.9080820684226922,
      "learning_rate": 3.759790050934042e-06,
      "loss": 0.4869,
      "step": 5959
    },
    {
      "epoch": 0.7307503678273664,
      "grad_norm": 2.131454702386608,
      "learning_rate": 3.759352241266411e-06,
      "loss": 0.5582,
      "step": 5960
    },
    {
      "epoch": 0.730872976949485,
      "grad_norm": 2.085292636555748,
      "learning_rate": 3.758914379837175e-06,
      "loss": 0.5112,
      "step": 5961
    },
    {
      "epoch": 0.7309955860716038,
      "grad_norm": 1.8664436780974132,
      "learning_rate": 3.758476466664328e-06,
      "loss": 0.5575,
      "step": 5962
    },
    {
      "epoch": 0.7311181951937225,
      "grad_norm": 2.11314121239142,
      "learning_rate": 3.7580385017658716e-06,
      "loss": 0.5127,
      "step": 5963
    },
    {
      "epoch": 0.7312408043158412,
      "grad_norm": 1.9516698649645807,
      "learning_rate": 3.7576004851598052e-06,
      "loss": 0.4981,
      "step": 5964
    },
    {
      "epoch": 0.7313634134379597,
      "grad_norm": 2.081326365218519,
      "learning_rate": 3.757162416864132e-06,
      "loss": 0.6052,
      "step": 5965
    },
    {
      "epoch": 0.7314860225600784,
      "grad_norm": 1.9038304580724548,
      "learning_rate": 3.756724296896858e-06,
      "loss": 0.5249,
      "step": 5966
    },
    {
      "epoch": 0.7316086316821971,
      "grad_norm": 1.8112465953843329,
      "learning_rate": 3.7562861252759908e-06,
      "loss": 0.4993,
      "step": 5967
    },
    {
      "epoch": 0.7317312408043158,
      "grad_norm": 1.9564864820111985,
      "learning_rate": 3.7558479020195394e-06,
      "loss": 0.5503,
      "step": 5968
    },
    {
      "epoch": 0.7318538499264345,
      "grad_norm": 1.8071022357664033,
      "learning_rate": 3.7554096271455164e-06,
      "loss": 0.5192,
      "step": 5969
    },
    {
      "epoch": 0.7319764590485532,
      "grad_norm": 2.1307691948300174,
      "learning_rate": 3.7549713006719344e-06,
      "loss": 0.6101,
      "step": 5970
    },
    {
      "epoch": 0.7320990681706719,
      "grad_norm": 2.0576696878520897,
      "learning_rate": 3.7545329226168115e-06,
      "loss": 0.539,
      "step": 5971
    },
    {
      "epoch": 0.7322216772927905,
      "grad_norm": 1.999600934338883,
      "learning_rate": 3.7540944929981632e-06,
      "loss": 0.567,
      "step": 5972
    },
    {
      "epoch": 0.7323442864149092,
      "grad_norm": 1.9073609668808558,
      "learning_rate": 3.7536560118340114e-06,
      "loss": 0.5385,
      "step": 5973
    },
    {
      "epoch": 0.7324668955370279,
      "grad_norm": 1.9468084596416813,
      "learning_rate": 3.7532174791423785e-06,
      "loss": 0.4863,
      "step": 5974
    },
    {
      "epoch": 0.7325895046591466,
      "grad_norm": 1.818818722878052,
      "learning_rate": 3.752778894941288e-06,
      "loss": 0.508,
      "step": 5975
    },
    {
      "epoch": 0.7327121137812653,
      "grad_norm": 2.0035614208443957,
      "learning_rate": 3.7523402592487677e-06,
      "loss": 0.5715,
      "step": 5976
    },
    {
      "epoch": 0.732834722903384,
      "grad_norm": 1.930815349763467,
      "learning_rate": 3.7519015720828456e-06,
      "loss": 0.5358,
      "step": 5977
    },
    {
      "epoch": 0.7329573320255027,
      "grad_norm": 1.8844870031128549,
      "learning_rate": 3.7514628334615522e-06,
      "loss": 0.5304,
      "step": 5978
    },
    {
      "epoch": 0.7330799411476214,
      "grad_norm": 2.1232706446792915,
      "learning_rate": 3.7510240434029203e-06,
      "loss": 0.6241,
      "step": 5979
    },
    {
      "epoch": 0.73320255026974,
      "grad_norm": 1.8380110170548105,
      "learning_rate": 3.750585201924986e-06,
      "loss": 0.5121,
      "step": 5980
    },
    {
      "epoch": 0.7333251593918587,
      "grad_norm": 1.8553261082227015,
      "learning_rate": 3.750146309045787e-06,
      "loss": 0.5203,
      "step": 5981
    },
    {
      "epoch": 0.7334477685139774,
      "grad_norm": 1.7712386143346692,
      "learning_rate": 3.7497073647833593e-06,
      "loss": 0.5195,
      "step": 5982
    },
    {
      "epoch": 0.7335703776360961,
      "grad_norm": 1.9030692029347704,
      "learning_rate": 3.7492683691557475e-06,
      "loss": 0.575,
      "step": 5983
    },
    {
      "epoch": 0.7336929867582148,
      "grad_norm": 1.8184589592220297,
      "learning_rate": 3.7488293221809947e-06,
      "loss": 0.5138,
      "step": 5984
    },
    {
      "epoch": 0.7338155958803335,
      "grad_norm": 1.9318221449198303,
      "learning_rate": 3.7483902238771447e-06,
      "loss": 0.5109,
      "step": 5985
    },
    {
      "epoch": 0.7339382050024522,
      "grad_norm": 1.9899279166908226,
      "learning_rate": 3.747951074262248e-06,
      "loss": 0.5458,
      "step": 5986
    },
    {
      "epoch": 0.7340608141245709,
      "grad_norm": 2.055628928950374,
      "learning_rate": 3.7475118733543515e-06,
      "loss": 0.5823,
      "step": 5987
    },
    {
      "epoch": 0.7341834232466895,
      "grad_norm": 1.9215897958355999,
      "learning_rate": 3.7470726211715087e-06,
      "loss": 0.522,
      "step": 5988
    },
    {
      "epoch": 0.7343060323688082,
      "grad_norm": 2.1337382487174295,
      "learning_rate": 3.7466333177317733e-06,
      "loss": 0.5226,
      "step": 5989
    },
    {
      "epoch": 0.7344286414909269,
      "grad_norm": 1.8396951877973604,
      "learning_rate": 3.7461939630532018e-06,
      "loss": 0.5541,
      "step": 5990
    },
    {
      "epoch": 0.7345512506130456,
      "grad_norm": 1.9923611010673188,
      "learning_rate": 3.745754557153852e-06,
      "loss": 0.5551,
      "step": 5991
    },
    {
      "epoch": 0.7346738597351643,
      "grad_norm": 1.9589621693646873,
      "learning_rate": 3.7453151000517847e-06,
      "loss": 0.5679,
      "step": 5992
    },
    {
      "epoch": 0.734796468857283,
      "grad_norm": 2.0995021381552186,
      "learning_rate": 3.744875591765062e-06,
      "loss": 0.5165,
      "step": 5993
    },
    {
      "epoch": 0.7349190779794017,
      "grad_norm": 1.9214757680878447,
      "learning_rate": 3.744436032311749e-06,
      "loss": 0.5844,
      "step": 5994
    },
    {
      "epoch": 0.7350416871015204,
      "grad_norm": 1.9338975380076273,
      "learning_rate": 3.743996421709911e-06,
      "loss": 0.5443,
      "step": 5995
    },
    {
      "epoch": 0.735164296223639,
      "grad_norm": 1.8173045954175444,
      "learning_rate": 3.743556759977619e-06,
      "loss": 0.5311,
      "step": 5996
    },
    {
      "epoch": 0.7352869053457577,
      "grad_norm": 1.834324722561088,
      "learning_rate": 3.743117047132942e-06,
      "loss": 0.5185,
      "step": 5997
    },
    {
      "epoch": 0.7354095144678764,
      "grad_norm": 1.8180365142503594,
      "learning_rate": 3.742677283193954e-06,
      "loss": 0.4694,
      "step": 5998
    },
    {
      "epoch": 0.7355321235899951,
      "grad_norm": 1.7120244759902552,
      "learning_rate": 3.742237468178729e-06,
      "loss": 0.5512,
      "step": 5999
    },
    {
      "epoch": 0.7356547327121138,
      "grad_norm": 1.7721853906280576,
      "learning_rate": 3.741797602105345e-06,
      "loss": 0.4739,
      "step": 6000
    },
    {
      "epoch": 0.7357773418342325,
      "grad_norm": 1.9134581082802213,
      "learning_rate": 3.7413576849918808e-06,
      "loss": 0.5176,
      "step": 6001
    },
    {
      "epoch": 0.7358999509563512,
      "grad_norm": 1.7705098199389984,
      "learning_rate": 3.740917716856419e-06,
      "loss": 0.5452,
      "step": 6002
    },
    {
      "epoch": 0.7360225600784699,
      "grad_norm": 2.0862544786777852,
      "learning_rate": 3.7404776977170417e-06,
      "loss": 0.5731,
      "step": 6003
    },
    {
      "epoch": 0.7361451692005885,
      "grad_norm": 1.929745491103589,
      "learning_rate": 3.7400376275918355e-06,
      "loss": 0.5121,
      "step": 6004
    },
    {
      "epoch": 0.7362677783227072,
      "grad_norm": 1.7157599371404866,
      "learning_rate": 3.739597506498886e-06,
      "loss": 0.4816,
      "step": 6005
    },
    {
      "epoch": 0.7363903874448259,
      "grad_norm": 1.9126023187107988,
      "learning_rate": 3.7391573344562854e-06,
      "loss": 0.5084,
      "step": 6006
    },
    {
      "epoch": 0.7365129965669446,
      "grad_norm": 1.837093785639805,
      "learning_rate": 3.7387171114821246e-06,
      "loss": 0.48,
      "step": 6007
    },
    {
      "epoch": 0.7366356056890633,
      "grad_norm": 1.808605254967348,
      "learning_rate": 3.7382768375944977e-06,
      "loss": 0.5126,
      "step": 6008
    },
    {
      "epoch": 0.736758214811182,
      "grad_norm": 1.9699976787530593,
      "learning_rate": 3.7378365128115007e-06,
      "loss": 0.5547,
      "step": 6009
    },
    {
      "epoch": 0.7368808239333007,
      "grad_norm": 1.918058366768165,
      "learning_rate": 3.737396137151231e-06,
      "loss": 0.471,
      "step": 6010
    },
    {
      "epoch": 0.7370034330554194,
      "grad_norm": 1.9780333370678809,
      "learning_rate": 3.73695571063179e-06,
      "loss": 0.519,
      "step": 6011
    },
    {
      "epoch": 0.737126042177538,
      "grad_norm": 1.9603011387317804,
      "learning_rate": 3.736515233271279e-06,
      "loss": 0.573,
      "step": 6012
    },
    {
      "epoch": 0.7372486512996567,
      "grad_norm": 1.924210730248945,
      "learning_rate": 3.736074705087803e-06,
      "loss": 0.5668,
      "step": 6013
    },
    {
      "epoch": 0.7373712604217754,
      "grad_norm": 1.9539502977994079,
      "learning_rate": 3.735634126099468e-06,
      "loss": 0.5717,
      "step": 6014
    },
    {
      "epoch": 0.7374938695438941,
      "grad_norm": 2.218837164742909,
      "learning_rate": 3.7351934963243835e-06,
      "loss": 0.5922,
      "step": 6015
    },
    {
      "epoch": 0.7376164786660128,
      "grad_norm": 1.9410805184162756,
      "learning_rate": 3.734752815780659e-06,
      "loss": 0.5437,
      "step": 6016
    },
    {
      "epoch": 0.7377390877881315,
      "grad_norm": 1.9130079314458044,
      "learning_rate": 3.7343120844864084e-06,
      "loss": 0.6002,
      "step": 6017
    },
    {
      "epoch": 0.7378616969102502,
      "grad_norm": 2.170709080765857,
      "learning_rate": 3.7338713024597456e-06,
      "loss": 0.5235,
      "step": 6018
    },
    {
      "epoch": 0.7379843060323688,
      "grad_norm": 2.0585767710878167,
      "learning_rate": 3.7334304697187883e-06,
      "loss": 0.5976,
      "step": 6019
    },
    {
      "epoch": 0.7381069151544875,
      "grad_norm": 1.8651875137414218,
      "learning_rate": 3.732989586281654e-06,
      "loss": 0.5027,
      "step": 6020
    },
    {
      "epoch": 0.7382295242766062,
      "grad_norm": 2.0166102206897643,
      "learning_rate": 3.7325486521664663e-06,
      "loss": 0.584,
      "step": 6021
    },
    {
      "epoch": 0.7383521333987249,
      "grad_norm": 1.8251801881191738,
      "learning_rate": 3.732107667391346e-06,
      "loss": 0.5453,
      "step": 6022
    },
    {
      "epoch": 0.7384747425208436,
      "grad_norm": 1.94524789246339,
      "learning_rate": 3.7316666319744195e-06,
      "loss": 0.501,
      "step": 6023
    },
    {
      "epoch": 0.7385973516429623,
      "grad_norm": 1.7943648549864895,
      "learning_rate": 3.7312255459338138e-06,
      "loss": 0.5202,
      "step": 6024
    },
    {
      "epoch": 0.738719960765081,
      "grad_norm": 2.009698659880582,
      "learning_rate": 3.7307844092876588e-06,
      "loss": 0.5124,
      "step": 6025
    },
    {
      "epoch": 0.7388425698871997,
      "grad_norm": 2.2305127360886243,
      "learning_rate": 3.730343222054085e-06,
      "loss": 0.588,
      "step": 6026
    },
    {
      "epoch": 0.7389651790093182,
      "grad_norm": 1.8506268820263303,
      "learning_rate": 3.7299019842512275e-06,
      "loss": 0.4765,
      "step": 6027
    },
    {
      "epoch": 0.7390877881314369,
      "grad_norm": 2.075625777739261,
      "learning_rate": 3.7294606958972204e-06,
      "loss": 0.5782,
      "step": 6028
    },
    {
      "epoch": 0.7392103972535556,
      "grad_norm": 2.032768895849627,
      "learning_rate": 3.729019357010203e-06,
      "loss": 0.5181,
      "step": 6029
    },
    {
      "epoch": 0.7393330063756743,
      "grad_norm": 2.178698536711506,
      "learning_rate": 3.7285779676083124e-06,
      "loss": 0.5147,
      "step": 6030
    },
    {
      "epoch": 0.739455615497793,
      "grad_norm": 1.8726267484289376,
      "learning_rate": 3.7281365277096937e-06,
      "loss": 0.5341,
      "step": 6031
    },
    {
      "epoch": 0.7395782246199117,
      "grad_norm": 1.8624526550900316,
      "learning_rate": 3.7276950373324892e-06,
      "loss": 0.4929,
      "step": 6032
    },
    {
      "epoch": 0.7397008337420304,
      "grad_norm": 1.7548380570532867,
      "learning_rate": 3.727253496494845e-06,
      "loss": 0.5475,
      "step": 6033
    },
    {
      "epoch": 0.7398234428641491,
      "grad_norm": 1.8772326681274607,
      "learning_rate": 3.72681190521491e-06,
      "loss": 0.5786,
      "step": 6034
    },
    {
      "epoch": 0.7399460519862677,
      "grad_norm": 1.7828322089461908,
      "learning_rate": 3.7263702635108327e-06,
      "loss": 0.5175,
      "step": 6035
    },
    {
      "epoch": 0.7400686611083864,
      "grad_norm": 1.896771071691838,
      "learning_rate": 3.7259285714007683e-06,
      "loss": 0.5266,
      "step": 6036
    },
    {
      "epoch": 0.7401912702305051,
      "grad_norm": 2.0971338806753583,
      "learning_rate": 3.725486828902868e-06,
      "loss": 0.5815,
      "step": 6037
    },
    {
      "epoch": 0.7403138793526238,
      "grad_norm": 1.8024375232597867,
      "learning_rate": 3.7250450360352897e-06,
      "loss": 0.5302,
      "step": 6038
    },
    {
      "epoch": 0.7404364884747425,
      "grad_norm": 2.0382449931542626,
      "learning_rate": 3.7246031928161912e-06,
      "loss": 0.6233,
      "step": 6039
    },
    {
      "epoch": 0.7405590975968612,
      "grad_norm": 2.005661507027918,
      "learning_rate": 3.7241612992637336e-06,
      "loss": 0.5236,
      "step": 6040
    },
    {
      "epoch": 0.7406817067189799,
      "grad_norm": 1.9875493104383044,
      "learning_rate": 3.72371935539608e-06,
      "loss": 0.5395,
      "step": 6041
    },
    {
      "epoch": 0.7408043158410986,
      "grad_norm": 1.7383060737465161,
      "learning_rate": 3.7232773612313945e-06,
      "loss": 0.5,
      "step": 6042
    },
    {
      "epoch": 0.7409269249632172,
      "grad_norm": 1.8173031637896895,
      "learning_rate": 3.7228353167878427e-06,
      "loss": 0.5252,
      "step": 6043
    },
    {
      "epoch": 0.7410495340853359,
      "grad_norm": 1.825774624983154,
      "learning_rate": 3.7223932220835957e-06,
      "loss": 0.5102,
      "step": 6044
    },
    {
      "epoch": 0.7411721432074546,
      "grad_norm": 1.9960173365381617,
      "learning_rate": 3.721951077136823e-06,
      "loss": 0.5274,
      "step": 6045
    },
    {
      "epoch": 0.7412947523295733,
      "grad_norm": 1.9525536775795436,
      "learning_rate": 3.7215088819656974e-06,
      "loss": 0.5717,
      "step": 6046
    },
    {
      "epoch": 0.741417361451692,
      "grad_norm": 2.1563501744771076,
      "learning_rate": 3.721066636588394e-06,
      "loss": 0.5655,
      "step": 6047
    },
    {
      "epoch": 0.7415399705738107,
      "grad_norm": 1.7714511000317559,
      "learning_rate": 3.7206243410230906e-06,
      "loss": 0.4985,
      "step": 6048
    },
    {
      "epoch": 0.7416625796959294,
      "grad_norm": 1.8921239285095275,
      "learning_rate": 3.7201819952879658e-06,
      "loss": 0.5273,
      "step": 6049
    },
    {
      "epoch": 0.7417851888180481,
      "grad_norm": 1.8613916191855644,
      "learning_rate": 3.719739599401201e-06,
      "loss": 0.5216,
      "step": 6050
    },
    {
      "epoch": 0.7419077979401667,
      "grad_norm": 1.9291944459752917,
      "learning_rate": 3.7192971533809783e-06,
      "loss": 0.5577,
      "step": 6051
    },
    {
      "epoch": 0.7420304070622854,
      "grad_norm": 1.7925345736834557,
      "learning_rate": 3.7188546572454852e-06,
      "loss": 0.5255,
      "step": 6052
    },
    {
      "epoch": 0.7421530161844041,
      "grad_norm": 1.81765195538681,
      "learning_rate": 3.7184121110129073e-06,
      "loss": 0.5212,
      "step": 6053
    },
    {
      "epoch": 0.7422756253065228,
      "grad_norm": 1.885266268489014,
      "learning_rate": 3.7179695147014343e-06,
      "loss": 0.4991,
      "step": 6054
    },
    {
      "epoch": 0.7423982344286415,
      "grad_norm": 1.9554673120528157,
      "learning_rate": 3.7175268683292587e-06,
      "loss": 0.547,
      "step": 6055
    },
    {
      "epoch": 0.7425208435507602,
      "grad_norm": 1.7622423531118125,
      "learning_rate": 3.7170841719145726e-06,
      "loss": 0.4953,
      "step": 6056
    },
    {
      "epoch": 0.7426434526728789,
      "grad_norm": 1.8543358530266612,
      "learning_rate": 3.7166414254755734e-06,
      "loss": 0.4955,
      "step": 6057
    },
    {
      "epoch": 0.7427660617949976,
      "grad_norm": 1.9747075825538738,
      "learning_rate": 3.7161986290304564e-06,
      "loss": 0.532,
      "step": 6058
    },
    {
      "epoch": 0.7428886709171162,
      "grad_norm": 2.077520942412606,
      "learning_rate": 3.7157557825974237e-06,
      "loss": 0.5863,
      "step": 6059
    },
    {
      "epoch": 0.7430112800392349,
      "grad_norm": 2.0209239049790915,
      "learning_rate": 3.715312886194676e-06,
      "loss": 0.5821,
      "step": 6060
    },
    {
      "epoch": 0.7431338891613536,
      "grad_norm": 1.8586980970616749,
      "learning_rate": 3.7148699398404165e-06,
      "loss": 0.5115,
      "step": 6061
    },
    {
      "epoch": 0.7432564982834723,
      "grad_norm": 1.8639665688053366,
      "learning_rate": 3.7144269435528524e-06,
      "loss": 0.5376,
      "step": 6062
    },
    {
      "epoch": 0.743379107405591,
      "grad_norm": 1.9535579734581558,
      "learning_rate": 3.71398389735019e-06,
      "loss": 0.5317,
      "step": 6063
    },
    {
      "epoch": 0.7435017165277097,
      "grad_norm": 1.7945754479386795,
      "learning_rate": 3.7135408012506415e-06,
      "loss": 0.5719,
      "step": 6064
    },
    {
      "epoch": 0.7436243256498284,
      "grad_norm": 1.9056143257113074,
      "learning_rate": 3.713097655272416e-06,
      "loss": 0.4846,
      "step": 6065
    },
    {
      "epoch": 0.7437469347719471,
      "grad_norm": 2.0323811627256902,
      "learning_rate": 3.7126544594337305e-06,
      "loss": 0.5295,
      "step": 6066
    },
    {
      "epoch": 0.7438695438940657,
      "grad_norm": 2.0585726571951986,
      "learning_rate": 3.7122112137528e-06,
      "loss": 0.52,
      "step": 6067
    },
    {
      "epoch": 0.7439921530161844,
      "grad_norm": 2.119281348514736,
      "learning_rate": 3.7117679182478415e-06,
      "loss": 0.5731,
      "step": 6068
    },
    {
      "epoch": 0.7441147621383031,
      "grad_norm": 1.855793654716065,
      "learning_rate": 3.7113245729370778e-06,
      "loss": 0.5654,
      "step": 6069
    },
    {
      "epoch": 0.7442373712604218,
      "grad_norm": 1.7318970406204484,
      "learning_rate": 3.7108811778387288e-06,
      "loss": 0.4588,
      "step": 6070
    },
    {
      "epoch": 0.7443599803825405,
      "grad_norm": 1.976784443108357,
      "learning_rate": 3.7104377329710197e-06,
      "loss": 0.564,
      "step": 6071
    },
    {
      "epoch": 0.7444825895046592,
      "grad_norm": 2.0793944508254576,
      "learning_rate": 3.7099942383521765e-06,
      "loss": 0.54,
      "step": 6072
    },
    {
      "epoch": 0.7446051986267779,
      "grad_norm": 1.7897860628330213,
      "learning_rate": 3.7095506940004283e-06,
      "loss": 0.5462,
      "step": 6073
    },
    {
      "epoch": 0.7447278077488965,
      "grad_norm": 1.9352176121651332,
      "learning_rate": 3.7091070999340052e-06,
      "loss": 0.5402,
      "step": 6074
    },
    {
      "epoch": 0.7448504168710152,
      "grad_norm": 1.8640252665482486,
      "learning_rate": 3.7086634561711398e-06,
      "loss": 0.4594,
      "step": 6075
    },
    {
      "epoch": 0.7449730259931339,
      "grad_norm": 1.8978813651861364,
      "learning_rate": 3.7082197627300664e-06,
      "loss": 0.578,
      "step": 6076
    },
    {
      "epoch": 0.7450956351152526,
      "grad_norm": 1.893935410932653,
      "learning_rate": 3.7077760196290226e-06,
      "loss": 0.5586,
      "step": 6077
    },
    {
      "epoch": 0.7452182442373713,
      "grad_norm": 1.9945681728406988,
      "learning_rate": 3.707332226886245e-06,
      "loss": 0.566,
      "step": 6078
    },
    {
      "epoch": 0.74534085335949,
      "grad_norm": 1.9354695820889118,
      "learning_rate": 3.706888384519976e-06,
      "loss": 0.5359,
      "step": 6079
    },
    {
      "epoch": 0.7454634624816087,
      "grad_norm": 1.838091666366261,
      "learning_rate": 3.7064444925484583e-06,
      "loss": 0.4928,
      "step": 6080
    },
    {
      "epoch": 0.7455860716037274,
      "grad_norm": 1.7604513204546632,
      "learning_rate": 3.706000550989935e-06,
      "loss": 0.4779,
      "step": 6081
    },
    {
      "epoch": 0.745708680725846,
      "grad_norm": 1.9940352429103407,
      "learning_rate": 3.7055565598626554e-06,
      "loss": 0.4999,
      "step": 6082
    },
    {
      "epoch": 0.7458312898479647,
      "grad_norm": 1.9735036694787984,
      "learning_rate": 3.7051125191848655e-06,
      "loss": 0.4882,
      "step": 6083
    },
    {
      "epoch": 0.7459538989700834,
      "grad_norm": 1.81600005716902,
      "learning_rate": 3.7046684289748186e-06,
      "loss": 0.5474,
      "step": 6084
    },
    {
      "epoch": 0.746076508092202,
      "grad_norm": 2.3447804642909706,
      "learning_rate": 3.7042242892507658e-06,
      "loss": 0.6159,
      "step": 6085
    },
    {
      "epoch": 0.7461991172143208,
      "grad_norm": 2.0216391018836957,
      "learning_rate": 3.703780100030963e-06,
      "loss": 0.5404,
      "step": 6086
    },
    {
      "epoch": 0.7463217263364395,
      "grad_norm": 1.959913549547357,
      "learning_rate": 3.703335861333667e-06,
      "loss": 0.5196,
      "step": 6087
    },
    {
      "epoch": 0.7464443354585582,
      "grad_norm": 1.9330460543138814,
      "learning_rate": 3.702891573177137e-06,
      "loss": 0.5397,
      "step": 6088
    },
    {
      "epoch": 0.7465669445806769,
      "grad_norm": 2.0129064331567457,
      "learning_rate": 3.702447235579634e-06,
      "loss": 0.5311,
      "step": 6089
    },
    {
      "epoch": 0.7466895537027954,
      "grad_norm": 2.081123145972767,
      "learning_rate": 3.7020028485594204e-06,
      "loss": 0.5443,
      "step": 6090
    },
    {
      "epoch": 0.7468121628249141,
      "grad_norm": 1.940538991171522,
      "learning_rate": 3.7015584121347614e-06,
      "loss": 0.5661,
      "step": 6091
    },
    {
      "epoch": 0.7469347719470328,
      "grad_norm": 2.132953156708216,
      "learning_rate": 3.701113926323926e-06,
      "loss": 0.5516,
      "step": 6092
    },
    {
      "epoch": 0.7470573810691515,
      "grad_norm": 1.7011864848163283,
      "learning_rate": 3.7006693911451798e-06,
      "loss": 0.4972,
      "step": 6093
    },
    {
      "epoch": 0.7471799901912702,
      "grad_norm": 1.9255227202005143,
      "learning_rate": 3.7002248066167977e-06,
      "loss": 0.5482,
      "step": 6094
    },
    {
      "epoch": 0.7473025993133889,
      "grad_norm": 1.9290820533471928,
      "learning_rate": 3.6997801727570503e-06,
      "loss": 0.5161,
      "step": 6095
    },
    {
      "epoch": 0.7474252084355076,
      "grad_norm": 2.0498546293219304,
      "learning_rate": 3.6993354895842136e-06,
      "loss": 0.5269,
      "step": 6096
    },
    {
      "epoch": 0.7475478175576263,
      "grad_norm": 2.191798470672335,
      "learning_rate": 3.6988907571165654e-06,
      "loss": 0.5716,
      "step": 6097
    },
    {
      "epoch": 0.7476704266797449,
      "grad_norm": 1.8444356945048752,
      "learning_rate": 3.6984459753723845e-06,
      "loss": 0.4945,
      "step": 6098
    },
    {
      "epoch": 0.7477930358018636,
      "grad_norm": 1.89116685359157,
      "learning_rate": 3.6980011443699526e-06,
      "loss": 0.5148,
      "step": 6099
    },
    {
      "epoch": 0.7479156449239823,
      "grad_norm": 1.879332130339262,
      "learning_rate": 3.697556264127553e-06,
      "loss": 0.5322,
      "step": 6100
    },
    {
      "epoch": 0.748038254046101,
      "grad_norm": 1.812725017186104,
      "learning_rate": 3.6971113346634703e-06,
      "loss": 0.5015,
      "step": 6101
    },
    {
      "epoch": 0.7481608631682197,
      "grad_norm": 1.9823724525746675,
      "learning_rate": 3.6966663559959927e-06,
      "loss": 0.5915,
      "step": 6102
    },
    {
      "epoch": 0.7482834722903384,
      "grad_norm": 2.232204725352426,
      "learning_rate": 3.6962213281434088e-06,
      "loss": 0.5737,
      "step": 6103
    },
    {
      "epoch": 0.7484060814124571,
      "grad_norm": 1.6659484133233482,
      "learning_rate": 3.695776251124011e-06,
      "loss": 0.5054,
      "step": 6104
    },
    {
      "epoch": 0.7485286905345758,
      "grad_norm": 2.1290842712987468,
      "learning_rate": 3.6953311249560925e-06,
      "loss": 0.5613,
      "step": 6105
    },
    {
      "epoch": 0.7486512996566944,
      "grad_norm": 2.0832893883665813,
      "learning_rate": 3.6948859496579482e-06,
      "loss": 0.5528,
      "step": 6106
    },
    {
      "epoch": 0.7487739087788131,
      "grad_norm": 1.898064365126827,
      "learning_rate": 3.6944407252478765e-06,
      "loss": 0.5382,
      "step": 6107
    },
    {
      "epoch": 0.7488965179009318,
      "grad_norm": 1.8081445755059034,
      "learning_rate": 3.6939954517441754e-06,
      "loss": 0.497,
      "step": 6108
    },
    {
      "epoch": 0.7490191270230505,
      "grad_norm": 1.9997390493323022,
      "learning_rate": 3.693550129165149e-06,
      "loss": 0.5782,
      "step": 6109
    },
    {
      "epoch": 0.7491417361451692,
      "grad_norm": 1.9257685460247125,
      "learning_rate": 3.693104757529098e-06,
      "loss": 0.5501,
      "step": 6110
    },
    {
      "epoch": 0.7492643452672879,
      "grad_norm": 1.9840983432460206,
      "learning_rate": 3.6926593368543294e-06,
      "loss": 0.5396,
      "step": 6111
    },
    {
      "epoch": 0.7493869543894066,
      "grad_norm": 2.1076894036710927,
      "learning_rate": 3.6922138671591507e-06,
      "loss": 0.5424,
      "step": 6112
    },
    {
      "epoch": 0.7495095635115253,
      "grad_norm": 2.008909136873654,
      "learning_rate": 3.6917683484618717e-06,
      "loss": 0.491,
      "step": 6113
    },
    {
      "epoch": 0.7496321726336439,
      "grad_norm": 2.1402962810639345,
      "learning_rate": 3.691322780780803e-06,
      "loss": 0.5369,
      "step": 6114
    },
    {
      "epoch": 0.7497547817557626,
      "grad_norm": 1.9268454720130581,
      "learning_rate": 3.69087716413426e-06,
      "loss": 0.5429,
      "step": 6115
    },
    {
      "epoch": 0.7498773908778813,
      "grad_norm": 2.04738394379976,
      "learning_rate": 3.6904314985405565e-06,
      "loss": 0.5309,
      "step": 6116
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.0050750807118187,
      "learning_rate": 3.689985784018012e-06,
      "loss": 0.5327,
      "step": 6117
    },
    {
      "epoch": 0.7501226091221187,
      "grad_norm": 1.890601520093768,
      "learning_rate": 3.689540020584944e-06,
      "loss": 0.5397,
      "step": 6118
    },
    {
      "epoch": 0.7502452182442374,
      "grad_norm": 1.9299301599432375,
      "learning_rate": 3.6890942082596754e-06,
      "loss": 0.5662,
      "step": 6119
    },
    {
      "epoch": 0.7503678273663561,
      "grad_norm": 2.24777811279465,
      "learning_rate": 3.6886483470605293e-06,
      "loss": 0.6165,
      "step": 6120
    },
    {
      "epoch": 0.7504904364884747,
      "grad_norm": 2.2231479595249617,
      "learning_rate": 3.6882024370058326e-06,
      "loss": 0.5883,
      "step": 6121
    },
    {
      "epoch": 0.7506130456105934,
      "grad_norm": 1.9602394897437914,
      "learning_rate": 3.687756478113912e-06,
      "loss": 0.5546,
      "step": 6122
    },
    {
      "epoch": 0.7507356547327121,
      "grad_norm": 2.0570661430901063,
      "learning_rate": 3.687310470403097e-06,
      "loss": 0.5114,
      "step": 6123
    },
    {
      "epoch": 0.7508582638548308,
      "grad_norm": 1.8996928485962636,
      "learning_rate": 3.6868644138917205e-06,
      "loss": 0.5078,
      "step": 6124
    },
    {
      "epoch": 0.7509808729769495,
      "grad_norm": 1.981229415019754,
      "learning_rate": 3.686418308598115e-06,
      "loss": 0.5281,
      "step": 6125
    },
    {
      "epoch": 0.7511034820990682,
      "grad_norm": 2.0168707227188274,
      "learning_rate": 3.6859721545406158e-06,
      "loss": 0.5452,
      "step": 6126
    },
    {
      "epoch": 0.7512260912211869,
      "grad_norm": 1.9360492363090094,
      "learning_rate": 3.6855259517375623e-06,
      "loss": 0.5726,
      "step": 6127
    },
    {
      "epoch": 0.7513487003433056,
      "grad_norm": 1.8606041591200975,
      "learning_rate": 3.6850797002072937e-06,
      "loss": 0.5353,
      "step": 6128
    },
    {
      "epoch": 0.7514713094654242,
      "grad_norm": 2.0374497226244612,
      "learning_rate": 3.6846333999681506e-06,
      "loss": 0.5622,
      "step": 6129
    },
    {
      "epoch": 0.7515939185875429,
      "grad_norm": 1.7967463762686173,
      "learning_rate": 3.684187051038478e-06,
      "loss": 0.5297,
      "step": 6130
    },
    {
      "epoch": 0.7517165277096616,
      "grad_norm": 1.8282855349776796,
      "learning_rate": 3.683740653436621e-06,
      "loss": 0.4884,
      "step": 6131
    },
    {
      "epoch": 0.7518391368317803,
      "grad_norm": 2.098899068404357,
      "learning_rate": 3.683294207180928e-06,
      "loss": 0.5424,
      "step": 6132
    },
    {
      "epoch": 0.751961745953899,
      "grad_norm": 1.81886843771156,
      "learning_rate": 3.6828477122897475e-06,
      "loss": 0.5334,
      "step": 6133
    },
    {
      "epoch": 0.7520843550760177,
      "grad_norm": 1.8686055208040386,
      "learning_rate": 3.682401168781432e-06,
      "loss": 0.4703,
      "step": 6134
    },
    {
      "epoch": 0.7522069641981364,
      "grad_norm": 2.068077108347071,
      "learning_rate": 3.6819545766743357e-06,
      "loss": 0.5468,
      "step": 6135
    },
    {
      "epoch": 0.7523295733202551,
      "grad_norm": 1.9045579784087256,
      "learning_rate": 3.681507935986813e-06,
      "loss": 0.4956,
      "step": 6136
    },
    {
      "epoch": 0.7524521824423737,
      "grad_norm": 1.9962420759132256,
      "learning_rate": 3.681061246737223e-06,
      "loss": 0.5655,
      "step": 6137
    },
    {
      "epoch": 0.7525747915644924,
      "grad_norm": 1.948443176335578,
      "learning_rate": 3.680614508943926e-06,
      "loss": 0.5814,
      "step": 6138
    },
    {
      "epoch": 0.7526974006866111,
      "grad_norm": 1.9117580781185983,
      "learning_rate": 3.680167722625281e-06,
      "loss": 0.5594,
      "step": 6139
    },
    {
      "epoch": 0.7528200098087298,
      "grad_norm": 1.7864522447019522,
      "learning_rate": 3.679720887799654e-06,
      "loss": 0.4782,
      "step": 6140
    },
    {
      "epoch": 0.7529426189308485,
      "grad_norm": 1.895151328280089,
      "learning_rate": 3.6792740044854085e-06,
      "loss": 0.5361,
      "step": 6141
    },
    {
      "epoch": 0.7530652280529672,
      "grad_norm": 1.9376196919521216,
      "learning_rate": 3.6788270727009157e-06,
      "loss": 0.5222,
      "step": 6142
    },
    {
      "epoch": 0.7531878371750859,
      "grad_norm": 2.0200016851855906,
      "learning_rate": 3.6783800924645425e-06,
      "loss": 0.549,
      "step": 6143
    },
    {
      "epoch": 0.7533104462972046,
      "grad_norm": 1.7703231565022977,
      "learning_rate": 3.677933063794661e-06,
      "loss": 0.4894,
      "step": 6144
    },
    {
      "epoch": 0.7534330554193231,
      "grad_norm": 1.6625850088459095,
      "learning_rate": 3.677485986709645e-06,
      "loss": 0.483,
      "step": 6145
    },
    {
      "epoch": 0.7535556645414418,
      "grad_norm": 1.957037065567535,
      "learning_rate": 3.6770388612278713e-06,
      "loss": 0.5172,
      "step": 6146
    },
    {
      "epoch": 0.7536782736635605,
      "grad_norm": 2.1439362870971355,
      "learning_rate": 3.676591687367716e-06,
      "loss": 0.525,
      "step": 6147
    },
    {
      "epoch": 0.7538008827856792,
      "grad_norm": 2.1948732966763878,
      "learning_rate": 3.6761444651475596e-06,
      "loss": 0.6414,
      "step": 6148
    },
    {
      "epoch": 0.753923491907798,
      "grad_norm": 2.000516691519739,
      "learning_rate": 3.6756971945857834e-06,
      "loss": 0.5595,
      "step": 6149
    },
    {
      "epoch": 0.7540461010299166,
      "grad_norm": 2.0574408236428305,
      "learning_rate": 3.675249875700771e-06,
      "loss": 0.5293,
      "step": 6150
    },
    {
      "epoch": 0.7541687101520353,
      "grad_norm": 1.8079466766809253,
      "learning_rate": 3.674802508510908e-06,
      "loss": 0.5614,
      "step": 6151
    },
    {
      "epoch": 0.754291319274154,
      "grad_norm": 2.1084548078194727,
      "learning_rate": 3.674355093034582e-06,
      "loss": 0.5654,
      "step": 6152
    },
    {
      "epoch": 0.7544139283962726,
      "grad_norm": 2.192172423412498,
      "learning_rate": 3.673907629290183e-06,
      "loss": 0.5971,
      "step": 6153
    },
    {
      "epoch": 0.7545365375183913,
      "grad_norm": 1.9919636608069518,
      "learning_rate": 3.673460117296102e-06,
      "loss": 0.5303,
      "step": 6154
    },
    {
      "epoch": 0.75465914664051,
      "grad_norm": 1.7604506898044507,
      "learning_rate": 3.673012557070733e-06,
      "loss": 0.5184,
      "step": 6155
    },
    {
      "epoch": 0.7547817557626287,
      "grad_norm": 2.1220475864978345,
      "learning_rate": 3.672564948632471e-06,
      "loss": 0.5318,
      "step": 6156
    },
    {
      "epoch": 0.7549043648847474,
      "grad_norm": 1.933564494647746,
      "learning_rate": 3.6721172919997137e-06,
      "loss": 0.5266,
      "step": 6157
    },
    {
      "epoch": 0.7550269740068661,
      "grad_norm": 1.8650757523844348,
      "learning_rate": 3.671669587190861e-06,
      "loss": 0.4675,
      "step": 6158
    },
    {
      "epoch": 0.7551495831289848,
      "grad_norm": 2.102372304032312,
      "learning_rate": 3.6712218342243137e-06,
      "loss": 0.5029,
      "step": 6159
    },
    {
      "epoch": 0.7552721922511035,
      "grad_norm": 1.9955234837516294,
      "learning_rate": 3.6707740331184756e-06,
      "loss": 0.5582,
      "step": 6160
    },
    {
      "epoch": 0.7553948013732221,
      "grad_norm": 1.8520689947165747,
      "learning_rate": 3.670326183891752e-06,
      "loss": 0.5073,
      "step": 6161
    },
    {
      "epoch": 0.7555174104953408,
      "grad_norm": 1.9816216692313944,
      "learning_rate": 3.6698782865625506e-06,
      "loss": 0.569,
      "step": 6162
    },
    {
      "epoch": 0.7556400196174595,
      "grad_norm": 1.9551788650759447,
      "learning_rate": 3.6694303411492805e-06,
      "loss": 0.5087,
      "step": 6163
    },
    {
      "epoch": 0.7557626287395782,
      "grad_norm": 2.1527014863386347,
      "learning_rate": 3.6689823476703524e-06,
      "loss": 0.5424,
      "step": 6164
    },
    {
      "epoch": 0.7558852378616969,
      "grad_norm": 2.1140018453854448,
      "learning_rate": 3.668534306144181e-06,
      "loss": 0.5478,
      "step": 6165
    },
    {
      "epoch": 0.7560078469838156,
      "grad_norm": 2.0969326006564266,
      "learning_rate": 3.6680862165891805e-06,
      "loss": 0.5322,
      "step": 6166
    },
    {
      "epoch": 0.7561304561059343,
      "grad_norm": 1.9818297349358567,
      "learning_rate": 3.6676380790237686e-06,
      "loss": 0.5861,
      "step": 6167
    },
    {
      "epoch": 0.7562530652280529,
      "grad_norm": 1.955073220737561,
      "learning_rate": 3.6671898934663648e-06,
      "loss": 0.5627,
      "step": 6168
    },
    {
      "epoch": 0.7563756743501716,
      "grad_norm": 1.9797842685326215,
      "learning_rate": 3.6667416599353895e-06,
      "loss": 0.5556,
      "step": 6169
    },
    {
      "epoch": 0.7564982834722903,
      "grad_norm": 1.939901023328283,
      "learning_rate": 3.6662933784492666e-06,
      "loss": 0.5483,
      "step": 6170
    },
    {
      "epoch": 0.756620892594409,
      "grad_norm": 1.8963723837399018,
      "learning_rate": 3.6658450490264215e-06,
      "loss": 0.5135,
      "step": 6171
    },
    {
      "epoch": 0.7567435017165277,
      "grad_norm": 1.9497868441213018,
      "learning_rate": 3.66539667168528e-06,
      "loss": 0.5352,
      "step": 6172
    },
    {
      "epoch": 0.7568661108386464,
      "grad_norm": 1.8604223956459576,
      "learning_rate": 3.664948246444273e-06,
      "loss": 0.5124,
      "step": 6173
    },
    {
      "epoch": 0.7569887199607651,
      "grad_norm": 1.8094623828512084,
      "learning_rate": 3.6644997733218303e-06,
      "loss": 0.5115,
      "step": 6174
    },
    {
      "epoch": 0.7571113290828838,
      "grad_norm": 1.789373516107101,
      "learning_rate": 3.664051252336386e-06,
      "loss": 0.5066,
      "step": 6175
    },
    {
      "epoch": 0.7572339382050024,
      "grad_norm": 1.9891868684062834,
      "learning_rate": 3.663602683506374e-06,
      "loss": 0.5566,
      "step": 6176
    },
    {
      "epoch": 0.7573565473271211,
      "grad_norm": 1.9112258121460717,
      "learning_rate": 3.663154066850231e-06,
      "loss": 0.5277,
      "step": 6177
    },
    {
      "epoch": 0.7574791564492398,
      "grad_norm": 1.8743261143646817,
      "learning_rate": 3.662705402386398e-06,
      "loss": 0.52,
      "step": 6178
    },
    {
      "epoch": 0.7576017655713585,
      "grad_norm": 1.8476208534864786,
      "learning_rate": 3.6622566901333135e-06,
      "loss": 0.5266,
      "step": 6179
    },
    {
      "epoch": 0.7577243746934772,
      "grad_norm": 1.9617925167634604,
      "learning_rate": 3.661807930109422e-06,
      "loss": 0.5291,
      "step": 6180
    },
    {
      "epoch": 0.7578469838155959,
      "grad_norm": 2.2659658556019635,
      "learning_rate": 3.6613591223331672e-06,
      "loss": 0.5784,
      "step": 6181
    },
    {
      "epoch": 0.7579695929377146,
      "grad_norm": 2.0154298277007556,
      "learning_rate": 3.660910266822997e-06,
      "loss": 0.5289,
      "step": 6182
    },
    {
      "epoch": 0.7580922020598333,
      "grad_norm": 1.807768355129504,
      "learning_rate": 3.660461363597359e-06,
      "loss": 0.521,
      "step": 6183
    },
    {
      "epoch": 0.7582148111819519,
      "grad_norm": 1.8428102822496935,
      "learning_rate": 3.660012412674705e-06,
      "loss": 0.5434,
      "step": 6184
    },
    {
      "epoch": 0.7583374203040706,
      "grad_norm": 1.8652397598783514,
      "learning_rate": 3.659563414073487e-06,
      "loss": 0.5157,
      "step": 6185
    },
    {
      "epoch": 0.7584600294261893,
      "grad_norm": 2.0559991982738723,
      "learning_rate": 3.6591143678121606e-06,
      "loss": 0.5921,
      "step": 6186
    },
    {
      "epoch": 0.758582638548308,
      "grad_norm": 2.0672588641914422,
      "learning_rate": 3.65866527390918e-06,
      "loss": 0.5681,
      "step": 6187
    },
    {
      "epoch": 0.7587052476704267,
      "grad_norm": 2.047402726143858,
      "learning_rate": 3.6582161323830067e-06,
      "loss": 0.5591,
      "step": 6188
    },
    {
      "epoch": 0.7588278567925454,
      "grad_norm": 2.0823653453612803,
      "learning_rate": 3.657766943252099e-06,
      "loss": 0.5265,
      "step": 6189
    },
    {
      "epoch": 0.7589504659146641,
      "grad_norm": 1.7431435879679598,
      "learning_rate": 3.657317706534922e-06,
      "loss": 0.5203,
      "step": 6190
    },
    {
      "epoch": 0.7590730750367828,
      "grad_norm": 2.023580304684117,
      "learning_rate": 3.6568684222499367e-06,
      "loss": 0.5436,
      "step": 6191
    },
    {
      "epoch": 0.7591956841589014,
      "grad_norm": 2.0255068996572896,
      "learning_rate": 3.656419090415612e-06,
      "loss": 0.5483,
      "step": 6192
    },
    {
      "epoch": 0.7593182932810201,
      "grad_norm": 1.6804731889328428,
      "learning_rate": 3.6559697110504143e-06,
      "loss": 0.4912,
      "step": 6193
    },
    {
      "epoch": 0.7594409024031388,
      "grad_norm": 1.843095865800465,
      "learning_rate": 3.655520284172816e-06,
      "loss": 0.5186,
      "step": 6194
    },
    {
      "epoch": 0.7595635115252575,
      "grad_norm": 2.1940131657357904,
      "learning_rate": 3.6550708098012883e-06,
      "loss": 0.4955,
      "step": 6195
    },
    {
      "epoch": 0.7596861206473762,
      "grad_norm": 1.8530597382897762,
      "learning_rate": 3.6546212879543054e-06,
      "loss": 0.5053,
      "step": 6196
    },
    {
      "epoch": 0.7598087297694949,
      "grad_norm": 1.6848339495522928,
      "learning_rate": 3.6541717186503428e-06,
      "loss": 0.5176,
      "step": 6197
    },
    {
      "epoch": 0.7599313388916136,
      "grad_norm": 1.9327524243098373,
      "learning_rate": 3.65372210190788e-06,
      "loss": 0.5197,
      "step": 6198
    },
    {
      "epoch": 0.7600539480137323,
      "grad_norm": 1.9600957434598956,
      "learning_rate": 3.653272437745396e-06,
      "loss": 0.5637,
      "step": 6199
    },
    {
      "epoch": 0.7601765571358509,
      "grad_norm": 2.0868501695863277,
      "learning_rate": 3.652822726181373e-06,
      "loss": 0.603,
      "step": 6200
    },
    {
      "epoch": 0.7602991662579696,
      "grad_norm": 1.7835897279752784,
      "learning_rate": 3.6523729672342955e-06,
      "loss": 0.4577,
      "step": 6201
    },
    {
      "epoch": 0.7604217753800883,
      "grad_norm": 1.9946410765253162,
      "learning_rate": 3.651923160922648e-06,
      "loss": 0.5366,
      "step": 6202
    },
    {
      "epoch": 0.760544384502207,
      "grad_norm": 1.9097701456870493,
      "learning_rate": 3.6514733072649205e-06,
      "loss": 0.5334,
      "step": 6203
    },
    {
      "epoch": 0.7606669936243257,
      "grad_norm": 2.2152515362862943,
      "learning_rate": 3.651023406279601e-06,
      "loss": 0.518,
      "step": 6204
    },
    {
      "epoch": 0.7607896027464444,
      "grad_norm": 1.8895390881882739,
      "learning_rate": 3.6505734579851815e-06,
      "loss": 0.4868,
      "step": 6205
    },
    {
      "epoch": 0.7609122118685631,
      "grad_norm": 2.008291527669083,
      "learning_rate": 3.6501234624001563e-06,
      "loss": 0.5379,
      "step": 6206
    },
    {
      "epoch": 0.7610348209906818,
      "grad_norm": 1.9514653418679522,
      "learning_rate": 3.6496734195430208e-06,
      "loss": 0.5707,
      "step": 6207
    },
    {
      "epoch": 0.7611574301128003,
      "grad_norm": 1.9732712391382816,
      "learning_rate": 3.6492233294322722e-06,
      "loss": 0.5947,
      "step": 6208
    },
    {
      "epoch": 0.761280039234919,
      "grad_norm": 1.9972266645605203,
      "learning_rate": 3.64877319208641e-06,
      "loss": 0.5399,
      "step": 6209
    },
    {
      "epoch": 0.7614026483570377,
      "grad_norm": 2.056380873816175,
      "learning_rate": 3.6483230075239356e-06,
      "loss": 0.5397,
      "step": 6210
    },
    {
      "epoch": 0.7615252574791564,
      "grad_norm": 1.9436897465625158,
      "learning_rate": 3.6478727757633536e-06,
      "loss": 0.5472,
      "step": 6211
    },
    {
      "epoch": 0.7616478666012751,
      "grad_norm": 1.7284513886218769,
      "learning_rate": 3.6474224968231677e-06,
      "loss": 0.4654,
      "step": 6212
    },
    {
      "epoch": 0.7617704757233938,
      "grad_norm": 1.9404649027808767,
      "learning_rate": 3.6469721707218864e-06,
      "loss": 0.5903,
      "step": 6213
    },
    {
      "epoch": 0.7618930848455125,
      "grad_norm": 1.9901234782723414,
      "learning_rate": 3.6465217974780175e-06,
      "loss": 0.5541,
      "step": 6214
    },
    {
      "epoch": 0.7620156939676312,
      "grad_norm": 1.9031400618141439,
      "learning_rate": 3.646071377110073e-06,
      "loss": 0.537,
      "step": 6215
    },
    {
      "epoch": 0.7621383030897498,
      "grad_norm": 1.8353486427016947,
      "learning_rate": 3.645620909636567e-06,
      "loss": 0.5435,
      "step": 6216
    },
    {
      "epoch": 0.7622609122118685,
      "grad_norm": 2.128274452953456,
      "learning_rate": 3.6451703950760124e-06,
      "loss": 0.5202,
      "step": 6217
    },
    {
      "epoch": 0.7623835213339872,
      "grad_norm": 1.7391276970888405,
      "learning_rate": 3.6447198334469275e-06,
      "loss": 0.5401,
      "step": 6218
    },
    {
      "epoch": 0.7625061304561059,
      "grad_norm": 2.0529391994142863,
      "learning_rate": 3.644269224767831e-06,
      "loss": 0.5571,
      "step": 6219
    },
    {
      "epoch": 0.7626287395782246,
      "grad_norm": 2.01372705490937,
      "learning_rate": 3.643818569057244e-06,
      "loss": 0.5502,
      "step": 6220
    },
    {
      "epoch": 0.7627513487003433,
      "grad_norm": 1.8207481717824276,
      "learning_rate": 3.643367866333688e-06,
      "loss": 0.597,
      "step": 6221
    },
    {
      "epoch": 0.762873957822462,
      "grad_norm": 1.9396027098423825,
      "learning_rate": 3.6429171166156885e-06,
      "loss": 0.5609,
      "step": 6222
    },
    {
      "epoch": 0.7629965669445806,
      "grad_norm": 1.8518903382092444,
      "learning_rate": 3.6424663199217723e-06,
      "loss": 0.5322,
      "step": 6223
    },
    {
      "epoch": 0.7631191760666993,
      "grad_norm": 1.9815142738980283,
      "learning_rate": 3.6420154762704685e-06,
      "loss": 0.5316,
      "step": 6224
    },
    {
      "epoch": 0.763241785188818,
      "grad_norm": 1.8626585079371634,
      "learning_rate": 3.641564585680306e-06,
      "loss": 0.4791,
      "step": 6225
    },
    {
      "epoch": 0.7633643943109367,
      "grad_norm": 1.8388064577491228,
      "learning_rate": 3.641113648169819e-06,
      "loss": 0.5685,
      "step": 6226
    },
    {
      "epoch": 0.7634870034330554,
      "grad_norm": 1.7934577833789036,
      "learning_rate": 3.640662663757539e-06,
      "loss": 0.5189,
      "step": 6227
    },
    {
      "epoch": 0.7636096125551741,
      "grad_norm": 1.7251657833227234,
      "learning_rate": 3.6402116324620062e-06,
      "loss": 0.4924,
      "step": 6228
    },
    {
      "epoch": 0.7637322216772928,
      "grad_norm": 2.0073631945691806,
      "learning_rate": 3.639760554301756e-06,
      "loss": 0.5407,
      "step": 6229
    },
    {
      "epoch": 0.7638548307994115,
      "grad_norm": 1.9262123274721459,
      "learning_rate": 3.639309429295329e-06,
      "loss": 0.5424,
      "step": 6230
    },
    {
      "epoch": 0.7639774399215301,
      "grad_norm": 1.8535728737785067,
      "learning_rate": 3.638858257461268e-06,
      "loss": 0.5555,
      "step": 6231
    },
    {
      "epoch": 0.7641000490436488,
      "grad_norm": 2.0835153422543407,
      "learning_rate": 3.6384070388181158e-06,
      "loss": 0.5183,
      "step": 6232
    },
    {
      "epoch": 0.7642226581657675,
      "grad_norm": 1.7861313538347474,
      "learning_rate": 3.637955773384419e-06,
      "loss": 0.553,
      "step": 6233
    },
    {
      "epoch": 0.7643452672878862,
      "grad_norm": 2.2341406867598304,
      "learning_rate": 3.637504461178726e-06,
      "loss": 0.5637,
      "step": 6234
    },
    {
      "epoch": 0.7644678764100049,
      "grad_norm": 1.8518195833150775,
      "learning_rate": 3.637053102219585e-06,
      "loss": 0.495,
      "step": 6235
    },
    {
      "epoch": 0.7645904855321236,
      "grad_norm": 2.2603316546997707,
      "learning_rate": 3.636601696525549e-06,
      "loss": 0.5526,
      "step": 6236
    },
    {
      "epoch": 0.7647130946542423,
      "grad_norm": 1.9857671165944164,
      "learning_rate": 3.636150244115171e-06,
      "loss": 0.5375,
      "step": 6237
    },
    {
      "epoch": 0.764835703776361,
      "grad_norm": 1.6524105613786126,
      "learning_rate": 3.6356987450070064e-06,
      "loss": 0.4721,
      "step": 6238
    },
    {
      "epoch": 0.7649583128984796,
      "grad_norm": 1.930521641084536,
      "learning_rate": 3.635247199219613e-06,
      "loss": 0.5148,
      "step": 6239
    },
    {
      "epoch": 0.7650809220205983,
      "grad_norm": 2.128863238007472,
      "learning_rate": 3.6347956067715495e-06,
      "loss": 0.593,
      "step": 6240
    },
    {
      "epoch": 0.765203531142717,
      "grad_norm": 2.189054042127364,
      "learning_rate": 3.634343967681378e-06,
      "loss": 0.5911,
      "step": 6241
    },
    {
      "epoch": 0.7653261402648357,
      "grad_norm": 1.9639880373200465,
      "learning_rate": 3.633892281967661e-06,
      "loss": 0.5334,
      "step": 6242
    },
    {
      "epoch": 0.7654487493869544,
      "grad_norm": 1.7615074126713983,
      "learning_rate": 3.633440549648964e-06,
      "loss": 0.5288,
      "step": 6243
    },
    {
      "epoch": 0.7655713585090731,
      "grad_norm": 1.9502999852426628,
      "learning_rate": 3.6329887707438537e-06,
      "loss": 0.5163,
      "step": 6244
    },
    {
      "epoch": 0.7656939676311918,
      "grad_norm": 2.0304179297575384,
      "learning_rate": 3.6325369452708994e-06,
      "loss": 0.5489,
      "step": 6245
    },
    {
      "epoch": 0.7658165767533105,
      "grad_norm": 2.078777097885784,
      "learning_rate": 3.6320850732486714e-06,
      "loss": 0.5154,
      "step": 6246
    },
    {
      "epoch": 0.7659391858754291,
      "grad_norm": 2.130686774323411,
      "learning_rate": 3.631633154695742e-06,
      "loss": 0.5736,
      "step": 6247
    },
    {
      "epoch": 0.7660617949975478,
      "grad_norm": 2.096388071148548,
      "learning_rate": 3.631181189630687e-06,
      "loss": 0.4968,
      "step": 6248
    },
    {
      "epoch": 0.7661844041196665,
      "grad_norm": 1.8913115857472256,
      "learning_rate": 3.6307291780720833e-06,
      "loss": 0.5736,
      "step": 6249
    },
    {
      "epoch": 0.7663070132417852,
      "grad_norm": 2.015718546685122,
      "learning_rate": 3.630277120038508e-06,
      "loss": 0.482,
      "step": 6250
    },
    {
      "epoch": 0.7664296223639039,
      "grad_norm": 2.1132721824254777,
      "learning_rate": 3.629825015548542e-06,
      "loss": 0.5028,
      "step": 6251
    },
    {
      "epoch": 0.7665522314860226,
      "grad_norm": 1.9151379833151472,
      "learning_rate": 3.6293728646207677e-06,
      "loss": 0.5272,
      "step": 6252
    },
    {
      "epoch": 0.7666748406081413,
      "grad_norm": 1.8743211669436126,
      "learning_rate": 3.6289206672737693e-06,
      "loss": 0.5832,
      "step": 6253
    },
    {
      "epoch": 0.76679744973026,
      "grad_norm": 1.8747176850779872,
      "learning_rate": 3.6284684235261332e-06,
      "loss": 0.5781,
      "step": 6254
    },
    {
      "epoch": 0.7669200588523786,
      "grad_norm": 1.8190023318849393,
      "learning_rate": 3.6280161333964464e-06,
      "loss": 0.4843,
      "step": 6255
    },
    {
      "epoch": 0.7670426679744973,
      "grad_norm": 1.8858261480332745,
      "learning_rate": 3.6275637969033003e-06,
      "loss": 0.533,
      "step": 6256
    },
    {
      "epoch": 0.767165277096616,
      "grad_norm": 1.808727328843087,
      "learning_rate": 3.627111414065285e-06,
      "loss": 0.5619,
      "step": 6257
    },
    {
      "epoch": 0.7672878862187347,
      "grad_norm": 1.9185759411035703,
      "learning_rate": 3.626658984900996e-06,
      "loss": 0.5722,
      "step": 6258
    },
    {
      "epoch": 0.7674104953408534,
      "grad_norm": 1.739101836336635,
      "learning_rate": 3.6262065094290284e-06,
      "loss": 0.5148,
      "step": 6259
    },
    {
      "epoch": 0.7675331044629721,
      "grad_norm": 1.872778409770702,
      "learning_rate": 3.625753987667978e-06,
      "loss": 0.5546,
      "step": 6260
    },
    {
      "epoch": 0.7676557135850908,
      "grad_norm": 2.010365743476591,
      "learning_rate": 3.625301419636447e-06,
      "loss": 0.5565,
      "step": 6261
    },
    {
      "epoch": 0.7677783227072095,
      "grad_norm": 1.8731236786726002,
      "learning_rate": 3.624848805353035e-06,
      "loss": 0.5532,
      "step": 6262
    },
    {
      "epoch": 0.7679009318293281,
      "grad_norm": 1.9775795316653455,
      "learning_rate": 3.6243961448363463e-06,
      "loss": 0.5109,
      "step": 6263
    },
    {
      "epoch": 0.7680235409514468,
      "grad_norm": 1.9064199710591112,
      "learning_rate": 3.623943438104985e-06,
      "loss": 0.5077,
      "step": 6264
    },
    {
      "epoch": 0.7681461500735655,
      "grad_norm": 1.8377491352629025,
      "learning_rate": 3.6234906851775585e-06,
      "loss": 0.5255,
      "step": 6265
    },
    {
      "epoch": 0.7682687591956842,
      "grad_norm": 1.9455002768482976,
      "learning_rate": 3.6230378860726757e-06,
      "loss": 0.5519,
      "step": 6266
    },
    {
      "epoch": 0.7683913683178029,
      "grad_norm": 1.811209768968352,
      "learning_rate": 3.622585040808948e-06,
      "loss": 0.507,
      "step": 6267
    },
    {
      "epoch": 0.7685139774399216,
      "grad_norm": 1.8538679495797987,
      "learning_rate": 3.6221321494049877e-06,
      "loss": 0.5148,
      "step": 6268
    },
    {
      "epoch": 0.7686365865620403,
      "grad_norm": 1.8610310405731763,
      "learning_rate": 3.621679211879409e-06,
      "loss": 0.4664,
      "step": 6269
    },
    {
      "epoch": 0.7687591956841588,
      "grad_norm": 1.881051818362239,
      "learning_rate": 3.6212262282508293e-06,
      "loss": 0.4977,
      "step": 6270
    },
    {
      "epoch": 0.7688818048062775,
      "grad_norm": 1.8707858198503278,
      "learning_rate": 3.620773198537867e-06,
      "loss": 0.5346,
      "step": 6271
    },
    {
      "epoch": 0.7690044139283962,
      "grad_norm": 1.7309553657655028,
      "learning_rate": 3.6203201227591417e-06,
      "loss": 0.5241,
      "step": 6272
    },
    {
      "epoch": 0.769127023050515,
      "grad_norm": 2.1607843484397047,
      "learning_rate": 3.6198670009332753e-06,
      "loss": 0.5501,
      "step": 6273
    },
    {
      "epoch": 0.7692496321726336,
      "grad_norm": 2.250816033582778,
      "learning_rate": 3.619413833078894e-06,
      "loss": 0.5055,
      "step": 6274
    },
    {
      "epoch": 0.7693722412947523,
      "grad_norm": 2.2056886472420314,
      "learning_rate": 3.6189606192146208e-06,
      "loss": 0.6172,
      "step": 6275
    },
    {
      "epoch": 0.769494850416871,
      "grad_norm": 1.952299791828032,
      "learning_rate": 3.6185073593590868e-06,
      "loss": 0.4763,
      "step": 6276
    },
    {
      "epoch": 0.7696174595389897,
      "grad_norm": 1.8392588526667286,
      "learning_rate": 3.6180540535309196e-06,
      "loss": 0.5246,
      "step": 6277
    },
    {
      "epoch": 0.7697400686611083,
      "grad_norm": 2.0533277306200777,
      "learning_rate": 3.6176007017487515e-06,
      "loss": 0.5374,
      "step": 6278
    },
    {
      "epoch": 0.769862677783227,
      "grad_norm": 1.9613715889037873,
      "learning_rate": 3.6171473040312154e-06,
      "loss": 0.5729,
      "step": 6279
    },
    {
      "epoch": 0.7699852869053457,
      "grad_norm": 2.1372416967580605,
      "learning_rate": 3.616693860396948e-06,
      "loss": 0.548,
      "step": 6280
    },
    {
      "epoch": 0.7701078960274644,
      "grad_norm": 2.1830665874217936,
      "learning_rate": 3.6162403708645855e-06,
      "loss": 0.5365,
      "step": 6281
    },
    {
      "epoch": 0.7702305051495831,
      "grad_norm": 2.0137919533646222,
      "learning_rate": 3.6157868354527683e-06,
      "loss": 0.5842,
      "step": 6282
    },
    {
      "epoch": 0.7703531142717018,
      "grad_norm": 1.9793185916349043,
      "learning_rate": 3.6153332541801356e-06,
      "loss": 0.5328,
      "step": 6283
    },
    {
      "epoch": 0.7704757233938205,
      "grad_norm": 1.889024740484318,
      "learning_rate": 3.6148796270653323e-06,
      "loss": 0.5327,
      "step": 6284
    },
    {
      "epoch": 0.7705983325159392,
      "grad_norm": 2.2221420235270353,
      "learning_rate": 3.614425954127002e-06,
      "loss": 0.5576,
      "step": 6285
    },
    {
      "epoch": 0.7707209416380578,
      "grad_norm": 1.662258173646039,
      "learning_rate": 3.6139722353837934e-06,
      "loss": 0.5189,
      "step": 6286
    },
    {
      "epoch": 0.7708435507601765,
      "grad_norm": 1.955004568853521,
      "learning_rate": 3.613518470854353e-06,
      "loss": 0.5327,
      "step": 6287
    },
    {
      "epoch": 0.7709661598822952,
      "grad_norm": 2.032935200207934,
      "learning_rate": 3.6130646605573323e-06,
      "loss": 0.5926,
      "step": 6288
    },
    {
      "epoch": 0.7710887690044139,
      "grad_norm": 2.155553081087325,
      "learning_rate": 3.612610804511383e-06,
      "loss": 0.535,
      "step": 6289
    },
    {
      "epoch": 0.7712113781265326,
      "grad_norm": 1.7247986329431333,
      "learning_rate": 3.612156902735161e-06,
      "loss": 0.5076,
      "step": 6290
    },
    {
      "epoch": 0.7713339872486513,
      "grad_norm": 1.998212687447707,
      "learning_rate": 3.6117029552473204e-06,
      "loss": 0.5479,
      "step": 6291
    },
    {
      "epoch": 0.77145659637077,
      "grad_norm": 1.8939388459115496,
      "learning_rate": 3.6112489620665207e-06,
      "loss": 0.4653,
      "step": 6292
    },
    {
      "epoch": 0.7715792054928887,
      "grad_norm": 1.9526523599093324,
      "learning_rate": 3.610794923211421e-06,
      "loss": 0.547,
      "step": 6293
    },
    {
      "epoch": 0.7717018146150073,
      "grad_norm": 2.063061194077457,
      "learning_rate": 3.610340838700683e-06,
      "loss": 0.5539,
      "step": 6294
    },
    {
      "epoch": 0.771824423737126,
      "grad_norm": 2.1358476342381265,
      "learning_rate": 3.609886708552972e-06,
      "loss": 0.529,
      "step": 6295
    },
    {
      "epoch": 0.7719470328592447,
      "grad_norm": 1.913746878197024,
      "learning_rate": 3.6094325327869516e-06,
      "loss": 0.4638,
      "step": 6296
    },
    {
      "epoch": 0.7720696419813634,
      "grad_norm": 2.0728600907541885,
      "learning_rate": 3.6089783114212908e-06,
      "loss": 0.5561,
      "step": 6297
    },
    {
      "epoch": 0.7721922511034821,
      "grad_norm": 1.7993544033690263,
      "learning_rate": 3.608524044474657e-06,
      "loss": 0.5366,
      "step": 6298
    },
    {
      "epoch": 0.7723148602256008,
      "grad_norm": 1.8481820858254867,
      "learning_rate": 3.6080697319657232e-06,
      "loss": 0.5531,
      "step": 6299
    },
    {
      "epoch": 0.7724374693477195,
      "grad_norm": 1.847429777239105,
      "learning_rate": 3.607615373913162e-06,
      "loss": 0.5271,
      "step": 6300
    },
    {
      "epoch": 0.7725600784698382,
      "grad_norm": 1.8745506105084018,
      "learning_rate": 3.6071609703356474e-06,
      "loss": 0.5158,
      "step": 6301
    },
    {
      "epoch": 0.7726826875919568,
      "grad_norm": 1.7910803717409216,
      "learning_rate": 3.606706521251857e-06,
      "loss": 0.4974,
      "step": 6302
    },
    {
      "epoch": 0.7728052967140755,
      "grad_norm": 1.9253288157648518,
      "learning_rate": 3.606252026680469e-06,
      "loss": 0.5289,
      "step": 6303
    },
    {
      "epoch": 0.7729279058361942,
      "grad_norm": 1.9312636404553087,
      "learning_rate": 3.6057974866401645e-06,
      "loss": 0.5067,
      "step": 6304
    },
    {
      "epoch": 0.7730505149583129,
      "grad_norm": 1.9544677590918573,
      "learning_rate": 3.605342901149626e-06,
      "loss": 0.5249,
      "step": 6305
    },
    {
      "epoch": 0.7731731240804316,
      "grad_norm": 1.8694829722615687,
      "learning_rate": 3.604888270227537e-06,
      "loss": 0.5633,
      "step": 6306
    },
    {
      "epoch": 0.7732957332025503,
      "grad_norm": 1.8168773813009376,
      "learning_rate": 3.604433593892585e-06,
      "loss": 0.548,
      "step": 6307
    },
    {
      "epoch": 0.773418342324669,
      "grad_norm": 1.8781661432080425,
      "learning_rate": 3.6039788721634555e-06,
      "loss": 0.519,
      "step": 6308
    },
    {
      "epoch": 0.7735409514467877,
      "grad_norm": 1.933982519014718,
      "learning_rate": 3.603524105058841e-06,
      "loss": 0.517,
      "step": 6309
    },
    {
      "epoch": 0.7736635605689063,
      "grad_norm": 1.916442452181566,
      "learning_rate": 3.603069292597432e-06,
      "loss": 0.5078,
      "step": 6310
    },
    {
      "epoch": 0.773786169691025,
      "grad_norm": 1.8593302798028712,
      "learning_rate": 3.602614434797922e-06,
      "loss": 0.5663,
      "step": 6311
    },
    {
      "epoch": 0.7739087788131437,
      "grad_norm": 1.898331959705898,
      "learning_rate": 3.602159531679007e-06,
      "loss": 0.5822,
      "step": 6312
    },
    {
      "epoch": 0.7740313879352624,
      "grad_norm": 1.812280435426101,
      "learning_rate": 3.6017045832593835e-06,
      "loss": 0.5192,
      "step": 6313
    },
    {
      "epoch": 0.7741539970573811,
      "grad_norm": 2.074459426273879,
      "learning_rate": 3.6012495895577514e-06,
      "loss": 0.582,
      "step": 6314
    },
    {
      "epoch": 0.7742766061794998,
      "grad_norm": 1.8593435274974135,
      "learning_rate": 3.6007945505928115e-06,
      "loss": 0.5244,
      "step": 6315
    },
    {
      "epoch": 0.7743992153016185,
      "grad_norm": 2.0670635995097175,
      "learning_rate": 3.600339466383267e-06,
      "loss": 0.5182,
      "step": 6316
    },
    {
      "epoch": 0.7745218244237371,
      "grad_norm": 1.8067492038962025,
      "learning_rate": 3.5998843369478222e-06,
      "loss": 0.5399,
      "step": 6317
    },
    {
      "epoch": 0.7746444335458558,
      "grad_norm": 1.8847195520774231,
      "learning_rate": 3.599429162305184e-06,
      "loss": 0.5032,
      "step": 6318
    },
    {
      "epoch": 0.7747670426679745,
      "grad_norm": 1.7947367858994199,
      "learning_rate": 3.598973942474061e-06,
      "loss": 0.5222,
      "step": 6319
    },
    {
      "epoch": 0.7748896517900932,
      "grad_norm": 1.9460562547901639,
      "learning_rate": 3.598518677473164e-06,
      "loss": 0.5187,
      "step": 6320
    },
    {
      "epoch": 0.7750122609122119,
      "grad_norm": 1.893019461007935,
      "learning_rate": 3.5980633673212035e-06,
      "loss": 0.5146,
      "step": 6321
    },
    {
      "epoch": 0.7751348700343306,
      "grad_norm": 2.015182352693417,
      "learning_rate": 3.597608012036895e-06,
      "loss": 0.5484,
      "step": 6322
    },
    {
      "epoch": 0.7752574791564493,
      "grad_norm": 1.9480480629371677,
      "learning_rate": 3.597152611638954e-06,
      "loss": 0.5657,
      "step": 6323
    },
    {
      "epoch": 0.775380088278568,
      "grad_norm": 1.955281628550237,
      "learning_rate": 3.5966971661460987e-06,
      "loss": 0.5105,
      "step": 6324
    },
    {
      "epoch": 0.7755026974006866,
      "grad_norm": 1.7979660461208713,
      "learning_rate": 3.596241675577048e-06,
      "loss": 0.5156,
      "step": 6325
    },
    {
      "epoch": 0.7756253065228053,
      "grad_norm": 1.9807434293709376,
      "learning_rate": 3.5957861399505236e-06,
      "loss": 0.5339,
      "step": 6326
    },
    {
      "epoch": 0.775747915644924,
      "grad_norm": 1.9234361881011302,
      "learning_rate": 3.595330559285249e-06,
      "loss": 0.5236,
      "step": 6327
    },
    {
      "epoch": 0.7758705247670427,
      "grad_norm": 2.2042976166483705,
      "learning_rate": 3.5948749335999493e-06,
      "loss": 0.5415,
      "step": 6328
    },
    {
      "epoch": 0.7759931338891614,
      "grad_norm": 1.8362716215366996,
      "learning_rate": 3.594419262913351e-06,
      "loss": 0.5018,
      "step": 6329
    },
    {
      "epoch": 0.7761157430112801,
      "grad_norm": 1.7184888049643927,
      "learning_rate": 3.5939635472441846e-06,
      "loss": 0.5144,
      "step": 6330
    },
    {
      "epoch": 0.7762383521333988,
      "grad_norm": 1.8925398224498706,
      "learning_rate": 3.593507786611179e-06,
      "loss": 0.5787,
      "step": 6331
    },
    {
      "epoch": 0.7763609612555175,
      "grad_norm": 1.9051002603413305,
      "learning_rate": 3.593051981033068e-06,
      "loss": 0.511,
      "step": 6332
    },
    {
      "epoch": 0.776483570377636,
      "grad_norm": 1.90070321547783,
      "learning_rate": 3.5925961305285844e-06,
      "loss": 0.5379,
      "step": 6333
    },
    {
      "epoch": 0.7766061794997547,
      "grad_norm": 2.021385571861769,
      "learning_rate": 3.592140235116467e-06,
      "loss": 0.5194,
      "step": 6334
    },
    {
      "epoch": 0.7767287886218734,
      "grad_norm": 1.8913159351549778,
      "learning_rate": 3.591684294815451e-06,
      "loss": 0.5319,
      "step": 6335
    },
    {
      "epoch": 0.7768513977439921,
      "grad_norm": 1.913426088236604,
      "learning_rate": 3.5912283096442784e-06,
      "loss": 0.4923,
      "step": 6336
    },
    {
      "epoch": 0.7769740068661108,
      "grad_norm": 1.9988460772260994,
      "learning_rate": 3.5907722796216903e-06,
      "loss": 0.5092,
      "step": 6337
    },
    {
      "epoch": 0.7770966159882295,
      "grad_norm": 1.9378334941752031,
      "learning_rate": 3.5903162047664306e-06,
      "loss": 0.5224,
      "step": 6338
    },
    {
      "epoch": 0.7772192251103482,
      "grad_norm": 1.9268023395889893,
      "learning_rate": 3.589860085097245e-06,
      "loss": 0.5636,
      "step": 6339
    },
    {
      "epoch": 0.7773418342324669,
      "grad_norm": 2.1523346564601478,
      "learning_rate": 3.5894039206328803e-06,
      "loss": 0.5334,
      "step": 6340
    },
    {
      "epoch": 0.7774644433545855,
      "grad_norm": 2.2217110284862343,
      "learning_rate": 3.5889477113920852e-06,
      "loss": 0.5735,
      "step": 6341
    },
    {
      "epoch": 0.7775870524767042,
      "grad_norm": 1.98279076064584,
      "learning_rate": 3.588491457393612e-06,
      "loss": 0.553,
      "step": 6342
    },
    {
      "epoch": 0.7777096615988229,
      "grad_norm": 2.0746428345344348,
      "learning_rate": 3.588035158656212e-06,
      "loss": 0.507,
      "step": 6343
    },
    {
      "epoch": 0.7778322707209416,
      "grad_norm": 2.0302619557435015,
      "learning_rate": 3.587578815198642e-06,
      "loss": 0.5653,
      "step": 6344
    },
    {
      "epoch": 0.7779548798430603,
      "grad_norm": 1.7615242669395248,
      "learning_rate": 3.5871224270396565e-06,
      "loss": 0.5298,
      "step": 6345
    },
    {
      "epoch": 0.778077488965179,
      "grad_norm": 2.0234455372786067,
      "learning_rate": 3.5866659941980147e-06,
      "loss": 0.5236,
      "step": 6346
    },
    {
      "epoch": 0.7782000980872977,
      "grad_norm": 2.018163590887351,
      "learning_rate": 3.5862095166924772e-06,
      "loss": 0.4928,
      "step": 6347
    },
    {
      "epoch": 0.7783227072094164,
      "grad_norm": 1.9375145260514672,
      "learning_rate": 3.585752994541805e-06,
      "loss": 0.5221,
      "step": 6348
    },
    {
      "epoch": 0.778445316331535,
      "grad_norm": 2.049189409363384,
      "learning_rate": 3.585296427764763e-06,
      "loss": 0.5676,
      "step": 6349
    },
    {
      "epoch": 0.7785679254536537,
      "grad_norm": 1.9172537951745947,
      "learning_rate": 3.584839816380116e-06,
      "loss": 0.5552,
      "step": 6350
    },
    {
      "epoch": 0.7786905345757724,
      "grad_norm": 2.02058497076636,
      "learning_rate": 3.5843831604066324e-06,
      "loss": 0.5039,
      "step": 6351
    },
    {
      "epoch": 0.7788131436978911,
      "grad_norm": 1.7374810301998436,
      "learning_rate": 3.583926459863081e-06,
      "loss": 0.5233,
      "step": 6352
    },
    {
      "epoch": 0.7789357528200098,
      "grad_norm": 1.93097441643881,
      "learning_rate": 3.5834697147682327e-06,
      "loss": 0.5344,
      "step": 6353
    },
    {
      "epoch": 0.7790583619421285,
      "grad_norm": 1.8911292116615726,
      "learning_rate": 3.583012925140862e-06,
      "loss": 0.5579,
      "step": 6354
    },
    {
      "epoch": 0.7791809710642472,
      "grad_norm": 2.017055758248978,
      "learning_rate": 3.5825560909997424e-06,
      "loss": 0.5721,
      "step": 6355
    },
    {
      "epoch": 0.7793035801863659,
      "grad_norm": 1.8373966111892632,
      "learning_rate": 3.58209921236365e-06,
      "loss": 0.5245,
      "step": 6356
    },
    {
      "epoch": 0.7794261893084845,
      "grad_norm": 1.9233392101021152,
      "learning_rate": 3.5816422892513656e-06,
      "loss": 0.5434,
      "step": 6357
    },
    {
      "epoch": 0.7795487984306032,
      "grad_norm": 1.9176799608146016,
      "learning_rate": 3.5811853216816677e-06,
      "loss": 0.5387,
      "step": 6358
    },
    {
      "epoch": 0.7796714075527219,
      "grad_norm": 2.209542294423631,
      "learning_rate": 3.580728309673339e-06,
      "loss": 0.5306,
      "step": 6359
    },
    {
      "epoch": 0.7797940166748406,
      "grad_norm": 1.8570211607252094,
      "learning_rate": 3.580271253245163e-06,
      "loss": 0.5209,
      "step": 6360
    },
    {
      "epoch": 0.7799166257969593,
      "grad_norm": 1.8586690906044445,
      "learning_rate": 3.5798141524159267e-06,
      "loss": 0.477,
      "step": 6361
    },
    {
      "epoch": 0.780039234919078,
      "grad_norm": 1.9771188391999523,
      "learning_rate": 3.579357007204417e-06,
      "loss": 0.5525,
      "step": 6362
    },
    {
      "epoch": 0.7801618440411967,
      "grad_norm": 1.9273167498223756,
      "learning_rate": 3.5788998176294232e-06,
      "loss": 0.5248,
      "step": 6363
    },
    {
      "epoch": 0.7802844531633154,
      "grad_norm": 2.0287591498469433,
      "learning_rate": 3.578442583709737e-06,
      "loss": 0.5605,
      "step": 6364
    },
    {
      "epoch": 0.780407062285434,
      "grad_norm": 2.0003042522868997,
      "learning_rate": 3.577985305464151e-06,
      "loss": 0.5197,
      "step": 6365
    },
    {
      "epoch": 0.7805296714075527,
      "grad_norm": 1.9539424179654497,
      "learning_rate": 3.5775279829114605e-06,
      "loss": 0.5686,
      "step": 6366
    },
    {
      "epoch": 0.7806522805296714,
      "grad_norm": 1.9886644693865518,
      "learning_rate": 3.5770706160704627e-06,
      "loss": 0.5592,
      "step": 6367
    },
    {
      "epoch": 0.7807748896517901,
      "grad_norm": 2.085030060119633,
      "learning_rate": 3.5766132049599562e-06,
      "loss": 0.5688,
      "step": 6368
    },
    {
      "epoch": 0.7808974987739088,
      "grad_norm": 2.010920256927002,
      "learning_rate": 3.5761557495987397e-06,
      "loss": 0.5236,
      "step": 6369
    },
    {
      "epoch": 0.7810201078960275,
      "grad_norm": 2.025614567733981,
      "learning_rate": 3.575698250005618e-06,
      "loss": 0.5361,
      "step": 6370
    },
    {
      "epoch": 0.7811427170181462,
      "grad_norm": 1.9173581397525945,
      "learning_rate": 3.5752407061993934e-06,
      "loss": 0.5842,
      "step": 6371
    },
    {
      "epoch": 0.7812653261402648,
      "grad_norm": 2.0524701352486083,
      "learning_rate": 3.5747831181988723e-06,
      "loss": 0.5264,
      "step": 6372
    },
    {
      "epoch": 0.7813879352623835,
      "grad_norm": 1.8158804265431745,
      "learning_rate": 3.574325486022862e-06,
      "loss": 0.5209,
      "step": 6373
    },
    {
      "epoch": 0.7815105443845022,
      "grad_norm": 2.0150119829129824,
      "learning_rate": 3.5738678096901724e-06,
      "loss": 0.5696,
      "step": 6374
    },
    {
      "epoch": 0.7816331535066209,
      "grad_norm": 2.1984279116970495,
      "learning_rate": 3.5734100892196145e-06,
      "loss": 0.5818,
      "step": 6375
    },
    {
      "epoch": 0.7817557626287396,
      "grad_norm": 1.91568764103318,
      "learning_rate": 3.5729523246300017e-06,
      "loss": 0.5151,
      "step": 6376
    },
    {
      "epoch": 0.7818783717508583,
      "grad_norm": 1.866997326042062,
      "learning_rate": 3.572494515940149e-06,
      "loss": 0.512,
      "step": 6377
    },
    {
      "epoch": 0.782000980872977,
      "grad_norm": 2.1142204737616743,
      "learning_rate": 3.5720366631688733e-06,
      "loss": 0.5584,
      "step": 6378
    },
    {
      "epoch": 0.7821235899950957,
      "grad_norm": 2.0373222361287042,
      "learning_rate": 3.5715787663349923e-06,
      "loss": 0.5144,
      "step": 6379
    },
    {
      "epoch": 0.7822461991172143,
      "grad_norm": 1.8626428702027926,
      "learning_rate": 3.571120825457327e-06,
      "loss": 0.5,
      "step": 6380
    },
    {
      "epoch": 0.782368808239333,
      "grad_norm": 1.9533133385277568,
      "learning_rate": 3.5706628405546993e-06,
      "loss": 0.5014,
      "step": 6381
    },
    {
      "epoch": 0.7824914173614517,
      "grad_norm": 1.856486830222354,
      "learning_rate": 3.570204811645934e-06,
      "loss": 0.5371,
      "step": 6382
    },
    {
      "epoch": 0.7826140264835704,
      "grad_norm": 1.8579177704640448,
      "learning_rate": 3.5697467387498565e-06,
      "loss": 0.4872,
      "step": 6383
    },
    {
      "epoch": 0.7827366356056891,
      "grad_norm": 2.2533084954202796,
      "learning_rate": 3.5692886218852934e-06,
      "loss": 0.6328,
      "step": 6384
    },
    {
      "epoch": 0.7828592447278078,
      "grad_norm": 1.6601475755900865,
      "learning_rate": 3.5688304610710757e-06,
      "loss": 0.5056,
      "step": 6385
    },
    {
      "epoch": 0.7829818538499265,
      "grad_norm": 2.016003525202509,
      "learning_rate": 3.5683722563260325e-06,
      "loss": 0.535,
      "step": 6386
    },
    {
      "epoch": 0.7831044629720452,
      "grad_norm": 1.9056891844340476,
      "learning_rate": 3.5679140076689996e-06,
      "loss": 0.5309,
      "step": 6387
    },
    {
      "epoch": 0.7832270720941638,
      "grad_norm": 2.0651416215404668,
      "learning_rate": 3.567455715118809e-06,
      "loss": 0.5137,
      "step": 6388
    },
    {
      "epoch": 0.7833496812162825,
      "grad_norm": 1.721418985363267,
      "learning_rate": 3.5669973786943e-06,
      "loss": 0.5101,
      "step": 6389
    },
    {
      "epoch": 0.7834722903384012,
      "grad_norm": 2.072747517050501,
      "learning_rate": 3.5665389984143095e-06,
      "loss": 0.5431,
      "step": 6390
    },
    {
      "epoch": 0.7835948994605199,
      "grad_norm": 1.9206429381896166,
      "learning_rate": 3.566080574297678e-06,
      "loss": 0.5324,
      "step": 6391
    },
    {
      "epoch": 0.7837175085826386,
      "grad_norm": 1.9145566974012418,
      "learning_rate": 3.565622106363247e-06,
      "loss": 0.4893,
      "step": 6392
    },
    {
      "epoch": 0.7838401177047573,
      "grad_norm": 1.9848058875754075,
      "learning_rate": 3.5651635946298614e-06,
      "loss": 0.5468,
      "step": 6393
    },
    {
      "epoch": 0.783962726826876,
      "grad_norm": 1.94060613916611,
      "learning_rate": 3.5647050391163655e-06,
      "loss": 0.546,
      "step": 6394
    },
    {
      "epoch": 0.7840853359489947,
      "grad_norm": 1.8853775293942154,
      "learning_rate": 3.5642464398416087e-06,
      "loss": 0.5392,
      "step": 6395
    },
    {
      "epoch": 0.7842079450711132,
      "grad_norm": 2.0769269013318796,
      "learning_rate": 3.563787796824439e-06,
      "loss": 0.5444,
      "step": 6396
    },
    {
      "epoch": 0.7843305541932319,
      "grad_norm": 1.9108060710904493,
      "learning_rate": 3.563329110083707e-06,
      "loss": 0.5742,
      "step": 6397
    },
    {
      "epoch": 0.7844531633153506,
      "grad_norm": 1.9951770506103148,
      "learning_rate": 3.5628703796382665e-06,
      "loss": 0.5231,
      "step": 6398
    },
    {
      "epoch": 0.7845757724374693,
      "grad_norm": 2.04775838696411,
      "learning_rate": 3.5624116055069713e-06,
      "loss": 0.5806,
      "step": 6399
    },
    {
      "epoch": 0.784698381559588,
      "grad_norm": 1.9650785515971072,
      "learning_rate": 3.561952787708679e-06,
      "loss": 0.5301,
      "step": 6400
    },
    {
      "epoch": 0.7848209906817067,
      "grad_norm": 1.894432960979396,
      "learning_rate": 3.561493926262246e-06,
      "loss": 0.5008,
      "step": 6401
    },
    {
      "epoch": 0.7849435998038254,
      "grad_norm": 1.9478558281928582,
      "learning_rate": 3.561035021186534e-06,
      "loss": 0.4868,
      "step": 6402
    },
    {
      "epoch": 0.7850662089259441,
      "grad_norm": 2.0414918575668715,
      "learning_rate": 3.5605760725004053e-06,
      "loss": 0.5735,
      "step": 6403
    },
    {
      "epoch": 0.7851888180480627,
      "grad_norm": 1.853171552586968,
      "learning_rate": 3.5601170802227203e-06,
      "loss": 0.4969,
      "step": 6404
    },
    {
      "epoch": 0.7853114271701814,
      "grad_norm": 1.8942048671410212,
      "learning_rate": 3.5596580443723483e-06,
      "loss": 0.4947,
      "step": 6405
    },
    {
      "epoch": 0.7854340362923001,
      "grad_norm": 2.021652198785041,
      "learning_rate": 3.5591989649681534e-06,
      "loss": 0.5039,
      "step": 6406
    },
    {
      "epoch": 0.7855566454144188,
      "grad_norm": 1.8999331496782437,
      "learning_rate": 3.558739842029007e-06,
      "loss": 0.5443,
      "step": 6407
    },
    {
      "epoch": 0.7856792545365375,
      "grad_norm": 1.9869915930192046,
      "learning_rate": 3.558280675573778e-06,
      "loss": 0.5227,
      "step": 6408
    },
    {
      "epoch": 0.7858018636586562,
      "grad_norm": 2.0349310954730906,
      "learning_rate": 3.55782146562134e-06,
      "loss": 0.6054,
      "step": 6409
    },
    {
      "epoch": 0.7859244727807749,
      "grad_norm": 1.9347743080300637,
      "learning_rate": 3.557362212190567e-06,
      "loss": 0.4713,
      "step": 6410
    },
    {
      "epoch": 0.7860470819028936,
      "grad_norm": 2.03779106590466,
      "learning_rate": 3.556902915300335e-06,
      "loss": 0.5774,
      "step": 6411
    },
    {
      "epoch": 0.7861696910250122,
      "grad_norm": 1.9865699883078338,
      "learning_rate": 3.556443574969522e-06,
      "loss": 0.5645,
      "step": 6412
    },
    {
      "epoch": 0.7862923001471309,
      "grad_norm": 1.8202372462547367,
      "learning_rate": 3.555984191217008e-06,
      "loss": 0.4763,
      "step": 6413
    },
    {
      "epoch": 0.7864149092692496,
      "grad_norm": 1.9741440658771827,
      "learning_rate": 3.5555247640616743e-06,
      "loss": 0.5151,
      "step": 6414
    },
    {
      "epoch": 0.7865375183913683,
      "grad_norm": 2.077096064977364,
      "learning_rate": 3.555065293522404e-06,
      "loss": 0.5055,
      "step": 6415
    },
    {
      "epoch": 0.786660127513487,
      "grad_norm": 1.9579350508021474,
      "learning_rate": 3.5546057796180826e-06,
      "loss": 0.5193,
      "step": 6416
    },
    {
      "epoch": 0.7867827366356057,
      "grad_norm": 2.0466772728694984,
      "learning_rate": 3.554146222367596e-06,
      "loss": 0.5336,
      "step": 6417
    },
    {
      "epoch": 0.7869053457577244,
      "grad_norm": 2.088574268249269,
      "learning_rate": 3.553686621789834e-06,
      "loss": 0.5308,
      "step": 6418
    },
    {
      "epoch": 0.787027954879843,
      "grad_norm": 1.8817781850190842,
      "learning_rate": 3.553226977903686e-06,
      "loss": 0.5286,
      "step": 6419
    },
    {
      "epoch": 0.7871505640019617,
      "grad_norm": 2.0261442542134853,
      "learning_rate": 3.5527672907280454e-06,
      "loss": 0.5097,
      "step": 6420
    },
    {
      "epoch": 0.7872731731240804,
      "grad_norm": 1.7327015505963164,
      "learning_rate": 3.5523075602818046e-06,
      "loss": 0.5251,
      "step": 6421
    },
    {
      "epoch": 0.7873957822461991,
      "grad_norm": 2.1574429687611585,
      "learning_rate": 3.55184778658386e-06,
      "loss": 0.5512,
      "step": 6422
    },
    {
      "epoch": 0.7875183913683178,
      "grad_norm": 1.9302033022837963,
      "learning_rate": 3.5513879696531097e-06,
      "loss": 0.556,
      "step": 6423
    },
    {
      "epoch": 0.7876410004904365,
      "grad_norm": 1.895981302392009,
      "learning_rate": 3.5509281095084527e-06,
      "loss": 0.5773,
      "step": 6424
    },
    {
      "epoch": 0.7877636096125552,
      "grad_norm": 1.9342221009177576,
      "learning_rate": 3.550468206168789e-06,
      "loss": 0.4982,
      "step": 6425
    },
    {
      "epoch": 0.7878862187346739,
      "grad_norm": 2.119498364948151,
      "learning_rate": 3.5500082596530238e-06,
      "loss": 0.577,
      "step": 6426
    },
    {
      "epoch": 0.7880088278567925,
      "grad_norm": 2.000546792839156,
      "learning_rate": 3.5495482699800588e-06,
      "loss": 0.5145,
      "step": 6427
    },
    {
      "epoch": 0.7881314369789112,
      "grad_norm": 1.9747640961882482,
      "learning_rate": 3.549088237168803e-06,
      "loss": 0.5453,
      "step": 6428
    },
    {
      "epoch": 0.7882540461010299,
      "grad_norm": 1.818930791407294,
      "learning_rate": 3.5486281612381617e-06,
      "loss": 0.5026,
      "step": 6429
    },
    {
      "epoch": 0.7883766552231486,
      "grad_norm": 2.110389435730676,
      "learning_rate": 3.548168042207048e-06,
      "loss": 0.531,
      "step": 6430
    },
    {
      "epoch": 0.7884992643452673,
      "grad_norm": 2.0016739959256933,
      "learning_rate": 3.547707880094372e-06,
      "loss": 0.5317,
      "step": 6431
    },
    {
      "epoch": 0.788621873467386,
      "grad_norm": 1.6928736918832685,
      "learning_rate": 3.5472476749190465e-06,
      "loss": 0.474,
      "step": 6432
    },
    {
      "epoch": 0.7887444825895047,
      "grad_norm": 1.8565070388584062,
      "learning_rate": 3.546787426699988e-06,
      "loss": 0.5084,
      "step": 6433
    },
    {
      "epoch": 0.7888670917116234,
      "grad_norm": 1.7750632564576605,
      "learning_rate": 3.5463271354561134e-06,
      "loss": 0.5423,
      "step": 6434
    },
    {
      "epoch": 0.788989700833742,
      "grad_norm": 2.088612938057596,
      "learning_rate": 3.5458668012063406e-06,
      "loss": 0.5045,
      "step": 6435
    },
    {
      "epoch": 0.7891123099558607,
      "grad_norm": 1.9279642609895769,
      "learning_rate": 3.545406423969591e-06,
      "loss": 0.5083,
      "step": 6436
    },
    {
      "epoch": 0.7892349190779794,
      "grad_norm": 1.7254679653175056,
      "learning_rate": 3.5449460037647863e-06,
      "loss": 0.4605,
      "step": 6437
    },
    {
      "epoch": 0.7893575282000981,
      "grad_norm": 1.962751299096428,
      "learning_rate": 3.5444855406108515e-06,
      "loss": 0.5473,
      "step": 6438
    },
    {
      "epoch": 0.7894801373222168,
      "grad_norm": 1.9663165312262636,
      "learning_rate": 3.544025034526711e-06,
      "loss": 0.5114,
      "step": 6439
    },
    {
      "epoch": 0.7896027464443355,
      "grad_norm": 1.7972461949135388,
      "learning_rate": 3.5435644855312947e-06,
      "loss": 0.47,
      "step": 6440
    },
    {
      "epoch": 0.7897253555664542,
      "grad_norm": 1.847123689237319,
      "learning_rate": 3.5431038936435295e-06,
      "loss": 0.5033,
      "step": 6441
    },
    {
      "epoch": 0.7898479646885729,
      "grad_norm": 1.9420814750457476,
      "learning_rate": 3.5426432588823476e-06,
      "loss": 0.5433,
      "step": 6442
    },
    {
      "epoch": 0.7899705738106915,
      "grad_norm": 2.000552262466982,
      "learning_rate": 3.5421825812666828e-06,
      "loss": 0.5121,
      "step": 6443
    },
    {
      "epoch": 0.7900931829328102,
      "grad_norm": 1.9302790042262332,
      "learning_rate": 3.541721860815468e-06,
      "loss": 0.5268,
      "step": 6444
    },
    {
      "epoch": 0.7902157920549289,
      "grad_norm": 1.944783019323873,
      "learning_rate": 3.5412610975476408e-06,
      "loss": 0.518,
      "step": 6445
    },
    {
      "epoch": 0.7903384011770476,
      "grad_norm": 2.0734377852803703,
      "learning_rate": 3.5408002914821387e-06,
      "loss": 0.5181,
      "step": 6446
    },
    {
      "epoch": 0.7904610102991663,
      "grad_norm": 2.1020177218808214,
      "learning_rate": 3.540339442637902e-06,
      "loss": 0.5025,
      "step": 6447
    },
    {
      "epoch": 0.790583619421285,
      "grad_norm": 2.213575301081679,
      "learning_rate": 3.539878551033873e-06,
      "loss": 0.5519,
      "step": 6448
    },
    {
      "epoch": 0.7907062285434037,
      "grad_norm": 2.1063290282127514,
      "learning_rate": 3.5394176166889943e-06,
      "loss": 0.5251,
      "step": 6449
    },
    {
      "epoch": 0.7908288376655224,
      "grad_norm": 1.9785261897233226,
      "learning_rate": 3.538956639622211e-06,
      "loss": 0.4808,
      "step": 6450
    },
    {
      "epoch": 0.790951446787641,
      "grad_norm": 1.925247765367662,
      "learning_rate": 3.5384956198524717e-06,
      "loss": 0.4987,
      "step": 6451
    },
    {
      "epoch": 0.7910740559097597,
      "grad_norm": 1.9679623161867776,
      "learning_rate": 3.538034557398723e-06,
      "loss": 0.5967,
      "step": 6452
    },
    {
      "epoch": 0.7911966650318784,
      "grad_norm": 1.8038352536118498,
      "learning_rate": 3.5375734522799165e-06,
      "loss": 0.4829,
      "step": 6453
    },
    {
      "epoch": 0.791319274153997,
      "grad_norm": 1.835694384156463,
      "learning_rate": 3.5371123045150042e-06,
      "loss": 0.5088,
      "step": 6454
    },
    {
      "epoch": 0.7914418832761158,
      "grad_norm": 1.7662896054423485,
      "learning_rate": 3.53665111412294e-06,
      "loss": 0.5238,
      "step": 6455
    },
    {
      "epoch": 0.7915644923982345,
      "grad_norm": 1.9364483287766274,
      "learning_rate": 3.53618988112268e-06,
      "loss": 0.5346,
      "step": 6456
    },
    {
      "epoch": 0.7916871015203532,
      "grad_norm": 1.8037256356289892,
      "learning_rate": 3.535728605533181e-06,
      "loss": 0.5154,
      "step": 6457
    },
    {
      "epoch": 0.7918097106424719,
      "grad_norm": 1.7598839080367152,
      "learning_rate": 3.535267287373403e-06,
      "loss": 0.5578,
      "step": 6458
    },
    {
      "epoch": 0.7919323197645904,
      "grad_norm": 1.9497929990643026,
      "learning_rate": 3.534805926662307e-06,
      "loss": 0.5385,
      "step": 6459
    },
    {
      "epoch": 0.7920549288867091,
      "grad_norm": 1.9932634947441494,
      "learning_rate": 3.534344523418855e-06,
      "loss": 0.5472,
      "step": 6460
    },
    {
      "epoch": 0.7921775380088278,
      "grad_norm": 1.8662359578310022,
      "learning_rate": 3.533883077662012e-06,
      "loss": 0.5534,
      "step": 6461
    },
    {
      "epoch": 0.7923001471309465,
      "grad_norm": 2.0329641325304184,
      "learning_rate": 3.533421589410744e-06,
      "loss": 0.5533,
      "step": 6462
    },
    {
      "epoch": 0.7924227562530652,
      "grad_norm": 1.7205937342331417,
      "learning_rate": 3.5329600586840194e-06,
      "loss": 0.4964,
      "step": 6463
    },
    {
      "epoch": 0.7925453653751839,
      "grad_norm": 2.058596601690749,
      "learning_rate": 3.532498485500808e-06,
      "loss": 0.549,
      "step": 6464
    },
    {
      "epoch": 0.7926679744973026,
      "grad_norm": 1.966649851345051,
      "learning_rate": 3.53203686988008e-06,
      "loss": 0.485,
      "step": 6465
    },
    {
      "epoch": 0.7927905836194212,
      "grad_norm": 1.8998545165481624,
      "learning_rate": 3.531575211840811e-06,
      "loss": 0.5424,
      "step": 6466
    },
    {
      "epoch": 0.7929131927415399,
      "grad_norm": 1.9003764360625996,
      "learning_rate": 3.531113511401973e-06,
      "loss": 0.5663,
      "step": 6467
    },
    {
      "epoch": 0.7930358018636586,
      "grad_norm": 1.8866679690134531,
      "learning_rate": 3.5306517685825455e-06,
      "loss": 0.5184,
      "step": 6468
    },
    {
      "epoch": 0.7931584109857773,
      "grad_norm": 1.810773428936294,
      "learning_rate": 3.5301899834015053e-06,
      "loss": 0.5778,
      "step": 6469
    },
    {
      "epoch": 0.793281020107896,
      "grad_norm": 2.141719559980839,
      "learning_rate": 3.5297281558778324e-06,
      "loss": 0.5176,
      "step": 6470
    },
    {
      "epoch": 0.7934036292300147,
      "grad_norm": 1.8542876933014827,
      "learning_rate": 3.5292662860305094e-06,
      "loss": 0.5459,
      "step": 6471
    },
    {
      "epoch": 0.7935262383521334,
      "grad_norm": 2.0349937864364076,
      "learning_rate": 3.5288043738785206e-06,
      "loss": 0.5088,
      "step": 6472
    },
    {
      "epoch": 0.7936488474742521,
      "grad_norm": 1.8527884989565628,
      "learning_rate": 3.52834241944085e-06,
      "loss": 0.4564,
      "step": 6473
    },
    {
      "epoch": 0.7937714565963707,
      "grad_norm": 1.9695981394442608,
      "learning_rate": 3.527880422736486e-06,
      "loss": 0.549,
      "step": 6474
    },
    {
      "epoch": 0.7938940657184894,
      "grad_norm": 2.0788044242876826,
      "learning_rate": 3.5274183837844162e-06,
      "loss": 0.5339,
      "step": 6475
    },
    {
      "epoch": 0.7940166748406081,
      "grad_norm": 1.917064425588753,
      "learning_rate": 3.5269563026036325e-06,
      "loss": 0.554,
      "step": 6476
    },
    {
      "epoch": 0.7941392839627268,
      "grad_norm": 1.848638175283189,
      "learning_rate": 3.5264941792131264e-06,
      "loss": 0.5147,
      "step": 6477
    },
    {
      "epoch": 0.7942618930848455,
      "grad_norm": 1.9610383446387205,
      "learning_rate": 3.5260320136318927e-06,
      "loss": 0.5494,
      "step": 6478
    },
    {
      "epoch": 0.7943845022069642,
      "grad_norm": 1.7315874324380367,
      "learning_rate": 3.525569805878926e-06,
      "loss": 0.5468,
      "step": 6479
    },
    {
      "epoch": 0.7945071113290829,
      "grad_norm": 2.0534114931851515,
      "learning_rate": 3.525107555973225e-06,
      "loss": 0.5821,
      "step": 6480
    },
    {
      "epoch": 0.7946297204512016,
      "grad_norm": 1.922174259964699,
      "learning_rate": 3.524645263933789e-06,
      "loss": 0.5124,
      "step": 6481
    },
    {
      "epoch": 0.7947523295733202,
      "grad_norm": 2.055777414383512,
      "learning_rate": 3.5241829297796183e-06,
      "loss": 0.5897,
      "step": 6482
    },
    {
      "epoch": 0.7948749386954389,
      "grad_norm": 1.9096330630652578,
      "learning_rate": 3.523720553529716e-06,
      "loss": 0.5426,
      "step": 6483
    },
    {
      "epoch": 0.7949975478175576,
      "grad_norm": 2.1434988248908944,
      "learning_rate": 3.523258135203087e-06,
      "loss": 0.5207,
      "step": 6484
    },
    {
      "epoch": 0.7951201569396763,
      "grad_norm": 1.963894284838346,
      "learning_rate": 3.5227956748187363e-06,
      "loss": 0.5289,
      "step": 6485
    },
    {
      "epoch": 0.795242766061795,
      "grad_norm": 1.8604778094464072,
      "learning_rate": 3.5223331723956734e-06,
      "loss": 0.509,
      "step": 6486
    },
    {
      "epoch": 0.7953653751839137,
      "grad_norm": 2.0313902654747147,
      "learning_rate": 3.521870627952907e-06,
      "loss": 0.5416,
      "step": 6487
    },
    {
      "epoch": 0.7954879843060324,
      "grad_norm": 2.081448884011137,
      "learning_rate": 3.5214080415094485e-06,
      "loss": 0.5503,
      "step": 6488
    },
    {
      "epoch": 0.7956105934281511,
      "grad_norm": 2.056111532572266,
      "learning_rate": 3.520945413084312e-06,
      "loss": 0.5025,
      "step": 6489
    },
    {
      "epoch": 0.7957332025502697,
      "grad_norm": 1.911168883337442,
      "learning_rate": 3.5204827426965104e-06,
      "loss": 0.4765,
      "step": 6490
    },
    {
      "epoch": 0.7958558116723884,
      "grad_norm": 2.054997480506129,
      "learning_rate": 3.520020030365062e-06,
      "loss": 0.6009,
      "step": 6491
    },
    {
      "epoch": 0.7959784207945071,
      "grad_norm": 2.0211858974883232,
      "learning_rate": 3.5195572761089854e-06,
      "loss": 0.4914,
      "step": 6492
    },
    {
      "epoch": 0.7961010299166258,
      "grad_norm": 2.059395031205948,
      "learning_rate": 3.5190944799472987e-06,
      "loss": 0.5518,
      "step": 6493
    },
    {
      "epoch": 0.7962236390387445,
      "grad_norm": 1.9537204040115372,
      "learning_rate": 3.518631641899025e-06,
      "loss": 0.5673,
      "step": 6494
    },
    {
      "epoch": 0.7963462481608632,
      "grad_norm": 1.7947523026957626,
      "learning_rate": 3.5181687619831873e-06,
      "loss": 0.487,
      "step": 6495
    },
    {
      "epoch": 0.7964688572829819,
      "grad_norm": 1.809328280552538,
      "learning_rate": 3.5177058402188113e-06,
      "loss": 0.5514,
      "step": 6496
    },
    {
      "epoch": 0.7965914664051006,
      "grad_norm": 1.8546506547865798,
      "learning_rate": 3.517242876624923e-06,
      "loss": 0.5184,
      "step": 6497
    },
    {
      "epoch": 0.7967140755272192,
      "grad_norm": 2.1842826089045144,
      "learning_rate": 3.516779871220552e-06,
      "loss": 0.5982,
      "step": 6498
    },
    {
      "epoch": 0.7968366846493379,
      "grad_norm": 1.8630591138766204,
      "learning_rate": 3.5163168240247284e-06,
      "loss": 0.5214,
      "step": 6499
    },
    {
      "epoch": 0.7969592937714566,
      "grad_norm": 1.9354840347652695,
      "learning_rate": 3.5158537350564836e-06,
      "loss": 0.5068,
      "step": 6500
    },
    {
      "epoch": 0.7970819028935753,
      "grad_norm": 1.8449544567136638,
      "learning_rate": 3.5153906043348525e-06,
      "loss": 0.4975,
      "step": 6501
    },
    {
      "epoch": 0.797204512015694,
      "grad_norm": 1.8698154499043411,
      "learning_rate": 3.5149274318788696e-06,
      "loss": 0.54,
      "step": 6502
    },
    {
      "epoch": 0.7973271211378127,
      "grad_norm": 1.9177390278676285,
      "learning_rate": 3.514464217707572e-06,
      "loss": 0.5205,
      "step": 6503
    },
    {
      "epoch": 0.7974497302599314,
      "grad_norm": 1.8929425995833926,
      "learning_rate": 3.5140009618399993e-06,
      "loss": 0.557,
      "step": 6504
    },
    {
      "epoch": 0.7975723393820501,
      "grad_norm": 2.0950713268550842,
      "learning_rate": 3.5135376642951924e-06,
      "loss": 0.5061,
      "step": 6505
    },
    {
      "epoch": 0.7976949485041687,
      "grad_norm": 1.8414664356033443,
      "learning_rate": 3.513074325092193e-06,
      "loss": 0.502,
      "step": 6506
    },
    {
      "epoch": 0.7978175576262874,
      "grad_norm": 1.8583776737220465,
      "learning_rate": 3.5126109442500458e-06,
      "loss": 0.5133,
      "step": 6507
    },
    {
      "epoch": 0.7979401667484061,
      "grad_norm": 1.8770634068938865,
      "learning_rate": 3.512147521787795e-06,
      "loss": 0.4952,
      "step": 6508
    },
    {
      "epoch": 0.7980627758705248,
      "grad_norm": 2.016317923893992,
      "learning_rate": 3.51168405772449e-06,
      "loss": 0.5902,
      "step": 6509
    },
    {
      "epoch": 0.7981853849926435,
      "grad_norm": 1.824707449321649,
      "learning_rate": 3.5112205520791785e-06,
      "loss": 0.5689,
      "step": 6510
    },
    {
      "epoch": 0.7983079941147622,
      "grad_norm": 1.8618926117301111,
      "learning_rate": 3.5107570048709127e-06,
      "loss": 0.5523,
      "step": 6511
    },
    {
      "epoch": 0.7984306032368809,
      "grad_norm": 1.854120925683788,
      "learning_rate": 3.5102934161187446e-06,
      "loss": 0.5133,
      "step": 6512
    },
    {
      "epoch": 0.7985532123589996,
      "grad_norm": 2.009233802200615,
      "learning_rate": 3.5098297858417278e-06,
      "loss": 0.5331,
      "step": 6513
    },
    {
      "epoch": 0.7986758214811182,
      "grad_norm": 2.016550277378371,
      "learning_rate": 3.5093661140589204e-06,
      "loss": 0.5537,
      "step": 6514
    },
    {
      "epoch": 0.7987984306032369,
      "grad_norm": 1.9689983249916068,
      "learning_rate": 3.508902400789377e-06,
      "loss": 0.528,
      "step": 6515
    },
    {
      "epoch": 0.7989210397253556,
      "grad_norm": 2.050243150911935,
      "learning_rate": 3.5084386460521602e-06,
      "loss": 0.5219,
      "step": 6516
    },
    {
      "epoch": 0.7990436488474743,
      "grad_norm": 1.9993203486797768,
      "learning_rate": 3.507974849866329e-06,
      "loss": 0.511,
      "step": 6517
    },
    {
      "epoch": 0.799166257969593,
      "grad_norm": 1.8335247595347308,
      "learning_rate": 3.5075110122509475e-06,
      "loss": 0.5095,
      "step": 6518
    },
    {
      "epoch": 0.7992888670917117,
      "grad_norm": 1.958188316747434,
      "learning_rate": 3.5070471332250793e-06,
      "loss": 0.527,
      "step": 6519
    },
    {
      "epoch": 0.7994114762138304,
      "grad_norm": 2.0544650673616873,
      "learning_rate": 3.5065832128077913e-06,
      "loss": 0.5559,
      "step": 6520
    },
    {
      "epoch": 0.7995340853359489,
      "grad_norm": 1.905309054462411,
      "learning_rate": 3.5061192510181513e-06,
      "loss": 0.5231,
      "step": 6521
    },
    {
      "epoch": 0.7996566944580676,
      "grad_norm": 1.8292114163510709,
      "learning_rate": 3.505655247875229e-06,
      "loss": 0.5178,
      "step": 6522
    },
    {
      "epoch": 0.7997793035801863,
      "grad_norm": 1.7427266161272676,
      "learning_rate": 3.505191203398095e-06,
      "loss": 0.5055,
      "step": 6523
    },
    {
      "epoch": 0.799901912702305,
      "grad_norm": 2.026230793340939,
      "learning_rate": 3.504727117605824e-06,
      "loss": 0.5634,
      "step": 6524
    },
    {
      "epoch": 0.8000245218244237,
      "grad_norm": 1.785394206402973,
      "learning_rate": 3.5042629905174887e-06,
      "loss": 0.5254,
      "step": 6525
    },
    {
      "epoch": 0.8001471309465424,
      "grad_norm": 1.9302470686228403,
      "learning_rate": 3.5037988221521678e-06,
      "loss": 0.5573,
      "step": 6526
    },
    {
      "epoch": 0.8002697400686611,
      "grad_norm": 2.0825571203632305,
      "learning_rate": 3.5033346125289375e-06,
      "loss": 0.6006,
      "step": 6527
    },
    {
      "epoch": 0.8003923491907798,
      "grad_norm": 2.1145691395201975,
      "learning_rate": 3.502870361666878e-06,
      "loss": 0.5492,
      "step": 6528
    },
    {
      "epoch": 0.8005149583128984,
      "grad_norm": 2.038334151921486,
      "learning_rate": 3.5024060695850715e-06,
      "loss": 0.5221,
      "step": 6529
    },
    {
      "epoch": 0.8006375674350171,
      "grad_norm": 2.0458889980553105,
      "learning_rate": 3.501941736302601e-06,
      "loss": 0.6439,
      "step": 6530
    },
    {
      "epoch": 0.8007601765571358,
      "grad_norm": 2.0276426669896357,
      "learning_rate": 3.501477361838551e-06,
      "loss": 0.5789,
      "step": 6531
    },
    {
      "epoch": 0.8008827856792545,
      "grad_norm": 1.854214516168351,
      "learning_rate": 3.501012946212009e-06,
      "loss": 0.4717,
      "step": 6532
    },
    {
      "epoch": 0.8010053948013732,
      "grad_norm": 1.8455658731926652,
      "learning_rate": 3.500548489442063e-06,
      "loss": 0.4999,
      "step": 6533
    },
    {
      "epoch": 0.8011280039234919,
      "grad_norm": 2.09450627119426,
      "learning_rate": 3.500083991547802e-06,
      "loss": 0.5663,
      "step": 6534
    },
    {
      "epoch": 0.8012506130456106,
      "grad_norm": 2.112376908691954,
      "learning_rate": 3.4996194525483185e-06,
      "loss": 0.5651,
      "step": 6535
    },
    {
      "epoch": 0.8013732221677293,
      "grad_norm": 1.9643295744155875,
      "learning_rate": 3.4991548724627054e-06,
      "loss": 0.508,
      "step": 6536
    },
    {
      "epoch": 0.8014958312898479,
      "grad_norm": 1.9519062226473465,
      "learning_rate": 3.4986902513100598e-06,
      "loss": 0.5371,
      "step": 6537
    },
    {
      "epoch": 0.8016184404119666,
      "grad_norm": 2.046544509687913,
      "learning_rate": 3.498225589109475e-06,
      "loss": 0.6008,
      "step": 6538
    },
    {
      "epoch": 0.8017410495340853,
      "grad_norm": 1.943565660769197,
      "learning_rate": 3.4977608858800525e-06,
      "loss": 0.515,
      "step": 6539
    },
    {
      "epoch": 0.801863658656204,
      "grad_norm": 1.8516908945648207,
      "learning_rate": 3.4972961416408912e-06,
      "loss": 0.5103,
      "step": 6540
    },
    {
      "epoch": 0.8019862677783227,
      "grad_norm": 1.9373632240100416,
      "learning_rate": 3.496831356411092e-06,
      "loss": 0.5031,
      "step": 6541
    },
    {
      "epoch": 0.8021088769004414,
      "grad_norm": 1.9594351437395041,
      "learning_rate": 3.49636653020976e-06,
      "loss": 0.5237,
      "step": 6542
    },
    {
      "epoch": 0.8022314860225601,
      "grad_norm": 1.9430542012379903,
      "learning_rate": 3.4959016630559995e-06,
      "loss": 0.5132,
      "step": 6543
    },
    {
      "epoch": 0.8023540951446788,
      "grad_norm": 1.914332725298793,
      "learning_rate": 3.4954367549689173e-06,
      "loss": 0.5323,
      "step": 6544
    },
    {
      "epoch": 0.8024767042667974,
      "grad_norm": 1.9724658109505937,
      "learning_rate": 3.4949718059676226e-06,
      "loss": 0.4991,
      "step": 6545
    },
    {
      "epoch": 0.8025993133889161,
      "grad_norm": 1.9592865233458339,
      "learning_rate": 3.494506816071225e-06,
      "loss": 0.5719,
      "step": 6546
    },
    {
      "epoch": 0.8027219225110348,
      "grad_norm": 1.940256519580203,
      "learning_rate": 3.494041785298837e-06,
      "loss": 0.5363,
      "step": 6547
    },
    {
      "epoch": 0.8028445316331535,
      "grad_norm": 2.0967887288548566,
      "learning_rate": 3.4935767136695707e-06,
      "loss": 0.4734,
      "step": 6548
    },
    {
      "epoch": 0.8029671407552722,
      "grad_norm": 1.7638859310812707,
      "learning_rate": 3.493111601202544e-06,
      "loss": 0.522,
      "step": 6549
    },
    {
      "epoch": 0.8030897498773909,
      "grad_norm": 2.07689506033863,
      "learning_rate": 3.4926464479168715e-06,
      "loss": 0.5234,
      "step": 6550
    },
    {
      "epoch": 0.8032123589995096,
      "grad_norm": 2.111453553897087,
      "learning_rate": 3.4921812538316723e-06,
      "loss": 0.5067,
      "step": 6551
    },
    {
      "epoch": 0.8033349681216283,
      "grad_norm": 1.8866502567742547,
      "learning_rate": 3.4917160189660676e-06,
      "loss": 0.5513,
      "step": 6552
    },
    {
      "epoch": 0.8034575772437469,
      "grad_norm": 2.169073373468326,
      "learning_rate": 3.4912507433391783e-06,
      "loss": 0.5401,
      "step": 6553
    },
    {
      "epoch": 0.8035801863658656,
      "grad_norm": 1.943843511888034,
      "learning_rate": 3.4907854269701292e-06,
      "loss": 0.5252,
      "step": 6554
    },
    {
      "epoch": 0.8037027954879843,
      "grad_norm": 1.9258765100499888,
      "learning_rate": 3.4903200698780446e-06,
      "loss": 0.5208,
      "step": 6555
    },
    {
      "epoch": 0.803825404610103,
      "grad_norm": 1.8314585865555386,
      "learning_rate": 3.4898546720820526e-06,
      "loss": 0.5161,
      "step": 6556
    },
    {
      "epoch": 0.8039480137322217,
      "grad_norm": 1.90883874528657,
      "learning_rate": 3.48938923360128e-06,
      "loss": 0.4985,
      "step": 6557
    },
    {
      "epoch": 0.8040706228543404,
      "grad_norm": 2.1359749713084666,
      "learning_rate": 3.4889237544548594e-06,
      "loss": 0.5388,
      "step": 6558
    },
    {
      "epoch": 0.8041932319764591,
      "grad_norm": 2.1346116643324593,
      "learning_rate": 3.488458234661921e-06,
      "loss": 0.5755,
      "step": 6559
    },
    {
      "epoch": 0.8043158410985778,
      "grad_norm": 1.8636712365027845,
      "learning_rate": 3.4879926742416e-06,
      "loss": 0.5324,
      "step": 6560
    },
    {
      "epoch": 0.8044384502206964,
      "grad_norm": 1.9360854886316181,
      "learning_rate": 3.48752707321303e-06,
      "loss": 0.5555,
      "step": 6561
    },
    {
      "epoch": 0.8045610593428151,
      "grad_norm": 1.996901798281271,
      "learning_rate": 3.48706143159535e-06,
      "loss": 0.5013,
      "step": 6562
    },
    {
      "epoch": 0.8046836684649338,
      "grad_norm": 1.8454220322722756,
      "learning_rate": 3.4865957494076964e-06,
      "loss": 0.5557,
      "step": 6563
    },
    {
      "epoch": 0.8048062775870525,
      "grad_norm": 1.857229378279264,
      "learning_rate": 3.486130026669212e-06,
      "loss": 0.593,
      "step": 6564
    },
    {
      "epoch": 0.8049288867091712,
      "grad_norm": 1.9333317867876143,
      "learning_rate": 3.4856642633990376e-06,
      "loss": 0.5212,
      "step": 6565
    },
    {
      "epoch": 0.8050514958312899,
      "grad_norm": 1.8661576439945677,
      "learning_rate": 3.485198459616317e-06,
      "loss": 0.5131,
      "step": 6566
    },
    {
      "epoch": 0.8051741049534086,
      "grad_norm": 1.9202049695594676,
      "learning_rate": 3.4847326153401952e-06,
      "loss": 0.5493,
      "step": 6567
    },
    {
      "epoch": 0.8052967140755272,
      "grad_norm": 1.8876757223093983,
      "learning_rate": 3.4842667305898196e-06,
      "loss": 0.4653,
      "step": 6568
    },
    {
      "epoch": 0.8054193231976459,
      "grad_norm": 1.8483394245015352,
      "learning_rate": 3.4838008053843394e-06,
      "loss": 0.4757,
      "step": 6569
    },
    {
      "epoch": 0.8055419323197646,
      "grad_norm": 2.0340961498076195,
      "learning_rate": 3.483334839742904e-06,
      "loss": 0.5461,
      "step": 6570
    },
    {
      "epoch": 0.8056645414418833,
      "grad_norm": 2.0029111993076354,
      "learning_rate": 3.4828688336846657e-06,
      "loss": 0.5291,
      "step": 6571
    },
    {
      "epoch": 0.805787150564002,
      "grad_norm": 2.024831415948298,
      "learning_rate": 3.4824027872287785e-06,
      "loss": 0.5115,
      "step": 6572
    },
    {
      "epoch": 0.8059097596861207,
      "grad_norm": 2.1450373683394632,
      "learning_rate": 3.481936700394397e-06,
      "loss": 0.5558,
      "step": 6573
    },
    {
      "epoch": 0.8060323688082394,
      "grad_norm": 1.9728455883505487,
      "learning_rate": 3.48147057320068e-06,
      "loss": 0.4788,
      "step": 6574
    },
    {
      "epoch": 0.8061549779303581,
      "grad_norm": 1.7157101163007158,
      "learning_rate": 3.4810044056667835e-06,
      "loss": 0.4678,
      "step": 6575
    },
    {
      "epoch": 0.8062775870524767,
      "grad_norm": 1.8833514875532609,
      "learning_rate": 3.48053819781187e-06,
      "loss": 0.4798,
      "step": 6576
    },
    {
      "epoch": 0.8064001961745954,
      "grad_norm": 1.9375112734811966,
      "learning_rate": 3.4800719496551005e-06,
      "loss": 0.5929,
      "step": 6577
    },
    {
      "epoch": 0.806522805296714,
      "grad_norm": 1.8393576639280993,
      "learning_rate": 3.4796056612156387e-06,
      "loss": 0.4695,
      "step": 6578
    },
    {
      "epoch": 0.8066454144188328,
      "grad_norm": 2.100621845291715,
      "learning_rate": 3.47913933251265e-06,
      "loss": 0.5889,
      "step": 6579
    },
    {
      "epoch": 0.8067680235409515,
      "grad_norm": 2.036653713812867,
      "learning_rate": 3.478672963565301e-06,
      "loss": 0.5236,
      "step": 6580
    },
    {
      "epoch": 0.8068906326630702,
      "grad_norm": 2.107603234490559,
      "learning_rate": 3.4782065543927614e-06,
      "loss": 0.5739,
      "step": 6581
    },
    {
      "epoch": 0.8070132417851889,
      "grad_norm": 1.9911049112416166,
      "learning_rate": 3.4777401050141995e-06,
      "loss": 0.5107,
      "step": 6582
    },
    {
      "epoch": 0.8071358509073076,
      "grad_norm": 2.1712851504547372,
      "learning_rate": 3.4772736154487892e-06,
      "loss": 0.5148,
      "step": 6583
    },
    {
      "epoch": 0.8072584600294261,
      "grad_norm": 2.139198436716717,
      "learning_rate": 3.476807085715703e-06,
      "loss": 0.5626,
      "step": 6584
    },
    {
      "epoch": 0.8073810691515448,
      "grad_norm": 1.9643199822180912,
      "learning_rate": 3.4763405158341167e-06,
      "loss": 0.4922,
      "step": 6585
    },
    {
      "epoch": 0.8075036782736635,
      "grad_norm": 1.708287114465528,
      "learning_rate": 3.4758739058232056e-06,
      "loss": 0.5306,
      "step": 6586
    },
    {
      "epoch": 0.8076262873957822,
      "grad_norm": 1.9184396935731896,
      "learning_rate": 3.47540725570215e-06,
      "loss": 0.5869,
      "step": 6587
    },
    {
      "epoch": 0.8077488965179009,
      "grad_norm": 1.9870907681455945,
      "learning_rate": 3.4749405654901297e-06,
      "loss": 0.548,
      "step": 6588
    },
    {
      "epoch": 0.8078715056400196,
      "grad_norm": 2.063707534209952,
      "learning_rate": 3.474473835206326e-06,
      "loss": 0.5458,
      "step": 6589
    },
    {
      "epoch": 0.8079941147621383,
      "grad_norm": 2.074019904114416,
      "learning_rate": 3.474007064869922e-06,
      "loss": 0.5492,
      "step": 6590
    },
    {
      "epoch": 0.808116723884257,
      "grad_norm": 2.0689165194499832,
      "learning_rate": 3.4735402545001035e-06,
      "loss": 0.5394,
      "step": 6591
    },
    {
      "epoch": 0.8082393330063756,
      "grad_norm": 1.9760908854932813,
      "learning_rate": 3.4730734041160574e-06,
      "loss": 0.5929,
      "step": 6592
    },
    {
      "epoch": 0.8083619421284943,
      "grad_norm": 2.0318410871497696,
      "learning_rate": 3.4726065137369714e-06,
      "loss": 0.4601,
      "step": 6593
    },
    {
      "epoch": 0.808484551250613,
      "grad_norm": 1.7498907360786038,
      "learning_rate": 3.4721395833820355e-06,
      "loss": 0.5365,
      "step": 6594
    },
    {
      "epoch": 0.8086071603727317,
      "grad_norm": 1.9517294741889348,
      "learning_rate": 3.4716726130704424e-06,
      "loss": 0.5152,
      "step": 6595
    },
    {
      "epoch": 0.8087297694948504,
      "grad_norm": 1.9700910553840902,
      "learning_rate": 3.4712056028213836e-06,
      "loss": 0.5605,
      "step": 6596
    },
    {
      "epoch": 0.8088523786169691,
      "grad_norm": 1.687064917179379,
      "learning_rate": 3.4707385526540566e-06,
      "loss": 0.5477,
      "step": 6597
    },
    {
      "epoch": 0.8089749877390878,
      "grad_norm": 1.8530803482901355,
      "learning_rate": 3.4702714625876554e-06,
      "loss": 0.515,
      "step": 6598
    },
    {
      "epoch": 0.8090975968612065,
      "grad_norm": 2.066810322993253,
      "learning_rate": 3.46980433264138e-06,
      "loss": 0.6153,
      "step": 6599
    },
    {
      "epoch": 0.8092202059833251,
      "grad_norm": 1.9240076859649176,
      "learning_rate": 3.46933716283443e-06,
      "loss": 0.5163,
      "step": 6600
    },
    {
      "epoch": 0.8093428151054438,
      "grad_norm": 1.735940166937867,
      "learning_rate": 3.4688699531860056e-06,
      "loss": 0.5198,
      "step": 6601
    },
    {
      "epoch": 0.8094654242275625,
      "grad_norm": 1.9420795778854802,
      "learning_rate": 3.4684027037153107e-06,
      "loss": 0.4987,
      "step": 6602
    },
    {
      "epoch": 0.8095880333496812,
      "grad_norm": 1.8316084494173208,
      "learning_rate": 3.4679354144415513e-06,
      "loss": 0.5279,
      "step": 6603
    },
    {
      "epoch": 0.8097106424717999,
      "grad_norm": 1.938690679952882,
      "learning_rate": 3.4674680853839317e-06,
      "loss": 0.5112,
      "step": 6604
    },
    {
      "epoch": 0.8098332515939186,
      "grad_norm": 1.8876819790363095,
      "learning_rate": 3.4670007165616617e-06,
      "loss": 0.4191,
      "step": 6605
    },
    {
      "epoch": 0.8099558607160373,
      "grad_norm": 1.8939773883022575,
      "learning_rate": 3.46653330799395e-06,
      "loss": 0.5224,
      "step": 6606
    },
    {
      "epoch": 0.810078469838156,
      "grad_norm": 1.8389448955785872,
      "learning_rate": 3.466065859700008e-06,
      "loss": 0.4707,
      "step": 6607
    },
    {
      "epoch": 0.8102010789602746,
      "grad_norm": 1.9840260501427336,
      "learning_rate": 3.46559837169905e-06,
      "loss": 0.4997,
      "step": 6608
    },
    {
      "epoch": 0.8103236880823933,
      "grad_norm": 1.9911041645584284,
      "learning_rate": 3.4651308440102882e-06,
      "loss": 0.5821,
      "step": 6609
    },
    {
      "epoch": 0.810446297204512,
      "grad_norm": 1.8663881042985484,
      "learning_rate": 3.464663276652941e-06,
      "loss": 0.5312,
      "step": 6610
    },
    {
      "epoch": 0.8105689063266307,
      "grad_norm": 1.8884431904756869,
      "learning_rate": 3.4641956696462244e-06,
      "loss": 0.5059,
      "step": 6611
    },
    {
      "epoch": 0.8106915154487494,
      "grad_norm": 2.008993779615881,
      "learning_rate": 3.4637280230093594e-06,
      "loss": 0.5006,
      "step": 6612
    },
    {
      "epoch": 0.8108141245708681,
      "grad_norm": 2.007480089466211,
      "learning_rate": 3.463260336761567e-06,
      "loss": 0.575,
      "step": 6613
    },
    {
      "epoch": 0.8109367336929868,
      "grad_norm": 1.9738376187103444,
      "learning_rate": 3.4627926109220684e-06,
      "loss": 0.4696,
      "step": 6614
    },
    {
      "epoch": 0.8110593428151054,
      "grad_norm": 2.0940441433640693,
      "learning_rate": 3.4623248455100893e-06,
      "loss": 0.5621,
      "step": 6615
    },
    {
      "epoch": 0.8111819519372241,
      "grad_norm": 1.832775789608674,
      "learning_rate": 3.461857040544856e-06,
      "loss": 0.4914,
      "step": 6616
    },
    {
      "epoch": 0.8113045610593428,
      "grad_norm": 1.7475863802475822,
      "learning_rate": 3.4613891960455947e-06,
      "loss": 0.4571,
      "step": 6617
    },
    {
      "epoch": 0.8114271701814615,
      "grad_norm": 1.8050105701658214,
      "learning_rate": 3.4609213120315355e-06,
      "loss": 0.5026,
      "step": 6618
    },
    {
      "epoch": 0.8115497793035802,
      "grad_norm": 2.014649434814313,
      "learning_rate": 3.4604533885219096e-06,
      "loss": 0.5334,
      "step": 6619
    },
    {
      "epoch": 0.8116723884256989,
      "grad_norm": 1.7105104075985973,
      "learning_rate": 3.4599854255359494e-06,
      "loss": 0.5892,
      "step": 6620
    },
    {
      "epoch": 0.8117949975478176,
      "grad_norm": 1.9528011718322416,
      "learning_rate": 3.459517423092888e-06,
      "loss": 0.5293,
      "step": 6621
    },
    {
      "epoch": 0.8119176066699363,
      "grad_norm": 2.0143462738214373,
      "learning_rate": 3.4590493812119625e-06,
      "loss": 0.4935,
      "step": 6622
    },
    {
      "epoch": 0.8120402157920549,
      "grad_norm": 1.9843473386073804,
      "learning_rate": 3.458581299912409e-06,
      "loss": 0.569,
      "step": 6623
    },
    {
      "epoch": 0.8121628249141736,
      "grad_norm": 1.9350932689207467,
      "learning_rate": 3.458113179213467e-06,
      "loss": 0.504,
      "step": 6624
    },
    {
      "epoch": 0.8122854340362923,
      "grad_norm": 1.9849348433541127,
      "learning_rate": 3.4576450191343776e-06,
      "loss": 0.565,
      "step": 6625
    },
    {
      "epoch": 0.812408043158411,
      "grad_norm": 2.0774741081404917,
      "learning_rate": 3.4571768196943824e-06,
      "loss": 0.5203,
      "step": 6626
    },
    {
      "epoch": 0.8125306522805297,
      "grad_norm": 2.082101221868517,
      "learning_rate": 3.4567085809127247e-06,
      "loss": 0.5575,
      "step": 6627
    },
    {
      "epoch": 0.8126532614026484,
      "grad_norm": 2.0379156094213013,
      "learning_rate": 3.4562403028086513e-06,
      "loss": 0.5273,
      "step": 6628
    },
    {
      "epoch": 0.8127758705247671,
      "grad_norm": 2.0882218458209434,
      "learning_rate": 3.4557719854014084e-06,
      "loss": 0.5091,
      "step": 6629
    },
    {
      "epoch": 0.8128984796468858,
      "grad_norm": 1.8302381282673499,
      "learning_rate": 3.455303628710245e-06,
      "loss": 0.4613,
      "step": 6630
    },
    {
      "epoch": 0.8130210887690044,
      "grad_norm": 1.9349488366637109,
      "learning_rate": 3.4548352327544106e-06,
      "loss": 0.5645,
      "step": 6631
    },
    {
      "epoch": 0.8131436978911231,
      "grad_norm": 2.0109004571278724,
      "learning_rate": 3.454366797553158e-06,
      "loss": 0.4785,
      "step": 6632
    },
    {
      "epoch": 0.8132663070132418,
      "grad_norm": 1.946534027166122,
      "learning_rate": 3.4538983231257412e-06,
      "loss": 0.5057,
      "step": 6633
    },
    {
      "epoch": 0.8133889161353605,
      "grad_norm": 2.1442959669605828,
      "learning_rate": 3.4534298094914137e-06,
      "loss": 0.6249,
      "step": 6634
    },
    {
      "epoch": 0.8135115252574792,
      "grad_norm": 1.8775094981398677,
      "learning_rate": 3.4529612566694337e-06,
      "loss": 0.5377,
      "step": 6635
    },
    {
      "epoch": 0.8136341343795979,
      "grad_norm": 1.9528186657929372,
      "learning_rate": 3.4524926646790583e-06,
      "loss": 0.509,
      "step": 6636
    },
    {
      "epoch": 0.8137567435017166,
      "grad_norm": 2.032176064574017,
      "learning_rate": 3.452024033539549e-06,
      "loss": 0.6048,
      "step": 6637
    },
    {
      "epoch": 0.8138793526238353,
      "grad_norm": 1.9790355114903704,
      "learning_rate": 3.451555363270166e-06,
      "loss": 0.5313,
      "step": 6638
    },
    {
      "epoch": 0.8140019617459539,
      "grad_norm": 2.058209269267965,
      "learning_rate": 3.451086653890173e-06,
      "loss": 0.5109,
      "step": 6639
    },
    {
      "epoch": 0.8141245708680726,
      "grad_norm": 2.055955844937716,
      "learning_rate": 3.450617905418834e-06,
      "loss": 0.487,
      "step": 6640
    },
    {
      "epoch": 0.8142471799901912,
      "grad_norm": 3.6230544086076613,
      "learning_rate": 3.4501491178754174e-06,
      "loss": 0.5499,
      "step": 6641
    },
    {
      "epoch": 0.81436978911231,
      "grad_norm": 2.014753741012336,
      "learning_rate": 3.4496802912791892e-06,
      "loss": 0.5732,
      "step": 6642
    },
    {
      "epoch": 0.8144923982344286,
      "grad_norm": 1.8510762740825661,
      "learning_rate": 3.4492114256494204e-06,
      "loss": 0.5387,
      "step": 6643
    },
    {
      "epoch": 0.8146150073565473,
      "grad_norm": 1.8137640201979035,
      "learning_rate": 3.4487425210053804e-06,
      "loss": 0.4933,
      "step": 6644
    },
    {
      "epoch": 0.814737616478666,
      "grad_norm": 1.7134573508855306,
      "learning_rate": 3.4482735773663446e-06,
      "loss": 0.4607,
      "step": 6645
    },
    {
      "epoch": 0.8148602256007847,
      "grad_norm": 2.0541869549666147,
      "learning_rate": 3.447804594751585e-06,
      "loss": 0.5071,
      "step": 6646
    },
    {
      "epoch": 0.8149828347229033,
      "grad_norm": 1.9248999624550651,
      "learning_rate": 3.4473355731803793e-06,
      "loss": 0.5441,
      "step": 6647
    },
    {
      "epoch": 0.815105443845022,
      "grad_norm": 1.8845650738395077,
      "learning_rate": 3.4468665126720042e-06,
      "loss": 0.5374,
      "step": 6648
    },
    {
      "epoch": 0.8152280529671407,
      "grad_norm": 1.8517642046168612,
      "learning_rate": 3.446397413245739e-06,
      "loss": 0.5204,
      "step": 6649
    },
    {
      "epoch": 0.8153506620892594,
      "grad_norm": 1.8651368682113725,
      "learning_rate": 3.4459282749208643e-06,
      "loss": 0.5138,
      "step": 6650
    },
    {
      "epoch": 0.8154732712113781,
      "grad_norm": 1.8880722344459397,
      "learning_rate": 3.445459097716663e-06,
      "loss": 0.5277,
      "step": 6651
    },
    {
      "epoch": 0.8155958803334968,
      "grad_norm": 1.8337472797861387,
      "learning_rate": 3.444989881652419e-06,
      "loss": 0.4765,
      "step": 6652
    },
    {
      "epoch": 0.8157184894556155,
      "grad_norm": 1.7952251852669932,
      "learning_rate": 3.4445206267474184e-06,
      "loss": 0.504,
      "step": 6653
    },
    {
      "epoch": 0.8158410985777342,
      "grad_norm": 2.0584174915881275,
      "learning_rate": 3.4440513330209473e-06,
      "loss": 0.5508,
      "step": 6654
    },
    {
      "epoch": 0.8159637076998528,
      "grad_norm": 1.8620598541245212,
      "learning_rate": 3.4435820004922954e-06,
      "loss": 0.5215,
      "step": 6655
    },
    {
      "epoch": 0.8160863168219715,
      "grad_norm": 1.9386258153978275,
      "learning_rate": 3.4431126291807527e-06,
      "loss": 0.5187,
      "step": 6656
    },
    {
      "epoch": 0.8162089259440902,
      "grad_norm": 2.0319054058066657,
      "learning_rate": 3.4426432191056115e-06,
      "loss": 0.5426,
      "step": 6657
    },
    {
      "epoch": 0.8163315350662089,
      "grad_norm": 1.815319913380825,
      "learning_rate": 3.442173770286165e-06,
      "loss": 0.5198,
      "step": 6658
    },
    {
      "epoch": 0.8164541441883276,
      "grad_norm": 2.0025040908447878,
      "learning_rate": 3.441704282741708e-06,
      "loss": 0.5604,
      "step": 6659
    },
    {
      "epoch": 0.8165767533104463,
      "grad_norm": 2.0289482397606053,
      "learning_rate": 3.4412347564915388e-06,
      "loss": 0.5104,
      "step": 6660
    },
    {
      "epoch": 0.816699362432565,
      "grad_norm": 2.1955703264930353,
      "learning_rate": 3.4407651915549546e-06,
      "loss": 0.5226,
      "step": 6661
    },
    {
      "epoch": 0.8168219715546837,
      "grad_norm": 2.0410835750629994,
      "learning_rate": 3.440295587951255e-06,
      "loss": 0.5294,
      "step": 6662
    },
    {
      "epoch": 0.8169445806768023,
      "grad_norm": 2.0167803004016034,
      "learning_rate": 3.4398259456997425e-06,
      "loss": 0.4964,
      "step": 6663
    },
    {
      "epoch": 0.817067189798921,
      "grad_norm": 1.863220924982685,
      "learning_rate": 3.4393562648197197e-06,
      "loss": 0.522,
      "step": 6664
    },
    {
      "epoch": 0.8171897989210397,
      "grad_norm": 1.9091669366621447,
      "learning_rate": 3.4388865453304915e-06,
      "loss": 0.4988,
      "step": 6665
    },
    {
      "epoch": 0.8173124080431584,
      "grad_norm": 1.9178509983444136,
      "learning_rate": 3.4384167872513642e-06,
      "loss": 0.5442,
      "step": 6666
    },
    {
      "epoch": 0.8174350171652771,
      "grad_norm": 1.9972517660968463,
      "learning_rate": 3.437946990601645e-06,
      "loss": 0.5442,
      "step": 6667
    },
    {
      "epoch": 0.8175576262873958,
      "grad_norm": 1.804140153168962,
      "learning_rate": 3.437477155400645e-06,
      "loss": 0.5401,
      "step": 6668
    },
    {
      "epoch": 0.8176802354095145,
      "grad_norm": 1.9832856137237442,
      "learning_rate": 3.4370072816676732e-06,
      "loss": 0.556,
      "step": 6669
    },
    {
      "epoch": 0.8178028445316331,
      "grad_norm": 1.830407683080048,
      "learning_rate": 3.436537369422044e-06,
      "loss": 0.5277,
      "step": 6670
    },
    {
      "epoch": 0.8179254536537518,
      "grad_norm": 1.9706841558457924,
      "learning_rate": 3.4360674186830704e-06,
      "loss": 0.5314,
      "step": 6671
    },
    {
      "epoch": 0.8180480627758705,
      "grad_norm": 2.100658221839882,
      "learning_rate": 3.435597429470069e-06,
      "loss": 0.5488,
      "step": 6672
    },
    {
      "epoch": 0.8181706718979892,
      "grad_norm": 1.9182000506123231,
      "learning_rate": 3.4351274018023567e-06,
      "loss": 0.5377,
      "step": 6673
    },
    {
      "epoch": 0.8182932810201079,
      "grad_norm": 2.094114555969288,
      "learning_rate": 3.434657335699253e-06,
      "loss": 0.5311,
      "step": 6674
    },
    {
      "epoch": 0.8184158901422266,
      "grad_norm": 2.0884441175063895,
      "learning_rate": 3.434187231180078e-06,
      "loss": 0.5575,
      "step": 6675
    },
    {
      "epoch": 0.8185384992643453,
      "grad_norm": 1.9217775855691974,
      "learning_rate": 3.4337170882641536e-06,
      "loss": 0.536,
      "step": 6676
    },
    {
      "epoch": 0.818661108386464,
      "grad_norm": 1.8850899427795587,
      "learning_rate": 3.433246906970804e-06,
      "loss": 0.5633,
      "step": 6677
    },
    {
      "epoch": 0.8187837175085826,
      "grad_norm": 1.9083309862671347,
      "learning_rate": 3.4327766873193545e-06,
      "loss": 0.5203,
      "step": 6678
    },
    {
      "epoch": 0.8189063266307013,
      "grad_norm": 1.9563506096495742,
      "learning_rate": 3.432306429329132e-06,
      "loss": 0.5453,
      "step": 6679
    },
    {
      "epoch": 0.81902893575282,
      "grad_norm": 2.0476881177755666,
      "learning_rate": 3.4318361330194644e-06,
      "loss": 0.5316,
      "step": 6680
    },
    {
      "epoch": 0.8191515448749387,
      "grad_norm": 1.9259509246324824,
      "learning_rate": 3.431365798409683e-06,
      "loss": 0.5169,
      "step": 6681
    },
    {
      "epoch": 0.8192741539970574,
      "grad_norm": 1.9660125357024616,
      "learning_rate": 3.4308954255191167e-06,
      "loss": 0.5133,
      "step": 6682
    },
    {
      "epoch": 0.8193967631191761,
      "grad_norm": 1.8290477194692618,
      "learning_rate": 3.4304250143671012e-06,
      "loss": 0.5028,
      "step": 6683
    },
    {
      "epoch": 0.8195193722412948,
      "grad_norm": 1.8954075199273914,
      "learning_rate": 3.4299545649729704e-06,
      "loss": 0.5445,
      "step": 6684
    },
    {
      "epoch": 0.8196419813634135,
      "grad_norm": 2.069891162627131,
      "learning_rate": 3.429484077356061e-06,
      "loss": 0.5091,
      "step": 6685
    },
    {
      "epoch": 0.8197645904855321,
      "grad_norm": 1.9465984918454984,
      "learning_rate": 3.4290135515357105e-06,
      "loss": 0.5586,
      "step": 6686
    },
    {
      "epoch": 0.8198871996076508,
      "grad_norm": 1.8686530114930464,
      "learning_rate": 3.4285429875312583e-06,
      "loss": 0.5283,
      "step": 6687
    },
    {
      "epoch": 0.8200098087297695,
      "grad_norm": 1.9202490249734352,
      "learning_rate": 3.4280723853620447e-06,
      "loss": 0.5951,
      "step": 6688
    },
    {
      "epoch": 0.8201324178518882,
      "grad_norm": 2.191575139316546,
      "learning_rate": 3.4276017450474134e-06,
      "loss": 0.5956,
      "step": 6689
    },
    {
      "epoch": 0.8202550269740069,
      "grad_norm": 2.0687403046502033,
      "learning_rate": 3.4271310666067085e-06,
      "loss": 0.513,
      "step": 6690
    },
    {
      "epoch": 0.8203776360961256,
      "grad_norm": 1.9590827568175952,
      "learning_rate": 3.426660350059276e-06,
      "loss": 0.5569,
      "step": 6691
    },
    {
      "epoch": 0.8205002452182443,
      "grad_norm": 1.7856950560063707,
      "learning_rate": 3.4261895954244613e-06,
      "loss": 0.5192,
      "step": 6692
    },
    {
      "epoch": 0.820622854340363,
      "grad_norm": 1.9253358920626265,
      "learning_rate": 3.425718802721615e-06,
      "loss": 0.5452,
      "step": 6693
    },
    {
      "epoch": 0.8207454634624816,
      "grad_norm": 1.8910744143439944,
      "learning_rate": 3.4252479719700872e-06,
      "loss": 0.4967,
      "step": 6694
    },
    {
      "epoch": 0.8208680725846003,
      "grad_norm": 2.063399590361652,
      "learning_rate": 3.42477710318923e-06,
      "loss": 0.5518,
      "step": 6695
    },
    {
      "epoch": 0.820990681706719,
      "grad_norm": 1.8566589245105667,
      "learning_rate": 3.4243061963983964e-06,
      "loss": 0.5104,
      "step": 6696
    },
    {
      "epoch": 0.8211132908288377,
      "grad_norm": 1.7336641276470295,
      "learning_rate": 3.4238352516169413e-06,
      "loss": 0.4544,
      "step": 6697
    },
    {
      "epoch": 0.8212358999509564,
      "grad_norm": 2.085839148762829,
      "learning_rate": 3.4233642688642227e-06,
      "loss": 0.5852,
      "step": 6698
    },
    {
      "epoch": 0.8213585090730751,
      "grad_norm": 1.8377492175601566,
      "learning_rate": 3.422893248159597e-06,
      "loss": 0.5074,
      "step": 6699
    },
    {
      "epoch": 0.8214811181951938,
      "grad_norm": 2.0572501809984893,
      "learning_rate": 3.4224221895224257e-06,
      "loss": 0.5662,
      "step": 6700
    },
    {
      "epoch": 0.8216037273173125,
      "grad_norm": 1.909822566120425,
      "learning_rate": 3.421951092972069e-06,
      "loss": 0.5346,
      "step": 6701
    },
    {
      "epoch": 0.821726336439431,
      "grad_norm": 1.8553967634391364,
      "learning_rate": 3.42147995852789e-06,
      "loss": 0.4938,
      "step": 6702
    },
    {
      "epoch": 0.8218489455615497,
      "grad_norm": 1.9825609652205833,
      "learning_rate": 3.421008786209254e-06,
      "loss": 0.5333,
      "step": 6703
    },
    {
      "epoch": 0.8219715546836684,
      "grad_norm": 1.9025109141514855,
      "learning_rate": 3.4205375760355263e-06,
      "loss": 0.5042,
      "step": 6704
    },
    {
      "epoch": 0.8220941638057871,
      "grad_norm": 1.9086534959325203,
      "learning_rate": 3.420066328026074e-06,
      "loss": 0.4911,
      "step": 6705
    },
    {
      "epoch": 0.8222167729279058,
      "grad_norm": 2.1662492896855596,
      "learning_rate": 3.4195950422002677e-06,
      "loss": 0.5113,
      "step": 6706
    },
    {
      "epoch": 0.8223393820500245,
      "grad_norm": 1.865039105689877,
      "learning_rate": 3.419123718577476e-06,
      "loss": 0.542,
      "step": 6707
    },
    {
      "epoch": 0.8224619911721432,
      "grad_norm": 2.0053140717053037,
      "learning_rate": 3.4186523571770734e-06,
      "loss": 0.518,
      "step": 6708
    },
    {
      "epoch": 0.822584600294262,
      "grad_norm": 2.0637857771932273,
      "learning_rate": 3.4181809580184318e-06,
      "loss": 0.4888,
      "step": 6709
    },
    {
      "epoch": 0.8227072094163805,
      "grad_norm": 2.0260257317783,
      "learning_rate": 3.4177095211209275e-06,
      "loss": 0.5595,
      "step": 6710
    },
    {
      "epoch": 0.8228298185384992,
      "grad_norm": 1.8279472633267067,
      "learning_rate": 3.4172380465039374e-06,
      "loss": 0.4935,
      "step": 6711
    },
    {
      "epoch": 0.8229524276606179,
      "grad_norm": 2.005806910276954,
      "learning_rate": 3.4167665341868394e-06,
      "loss": 0.5611,
      "step": 6712
    },
    {
      "epoch": 0.8230750367827366,
      "grad_norm": 2.12702033190407,
      "learning_rate": 3.4162949841890135e-06,
      "loss": 0.5425,
      "step": 6713
    },
    {
      "epoch": 0.8231976459048553,
      "grad_norm": 2.080127780937031,
      "learning_rate": 3.4158233965298425e-06,
      "loss": 0.5389,
      "step": 6714
    },
    {
      "epoch": 0.823320255026974,
      "grad_norm": 2.0608852610545156,
      "learning_rate": 3.415351771228708e-06,
      "loss": 0.5478,
      "step": 6715
    },
    {
      "epoch": 0.8234428641490927,
      "grad_norm": 2.06005486214479,
      "learning_rate": 3.4148801083049958e-06,
      "loss": 0.5611,
      "step": 6716
    },
    {
      "epoch": 0.8235654732712113,
      "grad_norm": 1.743321725299099,
      "learning_rate": 3.4144084077780904e-06,
      "loss": 0.5496,
      "step": 6717
    },
    {
      "epoch": 0.82368808239333,
      "grad_norm": 1.892804038396913,
      "learning_rate": 3.4139366696673815e-06,
      "loss": 0.5031,
      "step": 6718
    },
    {
      "epoch": 0.8238106915154487,
      "grad_norm": 1.6068757237291507,
      "learning_rate": 3.4134648939922576e-06,
      "loss": 0.5091,
      "step": 6719
    },
    {
      "epoch": 0.8239333006375674,
      "grad_norm": 1.8520227943178276,
      "learning_rate": 3.412993080772109e-06,
      "loss": 0.5506,
      "step": 6720
    },
    {
      "epoch": 0.8240559097596861,
      "grad_norm": 1.9299779867680404,
      "learning_rate": 3.4125212300263284e-06,
      "loss": 0.4987,
      "step": 6721
    },
    {
      "epoch": 0.8241785188818048,
      "grad_norm": 1.882287318238576,
      "learning_rate": 3.4120493417743095e-06,
      "loss": 0.5485,
      "step": 6722
    },
    {
      "epoch": 0.8243011280039235,
      "grad_norm": 1.9192843948472442,
      "learning_rate": 3.4115774160354482e-06,
      "loss": 0.5666,
      "step": 6723
    },
    {
      "epoch": 0.8244237371260422,
      "grad_norm": 2.032524599211781,
      "learning_rate": 3.4111054528291416e-06,
      "loss": 0.4944,
      "step": 6724
    },
    {
      "epoch": 0.8245463462481608,
      "grad_norm": 1.9923937054964225,
      "learning_rate": 3.4106334521747876e-06,
      "loss": 0.5318,
      "step": 6725
    },
    {
      "epoch": 0.8246689553702795,
      "grad_norm": 1.907865303969666,
      "learning_rate": 3.4101614140917866e-06,
      "loss": 0.5459,
      "step": 6726
    },
    {
      "epoch": 0.8247915644923982,
      "grad_norm": 1.997509719368281,
      "learning_rate": 3.40968933859954e-06,
      "loss": 0.5238,
      "step": 6727
    },
    {
      "epoch": 0.8249141736145169,
      "grad_norm": 1.96517240779245,
      "learning_rate": 3.4092172257174518e-06,
      "loss": 0.5094,
      "step": 6728
    },
    {
      "epoch": 0.8250367827366356,
      "grad_norm": 1.9961656641943186,
      "learning_rate": 3.4087450754649256e-06,
      "loss": 0.5421,
      "step": 6729
    },
    {
      "epoch": 0.8251593918587543,
      "grad_norm": 1.7311393726243467,
      "learning_rate": 3.4082728878613673e-06,
      "loss": 0.5392,
      "step": 6730
    },
    {
      "epoch": 0.825282000980873,
      "grad_norm": 1.8243941767124987,
      "learning_rate": 3.4078006629261864e-06,
      "loss": 0.5456,
      "step": 6731
    },
    {
      "epoch": 0.8254046101029917,
      "grad_norm": 1.7595156131789635,
      "learning_rate": 3.40732840067879e-06,
      "loss": 0.5841,
      "step": 6732
    },
    {
      "epoch": 0.8255272192251103,
      "grad_norm": 1.857790370296976,
      "learning_rate": 3.4068561011385914e-06,
      "loss": 0.5,
      "step": 6733
    },
    {
      "epoch": 0.825649828347229,
      "grad_norm": 1.9481068449219627,
      "learning_rate": 3.4063837643250007e-06,
      "loss": 0.4866,
      "step": 6734
    },
    {
      "epoch": 0.8257724374693477,
      "grad_norm": 2.076505316692435,
      "learning_rate": 3.4059113902574327e-06,
      "loss": 0.5309,
      "step": 6735
    },
    {
      "epoch": 0.8258950465914664,
      "grad_norm": 2.005452754690577,
      "learning_rate": 3.4054389789553026e-06,
      "loss": 0.4918,
      "step": 6736
    },
    {
      "epoch": 0.8260176557135851,
      "grad_norm": 1.914714943289063,
      "learning_rate": 3.404966530438028e-06,
      "loss": 0.5569,
      "step": 6737
    },
    {
      "epoch": 0.8261402648357038,
      "grad_norm": 2.059103441717331,
      "learning_rate": 3.404494044725027e-06,
      "loss": 0.5745,
      "step": 6738
    },
    {
      "epoch": 0.8262628739578225,
      "grad_norm": 1.8424677240120837,
      "learning_rate": 3.404021521835719e-06,
      "loss": 0.5199,
      "step": 6739
    },
    {
      "epoch": 0.8263854830799412,
      "grad_norm": 1.81316471446984,
      "learning_rate": 3.4035489617895255e-06,
      "loss": 0.545,
      "step": 6740
    },
    {
      "epoch": 0.8265080922020598,
      "grad_norm": 1.804244663658618,
      "learning_rate": 3.403076364605872e-06,
      "loss": 0.4986,
      "step": 6741
    },
    {
      "epoch": 0.8266307013241785,
      "grad_norm": 2.021334590772345,
      "learning_rate": 3.4026037303041793e-06,
      "loss": 0.5403,
      "step": 6742
    },
    {
      "epoch": 0.8267533104462972,
      "grad_norm": 1.8940398294291045,
      "learning_rate": 3.402131058903876e-06,
      "loss": 0.5431,
      "step": 6743
    },
    {
      "epoch": 0.8268759195684159,
      "grad_norm": 1.9836007577669068,
      "learning_rate": 3.4016583504243892e-06,
      "loss": 0.4963,
      "step": 6744
    },
    {
      "epoch": 0.8269985286905346,
      "grad_norm": 1.8599506519953,
      "learning_rate": 3.4011856048851478e-06,
      "loss": 0.5421,
      "step": 6745
    },
    {
      "epoch": 0.8271211378126533,
      "grad_norm": 1.8419869081468037,
      "learning_rate": 3.4007128223055824e-06,
      "loss": 0.5458,
      "step": 6746
    },
    {
      "epoch": 0.827243746934772,
      "grad_norm": 1.736953740915039,
      "learning_rate": 3.4002400027051254e-06,
      "loss": 0.5129,
      "step": 6747
    },
    {
      "epoch": 0.8273663560568907,
      "grad_norm": 1.758261690371539,
      "learning_rate": 3.399767146103211e-06,
      "loss": 0.481,
      "step": 6748
    },
    {
      "epoch": 0.8274889651790093,
      "grad_norm": 1.9176387738824454,
      "learning_rate": 3.3992942525192737e-06,
      "loss": 0.5051,
      "step": 6749
    },
    {
      "epoch": 0.827611574301128,
      "grad_norm": 1.8777146370316125,
      "learning_rate": 3.3988213219727505e-06,
      "loss": 0.5712,
      "step": 6750
    },
    {
      "epoch": 0.8277341834232467,
      "grad_norm": 2.0856112099185835,
      "learning_rate": 3.39834835448308e-06,
      "loss": 0.5158,
      "step": 6751
    },
    {
      "epoch": 0.8278567925453654,
      "grad_norm": 1.7791292080235497,
      "learning_rate": 3.397875350069701e-06,
      "loss": 0.4964,
      "step": 6752
    },
    {
      "epoch": 0.8279794016674841,
      "grad_norm": 1.9553052233811081,
      "learning_rate": 3.3974023087520558e-06,
      "loss": 0.5426,
      "step": 6753
    },
    {
      "epoch": 0.8281020107896028,
      "grad_norm": 1.8803800505037804,
      "learning_rate": 3.3969292305495884e-06,
      "loss": 0.5571,
      "step": 6754
    },
    {
      "epoch": 0.8282246199117215,
      "grad_norm": 1.8214108487736496,
      "learning_rate": 3.3964561154817396e-06,
      "loss": 0.5461,
      "step": 6755
    },
    {
      "epoch": 0.8283472290338402,
      "grad_norm": 1.9235502475959083,
      "learning_rate": 3.3959829635679588e-06,
      "loss": 0.5546,
      "step": 6756
    },
    {
      "epoch": 0.8284698381559588,
      "grad_norm": 1.9917319601875112,
      "learning_rate": 3.395509774827692e-06,
      "loss": 0.5791,
      "step": 6757
    },
    {
      "epoch": 0.8285924472780775,
      "grad_norm": 1.901476207177435,
      "learning_rate": 3.395036549280387e-06,
      "loss": 0.5594,
      "step": 6758
    },
    {
      "epoch": 0.8287150564001962,
      "grad_norm": 1.8330063789243636,
      "learning_rate": 3.394563286945496e-06,
      "loss": 0.5012,
      "step": 6759
    },
    {
      "epoch": 0.8288376655223149,
      "grad_norm": 1.7206424903388864,
      "learning_rate": 3.3940899878424704e-06,
      "loss": 0.5348,
      "step": 6760
    },
    {
      "epoch": 0.8289602746444336,
      "grad_norm": 2.179660625757847,
      "learning_rate": 3.3936166519907634e-06,
      "loss": 0.5842,
      "step": 6761
    },
    {
      "epoch": 0.8290828837665523,
      "grad_norm": 1.9369985971303905,
      "learning_rate": 3.393143279409829e-06,
      "loss": 0.5374,
      "step": 6762
    },
    {
      "epoch": 0.829205492888671,
      "grad_norm": 1.8711898544502645,
      "learning_rate": 3.3926698701191258e-06,
      "loss": 0.5525,
      "step": 6763
    },
    {
      "epoch": 0.8293281020107895,
      "grad_norm": 1.992942574354801,
      "learning_rate": 3.3921964241381105e-06,
      "loss": 0.5764,
      "step": 6764
    },
    {
      "epoch": 0.8294507111329082,
      "grad_norm": 2.4548819285564747,
      "learning_rate": 3.391722941486242e-06,
      "loss": 0.5077,
      "step": 6765
    },
    {
      "epoch": 0.829573320255027,
      "grad_norm": 2.040102094942818,
      "learning_rate": 3.391249422182982e-06,
      "loss": 0.5445,
      "step": 6766
    },
    {
      "epoch": 0.8296959293771456,
      "grad_norm": 2.099395018198893,
      "learning_rate": 3.3907758662477937e-06,
      "loss": 0.5371,
      "step": 6767
    },
    {
      "epoch": 0.8298185384992643,
      "grad_norm": 1.9953361273956287,
      "learning_rate": 3.3903022737001397e-06,
      "loss": 0.5819,
      "step": 6768
    },
    {
      "epoch": 0.829941147621383,
      "grad_norm": 1.9204310576456043,
      "learning_rate": 3.3898286445594863e-06,
      "loss": 0.5024,
      "step": 6769
    },
    {
      "epoch": 0.8300637567435017,
      "grad_norm": 2.0279849156216345,
      "learning_rate": 3.3893549788453e-06,
      "loss": 0.5644,
      "step": 6770
    },
    {
      "epoch": 0.8301863658656204,
      "grad_norm": 1.9271508188993665,
      "learning_rate": 3.3888812765770494e-06,
      "loss": 0.5281,
      "step": 6771
    },
    {
      "epoch": 0.830308974987739,
      "grad_norm": 2.077204876351186,
      "learning_rate": 3.3884075377742052e-06,
      "loss": 0.4677,
      "step": 6772
    },
    {
      "epoch": 0.8304315841098577,
      "grad_norm": 2.043065780444395,
      "learning_rate": 3.387933762456238e-06,
      "loss": 0.5003,
      "step": 6773
    },
    {
      "epoch": 0.8305541932319764,
      "grad_norm": 2.130431684056932,
      "learning_rate": 3.387459950642622e-06,
      "loss": 0.5159,
      "step": 6774
    },
    {
      "epoch": 0.8306768023540951,
      "grad_norm": 1.9427178858815863,
      "learning_rate": 3.38698610235283e-06,
      "loss": 0.4909,
      "step": 6775
    },
    {
      "epoch": 0.8307994114762138,
      "grad_norm": 2.0031335826838736,
      "learning_rate": 3.386512217606339e-06,
      "loss": 0.5329,
      "step": 6776
    },
    {
      "epoch": 0.8309220205983325,
      "grad_norm": 1.816174488868848,
      "learning_rate": 3.3860382964226274e-06,
      "loss": 0.5045,
      "step": 6777
    },
    {
      "epoch": 0.8310446297204512,
      "grad_norm": 1.9771861530648733,
      "learning_rate": 3.3855643388211716e-06,
      "loss": 0.513,
      "step": 6778
    },
    {
      "epoch": 0.8311672388425699,
      "grad_norm": 1.6984572671297122,
      "learning_rate": 3.3850903448214556e-06,
      "loss": 0.5551,
      "step": 6779
    },
    {
      "epoch": 0.8312898479646885,
      "grad_norm": 1.9819306147633917,
      "learning_rate": 3.384616314442958e-06,
      "loss": 0.527,
      "step": 6780
    },
    {
      "epoch": 0.8314124570868072,
      "grad_norm": 1.9263402999601316,
      "learning_rate": 3.384142247705164e-06,
      "loss": 0.5464,
      "step": 6781
    },
    {
      "epoch": 0.8315350662089259,
      "grad_norm": 2.0655254556437592,
      "learning_rate": 3.3836681446275586e-06,
      "loss": 0.5514,
      "step": 6782
    },
    {
      "epoch": 0.8316576753310446,
      "grad_norm": 1.646893428517351,
      "learning_rate": 3.3831940052296288e-06,
      "loss": 0.4778,
      "step": 6783
    },
    {
      "epoch": 0.8317802844531633,
      "grad_norm": 2.0393446269483038,
      "learning_rate": 3.3827198295308606e-06,
      "loss": 0.5789,
      "step": 6784
    },
    {
      "epoch": 0.831902893575282,
      "grad_norm": 1.9469721540071747,
      "learning_rate": 3.3822456175507457e-06,
      "loss": 0.5093,
      "step": 6785
    },
    {
      "epoch": 0.8320255026974007,
      "grad_norm": 1.9926501847451632,
      "learning_rate": 3.381771369308773e-06,
      "loss": 0.5432,
      "step": 6786
    },
    {
      "epoch": 0.8321481118195194,
      "grad_norm": 1.9956215912293298,
      "learning_rate": 3.381297084824437e-06,
      "loss": 0.5883,
      "step": 6787
    },
    {
      "epoch": 0.832270720941638,
      "grad_norm": 1.7742547425894706,
      "learning_rate": 3.3808227641172297e-06,
      "loss": 0.5205,
      "step": 6788
    },
    {
      "epoch": 0.8323933300637567,
      "grad_norm": 2.0422384306934918,
      "learning_rate": 3.3803484072066485e-06,
      "loss": 0.4987,
      "step": 6789
    },
    {
      "epoch": 0.8325159391858754,
      "grad_norm": 1.8977008237797752,
      "learning_rate": 3.379874014112188e-06,
      "loss": 0.5539,
      "step": 6790
    },
    {
      "epoch": 0.8326385483079941,
      "grad_norm": 2.0811298519205623,
      "learning_rate": 3.3793995848533484e-06,
      "loss": 0.5591,
      "step": 6791
    },
    {
      "epoch": 0.8327611574301128,
      "grad_norm": 1.7937799710011515,
      "learning_rate": 3.3789251194496285e-06,
      "loss": 0.5136,
      "step": 6792
    },
    {
      "epoch": 0.8328837665522315,
      "grad_norm": 2.0766732574173514,
      "learning_rate": 3.378450617920531e-06,
      "loss": 0.576,
      "step": 6793
    },
    {
      "epoch": 0.8330063756743502,
      "grad_norm": 1.9523291535058858,
      "learning_rate": 3.377976080285558e-06,
      "loss": 0.574,
      "step": 6794
    },
    {
      "epoch": 0.8331289847964689,
      "grad_norm": 1.9542287181459272,
      "learning_rate": 3.3775015065642125e-06,
      "loss": 0.5067,
      "step": 6795
    },
    {
      "epoch": 0.8332515939185875,
      "grad_norm": 2.1366642961140645,
      "learning_rate": 3.3770268967760026e-06,
      "loss": 0.5338,
      "step": 6796
    },
    {
      "epoch": 0.8333742030407062,
      "grad_norm": 1.8653756752532638,
      "learning_rate": 3.3765522509404346e-06,
      "loss": 0.5319,
      "step": 6797
    },
    {
      "epoch": 0.8334968121628249,
      "grad_norm": 1.950343160769389,
      "learning_rate": 3.376077569077017e-06,
      "loss": 0.5875,
      "step": 6798
    },
    {
      "epoch": 0.8336194212849436,
      "grad_norm": 1.943133847795123,
      "learning_rate": 3.37560285120526e-06,
      "loss": 0.5118,
      "step": 6799
    },
    {
      "epoch": 0.8337420304070623,
      "grad_norm": 2.0006166491155994,
      "learning_rate": 3.3751280973446766e-06,
      "loss": 0.567,
      "step": 6800
    },
    {
      "epoch": 0.833864639529181,
      "grad_norm": 2.0201754087028427,
      "learning_rate": 3.374653307514778e-06,
      "loss": 0.4993,
      "step": 6801
    },
    {
      "epoch": 0.8339872486512997,
      "grad_norm": 1.8225251567933234,
      "learning_rate": 3.3741784817350814e-06,
      "loss": 0.5038,
      "step": 6802
    },
    {
      "epoch": 0.8341098577734184,
      "grad_norm": 2.0111564615369293,
      "learning_rate": 3.3737036200251e-06,
      "loss": 0.5183,
      "step": 6803
    },
    {
      "epoch": 0.834232466895537,
      "grad_norm": 1.6871746920808979,
      "learning_rate": 3.373228722404355e-06,
      "loss": 0.5027,
      "step": 6804
    },
    {
      "epoch": 0.8343550760176557,
      "grad_norm": 1.7543993473413555,
      "learning_rate": 3.3727537888923617e-06,
      "loss": 0.5277,
      "step": 6805
    },
    {
      "epoch": 0.8344776851397744,
      "grad_norm": 1.9009642116172683,
      "learning_rate": 3.372278819508643e-06,
      "loss": 0.5403,
      "step": 6806
    },
    {
      "epoch": 0.8346002942618931,
      "grad_norm": 1.89385763659638,
      "learning_rate": 3.3718038142727214e-06,
      "loss": 0.5506,
      "step": 6807
    },
    {
      "epoch": 0.8347229033840118,
      "grad_norm": 1.856537846777535,
      "learning_rate": 3.3713287732041187e-06,
      "loss": 0.5326,
      "step": 6808
    },
    {
      "epoch": 0.8348455125061305,
      "grad_norm": 1.8971663673551569,
      "learning_rate": 3.3708536963223616e-06,
      "loss": 0.5161,
      "step": 6809
    },
    {
      "epoch": 0.8349681216282492,
      "grad_norm": 1.9635115048505734,
      "learning_rate": 3.3703785836469755e-06,
      "loss": 0.5009,
      "step": 6810
    },
    {
      "epoch": 0.8350907307503679,
      "grad_norm": 1.697232538272986,
      "learning_rate": 3.3699034351974887e-06,
      "loss": 0.4914,
      "step": 6811
    },
    {
      "epoch": 0.8352133398724865,
      "grad_norm": 1.854487693491576,
      "learning_rate": 3.3694282509934312e-06,
      "loss": 0.5511,
      "step": 6812
    },
    {
      "epoch": 0.8353359489946052,
      "grad_norm": 1.9677765068381803,
      "learning_rate": 3.3689530310543323e-06,
      "loss": 0.5608,
      "step": 6813
    },
    {
      "epoch": 0.8354585581167239,
      "grad_norm": 2.0164927819131364,
      "learning_rate": 3.3684777753997267e-06,
      "loss": 0.5578,
      "step": 6814
    },
    {
      "epoch": 0.8355811672388426,
      "grad_norm": 2.22628357606816,
      "learning_rate": 3.368002484049146e-06,
      "loss": 0.531,
      "step": 6815
    },
    {
      "epoch": 0.8357037763609613,
      "grad_norm": 1.8675185296674286,
      "learning_rate": 3.3675271570221267e-06,
      "loss": 0.5377,
      "step": 6816
    },
    {
      "epoch": 0.83582638548308,
      "grad_norm": 1.8518320252895752,
      "learning_rate": 3.367051794338206e-06,
      "loss": 0.4889,
      "step": 6817
    },
    {
      "epoch": 0.8359489946051987,
      "grad_norm": 1.9090414984349013,
      "learning_rate": 3.3665763960169208e-06,
      "loss": 0.488,
      "step": 6818
    },
    {
      "epoch": 0.8360716037273173,
      "grad_norm": 1.9489573858041755,
      "learning_rate": 3.366100962077812e-06,
      "loss": 0.5723,
      "step": 6819
    },
    {
      "epoch": 0.836194212849436,
      "grad_norm": 1.7817192457352844,
      "learning_rate": 3.3656254925404203e-06,
      "loss": 0.5392,
      "step": 6820
    },
    {
      "epoch": 0.8363168219715547,
      "grad_norm": 2.1331418709777785,
      "learning_rate": 3.365149987424288e-06,
      "loss": 0.5014,
      "step": 6821
    },
    {
      "epoch": 0.8364394310936734,
      "grad_norm": 2.061427176533735,
      "learning_rate": 3.36467444674896e-06,
      "loss": 0.5426,
      "step": 6822
    },
    {
      "epoch": 0.8365620402157921,
      "grad_norm": 1.9761974526559736,
      "learning_rate": 3.3641988705339817e-06,
      "loss": 0.4899,
      "step": 6823
    },
    {
      "epoch": 0.8366846493379108,
      "grad_norm": 1.9060391665603278,
      "learning_rate": 3.3637232587988994e-06,
      "loss": 0.4889,
      "step": 6824
    },
    {
      "epoch": 0.8368072584600295,
      "grad_norm": 1.9207223893567043,
      "learning_rate": 3.3632476115632628e-06,
      "loss": 0.5056,
      "step": 6825
    },
    {
      "epoch": 0.8369298675821482,
      "grad_norm": 1.844041821529761,
      "learning_rate": 3.36277192884662e-06,
      "loss": 0.4808,
      "step": 6826
    },
    {
      "epoch": 0.8370524767042667,
      "grad_norm": 1.8567535925188672,
      "learning_rate": 3.3622962106685246e-06,
      "loss": 0.5096,
      "step": 6827
    },
    {
      "epoch": 0.8371750858263854,
      "grad_norm": 1.872740616334936,
      "learning_rate": 3.361820457048527e-06,
      "loss": 0.4948,
      "step": 6828
    },
    {
      "epoch": 0.8372976949485041,
      "grad_norm": 1.9357223430022361,
      "learning_rate": 3.3613446680061843e-06,
      "loss": 0.5237,
      "step": 6829
    },
    {
      "epoch": 0.8374203040706228,
      "grad_norm": 1.9322371798346294,
      "learning_rate": 3.3608688435610505e-06,
      "loss": 0.5266,
      "step": 6830
    },
    {
      "epoch": 0.8375429131927415,
      "grad_norm": 1.9066519911010837,
      "learning_rate": 3.3603929837326828e-06,
      "loss": 0.5361,
      "step": 6831
    },
    {
      "epoch": 0.8376655223148602,
      "grad_norm": 1.899294366046864,
      "learning_rate": 3.3599170885406403e-06,
      "loss": 0.5314,
      "step": 6832
    },
    {
      "epoch": 0.837788131436979,
      "grad_norm": 1.9580351904665465,
      "learning_rate": 3.3594411580044833e-06,
      "loss": 0.4837,
      "step": 6833
    },
    {
      "epoch": 0.8379107405590976,
      "grad_norm": 2.0755490923855255,
      "learning_rate": 3.3589651921437727e-06,
      "loss": 0.5502,
      "step": 6834
    },
    {
      "epoch": 0.8380333496812162,
      "grad_norm": 1.9392996101420454,
      "learning_rate": 3.3584891909780733e-06,
      "loss": 0.5415,
      "step": 6835
    },
    {
      "epoch": 0.8381559588033349,
      "grad_norm": 1.9557036413278077,
      "learning_rate": 3.3580131545269468e-06,
      "loss": 0.5481,
      "step": 6836
    },
    {
      "epoch": 0.8382785679254536,
      "grad_norm": 1.9406005407140605,
      "learning_rate": 3.357537082809962e-06,
      "loss": 0.5192,
      "step": 6837
    },
    {
      "epoch": 0.8384011770475723,
      "grad_norm": 1.8739946964485117,
      "learning_rate": 3.357060975846683e-06,
      "loss": 0.4641,
      "step": 6838
    },
    {
      "epoch": 0.838523786169691,
      "grad_norm": 1.9769321069164234,
      "learning_rate": 3.3565848336566827e-06,
      "loss": 0.5564,
      "step": 6839
    },
    {
      "epoch": 0.8386463952918097,
      "grad_norm": 2.0019027692647153,
      "learning_rate": 3.3561086562595283e-06,
      "loss": 0.5397,
      "step": 6840
    },
    {
      "epoch": 0.8387690044139284,
      "grad_norm": 2.099203607492466,
      "learning_rate": 3.3556324436747928e-06,
      "loss": 0.5218,
      "step": 6841
    },
    {
      "epoch": 0.8388916135360471,
      "grad_norm": 2.0192791051004813,
      "learning_rate": 3.3551561959220484e-06,
      "loss": 0.478,
      "step": 6842
    },
    {
      "epoch": 0.8390142226581657,
      "grad_norm": 1.9498207320694987,
      "learning_rate": 3.3546799130208714e-06,
      "loss": 0.4898,
      "step": 6843
    },
    {
      "epoch": 0.8391368317802844,
      "grad_norm": 1.9794067637687787,
      "learning_rate": 3.354203594990836e-06,
      "loss": 0.5335,
      "step": 6844
    },
    {
      "epoch": 0.8392594409024031,
      "grad_norm": 2.289112931773852,
      "learning_rate": 3.353727241851521e-06,
      "loss": 0.5045,
      "step": 6845
    },
    {
      "epoch": 0.8393820500245218,
      "grad_norm": 1.905109095578383,
      "learning_rate": 3.353250853622505e-06,
      "loss": 0.5157,
      "step": 6846
    },
    {
      "epoch": 0.8395046591466405,
      "grad_norm": 2.1827446658032916,
      "learning_rate": 3.3527744303233685e-06,
      "loss": 0.528,
      "step": 6847
    },
    {
      "epoch": 0.8396272682687592,
      "grad_norm": 1.9096449207155666,
      "learning_rate": 3.3522979719736923e-06,
      "loss": 0.5385,
      "step": 6848
    },
    {
      "epoch": 0.8397498773908779,
      "grad_norm": 2.2852308792714773,
      "learning_rate": 3.3518214785930616e-06,
      "loss": 0.5376,
      "step": 6849
    },
    {
      "epoch": 0.8398724865129966,
      "grad_norm": 1.9167549003418825,
      "learning_rate": 3.3513449502010604e-06,
      "loss": 0.5223,
      "step": 6850
    },
    {
      "epoch": 0.8399950956351152,
      "grad_norm": 1.8905937590469135,
      "learning_rate": 3.350868386817273e-06,
      "loss": 0.522,
      "step": 6851
    },
    {
      "epoch": 0.8401177047572339,
      "grad_norm": 1.8553416363739714,
      "learning_rate": 3.3503917884612903e-06,
      "loss": 0.5256,
      "step": 6852
    },
    {
      "epoch": 0.8402403138793526,
      "grad_norm": 2.1384110783815173,
      "learning_rate": 3.3499151551526988e-06,
      "loss": 0.5276,
      "step": 6853
    },
    {
      "epoch": 0.8403629230014713,
      "grad_norm": 1.8861618374187987,
      "learning_rate": 3.3494384869110895e-06,
      "loss": 0.4648,
      "step": 6854
    },
    {
      "epoch": 0.84048553212359,
      "grad_norm": 1.8287636036657875,
      "learning_rate": 3.3489617837560544e-06,
      "loss": 0.5315,
      "step": 6855
    },
    {
      "epoch": 0.8406081412457087,
      "grad_norm": 1.9071939164075187,
      "learning_rate": 3.348485045707188e-06,
      "loss": 0.5112,
      "step": 6856
    },
    {
      "epoch": 0.8407307503678274,
      "grad_norm": 1.961554661751883,
      "learning_rate": 3.3480082727840835e-06,
      "loss": 0.4966,
      "step": 6857
    },
    {
      "epoch": 0.8408533594899461,
      "grad_norm": 2.221150653508849,
      "learning_rate": 3.3475314650063373e-06,
      "loss": 0.5887,
      "step": 6858
    },
    {
      "epoch": 0.8409759686120647,
      "grad_norm": 2.2833416989084174,
      "learning_rate": 3.347054622393548e-06,
      "loss": 0.573,
      "step": 6859
    },
    {
      "epoch": 0.8410985777341834,
      "grad_norm": 1.845133526927317,
      "learning_rate": 3.3465777449653148e-06,
      "loss": 0.4971,
      "step": 6860
    },
    {
      "epoch": 0.8412211868563021,
      "grad_norm": 1.758044881098329,
      "learning_rate": 3.346100832741236e-06,
      "loss": 0.4737,
      "step": 6861
    },
    {
      "epoch": 0.8413437959784208,
      "grad_norm": 1.8163625721049521,
      "learning_rate": 3.345623885740916e-06,
      "loss": 0.4915,
      "step": 6862
    },
    {
      "epoch": 0.8414664051005395,
      "grad_norm": 1.948464927018746,
      "learning_rate": 3.3451469039839566e-06,
      "loss": 0.5528,
      "step": 6863
    },
    {
      "epoch": 0.8415890142226582,
      "grad_norm": 1.9926469590367102,
      "learning_rate": 3.344669887489964e-06,
      "loss": 0.5871,
      "step": 6864
    },
    {
      "epoch": 0.8417116233447769,
      "grad_norm": 2.1948018592382286,
      "learning_rate": 3.3441928362785438e-06,
      "loss": 0.5694,
      "step": 6865
    },
    {
      "epoch": 0.8418342324668955,
      "grad_norm": 1.8373417863063075,
      "learning_rate": 3.343715750369303e-06,
      "loss": 0.5196,
      "step": 6866
    },
    {
      "epoch": 0.8419568415890142,
      "grad_norm": 1.8036912081531018,
      "learning_rate": 3.343238629781851e-06,
      "loss": 0.5147,
      "step": 6867
    },
    {
      "epoch": 0.8420794507111329,
      "grad_norm": 1.759893186809697,
      "learning_rate": 3.3427614745357985e-06,
      "loss": 0.5386,
      "step": 6868
    },
    {
      "epoch": 0.8422020598332516,
      "grad_norm": 1.8864683382344754,
      "learning_rate": 3.3422842846507575e-06,
      "loss": 0.4948,
      "step": 6869
    },
    {
      "epoch": 0.8423246689553703,
      "grad_norm": 1.9305719205145173,
      "learning_rate": 3.3418070601463416e-06,
      "loss": 0.5368,
      "step": 6870
    },
    {
      "epoch": 0.842447278077489,
      "grad_norm": 1.8819772173935159,
      "learning_rate": 3.3413298010421645e-06,
      "loss": 0.4447,
      "step": 6871
    },
    {
      "epoch": 0.8425698871996077,
      "grad_norm": 1.8276858708793944,
      "learning_rate": 3.340852507357844e-06,
      "loss": 0.5339,
      "step": 6872
    },
    {
      "epoch": 0.8426924963217264,
      "grad_norm": 1.9085925854483914,
      "learning_rate": 3.340375179112997e-06,
      "loss": 0.557,
      "step": 6873
    },
    {
      "epoch": 0.842815105443845,
      "grad_norm": 1.8809632688877043,
      "learning_rate": 3.339897816327241e-06,
      "loss": 0.5923,
      "step": 6874
    },
    {
      "epoch": 0.8429377145659637,
      "grad_norm": 1.8923160744442522,
      "learning_rate": 3.339420419020199e-06,
      "loss": 0.4956,
      "step": 6875
    },
    {
      "epoch": 0.8430603236880824,
      "grad_norm": 1.975549616956056,
      "learning_rate": 3.338942987211491e-06,
      "loss": 0.5425,
      "step": 6876
    },
    {
      "epoch": 0.8431829328102011,
      "grad_norm": 2.0155613882812053,
      "learning_rate": 3.338465520920742e-06,
      "loss": 0.5536,
      "step": 6877
    },
    {
      "epoch": 0.8433055419323198,
      "grad_norm": 1.9768782934156157,
      "learning_rate": 3.3379880201675747e-06,
      "loss": 0.5523,
      "step": 6878
    },
    {
      "epoch": 0.8434281510544385,
      "grad_norm": 1.9777555382762275,
      "learning_rate": 3.337510484971617e-06,
      "loss": 0.5453,
      "step": 6879
    },
    {
      "epoch": 0.8435507601765572,
      "grad_norm": 2.039013092146466,
      "learning_rate": 3.337032915352495e-06,
      "loss": 0.5104,
      "step": 6880
    },
    {
      "epoch": 0.8436733692986759,
      "grad_norm": 2.079659274125587,
      "learning_rate": 3.336555311329838e-06,
      "loss": 0.5663,
      "step": 6881
    },
    {
      "epoch": 0.8437959784207945,
      "grad_norm": 1.9382592600474733,
      "learning_rate": 3.3360776729232774e-06,
      "loss": 0.479,
      "step": 6882
    },
    {
      "epoch": 0.8439185875429132,
      "grad_norm": 1.7873491243334911,
      "learning_rate": 3.3356000001524446e-06,
      "loss": 0.5474,
      "step": 6883
    },
    {
      "epoch": 0.8440411966650319,
      "grad_norm": 1.927446742003687,
      "learning_rate": 3.335122293036972e-06,
      "loss": 0.5991,
      "step": 6884
    },
    {
      "epoch": 0.8441638057871506,
      "grad_norm": 1.9579540701749192,
      "learning_rate": 3.3346445515964947e-06,
      "loss": 0.52,
      "step": 6885
    },
    {
      "epoch": 0.8442864149092693,
      "grad_norm": 2.0623764149060033,
      "learning_rate": 3.3341667758506486e-06,
      "loss": 0.586,
      "step": 6886
    },
    {
      "epoch": 0.844409024031388,
      "grad_norm": 1.7164204969039423,
      "learning_rate": 3.333688965819072e-06,
      "loss": 0.5174,
      "step": 6887
    },
    {
      "epoch": 0.8445316331535067,
      "grad_norm": 2.1293566967591917,
      "learning_rate": 3.333211121521402e-06,
      "loss": 0.5466,
      "step": 6888
    },
    {
      "epoch": 0.8446542422756254,
      "grad_norm": 2.0194451700343095,
      "learning_rate": 3.3327332429772803e-06,
      "loss": 0.5464,
      "step": 6889
    },
    {
      "epoch": 0.8447768513977439,
      "grad_norm": 1.9838212604110257,
      "learning_rate": 3.3322553302063476e-06,
      "loss": 0.5457,
      "step": 6890
    },
    {
      "epoch": 0.8448994605198626,
      "grad_norm": 1.7725588819403486,
      "learning_rate": 3.331777383228248e-06,
      "loss": 0.513,
      "step": 6891
    },
    {
      "epoch": 0.8450220696419813,
      "grad_norm": 1.807394485672488,
      "learning_rate": 3.331299402062625e-06,
      "loss": 0.5278,
      "step": 6892
    },
    {
      "epoch": 0.8451446787641,
      "grad_norm": 1.899329408512921,
      "learning_rate": 3.330821386729125e-06,
      "loss": 0.5066,
      "step": 6893
    },
    {
      "epoch": 0.8452672878862187,
      "grad_norm": 2.0689471028074315,
      "learning_rate": 3.3303433372473947e-06,
      "loss": 0.5682,
      "step": 6894
    },
    {
      "epoch": 0.8453898970083374,
      "grad_norm": 2.050723391127284,
      "learning_rate": 3.329865253637084e-06,
      "loss": 0.582,
      "step": 6895
    },
    {
      "epoch": 0.8455125061304561,
      "grad_norm": 1.8668450660699327,
      "learning_rate": 3.3293871359178422e-06,
      "loss": 0.4816,
      "step": 6896
    },
    {
      "epoch": 0.8456351152525748,
      "grad_norm": 1.69611534481969,
      "learning_rate": 3.32890898410932e-06,
      "loss": 0.5471,
      "step": 6897
    },
    {
      "epoch": 0.8457577243746934,
      "grad_norm": 1.6635298527136233,
      "learning_rate": 3.3284307982311725e-06,
      "loss": 0.4995,
      "step": 6898
    },
    {
      "epoch": 0.8458803334968121,
      "grad_norm": 1.8986417844413346,
      "learning_rate": 3.327952578303051e-06,
      "loss": 0.5514,
      "step": 6899
    },
    {
      "epoch": 0.8460029426189308,
      "grad_norm": 2.006133426940156,
      "learning_rate": 3.327474324344614e-06,
      "loss": 0.4977,
      "step": 6900
    },
    {
      "epoch": 0.8461255517410495,
      "grad_norm": 2.18574550686818,
      "learning_rate": 3.3269960363755165e-06,
      "loss": 0.5701,
      "step": 6901
    },
    {
      "epoch": 0.8462481608631682,
      "grad_norm": 1.9692510444362028,
      "learning_rate": 3.3265177144154183e-06,
      "loss": 0.5554,
      "step": 6902
    },
    {
      "epoch": 0.8463707699852869,
      "grad_norm": 1.7790210409797307,
      "learning_rate": 3.326039358483979e-06,
      "loss": 0.4937,
      "step": 6903
    },
    {
      "epoch": 0.8464933791074056,
      "grad_norm": 1.9070435031200563,
      "learning_rate": 3.3255609686008588e-06,
      "loss": 0.5108,
      "step": 6904
    },
    {
      "epoch": 0.8466159882295243,
      "grad_norm": 2.1596944237058753,
      "learning_rate": 3.3250825447857217e-06,
      "loss": 0.6178,
      "step": 6905
    },
    {
      "epoch": 0.8467385973516429,
      "grad_norm": 1.8049875384917278,
      "learning_rate": 3.3246040870582314e-06,
      "loss": 0.564,
      "step": 6906
    },
    {
      "epoch": 0.8468612064737616,
      "grad_norm": 1.8034440592032916,
      "learning_rate": 3.3241255954380535e-06,
      "loss": 0.5293,
      "step": 6907
    },
    {
      "epoch": 0.8469838155958803,
      "grad_norm": 1.886531663739524,
      "learning_rate": 3.323647069944855e-06,
      "loss": 0.5016,
      "step": 6908
    },
    {
      "epoch": 0.847106424717999,
      "grad_norm": 1.88580930352872,
      "learning_rate": 3.3231685105983023e-06,
      "loss": 0.5059,
      "step": 6909
    },
    {
      "epoch": 0.8472290338401177,
      "grad_norm": 1.7342551855536326,
      "learning_rate": 3.3226899174180676e-06,
      "loss": 0.498,
      "step": 6910
    },
    {
      "epoch": 0.8473516429622364,
      "grad_norm": 1.8973445726641254,
      "learning_rate": 3.3222112904238208e-06,
      "loss": 0.572,
      "step": 6911
    },
    {
      "epoch": 0.8474742520843551,
      "grad_norm": 1.974569722868888,
      "learning_rate": 3.321732629635234e-06,
      "loss": 0.5324,
      "step": 6912
    },
    {
      "epoch": 0.8475968612064737,
      "grad_norm": 1.96388504714103,
      "learning_rate": 3.3212539350719814e-06,
      "loss": 0.5368,
      "step": 6913
    },
    {
      "epoch": 0.8477194703285924,
      "grad_norm": 1.9691444533520568,
      "learning_rate": 3.320775206753738e-06,
      "loss": 0.4785,
      "step": 6914
    },
    {
      "epoch": 0.8478420794507111,
      "grad_norm": 1.827089624627618,
      "learning_rate": 3.320296444700181e-06,
      "loss": 0.5063,
      "step": 6915
    },
    {
      "epoch": 0.8479646885728298,
      "grad_norm": 1.8554483421868766,
      "learning_rate": 3.3198176489309875e-06,
      "loss": 0.4576,
      "step": 6916
    },
    {
      "epoch": 0.8480872976949485,
      "grad_norm": 1.9121933958104425,
      "learning_rate": 3.3193388194658377e-06,
      "loss": 0.5619,
      "step": 6917
    },
    {
      "epoch": 0.8482099068170672,
      "grad_norm": 1.8951262658289303,
      "learning_rate": 3.3188599563244116e-06,
      "loss": 0.5334,
      "step": 6918
    },
    {
      "epoch": 0.8483325159391859,
      "grad_norm": 2.21179181287688,
      "learning_rate": 3.3183810595263915e-06,
      "loss": 0.553,
      "step": 6919
    },
    {
      "epoch": 0.8484551250613046,
      "grad_norm": 1.7777239137756693,
      "learning_rate": 3.3179021290914614e-06,
      "loss": 0.5106,
      "step": 6920
    },
    {
      "epoch": 0.8485777341834232,
      "grad_norm": 1.9967594843873022,
      "learning_rate": 3.3174231650393064e-06,
      "loss": 0.5167,
      "step": 6921
    },
    {
      "epoch": 0.8487003433055419,
      "grad_norm": 1.8050930346686733,
      "learning_rate": 3.3169441673896104e-06,
      "loss": 0.5363,
      "step": 6922
    },
    {
      "epoch": 0.8488229524276606,
      "grad_norm": 2.0352674649176743,
      "learning_rate": 3.316465136162064e-06,
      "loss": 0.5295,
      "step": 6923
    },
    {
      "epoch": 0.8489455615497793,
      "grad_norm": 2.044791926842515,
      "learning_rate": 3.3159860713763546e-06,
      "loss": 0.5258,
      "step": 6924
    },
    {
      "epoch": 0.849068170671898,
      "grad_norm": 1.9842889433341473,
      "learning_rate": 3.315506973052174e-06,
      "loss": 0.5456,
      "step": 6925
    },
    {
      "epoch": 0.8491907797940167,
      "grad_norm": 1.960887154284767,
      "learning_rate": 3.3150278412092126e-06,
      "loss": 0.5323,
      "step": 6926
    },
    {
      "epoch": 0.8493133889161354,
      "grad_norm": 1.8185177446775602,
      "learning_rate": 3.3145486758671634e-06,
      "loss": 0.573,
      "step": 6927
    },
    {
      "epoch": 0.8494359980382541,
      "grad_norm": 2.02656358023867,
      "learning_rate": 3.3140694770457223e-06,
      "loss": 0.544,
      "step": 6928
    },
    {
      "epoch": 0.8495586071603727,
      "grad_norm": 2.0331208782982158,
      "learning_rate": 3.313590244764584e-06,
      "loss": 0.5271,
      "step": 6929
    },
    {
      "epoch": 0.8496812162824914,
      "grad_norm": 2.023771241301393,
      "learning_rate": 3.3131109790434462e-06,
      "loss": 0.5077,
      "step": 6930
    },
    {
      "epoch": 0.8498038254046101,
      "grad_norm": 1.789829570007465,
      "learning_rate": 3.3126316799020084e-06,
      "loss": 0.5428,
      "step": 6931
    },
    {
      "epoch": 0.8499264345267288,
      "grad_norm": 1.9134777398869385,
      "learning_rate": 3.3121523473599694e-06,
      "loss": 0.4988,
      "step": 6932
    },
    {
      "epoch": 0.8500490436488475,
      "grad_norm": 1.9785950918696122,
      "learning_rate": 3.311672981437032e-06,
      "loss": 0.5488,
      "step": 6933
    },
    {
      "epoch": 0.8501716527709662,
      "grad_norm": 1.9774386984545906,
      "learning_rate": 3.311193582152896e-06,
      "loss": 0.5122,
      "step": 6934
    },
    {
      "epoch": 0.8502942618930849,
      "grad_norm": 1.9145695708324229,
      "learning_rate": 3.31071414952727e-06,
      "loss": 0.5052,
      "step": 6935
    },
    {
      "epoch": 0.8504168710152036,
      "grad_norm": 1.720719077955253,
      "learning_rate": 3.310234683579856e-06,
      "loss": 0.4923,
      "step": 6936
    },
    {
      "epoch": 0.8505394801373222,
      "grad_norm": 2.1685662867675695,
      "learning_rate": 3.309755184330362e-06,
      "loss": 0.5673,
      "step": 6937
    },
    {
      "epoch": 0.8506620892594409,
      "grad_norm": 1.9785451957228146,
      "learning_rate": 3.309275651798497e-06,
      "loss": 0.5002,
      "step": 6938
    },
    {
      "epoch": 0.8507846983815596,
      "grad_norm": 1.941823473996874,
      "learning_rate": 3.308796086003969e-06,
      "loss": 0.5051,
      "step": 6939
    },
    {
      "epoch": 0.8509073075036783,
      "grad_norm": 2.153249502254411,
      "learning_rate": 3.308316486966491e-06,
      "loss": 0.553,
      "step": 6940
    },
    {
      "epoch": 0.851029916625797,
      "grad_norm": 2.016562302919856,
      "learning_rate": 3.3078368547057737e-06,
      "loss": 0.6281,
      "step": 6941
    },
    {
      "epoch": 0.8511525257479157,
      "grad_norm": 1.7333023792351965,
      "learning_rate": 3.307357189241532e-06,
      "loss": 0.4982,
      "step": 6942
    },
    {
      "epoch": 0.8512751348700344,
      "grad_norm": 2.056897188576561,
      "learning_rate": 3.3068774905934803e-06,
      "loss": 0.5668,
      "step": 6943
    },
    {
      "epoch": 0.8513977439921531,
      "grad_norm": 1.951187249153929,
      "learning_rate": 3.3063977587813357e-06,
      "loss": 0.5531,
      "step": 6944
    },
    {
      "epoch": 0.8515203531142717,
      "grad_norm": 2.34744762395155,
      "learning_rate": 3.3059179938248148e-06,
      "loss": 0.5349,
      "step": 6945
    },
    {
      "epoch": 0.8516429622363904,
      "grad_norm": 2.0328794453030476,
      "learning_rate": 3.305438195743639e-06,
      "loss": 0.532,
      "step": 6946
    },
    {
      "epoch": 0.851765571358509,
      "grad_norm": 1.8810263796704383,
      "learning_rate": 3.3049583645575254e-06,
      "loss": 0.4736,
      "step": 6947
    },
    {
      "epoch": 0.8518881804806278,
      "grad_norm": 1.8042520080301514,
      "learning_rate": 3.3044785002861995e-06,
      "loss": 0.4877,
      "step": 6948
    },
    {
      "epoch": 0.8520107896027465,
      "grad_norm": 1.7324142437184171,
      "learning_rate": 3.3039986029493827e-06,
      "loss": 0.5656,
      "step": 6949
    },
    {
      "epoch": 0.8521333987248652,
      "grad_norm": 1.809566138907744,
      "learning_rate": 3.3035186725667995e-06,
      "loss": 0.5147,
      "step": 6950
    },
    {
      "epoch": 0.8522560078469839,
      "grad_norm": 1.9736035226225517,
      "learning_rate": 3.3030387091581767e-06,
      "loss": 0.5544,
      "step": 6951
    },
    {
      "epoch": 0.8523786169691026,
      "grad_norm": 2.0790414754507798,
      "learning_rate": 3.3025587127432414e-06,
      "loss": 0.5116,
      "step": 6952
    },
    {
      "epoch": 0.8525012260912211,
      "grad_norm": 2.098269270855422,
      "learning_rate": 3.302078683341722e-06,
      "loss": 0.4942,
      "step": 6953
    },
    {
      "epoch": 0.8526238352133398,
      "grad_norm": 1.8514801725635757,
      "learning_rate": 3.301598620973349e-06,
      "loss": 0.5142,
      "step": 6954
    },
    {
      "epoch": 0.8527464443354585,
      "grad_norm": 1.8953798443733558,
      "learning_rate": 3.3011185256578526e-06,
      "loss": 0.508,
      "step": 6955
    },
    {
      "epoch": 0.8528690534575772,
      "grad_norm": 2.1945678653828478,
      "learning_rate": 3.3006383974149674e-06,
      "loss": 0.5437,
      "step": 6956
    },
    {
      "epoch": 0.8529916625796959,
      "grad_norm": 2.0108142465763525,
      "learning_rate": 3.3001582362644258e-06,
      "loss": 0.5516,
      "step": 6957
    },
    {
      "epoch": 0.8531142717018146,
      "grad_norm": 2.1059938110557996,
      "learning_rate": 3.2996780422259656e-06,
      "loss": 0.6059,
      "step": 6958
    },
    {
      "epoch": 0.8532368808239333,
      "grad_norm": 2.000434425237508,
      "learning_rate": 3.2991978153193216e-06,
      "loss": 0.5349,
      "step": 6959
    },
    {
      "epoch": 0.853359489946052,
      "grad_norm": 1.8932938951896852,
      "learning_rate": 3.298717555564232e-06,
      "loss": 0.5287,
      "step": 6960
    },
    {
      "epoch": 0.8534820990681706,
      "grad_norm": 1.960661299581292,
      "learning_rate": 3.298237262980437e-06,
      "loss": 0.5431,
      "step": 6961
    },
    {
      "epoch": 0.8536047081902893,
      "grad_norm": 1.9415208670954582,
      "learning_rate": 3.2977569375876765e-06,
      "loss": 0.5114,
      "step": 6962
    },
    {
      "epoch": 0.853727317312408,
      "grad_norm": 2.0371116817540726,
      "learning_rate": 3.297276579405695e-06,
      "loss": 0.5555,
      "step": 6963
    },
    {
      "epoch": 0.8538499264345267,
      "grad_norm": 1.6938804243760017,
      "learning_rate": 3.296796188454234e-06,
      "loss": 0.5324,
      "step": 6964
    },
    {
      "epoch": 0.8539725355566454,
      "grad_norm": 2.027877245997989,
      "learning_rate": 3.2963157647530395e-06,
      "loss": 0.5469,
      "step": 6965
    },
    {
      "epoch": 0.8540951446787641,
      "grad_norm": 2.015945291479107,
      "learning_rate": 3.295835308321857e-06,
      "loss": 0.5346,
      "step": 6966
    },
    {
      "epoch": 0.8542177538008828,
      "grad_norm": 1.9593505391283255,
      "learning_rate": 3.295354819180434e-06,
      "loss": 0.4923,
      "step": 6967
    },
    {
      "epoch": 0.8543403629230014,
      "grad_norm": 1.9989331477688508,
      "learning_rate": 3.2948742973485204e-06,
      "loss": 0.5611,
      "step": 6968
    },
    {
      "epoch": 0.8544629720451201,
      "grad_norm": 2.0170008398142802,
      "learning_rate": 3.2943937428458663e-06,
      "loss": 0.5485,
      "step": 6969
    },
    {
      "epoch": 0.8545855811672388,
      "grad_norm": 2.085649792252193,
      "learning_rate": 3.2939131556922215e-06,
      "loss": 0.5442,
      "step": 6970
    },
    {
      "epoch": 0.8547081902893575,
      "grad_norm": 2.0267123133281246,
      "learning_rate": 3.293432535907342e-06,
      "loss": 0.5738,
      "step": 6971
    },
    {
      "epoch": 0.8548307994114762,
      "grad_norm": 1.9932056384477919,
      "learning_rate": 3.2929518835109795e-06,
      "loss": 0.5289,
      "step": 6972
    },
    {
      "epoch": 0.8549534085335949,
      "grad_norm": 2.0129706286688873,
      "learning_rate": 3.292471198522892e-06,
      "loss": 0.5114,
      "step": 6973
    },
    {
      "epoch": 0.8550760176557136,
      "grad_norm": 1.9553881056338644,
      "learning_rate": 3.2919904809628344e-06,
      "loss": 0.549,
      "step": 6974
    },
    {
      "epoch": 0.8551986267778323,
      "grad_norm": 1.6945556673045896,
      "learning_rate": 3.291509730850566e-06,
      "loss": 0.5059,
      "step": 6975
    },
    {
      "epoch": 0.8553212358999509,
      "grad_norm": 1.9312729704877074,
      "learning_rate": 3.2910289482058467e-06,
      "loss": 0.4721,
      "step": 6976
    },
    {
      "epoch": 0.8554438450220696,
      "grad_norm": 2.0944490009428933,
      "learning_rate": 3.2905481330484373e-06,
      "loss": 0.5712,
      "step": 6977
    },
    {
      "epoch": 0.8555664541441883,
      "grad_norm": 1.9194125689034445,
      "learning_rate": 3.2900672853981e-06,
      "loss": 0.6007,
      "step": 6978
    },
    {
      "epoch": 0.855689063266307,
      "grad_norm": 2.2617002254301153,
      "learning_rate": 3.2895864052745987e-06,
      "loss": 0.5848,
      "step": 6979
    },
    {
      "epoch": 0.8558116723884257,
      "grad_norm": 1.8255438801824624,
      "learning_rate": 3.2891054926976972e-06,
      "loss": 0.4755,
      "step": 6980
    },
    {
      "epoch": 0.8559342815105444,
      "grad_norm": 1.9048073532552103,
      "learning_rate": 3.2886245476871643e-06,
      "loss": 0.5281,
      "step": 6981
    },
    {
      "epoch": 0.8560568906326631,
      "grad_norm": 1.7740088820428979,
      "learning_rate": 3.288143570262765e-06,
      "loss": 0.4992,
      "step": 6982
    },
    {
      "epoch": 0.8561794997547818,
      "grad_norm": 1.9220965864309765,
      "learning_rate": 3.2876625604442713e-06,
      "loss": 0.528,
      "step": 6983
    },
    {
      "epoch": 0.8563021088769004,
      "grad_norm": 1.9622744659394615,
      "learning_rate": 3.287181518251451e-06,
      "loss": 0.528,
      "step": 6984
    },
    {
      "epoch": 0.8564247179990191,
      "grad_norm": 2.054950381544518,
      "learning_rate": 3.286700443704077e-06,
      "loss": 0.5297,
      "step": 6985
    },
    {
      "epoch": 0.8565473271211378,
      "grad_norm": 1.9789683823320734,
      "learning_rate": 3.286219336821921e-06,
      "loss": 0.4864,
      "step": 6986
    },
    {
      "epoch": 0.8566699362432565,
      "grad_norm": 2.055065899211114,
      "learning_rate": 3.2857381976247598e-06,
      "loss": 0.5821,
      "step": 6987
    },
    {
      "epoch": 0.8567925453653752,
      "grad_norm": 1.8252974692735422,
      "learning_rate": 3.285257026132367e-06,
      "loss": 0.4598,
      "step": 6988
    },
    {
      "epoch": 0.8569151544874939,
      "grad_norm": 1.9486276281883972,
      "learning_rate": 3.28477582236452e-06,
      "loss": 0.5615,
      "step": 6989
    },
    {
      "epoch": 0.8570377636096126,
      "grad_norm": 1.876939909049374,
      "learning_rate": 3.2842945863409974e-06,
      "loss": 0.5354,
      "step": 6990
    },
    {
      "epoch": 0.8571603727317313,
      "grad_norm": 2.1790460139781693,
      "learning_rate": 3.283813318081579e-06,
      "loss": 0.5576,
      "step": 6991
    },
    {
      "epoch": 0.8572829818538499,
      "grad_norm": 1.8863053089466248,
      "learning_rate": 3.2833320176060457e-06,
      "loss": 0.4642,
      "step": 6992
    },
    {
      "epoch": 0.8574055909759686,
      "grad_norm": 1.854765536291803,
      "learning_rate": 3.2828506849341797e-06,
      "loss": 0.4998,
      "step": 6993
    },
    {
      "epoch": 0.8575282000980873,
      "grad_norm": 1.9112482215401114,
      "learning_rate": 3.2823693200857653e-06,
      "loss": 0.4607,
      "step": 6994
    },
    {
      "epoch": 0.857650809220206,
      "grad_norm": 2.1421711821206046,
      "learning_rate": 3.2818879230805857e-06,
      "loss": 0.57,
      "step": 6995
    },
    {
      "epoch": 0.8577734183423247,
      "grad_norm": 1.9505614812732106,
      "learning_rate": 3.2814064939384284e-06,
      "loss": 0.586,
      "step": 6996
    },
    {
      "epoch": 0.8578960274644434,
      "grad_norm": 1.9989909326766346,
      "learning_rate": 3.280925032679081e-06,
      "loss": 0.5331,
      "step": 6997
    },
    {
      "epoch": 0.8580186365865621,
      "grad_norm": 1.8084586860124323,
      "learning_rate": 3.280443539322332e-06,
      "loss": 0.5067,
      "step": 6998
    },
    {
      "epoch": 0.8581412457086808,
      "grad_norm": 1.9628593495732527,
      "learning_rate": 3.2799620138879723e-06,
      "loss": 0.5497,
      "step": 6999
    },
    {
      "epoch": 0.8582638548307994,
      "grad_norm": 2.1549437836673015,
      "learning_rate": 3.2794804563957926e-06,
      "loss": 0.5743,
      "step": 7000
    },
    {
      "epoch": 0.8583864639529181,
      "grad_norm": 1.981244163582905,
      "learning_rate": 3.2789988668655866e-06,
      "loss": 0.504,
      "step": 7001
    },
    {
      "epoch": 0.8585090730750368,
      "grad_norm": 1.8372081301093626,
      "learning_rate": 3.2785172453171476e-06,
      "loss": 0.4634,
      "step": 7002
    },
    {
      "epoch": 0.8586316821971555,
      "grad_norm": 1.7308602456510165,
      "learning_rate": 3.278035591770272e-06,
      "loss": 0.4875,
      "step": 7003
    },
    {
      "epoch": 0.8587542913192742,
      "grad_norm": 1.9798633904882317,
      "learning_rate": 3.2775539062447566e-06,
      "loss": 0.5834,
      "step": 7004
    },
    {
      "epoch": 0.8588769004413929,
      "grad_norm": 2.030842070007913,
      "learning_rate": 3.277072188760398e-06,
      "loss": 0.4945,
      "step": 7005
    },
    {
      "epoch": 0.8589995095635116,
      "grad_norm": 1.759994009029527,
      "learning_rate": 3.2765904393369974e-06,
      "loss": 0.4739,
      "step": 7006
    },
    {
      "epoch": 0.8591221186856303,
      "grad_norm": 2.082694094576967,
      "learning_rate": 3.2761086579943547e-06,
      "loss": 0.5783,
      "step": 7007
    },
    {
      "epoch": 0.8592447278077489,
      "grad_norm": 2.1756224224293836,
      "learning_rate": 3.275626844752272e-06,
      "loss": 0.5898,
      "step": 7008
    },
    {
      "epoch": 0.8593673369298676,
      "grad_norm": 1.92069306969885,
      "learning_rate": 3.275144999630553e-06,
      "loss": 0.5477,
      "step": 7009
    },
    {
      "epoch": 0.8594899460519863,
      "grad_norm": 2.141942918684177,
      "learning_rate": 3.2746631226490017e-06,
      "loss": 0.5632,
      "step": 7010
    },
    {
      "epoch": 0.859612555174105,
      "grad_norm": 1.9843014935449081,
      "learning_rate": 3.274181213827426e-06,
      "loss": 0.5636,
      "step": 7011
    },
    {
      "epoch": 0.8597351642962237,
      "grad_norm": 1.8093805186151664,
      "learning_rate": 3.273699273185631e-06,
      "loss": 0.4949,
      "step": 7012
    },
    {
      "epoch": 0.8598577734183424,
      "grad_norm": 2.023802030788048,
      "learning_rate": 3.273217300743426e-06,
      "loss": 0.4965,
      "step": 7013
    },
    {
      "epoch": 0.859980382540461,
      "grad_norm": 1.8971117717445318,
      "learning_rate": 3.272735296520621e-06,
      "loss": 0.5054,
      "step": 7014
    },
    {
      "epoch": 0.8601029916625796,
      "grad_norm": 1.9839220373791249,
      "learning_rate": 3.2722532605370276e-06,
      "loss": 0.524,
      "step": 7015
    },
    {
      "epoch": 0.8602256007846983,
      "grad_norm": 1.8591599211766727,
      "learning_rate": 3.2717711928124578e-06,
      "loss": 0.5647,
      "step": 7016
    },
    {
      "epoch": 0.860348209906817,
      "grad_norm": 1.8940451845745971,
      "learning_rate": 3.2712890933667264e-06,
      "loss": 0.5293,
      "step": 7017
    },
    {
      "epoch": 0.8604708190289357,
      "grad_norm": 2.0279581932739545,
      "learning_rate": 3.2708069622196464e-06,
      "loss": 0.5282,
      "step": 7018
    },
    {
      "epoch": 0.8605934281510544,
      "grad_norm": 1.9560416857278091,
      "learning_rate": 3.270324799391037e-06,
      "loss": 0.5373,
      "step": 7019
    },
    {
      "epoch": 0.8607160372731731,
      "grad_norm": 1.6387503971297992,
      "learning_rate": 3.2698426049007136e-06,
      "loss": 0.5424,
      "step": 7020
    },
    {
      "epoch": 0.8608386463952918,
      "grad_norm": 1.8342095144071437,
      "learning_rate": 3.269360378768497e-06,
      "loss": 0.5308,
      "step": 7021
    },
    {
      "epoch": 0.8609612555174105,
      "grad_norm": 1.9088011625190588,
      "learning_rate": 3.2688781210142067e-06,
      "loss": 0.5291,
      "step": 7022
    },
    {
      "epoch": 0.8610838646395291,
      "grad_norm": 2.011281409333804,
      "learning_rate": 3.268395831657664e-06,
      "loss": 0.5428,
      "step": 7023
    },
    {
      "epoch": 0.8612064737616478,
      "grad_norm": 1.9632916557270774,
      "learning_rate": 3.267913510718693e-06,
      "loss": 0.5227,
      "step": 7024
    },
    {
      "epoch": 0.8613290828837665,
      "grad_norm": 1.8382449228090159,
      "learning_rate": 3.2674311582171157e-06,
      "loss": 0.5274,
      "step": 7025
    },
    {
      "epoch": 0.8614516920058852,
      "grad_norm": 1.9013772943570875,
      "learning_rate": 3.2669487741727605e-06,
      "loss": 0.4973,
      "step": 7026
    },
    {
      "epoch": 0.8615743011280039,
      "grad_norm": 1.8810825598430414,
      "learning_rate": 3.2664663586054534e-06,
      "loss": 0.5282,
      "step": 7027
    },
    {
      "epoch": 0.8616969102501226,
      "grad_norm": 1.8976492643306184,
      "learning_rate": 3.2659839115350207e-06,
      "loss": 0.5007,
      "step": 7028
    },
    {
      "epoch": 0.8618195193722413,
      "grad_norm": 2.173928597658257,
      "learning_rate": 3.265501432981294e-06,
      "loss": 0.5134,
      "step": 7029
    },
    {
      "epoch": 0.86194212849436,
      "grad_norm": 1.8774937230250466,
      "learning_rate": 3.265018922964102e-06,
      "loss": 0.4897,
      "step": 7030
    },
    {
      "epoch": 0.8620647376164786,
      "grad_norm": 2.1400325929747197,
      "learning_rate": 3.2645363815032794e-06,
      "loss": 0.5438,
      "step": 7031
    },
    {
      "epoch": 0.8621873467385973,
      "grad_norm": 1.9592231952599606,
      "learning_rate": 3.2640538086186576e-06,
      "loss": 0.5099,
      "step": 7032
    },
    {
      "epoch": 0.862309955860716,
      "grad_norm": 2.034613424261358,
      "learning_rate": 3.2635712043300715e-06,
      "loss": 0.473,
      "step": 7033
    },
    {
      "epoch": 0.8624325649828347,
      "grad_norm": 1.7764673315960615,
      "learning_rate": 3.2630885686573566e-06,
      "loss": 0.5728,
      "step": 7034
    },
    {
      "epoch": 0.8625551741049534,
      "grad_norm": 1.8235962911553159,
      "learning_rate": 3.2626059016203517e-06,
      "loss": 0.5037,
      "step": 7035
    },
    {
      "epoch": 0.8626777832270721,
      "grad_norm": 2.0735135038403607,
      "learning_rate": 3.2621232032388926e-06,
      "loss": 0.5369,
      "step": 7036
    },
    {
      "epoch": 0.8628003923491908,
      "grad_norm": 1.750630500275769,
      "learning_rate": 3.261640473532822e-06,
      "loss": 0.4811,
      "step": 7037
    },
    {
      "epoch": 0.8629230014713095,
      "grad_norm": 2.0922200521333,
      "learning_rate": 3.2611577125219783e-06,
      "loss": 0.5777,
      "step": 7038
    },
    {
      "epoch": 0.8630456105934281,
      "grad_norm": 1.899675161051012,
      "learning_rate": 3.260674920226206e-06,
      "loss": 0.5526,
      "step": 7039
    },
    {
      "epoch": 0.8631682197155468,
      "grad_norm": 2.0828488639681857,
      "learning_rate": 3.2601920966653476e-06,
      "loss": 0.5745,
      "step": 7040
    },
    {
      "epoch": 0.8632908288376655,
      "grad_norm": 1.9178675028753218,
      "learning_rate": 3.2597092418592487e-06,
      "loss": 0.5375,
      "step": 7041
    },
    {
      "epoch": 0.8634134379597842,
      "grad_norm": 1.8647344492546676,
      "learning_rate": 3.259226355827755e-06,
      "loss": 0.5192,
      "step": 7042
    },
    {
      "epoch": 0.8635360470819029,
      "grad_norm": 1.8983153396003152,
      "learning_rate": 3.2587434385907128e-06,
      "loss": 0.4718,
      "step": 7043
    },
    {
      "epoch": 0.8636586562040216,
      "grad_norm": 1.910035525922493,
      "learning_rate": 3.2582604901679732e-06,
      "loss": 0.5405,
      "step": 7044
    },
    {
      "epoch": 0.8637812653261403,
      "grad_norm": 1.85736007342222,
      "learning_rate": 3.2577775105793844e-06,
      "loss": 0.4395,
      "step": 7045
    },
    {
      "epoch": 0.863903874448259,
      "grad_norm": 1.9659415033532264,
      "learning_rate": 3.2572944998447987e-06,
      "loss": 0.5579,
      "step": 7046
    },
    {
      "epoch": 0.8640264835703776,
      "grad_norm": 1.8134990806202755,
      "learning_rate": 3.2568114579840683e-06,
      "loss": 0.4893,
      "step": 7047
    },
    {
      "epoch": 0.8641490926924963,
      "grad_norm": 1.916278420682024,
      "learning_rate": 3.256328385017047e-06,
      "loss": 0.5214,
      "step": 7048
    },
    {
      "epoch": 0.864271701814615,
      "grad_norm": 2.0885858287827936,
      "learning_rate": 3.25584528096359e-06,
      "loss": 0.5476,
      "step": 7049
    },
    {
      "epoch": 0.8643943109367337,
      "grad_norm": 1.813973997790383,
      "learning_rate": 3.255362145843554e-06,
      "loss": 0.5204,
      "step": 7050
    },
    {
      "epoch": 0.8645169200588524,
      "grad_norm": 1.9081519693524223,
      "learning_rate": 3.2548789796767967e-06,
      "loss": 0.5297,
      "step": 7051
    },
    {
      "epoch": 0.8646395291809711,
      "grad_norm": 1.8074044572572547,
      "learning_rate": 3.2543957824831775e-06,
      "loss": 0.4737,
      "step": 7052
    },
    {
      "epoch": 0.8647621383030898,
      "grad_norm": 1.8935575045992585,
      "learning_rate": 3.2539125542825544e-06,
      "loss": 0.5287,
      "step": 7053
    },
    {
      "epoch": 0.8648847474252085,
      "grad_norm": 1.968613946195984,
      "learning_rate": 3.253429295094792e-06,
      "loss": 0.5226,
      "step": 7054
    },
    {
      "epoch": 0.8650073565473271,
      "grad_norm": 2.160062477828369,
      "learning_rate": 3.2529460049397515e-06,
      "loss": 0.5143,
      "step": 7055
    },
    {
      "epoch": 0.8651299656694458,
      "grad_norm": 1.8868815467604885,
      "learning_rate": 3.2524626838372976e-06,
      "loss": 0.5079,
      "step": 7056
    },
    {
      "epoch": 0.8652525747915645,
      "grad_norm": 1.7164678554442023,
      "learning_rate": 3.2519793318072944e-06,
      "loss": 0.5207,
      "step": 7057
    },
    {
      "epoch": 0.8653751839136832,
      "grad_norm": 2.5867479809774516,
      "learning_rate": 3.251495948869609e-06,
      "loss": 0.574,
      "step": 7058
    },
    {
      "epoch": 0.8654977930358019,
      "grad_norm": 1.8192889126484597,
      "learning_rate": 3.2510125350441106e-06,
      "loss": 0.5153,
      "step": 7059
    },
    {
      "epoch": 0.8656204021579206,
      "grad_norm": 2.0096464052751104,
      "learning_rate": 3.2505290903506677e-06,
      "loss": 0.5174,
      "step": 7060
    },
    {
      "epoch": 0.8657430112800393,
      "grad_norm": 1.762573951739471,
      "learning_rate": 3.2500456148091497e-06,
      "loss": 0.5183,
      "step": 7061
    },
    {
      "epoch": 0.8658656204021579,
      "grad_norm": 2.0451308843102787,
      "learning_rate": 3.2495621084394296e-06,
      "loss": 0.4909,
      "step": 7062
    },
    {
      "epoch": 0.8659882295242766,
      "grad_norm": 1.8884435514327145,
      "learning_rate": 3.2490785712613794e-06,
      "loss": 0.5304,
      "step": 7063
    },
    {
      "epoch": 0.8661108386463953,
      "grad_norm": 2.212085269881071,
      "learning_rate": 3.2485950032948745e-06,
      "loss": 0.5535,
      "step": 7064
    },
    {
      "epoch": 0.866233447768514,
      "grad_norm": 1.7795087647137138,
      "learning_rate": 3.2481114045597894e-06,
      "loss": 0.5241,
      "step": 7065
    },
    {
      "epoch": 0.8663560568906327,
      "grad_norm": 1.798284243783837,
      "learning_rate": 3.2476277750759997e-06,
      "loss": 0.4555,
      "step": 7066
    },
    {
      "epoch": 0.8664786660127514,
      "grad_norm": 1.8831540059908431,
      "learning_rate": 3.247144114863387e-06,
      "loss": 0.5475,
      "step": 7067
    },
    {
      "epoch": 0.8666012751348701,
      "grad_norm": 1.664056401591425,
      "learning_rate": 3.246660423941827e-06,
      "loss": 0.4908,
      "step": 7068
    },
    {
      "epoch": 0.8667238842569888,
      "grad_norm": 2.024884313525601,
      "learning_rate": 3.246176702331203e-06,
      "loss": 0.5342,
      "step": 7069
    },
    {
      "epoch": 0.8668464933791074,
      "grad_norm": 2.0243949211173073,
      "learning_rate": 3.2456929500513943e-06,
      "loss": 0.5088,
      "step": 7070
    },
    {
      "epoch": 0.866969102501226,
      "grad_norm": 1.6639866507387522,
      "learning_rate": 3.2452091671222863e-06,
      "loss": 0.5097,
      "step": 7071
    },
    {
      "epoch": 0.8670917116233448,
      "grad_norm": 2.117269518513912,
      "learning_rate": 3.2447253535637614e-06,
      "loss": 0.5472,
      "step": 7072
    },
    {
      "epoch": 0.8672143207454635,
      "grad_norm": 1.8873570099361459,
      "learning_rate": 3.244241509395706e-06,
      "loss": 0.523,
      "step": 7073
    },
    {
      "epoch": 0.8673369298675822,
      "grad_norm": 2.088388703532674,
      "learning_rate": 3.2437576346380077e-06,
      "loss": 0.4955,
      "step": 7074
    },
    {
      "epoch": 0.8674595389897009,
      "grad_norm": 1.7704759961980636,
      "learning_rate": 3.2432737293105543e-06,
      "loss": 0.528,
      "step": 7075
    },
    {
      "epoch": 0.8675821481118196,
      "grad_norm": 1.9905862676446797,
      "learning_rate": 3.242789793433233e-06,
      "loss": 0.5767,
      "step": 7076
    },
    {
      "epoch": 0.8677047572339383,
      "grad_norm": 2.1638519655273134,
      "learning_rate": 3.2423058270259378e-06,
      "loss": 0.5118,
      "step": 7077
    },
    {
      "epoch": 0.8678273663560568,
      "grad_norm": 1.7698550894227423,
      "learning_rate": 3.241821830108558e-06,
      "loss": 0.4556,
      "step": 7078
    },
    {
      "epoch": 0.8679499754781755,
      "grad_norm": 1.676696490886145,
      "learning_rate": 3.2413378027009883e-06,
      "loss": 0.5389,
      "step": 7079
    },
    {
      "epoch": 0.8680725846002942,
      "grad_norm": 1.9901089888299566,
      "learning_rate": 3.240853744823122e-06,
      "loss": 0.5286,
      "step": 7080
    },
    {
      "epoch": 0.8681951937224129,
      "grad_norm": 1.7908958166835414,
      "learning_rate": 3.2403696564948557e-06,
      "loss": 0.5124,
      "step": 7081
    },
    {
      "epoch": 0.8683178028445316,
      "grad_norm": 1.786301006617618,
      "learning_rate": 3.239885537736085e-06,
      "loss": 0.5037,
      "step": 7082
    },
    {
      "epoch": 0.8684404119666503,
      "grad_norm": 1.985323136856891,
      "learning_rate": 3.2394013885667097e-06,
      "loss": 0.5361,
      "step": 7083
    },
    {
      "epoch": 0.868563021088769,
      "grad_norm": 1.9040282851761565,
      "learning_rate": 3.2389172090066277e-06,
      "loss": 0.5491,
      "step": 7084
    },
    {
      "epoch": 0.8686856302108877,
      "grad_norm": 2.0517453370388123,
      "learning_rate": 3.23843299907574e-06,
      "loss": 0.4421,
      "step": 7085
    },
    {
      "epoch": 0.8688082393330063,
      "grad_norm": 1.6924388711207121,
      "learning_rate": 3.2379487587939494e-06,
      "loss": 0.4883,
      "step": 7086
    },
    {
      "epoch": 0.868930848455125,
      "grad_norm": 1.8583188202865741,
      "learning_rate": 3.2374644881811586e-06,
      "loss": 0.5194,
      "step": 7087
    },
    {
      "epoch": 0.8690534575772437,
      "grad_norm": 2.040876633482147,
      "learning_rate": 3.2369801872572713e-06,
      "loss": 0.5669,
      "step": 7088
    },
    {
      "epoch": 0.8691760666993624,
      "grad_norm": 2.0027175846482117,
      "learning_rate": 3.2364958560421933e-06,
      "loss": 0.5344,
      "step": 7089
    },
    {
      "epoch": 0.8692986758214811,
      "grad_norm": 1.902113218100626,
      "learning_rate": 3.2360114945558328e-06,
      "loss": 0.5058,
      "step": 7090
    },
    {
      "epoch": 0.8694212849435998,
      "grad_norm": 2.0222266812355727,
      "learning_rate": 3.235527102818096e-06,
      "loss": 0.5409,
      "step": 7091
    },
    {
      "epoch": 0.8695438940657185,
      "grad_norm": 2.0539906406309902,
      "learning_rate": 3.2350426808488932e-06,
      "loss": 0.5618,
      "step": 7092
    },
    {
      "epoch": 0.8696665031878372,
      "grad_norm": 1.9855275919844584,
      "learning_rate": 3.2345582286681354e-06,
      "loss": 0.571,
      "step": 7093
    },
    {
      "epoch": 0.8697891123099558,
      "grad_norm": 2.0208492131829634,
      "learning_rate": 3.2340737462957334e-06,
      "loss": 0.5619,
      "step": 7094
    },
    {
      "epoch": 0.8699117214320745,
      "grad_norm": 1.8519129358454212,
      "learning_rate": 3.233589233751601e-06,
      "loss": 0.4835,
      "step": 7095
    },
    {
      "epoch": 0.8700343305541932,
      "grad_norm": 1.9300450615414009,
      "learning_rate": 3.2331046910556524e-06,
      "loss": 0.5425,
      "step": 7096
    },
    {
      "epoch": 0.8701569396763119,
      "grad_norm": 1.713330505761368,
      "learning_rate": 3.2326201182278033e-06,
      "loss": 0.5087,
      "step": 7097
    },
    {
      "epoch": 0.8702795487984306,
      "grad_norm": 1.788394543684002,
      "learning_rate": 3.23213551528797e-06,
      "loss": 0.5105,
      "step": 7098
    },
    {
      "epoch": 0.8704021579205493,
      "grad_norm": 1.8511921223215746,
      "learning_rate": 3.231650882256071e-06,
      "loss": 0.4917,
      "step": 7099
    },
    {
      "epoch": 0.870524767042668,
      "grad_norm": 1.8884180110689626,
      "learning_rate": 3.2311662191520264e-06,
      "loss": 0.5609,
      "step": 7100
    },
    {
      "epoch": 0.8706473761647867,
      "grad_norm": 1.9691220808171854,
      "learning_rate": 3.230681525995754e-06,
      "loss": 0.5581,
      "step": 7101
    },
    {
      "epoch": 0.8707699852869053,
      "grad_norm": 1.8590979040927744,
      "learning_rate": 3.230196802807179e-06,
      "loss": 0.5234,
      "step": 7102
    },
    {
      "epoch": 0.870892594409024,
      "grad_norm": 1.897758691636457,
      "learning_rate": 3.2297120496062216e-06,
      "loss": 0.5438,
      "step": 7103
    },
    {
      "epoch": 0.8710152035311427,
      "grad_norm": 1.887879463741009,
      "learning_rate": 3.229227266412808e-06,
      "loss": 0.5455,
      "step": 7104
    },
    {
      "epoch": 0.8711378126532614,
      "grad_norm": 1.86058688519687,
      "learning_rate": 3.2287424532468612e-06,
      "loss": 0.5243,
      "step": 7105
    },
    {
      "epoch": 0.8712604217753801,
      "grad_norm": 2.0189967195756315,
      "learning_rate": 3.2282576101283105e-06,
      "loss": 0.4572,
      "step": 7106
    },
    {
      "epoch": 0.8713830308974988,
      "grad_norm": 1.8345122458846856,
      "learning_rate": 3.227772737077083e-06,
      "loss": 0.6097,
      "step": 7107
    },
    {
      "epoch": 0.8715056400196175,
      "grad_norm": 1.906429271704858,
      "learning_rate": 3.2272878341131074e-06,
      "loss": 0.4993,
      "step": 7108
    },
    {
      "epoch": 0.8716282491417362,
      "grad_norm": 2.0429764682964064,
      "learning_rate": 3.226802901256314e-06,
      "loss": 0.5653,
      "step": 7109
    },
    {
      "epoch": 0.8717508582638548,
      "grad_norm": 1.9784795581402728,
      "learning_rate": 3.2263179385266347e-06,
      "loss": 0.5589,
      "step": 7110
    },
    {
      "epoch": 0.8718734673859735,
      "grad_norm": 1.8374731952971368,
      "learning_rate": 3.2258329459440018e-06,
      "loss": 0.5226,
      "step": 7111
    },
    {
      "epoch": 0.8719960765080922,
      "grad_norm": 1.9703113919545547,
      "learning_rate": 3.225347923528351e-06,
      "loss": 0.506,
      "step": 7112
    },
    {
      "epoch": 0.8721186856302109,
      "grad_norm": 2.2061950343168233,
      "learning_rate": 3.2248628712996162e-06,
      "loss": 0.5819,
      "step": 7113
    },
    {
      "epoch": 0.8722412947523296,
      "grad_norm": 1.8151294553346873,
      "learning_rate": 3.224377789277733e-06,
      "loss": 0.478,
      "step": 7114
    },
    {
      "epoch": 0.8723639038744483,
      "grad_norm": 2.103233229980803,
      "learning_rate": 3.2238926774826412e-06,
      "loss": 0.5088,
      "step": 7115
    },
    {
      "epoch": 0.872486512996567,
      "grad_norm": 2.062047514253039,
      "learning_rate": 3.223407535934278e-06,
      "loss": 0.5324,
      "step": 7116
    },
    {
      "epoch": 0.8726091221186856,
      "grad_norm": 1.9396546073618213,
      "learning_rate": 3.2229223646525854e-06,
      "loss": 0.5504,
      "step": 7117
    },
    {
      "epoch": 0.8727317312408043,
      "grad_norm": 1.645401810449615,
      "learning_rate": 3.2224371636575035e-06,
      "loss": 0.5211,
      "step": 7118
    },
    {
      "epoch": 0.872854340362923,
      "grad_norm": 1.992316446999644,
      "learning_rate": 3.221951932968975e-06,
      "loss": 0.5203,
      "step": 7119
    },
    {
      "epoch": 0.8729769494850417,
      "grad_norm": 1.9050820504153718,
      "learning_rate": 3.221466672606944e-06,
      "loss": 0.5118,
      "step": 7120
    },
    {
      "epoch": 0.8730995586071604,
      "grad_norm": 1.9590202263165413,
      "learning_rate": 3.2209813825913554e-06,
      "loss": 0.4992,
      "step": 7121
    },
    {
      "epoch": 0.8732221677292791,
      "grad_norm": 1.8539419646561621,
      "learning_rate": 3.2204960629421554e-06,
      "loss": 0.4975,
      "step": 7122
    },
    {
      "epoch": 0.8733447768513978,
      "grad_norm": 1.9762009275622752,
      "learning_rate": 3.2200107136792924e-06,
      "loss": 0.5371,
      "step": 7123
    },
    {
      "epoch": 0.8734673859735165,
      "grad_norm": 2.158483645621133,
      "learning_rate": 3.2195253348227134e-06,
      "loss": 0.5253,
      "step": 7124
    },
    {
      "epoch": 0.8735899950956351,
      "grad_norm": 2.0024905095764676,
      "learning_rate": 3.2190399263923704e-06,
      "loss": 0.5283,
      "step": 7125
    },
    {
      "epoch": 0.8737126042177538,
      "grad_norm": 1.854554453273415,
      "learning_rate": 3.218554488408212e-06,
      "loss": 0.5022,
      "step": 7126
    },
    {
      "epoch": 0.8738352133398725,
      "grad_norm": 1.899812878269259,
      "learning_rate": 3.218069020890194e-06,
      "loss": 0.5536,
      "step": 7127
    },
    {
      "epoch": 0.8739578224619912,
      "grad_norm": 1.8257205060027264,
      "learning_rate": 3.2175835238582663e-06,
      "loss": 0.4729,
      "step": 7128
    },
    {
      "epoch": 0.8740804315841099,
      "grad_norm": 1.893518178971494,
      "learning_rate": 3.2170979973323862e-06,
      "loss": 0.5444,
      "step": 7129
    },
    {
      "epoch": 0.8742030407062286,
      "grad_norm": 1.9243026441661522,
      "learning_rate": 3.216612441332509e-06,
      "loss": 0.4717,
      "step": 7130
    },
    {
      "epoch": 0.8743256498283473,
      "grad_norm": 1.8364481676164637,
      "learning_rate": 3.2161268558785914e-06,
      "loss": 0.5082,
      "step": 7131
    },
    {
      "epoch": 0.874448258950466,
      "grad_norm": 2.0129434062380067,
      "learning_rate": 3.215641240990592e-06,
      "loss": 0.5455,
      "step": 7132
    },
    {
      "epoch": 0.8745708680725846,
      "grad_norm": 2.0069944085867895,
      "learning_rate": 3.2151555966884705e-06,
      "loss": 0.586,
      "step": 7133
    },
    {
      "epoch": 0.8746934771947033,
      "grad_norm": 1.6742994606335255,
      "learning_rate": 3.2146699229921884e-06,
      "loss": 0.4659,
      "step": 7134
    },
    {
      "epoch": 0.874816086316822,
      "grad_norm": 2.0164042784008878,
      "learning_rate": 3.214184219921708e-06,
      "loss": 0.578,
      "step": 7135
    },
    {
      "epoch": 0.8749386954389407,
      "grad_norm": 1.8491811905160622,
      "learning_rate": 3.2136984874969906e-06,
      "loss": 0.496,
      "step": 7136
    },
    {
      "epoch": 0.8750613045610593,
      "grad_norm": 1.9693742550758033,
      "learning_rate": 3.213212725738002e-06,
      "loss": 0.5593,
      "step": 7137
    },
    {
      "epoch": 0.875183913683178,
      "grad_norm": 1.9241270247093691,
      "learning_rate": 3.2127269346647085e-06,
      "loss": 0.5109,
      "step": 7138
    },
    {
      "epoch": 0.8753065228052967,
      "grad_norm": 2.104980828404034,
      "learning_rate": 3.2122411142970754e-06,
      "loss": 0.5753,
      "step": 7139
    },
    {
      "epoch": 0.8754291319274154,
      "grad_norm": 1.978842023293131,
      "learning_rate": 3.2117552646550722e-06,
      "loss": 0.5036,
      "step": 7140
    },
    {
      "epoch": 0.875551741049534,
      "grad_norm": 1.8051868289943986,
      "learning_rate": 3.2112693857586674e-06,
      "loss": 0.5066,
      "step": 7141
    },
    {
      "epoch": 0.8756743501716527,
      "grad_norm": 2.13990282906332,
      "learning_rate": 3.2107834776278313e-06,
      "loss": 0.5425,
      "step": 7142
    },
    {
      "epoch": 0.8757969592937714,
      "grad_norm": 2.029121877927411,
      "learning_rate": 3.210297540282536e-06,
      "loss": 0.6085,
      "step": 7143
    },
    {
      "epoch": 0.8759195684158901,
      "grad_norm": 1.9825043185931168,
      "learning_rate": 3.2098115737427544e-06,
      "loss": 0.5189,
      "step": 7144
    },
    {
      "epoch": 0.8760421775380088,
      "grad_norm": 2.1095735625028587,
      "learning_rate": 3.2093255780284605e-06,
      "loss": 0.5253,
      "step": 7145
    },
    {
      "epoch": 0.8761647866601275,
      "grad_norm": 2.0495484151974632,
      "learning_rate": 3.2088395531596296e-06,
      "loss": 0.5201,
      "step": 7146
    },
    {
      "epoch": 0.8762873957822462,
      "grad_norm": 1.8046246405318298,
      "learning_rate": 3.2083534991562382e-06,
      "loss": 0.5373,
      "step": 7147
    },
    {
      "epoch": 0.8764100049043649,
      "grad_norm": 1.9328966609539766,
      "learning_rate": 3.207867416038264e-06,
      "loss": 0.4985,
      "step": 7148
    },
    {
      "epoch": 0.8765326140264835,
      "grad_norm": 1.9024598254852685,
      "learning_rate": 3.207381303825685e-06,
      "loss": 0.4911,
      "step": 7149
    },
    {
      "epoch": 0.8766552231486022,
      "grad_norm": 1.8769710421513464,
      "learning_rate": 3.2068951625384836e-06,
      "loss": 0.5905,
      "step": 7150
    },
    {
      "epoch": 0.8767778322707209,
      "grad_norm": 1.8833237836060908,
      "learning_rate": 3.2064089921966386e-06,
      "loss": 0.5116,
      "step": 7151
    },
    {
      "epoch": 0.8769004413928396,
      "grad_norm": 1.8047666323801563,
      "learning_rate": 3.2059227928201335e-06,
      "loss": 0.5348,
      "step": 7152
    },
    {
      "epoch": 0.8770230505149583,
      "grad_norm": 1.8364184897191442,
      "learning_rate": 3.2054365644289522e-06,
      "loss": 0.5253,
      "step": 7153
    },
    {
      "epoch": 0.877145659637077,
      "grad_norm": 1.8959893572566975,
      "learning_rate": 3.204950307043078e-06,
      "loss": 0.5357,
      "step": 7154
    },
    {
      "epoch": 0.8772682687591957,
      "grad_norm": 1.7818116126608714,
      "learning_rate": 3.2044640206824997e-06,
      "loss": 0.4989,
      "step": 7155
    },
    {
      "epoch": 0.8773908778813144,
      "grad_norm": 2.0365689663282187,
      "learning_rate": 3.203977705367203e-06,
      "loss": 0.5485,
      "step": 7156
    },
    {
      "epoch": 0.877513487003433,
      "grad_norm": 1.7914666284699607,
      "learning_rate": 3.2034913611171754e-06,
      "loss": 0.4791,
      "step": 7157
    },
    {
      "epoch": 0.8776360961255517,
      "grad_norm": 1.8278346816902495,
      "learning_rate": 3.2030049879524077e-06,
      "loss": 0.5125,
      "step": 7158
    },
    {
      "epoch": 0.8777587052476704,
      "grad_norm": 2.177602086098549,
      "learning_rate": 3.2025185858928903e-06,
      "loss": 0.5192,
      "step": 7159
    },
    {
      "epoch": 0.8778813143697891,
      "grad_norm": 2.1312679840990945,
      "learning_rate": 3.202032154958615e-06,
      "loss": 0.5534,
      "step": 7160
    },
    {
      "epoch": 0.8780039234919078,
      "grad_norm": 1.8315576140410836,
      "learning_rate": 3.201545695169577e-06,
      "loss": 0.4858,
      "step": 7161
    },
    {
      "epoch": 0.8781265326140265,
      "grad_norm": 1.757255297984119,
      "learning_rate": 3.201059206545767e-06,
      "loss": 0.5031,
      "step": 7162
    },
    {
      "epoch": 0.8782491417361452,
      "grad_norm": 1.9047280015224883,
      "learning_rate": 3.200572689107184e-06,
      "loss": 0.5305,
      "step": 7163
    },
    {
      "epoch": 0.8783717508582638,
      "grad_norm": 1.8335650997697763,
      "learning_rate": 3.2000861428738216e-06,
      "loss": 0.4848,
      "step": 7164
    },
    {
      "epoch": 0.8784943599803825,
      "grad_norm": 1.805284943250824,
      "learning_rate": 3.1995995678656804e-06,
      "loss": 0.475,
      "step": 7165
    },
    {
      "epoch": 0.8786169691025012,
      "grad_norm": 1.8561897437979127,
      "learning_rate": 3.1991129641027587e-06,
      "loss": 0.5447,
      "step": 7166
    },
    {
      "epoch": 0.8787395782246199,
      "grad_norm": 1.8527566419013362,
      "learning_rate": 3.198626331605056e-06,
      "loss": 0.4794,
      "step": 7167
    },
    {
      "epoch": 0.8788621873467386,
      "grad_norm": 1.8284769026115208,
      "learning_rate": 3.198139670392574e-06,
      "loss": 0.5012,
      "step": 7168
    },
    {
      "epoch": 0.8789847964688573,
      "grad_norm": 2.0027370050792492,
      "learning_rate": 3.197652980485315e-06,
      "loss": 0.5206,
      "step": 7169
    },
    {
      "epoch": 0.879107405590976,
      "grad_norm": 1.9535678758440118,
      "learning_rate": 3.1971662619032844e-06,
      "loss": 0.5535,
      "step": 7170
    },
    {
      "epoch": 0.8792300147130947,
      "grad_norm": 1.9073546038991824,
      "learning_rate": 3.1966795146664864e-06,
      "loss": 0.5105,
      "step": 7171
    },
    {
      "epoch": 0.8793526238352133,
      "grad_norm": 1.9138815527502577,
      "learning_rate": 3.1961927387949266e-06,
      "loss": 0.536,
      "step": 7172
    },
    {
      "epoch": 0.879475232957332,
      "grad_norm": 2.1749958525297055,
      "learning_rate": 3.195705934308613e-06,
      "loss": 0.5676,
      "step": 7173
    },
    {
      "epoch": 0.8795978420794507,
      "grad_norm": 1.7952880661666801,
      "learning_rate": 3.195219101227553e-06,
      "loss": 0.4834,
      "step": 7174
    },
    {
      "epoch": 0.8797204512015694,
      "grad_norm": 1.8243221412547272,
      "learning_rate": 3.1947322395717585e-06,
      "loss": 0.5199,
      "step": 7175
    },
    {
      "epoch": 0.8798430603236881,
      "grad_norm": 1.8141753527196383,
      "learning_rate": 3.194245349361238e-06,
      "loss": 0.5692,
      "step": 7176
    },
    {
      "epoch": 0.8799656694458068,
      "grad_norm": 1.8231436513201715,
      "learning_rate": 3.1937584306160047e-06,
      "loss": 0.4954,
      "step": 7177
    },
    {
      "epoch": 0.8800882785679255,
      "grad_norm": 2.1399057419694327,
      "learning_rate": 3.193271483356072e-06,
      "loss": 0.552,
      "step": 7178
    },
    {
      "epoch": 0.8802108876900442,
      "grad_norm": 1.9436392106131188,
      "learning_rate": 3.1927845076014535e-06,
      "loss": 0.5843,
      "step": 7179
    },
    {
      "epoch": 0.8803334968121628,
      "grad_norm": 1.7223816337375288,
      "learning_rate": 3.1922975033721654e-06,
      "loss": 0.4818,
      "step": 7180
    },
    {
      "epoch": 0.8804561059342815,
      "grad_norm": 2.02264907808398,
      "learning_rate": 3.1918104706882246e-06,
      "loss": 0.5533,
      "step": 7181
    },
    {
      "epoch": 0.8805787150564002,
      "grad_norm": 1.9387001397817991,
      "learning_rate": 3.1913234095696482e-06,
      "loss": 0.566,
      "step": 7182
    },
    {
      "epoch": 0.8807013241785189,
      "grad_norm": 2.0190125364380895,
      "learning_rate": 3.1908363200364563e-06,
      "loss": 0.5381,
      "step": 7183
    },
    {
      "epoch": 0.8808239333006376,
      "grad_norm": 1.8623129332791506,
      "learning_rate": 3.1903492021086686e-06,
      "loss": 0.5314,
      "step": 7184
    },
    {
      "epoch": 0.8809465424227563,
      "grad_norm": 1.9935368786256094,
      "learning_rate": 3.1898620558063057e-06,
      "loss": 0.5583,
      "step": 7185
    },
    {
      "epoch": 0.881069151544875,
      "grad_norm": 1.9288663714362377,
      "learning_rate": 3.1893748811493926e-06,
      "loss": 0.5601,
      "step": 7186
    },
    {
      "epoch": 0.8811917606669937,
      "grad_norm": 1.8716224112464095,
      "learning_rate": 3.1888876781579497e-06,
      "loss": 0.5284,
      "step": 7187
    },
    {
      "epoch": 0.8813143697891123,
      "grad_norm": 2.2924713192546595,
      "learning_rate": 3.1884004468520054e-06,
      "loss": 0.5694,
      "step": 7188
    },
    {
      "epoch": 0.881436978911231,
      "grad_norm": 1.8828890450959874,
      "learning_rate": 3.187913187251583e-06,
      "loss": 0.5334,
      "step": 7189
    },
    {
      "epoch": 0.8815595880333497,
      "grad_norm": 1.976799841199752,
      "learning_rate": 3.1874258993767105e-06,
      "loss": 0.5582,
      "step": 7190
    },
    {
      "epoch": 0.8816821971554684,
      "grad_norm": 2.05445750935019,
      "learning_rate": 3.186938583247417e-06,
      "loss": 0.5167,
      "step": 7191
    },
    {
      "epoch": 0.8818048062775871,
      "grad_norm": 2.0700336725858692,
      "learning_rate": 3.186451238883731e-06,
      "loss": 0.5266,
      "step": 7192
    },
    {
      "epoch": 0.8819274153997058,
      "grad_norm": 2.0314080780270856,
      "learning_rate": 3.1859638663056845e-06,
      "loss": 0.5599,
      "step": 7193
    },
    {
      "epoch": 0.8820500245218245,
      "grad_norm": 1.8976137514849416,
      "learning_rate": 3.185476465533308e-06,
      "loss": 0.5294,
      "step": 7194
    },
    {
      "epoch": 0.8821726336439432,
      "grad_norm": 2.238372248484211,
      "learning_rate": 3.184989036586636e-06,
      "loss": 0.5818,
      "step": 7195
    },
    {
      "epoch": 0.8822952427660617,
      "grad_norm": 1.977154826745795,
      "learning_rate": 3.184501579485702e-06,
      "loss": 0.4848,
      "step": 7196
    },
    {
      "epoch": 0.8824178518881804,
      "grad_norm": 1.8914513855025055,
      "learning_rate": 3.1840140942505405e-06,
      "loss": 0.5289,
      "step": 7197
    },
    {
      "epoch": 0.8825404610102991,
      "grad_norm": 2.03386634169124,
      "learning_rate": 3.1835265809011896e-06,
      "loss": 0.5379,
      "step": 7198
    },
    {
      "epoch": 0.8826630701324178,
      "grad_norm": 1.7250849163896753,
      "learning_rate": 3.183039039457686e-06,
      "loss": 0.5097,
      "step": 7199
    },
    {
      "epoch": 0.8827856792545365,
      "grad_norm": 2.0185273669871764,
      "learning_rate": 3.1825514699400684e-06,
      "loss": 0.4981,
      "step": 7200
    },
    {
      "epoch": 0.8829082883766552,
      "grad_norm": 1.975286063082033,
      "learning_rate": 3.1820638723683766e-06,
      "loss": 0.5472,
      "step": 7201
    },
    {
      "epoch": 0.883030897498774,
      "grad_norm": 2.016749003799522,
      "learning_rate": 3.181576246762652e-06,
      "loss": 0.5156,
      "step": 7202
    },
    {
      "epoch": 0.8831535066208926,
      "grad_norm": 2.086948251483884,
      "learning_rate": 3.181088593142938e-06,
      "loss": 0.5091,
      "step": 7203
    },
    {
      "epoch": 0.8832761157430112,
      "grad_norm": 1.9551087282478767,
      "learning_rate": 3.1806009115292767e-06,
      "loss": 0.5057,
      "step": 7204
    },
    {
      "epoch": 0.8833987248651299,
      "grad_norm": 1.6510888962574453,
      "learning_rate": 3.1801132019417124e-06,
      "loss": 0.4584,
      "step": 7205
    },
    {
      "epoch": 0.8835213339872486,
      "grad_norm": 1.9944908625055107,
      "learning_rate": 3.179625464400292e-06,
      "loss": 0.565,
      "step": 7206
    },
    {
      "epoch": 0.8836439431093673,
      "grad_norm": 2.080404024563591,
      "learning_rate": 3.179137698925062e-06,
      "loss": 0.5075,
      "step": 7207
    },
    {
      "epoch": 0.883766552231486,
      "grad_norm": 1.7941054565091685,
      "learning_rate": 3.17864990553607e-06,
      "loss": 0.5385,
      "step": 7208
    },
    {
      "epoch": 0.8838891613536047,
      "grad_norm": 2.1805073555574785,
      "learning_rate": 3.178162084253366e-06,
      "loss": 0.601,
      "step": 7209
    },
    {
      "epoch": 0.8840117704757234,
      "grad_norm": 2.132029872681528,
      "learning_rate": 3.1776742350969987e-06,
      "loss": 0.5676,
      "step": 7210
    },
    {
      "epoch": 0.8841343795978421,
      "grad_norm": 1.9887430381206233,
      "learning_rate": 3.1771863580870217e-06,
      "loss": 0.4849,
      "step": 7211
    },
    {
      "epoch": 0.8842569887199607,
      "grad_norm": 1.8770923238196449,
      "learning_rate": 3.1766984532434853e-06,
      "loss": 0.4839,
      "step": 7212
    },
    {
      "epoch": 0.8843795978420794,
      "grad_norm": 1.870506291179766,
      "learning_rate": 3.1762105205864456e-06,
      "loss": 0.4776,
      "step": 7213
    },
    {
      "epoch": 0.8845022069641981,
      "grad_norm": 1.8834918288040818,
      "learning_rate": 3.175722560135956e-06,
      "loss": 0.5653,
      "step": 7214
    },
    {
      "epoch": 0.8846248160863168,
      "grad_norm": 1.7142042419477561,
      "learning_rate": 3.1752345719120726e-06,
      "loss": 0.537,
      "step": 7215
    },
    {
      "epoch": 0.8847474252084355,
      "grad_norm": 1.9001339327099402,
      "learning_rate": 3.174746555934853e-06,
      "loss": 0.5413,
      "step": 7216
    },
    {
      "epoch": 0.8848700343305542,
      "grad_norm": 1.8482734640917042,
      "learning_rate": 3.174258512224356e-06,
      "loss": 0.4946,
      "step": 7217
    },
    {
      "epoch": 0.8849926434526729,
      "grad_norm": 2.0833110267434316,
      "learning_rate": 3.1737704408006396e-06,
      "loss": 0.5392,
      "step": 7218
    },
    {
      "epoch": 0.8851152525747915,
      "grad_norm": 1.9798643972121195,
      "learning_rate": 3.173282341683766e-06,
      "loss": 0.4874,
      "step": 7219
    },
    {
      "epoch": 0.8852378616969102,
      "grad_norm": 1.9749423971084794,
      "learning_rate": 3.172794214893795e-06,
      "loss": 0.5218,
      "step": 7220
    },
    {
      "epoch": 0.8853604708190289,
      "grad_norm": 2.0811531839252813,
      "learning_rate": 3.1723060604507923e-06,
      "loss": 0.4888,
      "step": 7221
    },
    {
      "epoch": 0.8854830799411476,
      "grad_norm": 2.1662797816151715,
      "learning_rate": 3.171817878374819e-06,
      "loss": 0.5262,
      "step": 7222
    },
    {
      "epoch": 0.8856056890632663,
      "grad_norm": 1.7698917131461998,
      "learning_rate": 3.171329668685942e-06,
      "loss": 0.5407,
      "step": 7223
    },
    {
      "epoch": 0.885728298185385,
      "grad_norm": 1.9738225493722465,
      "learning_rate": 3.170841431404228e-06,
      "loss": 0.5457,
      "step": 7224
    },
    {
      "epoch": 0.8858509073075037,
      "grad_norm": 1.8982842236557078,
      "learning_rate": 3.1703531665497416e-06,
      "loss": 0.5095,
      "step": 7225
    },
    {
      "epoch": 0.8859735164296224,
      "grad_norm": 1.6803209140405417,
      "learning_rate": 3.1698648741425554e-06,
      "loss": 0.472,
      "step": 7226
    },
    {
      "epoch": 0.886096125551741,
      "grad_norm": 1.9331930736851555,
      "learning_rate": 3.1693765542027356e-06,
      "loss": 0.5184,
      "step": 7227
    },
    {
      "epoch": 0.8862187346738597,
      "grad_norm": 2.047559427683896,
      "learning_rate": 3.1688882067503548e-06,
      "loss": 0.5762,
      "step": 7228
    },
    {
      "epoch": 0.8863413437959784,
      "grad_norm": 1.7651214330184926,
      "learning_rate": 3.1683998318054842e-06,
      "loss": 0.5009,
      "step": 7229
    },
    {
      "epoch": 0.8864639529180971,
      "grad_norm": 1.9032919152775853,
      "learning_rate": 3.1679114293881974e-06,
      "loss": 0.4818,
      "step": 7230
    },
    {
      "epoch": 0.8865865620402158,
      "grad_norm": 2.0529798025460093,
      "learning_rate": 3.1674229995185684e-06,
      "loss": 0.5056,
      "step": 7231
    },
    {
      "epoch": 0.8867091711623345,
      "grad_norm": 2.0902595912116384,
      "learning_rate": 3.166934542216673e-06,
      "loss": 0.5076,
      "step": 7232
    },
    {
      "epoch": 0.8868317802844532,
      "grad_norm": 2.0063275610980984,
      "learning_rate": 3.166446057502587e-06,
      "loss": 0.5074,
      "step": 7233
    },
    {
      "epoch": 0.8869543894065719,
      "grad_norm": 2.0673653939150243,
      "learning_rate": 3.165957545396388e-06,
      "loss": 0.5109,
      "step": 7234
    },
    {
      "epoch": 0.8870769985286905,
      "grad_norm": 2.031782940530448,
      "learning_rate": 3.1654690059181546e-06,
      "loss": 0.5023,
      "step": 7235
    },
    {
      "epoch": 0.8871996076508092,
      "grad_norm": 1.859244491547672,
      "learning_rate": 3.164980439087968e-06,
      "loss": 0.5088,
      "step": 7236
    },
    {
      "epoch": 0.8873222167729279,
      "grad_norm": 1.829578340099296,
      "learning_rate": 3.1644918449259065e-06,
      "loss": 0.4905,
      "step": 7237
    },
    {
      "epoch": 0.8874448258950466,
      "grad_norm": 1.882991226441425,
      "learning_rate": 3.164003223452055e-06,
      "loss": 0.4899,
      "step": 7238
    },
    {
      "epoch": 0.8875674350171653,
      "grad_norm": 2.071735207569209,
      "learning_rate": 3.163514574686495e-06,
      "loss": 0.5147,
      "step": 7239
    },
    {
      "epoch": 0.887690044139284,
      "grad_norm": 1.78927130796663,
      "learning_rate": 3.163025898649311e-06,
      "loss": 0.5062,
      "step": 7240
    },
    {
      "epoch": 0.8878126532614027,
      "grad_norm": 1.8869806786698653,
      "learning_rate": 3.1625371953605897e-06,
      "loss": 0.4744,
      "step": 7241
    },
    {
      "epoch": 0.8879352623835214,
      "grad_norm": 1.8791585151196541,
      "learning_rate": 3.162048464840416e-06,
      "loss": 0.5415,
      "step": 7242
    },
    {
      "epoch": 0.88805787150564,
      "grad_norm": 1.9274184931058564,
      "learning_rate": 3.161559707108879e-06,
      "loss": 0.5103,
      "step": 7243
    },
    {
      "epoch": 0.8881804806277587,
      "grad_norm": 1.9513836080078921,
      "learning_rate": 3.1610709221860668e-06,
      "loss": 0.4717,
      "step": 7244
    },
    {
      "epoch": 0.8883030897498774,
      "grad_norm": 2.04100706122595,
      "learning_rate": 3.1605821100920682e-06,
      "loss": 0.6092,
      "step": 7245
    },
    {
      "epoch": 0.8884256988719961,
      "grad_norm": 2.086826447242354,
      "learning_rate": 3.1600932708469766e-06,
      "loss": 0.5147,
      "step": 7246
    },
    {
      "epoch": 0.8885483079941148,
      "grad_norm": 1.8250231656145541,
      "learning_rate": 3.159604404470883e-06,
      "loss": 0.4818,
      "step": 7247
    },
    {
      "epoch": 0.8886709171162335,
      "grad_norm": 1.9453957239289066,
      "learning_rate": 3.15911551098388e-06,
      "loss": 0.5367,
      "step": 7248
    },
    {
      "epoch": 0.8887935262383522,
      "grad_norm": 1.7253443200324792,
      "learning_rate": 3.1586265904060628e-06,
      "loss": 0.4357,
      "step": 7249
    },
    {
      "epoch": 0.8889161353604709,
      "grad_norm": 1.7762476907834648,
      "learning_rate": 3.158137642757526e-06,
      "loss": 0.5143,
      "step": 7250
    },
    {
      "epoch": 0.8890387444825895,
      "grad_norm": 2.1727020782313406,
      "learning_rate": 3.1576486680583686e-06,
      "loss": 0.5281,
      "step": 7251
    },
    {
      "epoch": 0.8891613536047082,
      "grad_norm": 1.8027688234839039,
      "learning_rate": 3.1571596663286852e-06,
      "loss": 0.4919,
      "step": 7252
    },
    {
      "epoch": 0.8892839627268269,
      "grad_norm": 1.9458027952339605,
      "learning_rate": 3.156670637588577e-06,
      "loss": 0.5005,
      "step": 7253
    },
    {
      "epoch": 0.8894065718489456,
      "grad_norm": 1.7878994705488571,
      "learning_rate": 3.156181581858142e-06,
      "loss": 0.4867,
      "step": 7254
    },
    {
      "epoch": 0.8895291809710643,
      "grad_norm": 2.1253419587291518,
      "learning_rate": 3.1556924991574835e-06,
      "loss": 0.5827,
      "step": 7255
    },
    {
      "epoch": 0.889651790093183,
      "grad_norm": 1.9186098342867506,
      "learning_rate": 3.155203389506702e-06,
      "loss": 0.4849,
      "step": 7256
    },
    {
      "epoch": 0.8897743992153017,
      "grad_norm": 1.9365214465321474,
      "learning_rate": 3.154714252925901e-06,
      "loss": 0.5261,
      "step": 7257
    },
    {
      "epoch": 0.8898970083374204,
      "grad_norm": 1.7362105680213202,
      "learning_rate": 3.1542250894351857e-06,
      "loss": 0.5067,
      "step": 7258
    },
    {
      "epoch": 0.890019617459539,
      "grad_norm": 1.91007853788583,
      "learning_rate": 3.1537358990546606e-06,
      "loss": 0.523,
      "step": 7259
    },
    {
      "epoch": 0.8901422265816576,
      "grad_norm": 1.8674835014417623,
      "learning_rate": 3.1532466818044327e-06,
      "loss": 0.5269,
      "step": 7260
    },
    {
      "epoch": 0.8902648357037763,
      "grad_norm": 2.0653663975275274,
      "learning_rate": 3.15275743770461e-06,
      "loss": 0.5488,
      "step": 7261
    },
    {
      "epoch": 0.890387444825895,
      "grad_norm": 2.1667067926530965,
      "learning_rate": 3.152268166775301e-06,
      "loss": 0.5033,
      "step": 7262
    },
    {
      "epoch": 0.8905100539480137,
      "grad_norm": 1.7545206033613976,
      "learning_rate": 3.1517788690366146e-06,
      "loss": 0.4899,
      "step": 7263
    },
    {
      "epoch": 0.8906326630701324,
      "grad_norm": 1.973361805202451,
      "learning_rate": 3.151289544508664e-06,
      "loss": 0.5144,
      "step": 7264
    },
    {
      "epoch": 0.8907552721922511,
      "grad_norm": 2.238333032621938,
      "learning_rate": 3.1508001932115593e-06,
      "loss": 0.5483,
      "step": 7265
    },
    {
      "epoch": 0.8908778813143697,
      "grad_norm": 1.880325094233226,
      "learning_rate": 3.150310815165415e-06,
      "loss": 0.4725,
      "step": 7266
    },
    {
      "epoch": 0.8910004904364884,
      "grad_norm": 2.0694588315181437,
      "learning_rate": 3.149821410390345e-06,
      "loss": 0.5288,
      "step": 7267
    },
    {
      "epoch": 0.8911230995586071,
      "grad_norm": 2.237536107253329,
      "learning_rate": 3.149331978906464e-06,
      "loss": 0.5439,
      "step": 7268
    },
    {
      "epoch": 0.8912457086807258,
      "grad_norm": 1.8965958281145308,
      "learning_rate": 3.14884252073389e-06,
      "loss": 0.5014,
      "step": 7269
    },
    {
      "epoch": 0.8913683178028445,
      "grad_norm": 2.039957771614909,
      "learning_rate": 3.148353035892739e-06,
      "loss": 0.5351,
      "step": 7270
    },
    {
      "epoch": 0.8914909269249632,
      "grad_norm": 2.0787889835471853,
      "learning_rate": 3.1478635244031307e-06,
      "loss": 0.5281,
      "step": 7271
    },
    {
      "epoch": 0.8916135360470819,
      "grad_norm": 1.7368446131612112,
      "learning_rate": 3.147373986285185e-06,
      "loss": 0.5679,
      "step": 7272
    },
    {
      "epoch": 0.8917361451692006,
      "grad_norm": 2.077526029789881,
      "learning_rate": 3.146884421559021e-06,
      "loss": 0.5157,
      "step": 7273
    },
    {
      "epoch": 0.8918587542913192,
      "grad_norm": 1.8339650309401379,
      "learning_rate": 3.1463948302447635e-06,
      "loss": 0.5294,
      "step": 7274
    },
    {
      "epoch": 0.8919813634134379,
      "grad_norm": 1.9068507602900127,
      "learning_rate": 3.145905212362533e-06,
      "loss": 0.6046,
      "step": 7275
    },
    {
      "epoch": 0.8921039725355566,
      "grad_norm": 1.8406055604518834,
      "learning_rate": 3.145415567932456e-06,
      "loss": 0.5089,
      "step": 7276
    },
    {
      "epoch": 0.8922265816576753,
      "grad_norm": 2.088739912268579,
      "learning_rate": 3.1449258969746555e-06,
      "loss": 0.5284,
      "step": 7277
    },
    {
      "epoch": 0.892349190779794,
      "grad_norm": 1.884180732638926,
      "learning_rate": 3.14443619950926e-06,
      "loss": 0.5487,
      "step": 7278
    },
    {
      "epoch": 0.8924717999019127,
      "grad_norm": 1.8633584840289532,
      "learning_rate": 3.1439464755563946e-06,
      "loss": 0.517,
      "step": 7279
    },
    {
      "epoch": 0.8925944090240314,
      "grad_norm": 1.983838078145463,
      "learning_rate": 3.143456725136189e-06,
      "loss": 0.515,
      "step": 7280
    },
    {
      "epoch": 0.8927170181461501,
      "grad_norm": 1.910805135704346,
      "learning_rate": 3.1429669482687743e-06,
      "loss": 0.4801,
      "step": 7281
    },
    {
      "epoch": 0.8928396272682687,
      "grad_norm": 1.8827478733091771,
      "learning_rate": 3.1424771449742786e-06,
      "loss": 0.527,
      "step": 7282
    },
    {
      "epoch": 0.8929622363903874,
      "grad_norm": 1.7589799320244137,
      "learning_rate": 3.141987315272835e-06,
      "loss": 0.467,
      "step": 7283
    },
    {
      "epoch": 0.8930848455125061,
      "grad_norm": 2.0356670483920407,
      "learning_rate": 3.1414974591845766e-06,
      "loss": 0.6301,
      "step": 7284
    },
    {
      "epoch": 0.8932074546346248,
      "grad_norm": 1.9869544504301382,
      "learning_rate": 3.1410075767296368e-06,
      "loss": 0.5874,
      "step": 7285
    },
    {
      "epoch": 0.8933300637567435,
      "grad_norm": 1.8648068467992978,
      "learning_rate": 3.14051766792815e-06,
      "loss": 0.5195,
      "step": 7286
    },
    {
      "epoch": 0.8934526728788622,
      "grad_norm": 1.9457838896764,
      "learning_rate": 3.1400277328002537e-06,
      "loss": 0.5684,
      "step": 7287
    },
    {
      "epoch": 0.8935752820009809,
      "grad_norm": 2.005969802030387,
      "learning_rate": 3.1395377713660848e-06,
      "loss": 0.5162,
      "step": 7288
    },
    {
      "epoch": 0.8936978911230996,
      "grad_norm": 1.8647259980295026,
      "learning_rate": 3.1390477836457806e-06,
      "loss": 0.5097,
      "step": 7289
    },
    {
      "epoch": 0.8938205002452182,
      "grad_norm": 1.952887131390839,
      "learning_rate": 3.1385577696594816e-06,
      "loss": 0.5134,
      "step": 7290
    },
    {
      "epoch": 0.8939431093673369,
      "grad_norm": 1.8171639558188415,
      "learning_rate": 3.1380677294273275e-06,
      "loss": 0.5285,
      "step": 7291
    },
    {
      "epoch": 0.8940657184894556,
      "grad_norm": 1.9092033748137363,
      "learning_rate": 3.137577662969461e-06,
      "loss": 0.5116,
      "step": 7292
    },
    {
      "epoch": 0.8941883276115743,
      "grad_norm": 2.081833470831064,
      "learning_rate": 3.137087570306022e-06,
      "loss": 0.5356,
      "step": 7293
    },
    {
      "epoch": 0.894310936733693,
      "grad_norm": 1.974555802383969,
      "learning_rate": 3.1365974514571584e-06,
      "loss": 0.5814,
      "step": 7294
    },
    {
      "epoch": 0.8944335458558117,
      "grad_norm": 1.705059176371208,
      "learning_rate": 3.1361073064430105e-06,
      "loss": 0.4792,
      "step": 7295
    },
    {
      "epoch": 0.8945561549779304,
      "grad_norm": 1.891977247845311,
      "learning_rate": 3.1356171352837273e-06,
      "loss": 0.5195,
      "step": 7296
    },
    {
      "epoch": 0.8946787641000491,
      "grad_norm": 1.9767776737871856,
      "learning_rate": 3.135126937999454e-06,
      "loss": 0.5542,
      "step": 7297
    },
    {
      "epoch": 0.8948013732221677,
      "grad_norm": 1.880318773132972,
      "learning_rate": 3.134636714610339e-06,
      "loss": 0.5682,
      "step": 7298
    },
    {
      "epoch": 0.8949239823442864,
      "grad_norm": 1.982707470593722,
      "learning_rate": 3.134146465136532e-06,
      "loss": 0.544,
      "step": 7299
    },
    {
      "epoch": 0.8950465914664051,
      "grad_norm": 2.199320633832737,
      "learning_rate": 3.1336561895981827e-06,
      "loss": 0.5702,
      "step": 7300
    },
    {
      "epoch": 0.8951692005885238,
      "grad_norm": 1.8919791634505787,
      "learning_rate": 3.133165888015442e-06,
      "loss": 0.4948,
      "step": 7301
    },
    {
      "epoch": 0.8952918097106425,
      "grad_norm": 1.956849296020164,
      "learning_rate": 3.1326755604084617e-06,
      "loss": 0.5021,
      "step": 7302
    },
    {
      "epoch": 0.8954144188327612,
      "grad_norm": 1.9050201544291023,
      "learning_rate": 3.132185206797397e-06,
      "loss": 0.5374,
      "step": 7303
    },
    {
      "epoch": 0.8955370279548799,
      "grad_norm": 1.8404970275876742,
      "learning_rate": 3.1316948272024e-06,
      "loss": 0.531,
      "step": 7304
    },
    {
      "epoch": 0.8956596370769986,
      "grad_norm": 1.708032122714076,
      "learning_rate": 3.1312044216436278e-06,
      "loss": 0.5186,
      "step": 7305
    },
    {
      "epoch": 0.8957822461991172,
      "grad_norm": 2.121043483159496,
      "learning_rate": 3.1307139901412365e-06,
      "loss": 0.5478,
      "step": 7306
    },
    {
      "epoch": 0.8959048553212359,
      "grad_norm": 1.7882962528300965,
      "learning_rate": 3.130223532715384e-06,
      "loss": 0.5303,
      "step": 7307
    },
    {
      "epoch": 0.8960274644433546,
      "grad_norm": 1.9256900764217002,
      "learning_rate": 3.1297330493862278e-06,
      "loss": 0.5395,
      "step": 7308
    },
    {
      "epoch": 0.8961500735654733,
      "grad_norm": 1.9455150972078794,
      "learning_rate": 3.129242540173929e-06,
      "loss": 0.533,
      "step": 7309
    },
    {
      "epoch": 0.896272682687592,
      "grad_norm": 2.17441702073418,
      "learning_rate": 3.1287520050986474e-06,
      "loss": 0.5486,
      "step": 7310
    },
    {
      "epoch": 0.8963952918097107,
      "grad_norm": 1.9890391302205588,
      "learning_rate": 3.128261444180545e-06,
      "loss": 0.5257,
      "step": 7311
    },
    {
      "epoch": 0.8965179009318294,
      "grad_norm": 1.9279867305246559,
      "learning_rate": 3.127770857439786e-06,
      "loss": 0.5427,
      "step": 7312
    },
    {
      "epoch": 0.896640510053948,
      "grad_norm": 1.755720297094438,
      "learning_rate": 3.1272802448965323e-06,
      "loss": 0.5158,
      "step": 7313
    },
    {
      "epoch": 0.8967631191760667,
      "grad_norm": 1.834829256600077,
      "learning_rate": 3.1267896065709507e-06,
      "loss": 0.562,
      "step": 7314
    },
    {
      "epoch": 0.8968857282981854,
      "grad_norm": 1.9525947259562855,
      "learning_rate": 3.1262989424832063e-06,
      "loss": 0.4932,
      "step": 7315
    },
    {
      "epoch": 0.8970083374203041,
      "grad_norm": 1.975055406839304,
      "learning_rate": 3.1258082526534665e-06,
      "loss": 0.5209,
      "step": 7316
    },
    {
      "epoch": 0.8971309465424228,
      "grad_norm": 1.9072788556735853,
      "learning_rate": 3.1253175371019005e-06,
      "loss": 0.5181,
      "step": 7317
    },
    {
      "epoch": 0.8972535556645415,
      "grad_norm": 1.8924108893772391,
      "learning_rate": 3.1248267958486754e-06,
      "loss": 0.5408,
      "step": 7318
    },
    {
      "epoch": 0.8973761647866602,
      "grad_norm": 1.786521957527778,
      "learning_rate": 3.124336028913964e-06,
      "loss": 0.4681,
      "step": 7319
    },
    {
      "epoch": 0.8974987739087789,
      "grad_norm": 1.9573026554003474,
      "learning_rate": 3.123845236317936e-06,
      "loss": 0.5267,
      "step": 7320
    },
    {
      "epoch": 0.8976213830308974,
      "grad_norm": 1.790951177500587,
      "learning_rate": 3.1233544180807634e-06,
      "loss": 0.5543,
      "step": 7321
    },
    {
      "epoch": 0.8977439921530161,
      "grad_norm": 1.8193313038461807,
      "learning_rate": 3.1228635742226223e-06,
      "loss": 0.4905,
      "step": 7322
    },
    {
      "epoch": 0.8978666012751348,
      "grad_norm": 1.8962203065195333,
      "learning_rate": 3.122372704763684e-06,
      "loss": 0.4952,
      "step": 7323
    },
    {
      "epoch": 0.8979892103972535,
      "grad_norm": 1.9203306102245663,
      "learning_rate": 3.1218818097241273e-06,
      "loss": 0.5495,
      "step": 7324
    },
    {
      "epoch": 0.8981118195193722,
      "grad_norm": 1.7815001135344422,
      "learning_rate": 3.121390889124126e-06,
      "loss": 0.5178,
      "step": 7325
    },
    {
      "epoch": 0.898234428641491,
      "grad_norm": 1.9425458986434523,
      "learning_rate": 3.1208999429838597e-06,
      "loss": 0.5033,
      "step": 7326
    },
    {
      "epoch": 0.8983570377636096,
      "grad_norm": 1.8591933499588325,
      "learning_rate": 3.120408971323507e-06,
      "loss": 0.5434,
      "step": 7327
    },
    {
      "epoch": 0.8984796468857283,
      "grad_norm": 1.8217459521551471,
      "learning_rate": 3.1199179741632463e-06,
      "loss": 0.524,
      "step": 7328
    },
    {
      "epoch": 0.8986022560078469,
      "grad_norm": 1.9881169038773672,
      "learning_rate": 3.11942695152326e-06,
      "loss": 0.5475,
      "step": 7329
    },
    {
      "epoch": 0.8987248651299656,
      "grad_norm": 1.9593146578807514,
      "learning_rate": 3.1189359034237293e-06,
      "loss": 0.5805,
      "step": 7330
    },
    {
      "epoch": 0.8988474742520843,
      "grad_norm": 1.9576997564119707,
      "learning_rate": 3.118444829884837e-06,
      "loss": 0.6003,
      "step": 7331
    },
    {
      "epoch": 0.898970083374203,
      "grad_norm": 1.8610189823287313,
      "learning_rate": 3.1179537309267677e-06,
      "loss": 0.4844,
      "step": 7332
    },
    {
      "epoch": 0.8990926924963217,
      "grad_norm": 1.8854513606988954,
      "learning_rate": 3.117462606569705e-06,
      "loss": 0.5412,
      "step": 7333
    },
    {
      "epoch": 0.8992153016184404,
      "grad_norm": 2.0422925679477264,
      "learning_rate": 3.116971456833837e-06,
      "loss": 0.5421,
      "step": 7334
    },
    {
      "epoch": 0.8993379107405591,
      "grad_norm": 2.0055920409788652,
      "learning_rate": 3.1164802817393497e-06,
      "loss": 0.4972,
      "step": 7335
    },
    {
      "epoch": 0.8994605198626778,
      "grad_norm": 1.9260315264846652,
      "learning_rate": 3.1159890813064315e-06,
      "loss": 0.4836,
      "step": 7336
    },
    {
      "epoch": 0.8995831289847964,
      "grad_norm": 1.9181659492978116,
      "learning_rate": 3.115497855555271e-06,
      "loss": 0.477,
      "step": 7337
    },
    {
      "epoch": 0.8997057381069151,
      "grad_norm": 1.6672449868095336,
      "learning_rate": 3.1150066045060595e-06,
      "loss": 0.507,
      "step": 7338
    },
    {
      "epoch": 0.8998283472290338,
      "grad_norm": 1.9262134517746692,
      "learning_rate": 3.1145153281789875e-06,
      "loss": 0.5106,
      "step": 7339
    },
    {
      "epoch": 0.8999509563511525,
      "grad_norm": 2.103535430944259,
      "learning_rate": 3.1140240265942485e-06,
      "loss": 0.5445,
      "step": 7340
    },
    {
      "epoch": 0.9000735654732712,
      "grad_norm": 1.9873109631240264,
      "learning_rate": 3.1135326997720337e-06,
      "loss": 0.4803,
      "step": 7341
    },
    {
      "epoch": 0.9001961745953899,
      "grad_norm": 1.9658658718211768,
      "learning_rate": 3.1130413477325398e-06,
      "loss": 0.5505,
      "step": 7342
    },
    {
      "epoch": 0.9003187837175086,
      "grad_norm": 2.025267050257743,
      "learning_rate": 3.11254997049596e-06,
      "loss": 0.484,
      "step": 7343
    },
    {
      "epoch": 0.9004413928396273,
      "grad_norm": 1.8725869219085667,
      "learning_rate": 3.1120585680824933e-06,
      "loss": 0.5181,
      "step": 7344
    },
    {
      "epoch": 0.9005640019617459,
      "grad_norm": 1.8499368712236315,
      "learning_rate": 3.111567140512335e-06,
      "loss": 0.4467,
      "step": 7345
    },
    {
      "epoch": 0.9006866110838646,
      "grad_norm": 1.8472715136102988,
      "learning_rate": 3.1110756878056844e-06,
      "loss": 0.4695,
      "step": 7346
    },
    {
      "epoch": 0.9008092202059833,
      "grad_norm": 1.7115105593839626,
      "learning_rate": 3.110584209982742e-06,
      "loss": 0.504,
      "step": 7347
    },
    {
      "epoch": 0.900931829328102,
      "grad_norm": 2.0952568159377374,
      "learning_rate": 3.110092707063707e-06,
      "loss": 0.5423,
      "step": 7348
    },
    {
      "epoch": 0.9010544384502207,
      "grad_norm": 1.9604881902974467,
      "learning_rate": 3.109601179068782e-06,
      "loss": 0.5495,
      "step": 7349
    },
    {
      "epoch": 0.9011770475723394,
      "grad_norm": 2.1415339917178584,
      "learning_rate": 3.1091096260181693e-06,
      "loss": 0.542,
      "step": 7350
    },
    {
      "epoch": 0.9012996566944581,
      "grad_norm": 1.9398482791817035,
      "learning_rate": 3.108618047932072e-06,
      "loss": 0.4694,
      "step": 7351
    },
    {
      "epoch": 0.9014222658165768,
      "grad_norm": 2.1126898165083787,
      "learning_rate": 3.1081264448306964e-06,
      "loss": 0.5552,
      "step": 7352
    },
    {
      "epoch": 0.9015448749386954,
      "grad_norm": 1.8892316179923363,
      "learning_rate": 3.107634816734247e-06,
      "loss": 0.5453,
      "step": 7353
    },
    {
      "epoch": 0.9016674840608141,
      "grad_norm": 2.0820380742801126,
      "learning_rate": 3.10714316366293e-06,
      "loss": 0.5666,
      "step": 7354
    },
    {
      "epoch": 0.9017900931829328,
      "grad_norm": 1.8223109833428746,
      "learning_rate": 3.1066514856369556e-06,
      "loss": 0.4878,
      "step": 7355
    },
    {
      "epoch": 0.9019127023050515,
      "grad_norm": 1.95064382790555,
      "learning_rate": 3.10615978267653e-06,
      "loss": 0.4818,
      "step": 7356
    },
    {
      "epoch": 0.9020353114271702,
      "grad_norm": 1.9203203124119588,
      "learning_rate": 3.1056680548018655e-06,
      "loss": 0.5338,
      "step": 7357
    },
    {
      "epoch": 0.9021579205492889,
      "grad_norm": 1.7704367368219542,
      "learning_rate": 3.1051763020331704e-06,
      "loss": 0.4924,
      "step": 7358
    },
    {
      "epoch": 0.9022805296714076,
      "grad_norm": 1.838466521607847,
      "learning_rate": 3.1046845243906587e-06,
      "loss": 0.4955,
      "step": 7359
    },
    {
      "epoch": 0.9024031387935263,
      "grad_norm": 1.9154267087744874,
      "learning_rate": 3.1041927218945418e-06,
      "loss": 0.5825,
      "step": 7360
    },
    {
      "epoch": 0.9025257479156449,
      "grad_norm": 1.7419243316352049,
      "learning_rate": 3.103700894565035e-06,
      "loss": 0.5188,
      "step": 7361
    },
    {
      "epoch": 0.9026483570377636,
      "grad_norm": 1.8895881251626527,
      "learning_rate": 3.103209042422352e-06,
      "loss": 0.5311,
      "step": 7362
    },
    {
      "epoch": 0.9027709661598823,
      "grad_norm": 1.8457807591673312,
      "learning_rate": 3.10271716548671e-06,
      "loss": 0.5308,
      "step": 7363
    },
    {
      "epoch": 0.902893575282001,
      "grad_norm": 2.157693185980315,
      "learning_rate": 3.102225263778326e-06,
      "loss": 0.5547,
      "step": 7364
    },
    {
      "epoch": 0.9030161844041197,
      "grad_norm": 2.0042564895306216,
      "learning_rate": 3.1017333373174167e-06,
      "loss": 0.5482,
      "step": 7365
    },
    {
      "epoch": 0.9031387935262384,
      "grad_norm": 1.9959165649003714,
      "learning_rate": 3.1012413861242018e-06,
      "loss": 0.5124,
      "step": 7366
    },
    {
      "epoch": 0.9032614026483571,
      "grad_norm": 1.8043419917608157,
      "learning_rate": 3.1007494102189022e-06,
      "loss": 0.5004,
      "step": 7367
    },
    {
      "epoch": 0.9033840117704757,
      "grad_norm": 1.9722021464350872,
      "learning_rate": 3.100257409621738e-06,
      "loss": 0.5489,
      "step": 7368
    },
    {
      "epoch": 0.9035066208925944,
      "grad_norm": 1.9967871121096967,
      "learning_rate": 3.099765384352931e-06,
      "loss": 0.5319,
      "step": 7369
    },
    {
      "epoch": 0.9036292300147131,
      "grad_norm": 1.9205179701299886,
      "learning_rate": 3.0992733344327057e-06,
      "loss": 0.5244,
      "step": 7370
    },
    {
      "epoch": 0.9037518391368318,
      "grad_norm": 1.8257896187822944,
      "learning_rate": 3.098781259881285e-06,
      "loss": 0.5008,
      "step": 7371
    },
    {
      "epoch": 0.9038744482589505,
      "grad_norm": 2.2293184904995234,
      "learning_rate": 3.0982891607188948e-06,
      "loss": 0.5191,
      "step": 7372
    },
    {
      "epoch": 0.9039970573810692,
      "grad_norm": 1.9318380450894366,
      "learning_rate": 3.0977970369657608e-06,
      "loss": 0.5423,
      "step": 7373
    },
    {
      "epoch": 0.9041196665031879,
      "grad_norm": 1.7981017122722212,
      "learning_rate": 3.09730488864211e-06,
      "loss": 0.4784,
      "step": 7374
    },
    {
      "epoch": 0.9042422756253066,
      "grad_norm": 2.0390877978443833,
      "learning_rate": 3.096812715768171e-06,
      "loss": 0.496,
      "step": 7375
    },
    {
      "epoch": 0.9043648847474252,
      "grad_norm": 1.9611075968729632,
      "learning_rate": 3.096320518364173e-06,
      "loss": 0.5247,
      "step": 7376
    },
    {
      "epoch": 0.9044874938695439,
      "grad_norm": 1.7321803656889092,
      "learning_rate": 3.0958282964503456e-06,
      "loss": 0.5025,
      "step": 7377
    },
    {
      "epoch": 0.9046101029916626,
      "grad_norm": 1.757755031844706,
      "learning_rate": 3.095336050046921e-06,
      "loss": 0.531,
      "step": 7378
    },
    {
      "epoch": 0.9047327121137813,
      "grad_norm": 2.116162623954267,
      "learning_rate": 3.0948437791741297e-06,
      "loss": 0.5188,
      "step": 7379
    },
    {
      "epoch": 0.9048553212359,
      "grad_norm": 1.8581693404301138,
      "learning_rate": 3.0943514838522077e-06,
      "loss": 0.5245,
      "step": 7380
    },
    {
      "epoch": 0.9049779303580187,
      "grad_norm": 1.9472433726479301,
      "learning_rate": 3.093859164101386e-06,
      "loss": 0.5006,
      "step": 7381
    },
    {
      "epoch": 0.9051005394801374,
      "grad_norm": 2.031422082897167,
      "learning_rate": 3.093366819941902e-06,
      "loss": 0.4922,
      "step": 7382
    },
    {
      "epoch": 0.9052231486022561,
      "grad_norm": 2.0351751034057886,
      "learning_rate": 3.0928744513939913e-06,
      "loss": 0.5293,
      "step": 7383
    },
    {
      "epoch": 0.9053457577243746,
      "grad_norm": 1.9540498032551208,
      "learning_rate": 3.0923820584778902e-06,
      "loss": 0.5534,
      "step": 7384
    },
    {
      "epoch": 0.9054683668464933,
      "grad_norm": 1.8560235629256912,
      "learning_rate": 3.091889641213838e-06,
      "loss": 0.5627,
      "step": 7385
    },
    {
      "epoch": 0.905590975968612,
      "grad_norm": 1.997152219910486,
      "learning_rate": 3.0913971996220744e-06,
      "loss": 0.5503,
      "step": 7386
    },
    {
      "epoch": 0.9057135850907307,
      "grad_norm": 1.82385399851055,
      "learning_rate": 3.090904733722839e-06,
      "loss": 0.4984,
      "step": 7387
    },
    {
      "epoch": 0.9058361942128494,
      "grad_norm": 1.9414854517765268,
      "learning_rate": 3.0904122435363727e-06,
      "loss": 0.4898,
      "step": 7388
    },
    {
      "epoch": 0.9059588033349681,
      "grad_norm": 1.9839521169489682,
      "learning_rate": 3.089919729082917e-06,
      "loss": 0.5702,
      "step": 7389
    },
    {
      "epoch": 0.9060814124570868,
      "grad_norm": 2.121651105275855,
      "learning_rate": 3.089427190382717e-06,
      "loss": 0.5412,
      "step": 7390
    },
    {
      "epoch": 0.9062040215792055,
      "grad_norm": 2.010909731394625,
      "learning_rate": 3.0889346274560156e-06,
      "loss": 0.5664,
      "step": 7391
    },
    {
      "epoch": 0.9063266307013241,
      "grad_norm": 1.8130033602293465,
      "learning_rate": 3.0884420403230588e-06,
      "loss": 0.5007,
      "step": 7392
    },
    {
      "epoch": 0.9064492398234428,
      "grad_norm": 1.9943402921580606,
      "learning_rate": 3.087949429004092e-06,
      "loss": 0.5161,
      "step": 7393
    },
    {
      "epoch": 0.9065718489455615,
      "grad_norm": 2.0340471467057504,
      "learning_rate": 3.0874567935193624e-06,
      "loss": 0.5199,
      "step": 7394
    },
    {
      "epoch": 0.9066944580676802,
      "grad_norm": 2.024679721899842,
      "learning_rate": 3.08696413388912e-06,
      "loss": 0.5337,
      "step": 7395
    },
    {
      "epoch": 0.9068170671897989,
      "grad_norm": 1.960788806077003,
      "learning_rate": 3.0864714501336114e-06,
      "loss": 0.5834,
      "step": 7396
    },
    {
      "epoch": 0.9069396763119176,
      "grad_norm": 2.145954227195468,
      "learning_rate": 3.085978742273088e-06,
      "loss": 0.5051,
      "step": 7397
    },
    {
      "epoch": 0.9070622854340363,
      "grad_norm": 1.7357867978864823,
      "learning_rate": 3.085486010327801e-06,
      "loss": 0.5184,
      "step": 7398
    },
    {
      "epoch": 0.907184894556155,
      "grad_norm": 1.9874800863532232,
      "learning_rate": 3.0849932543180024e-06,
      "loss": 0.5751,
      "step": 7399
    },
    {
      "epoch": 0.9073075036782736,
      "grad_norm": 2.0272886075348855,
      "learning_rate": 3.0845004742639457e-06,
      "loss": 0.5475,
      "step": 7400
    },
    {
      "epoch": 0.9074301128003923,
      "grad_norm": 2.098613771228256,
      "learning_rate": 3.0840076701858845e-06,
      "loss": 0.5797,
      "step": 7401
    },
    {
      "epoch": 0.907552721922511,
      "grad_norm": 1.7447627966699377,
      "learning_rate": 3.0835148421040737e-06,
      "loss": 0.4934,
      "step": 7402
    },
    {
      "epoch": 0.9076753310446297,
      "grad_norm": 1.9411051665476144,
      "learning_rate": 3.0830219900387707e-06,
      "loss": 0.5254,
      "step": 7403
    },
    {
      "epoch": 0.9077979401667484,
      "grad_norm": 1.823980206471273,
      "learning_rate": 3.0825291140102314e-06,
      "loss": 0.507,
      "step": 7404
    },
    {
      "epoch": 0.9079205492888671,
      "grad_norm": 1.7593138614917434,
      "learning_rate": 3.0820362140387142e-06,
      "loss": 0.4351,
      "step": 7405
    },
    {
      "epoch": 0.9080431584109858,
      "grad_norm": 1.9918258704866925,
      "learning_rate": 3.0815432901444786e-06,
      "loss": 0.5889,
      "step": 7406
    },
    {
      "epoch": 0.9081657675331045,
      "grad_norm": 1.8810911322243167,
      "learning_rate": 3.0810503423477838e-06,
      "loss": 0.4865,
      "step": 7407
    },
    {
      "epoch": 0.9082883766552231,
      "grad_norm": 1.8000719641111391,
      "learning_rate": 3.0805573706688913e-06,
      "loss": 0.4968,
      "step": 7408
    },
    {
      "epoch": 0.9084109857773418,
      "grad_norm": 2.267426812130336,
      "learning_rate": 3.080064375128063e-06,
      "loss": 0.5667,
      "step": 7409
    },
    {
      "epoch": 0.9085335948994605,
      "grad_norm": 1.8773965625225388,
      "learning_rate": 3.079571355745562e-06,
      "loss": 0.477,
      "step": 7410
    },
    {
      "epoch": 0.9086562040215792,
      "grad_norm": 1.9166775040355377,
      "learning_rate": 3.079078312541652e-06,
      "loss": 0.4734,
      "step": 7411
    },
    {
      "epoch": 0.9087788131436979,
      "grad_norm": 1.876315686958219,
      "learning_rate": 3.078585245536599e-06,
      "loss": 0.5285,
      "step": 7412
    },
    {
      "epoch": 0.9089014222658166,
      "grad_norm": 1.972188259936743,
      "learning_rate": 3.0780921547506676e-06,
      "loss": 0.4839,
      "step": 7413
    },
    {
      "epoch": 0.9090240313879353,
      "grad_norm": 1.7320257163138268,
      "learning_rate": 3.0775990402041247e-06,
      "loss": 0.5011,
      "step": 7414
    },
    {
      "epoch": 0.9091466405100539,
      "grad_norm": 1.8514542264591545,
      "learning_rate": 3.07710590191724e-06,
      "loss": 0.4861,
      "step": 7415
    },
    {
      "epoch": 0.9092692496321726,
      "grad_norm": 1.8798275851090198,
      "learning_rate": 3.0766127399102797e-06,
      "loss": 0.5086,
      "step": 7416
    },
    {
      "epoch": 0.9093918587542913,
      "grad_norm": 2.0809040718185314,
      "learning_rate": 3.076119554203515e-06,
      "loss": 0.5272,
      "step": 7417
    },
    {
      "epoch": 0.90951446787641,
      "grad_norm": 2.3620979229142263,
      "learning_rate": 3.075626344817218e-06,
      "loss": 0.5436,
      "step": 7418
    },
    {
      "epoch": 0.9096370769985287,
      "grad_norm": 2.014252620379108,
      "learning_rate": 3.0751331117716575e-06,
      "loss": 0.5513,
      "step": 7419
    },
    {
      "epoch": 0.9097596861206474,
      "grad_norm": 1.97855228395008,
      "learning_rate": 3.0746398550871093e-06,
      "loss": 0.5153,
      "step": 7420
    },
    {
      "epoch": 0.9098822952427661,
      "grad_norm": 1.8890904155384531,
      "learning_rate": 3.0741465747838452e-06,
      "loss": 0.5634,
      "step": 7421
    },
    {
      "epoch": 0.9100049043648848,
      "grad_norm": 2.1476239159471793,
      "learning_rate": 3.0736532708821403e-06,
      "loss": 0.5407,
      "step": 7422
    },
    {
      "epoch": 0.9101275134870034,
      "grad_norm": 1.834872368824873,
      "learning_rate": 3.07315994340227e-06,
      "loss": 0.4992,
      "step": 7423
    },
    {
      "epoch": 0.9102501226091221,
      "grad_norm": 1.7642160875836597,
      "learning_rate": 3.0726665923645123e-06,
      "loss": 0.4779,
      "step": 7424
    },
    {
      "epoch": 0.9103727317312408,
      "grad_norm": 2.041384396164735,
      "learning_rate": 3.0721732177891435e-06,
      "loss": 0.5414,
      "step": 7425
    },
    {
      "epoch": 0.9104953408533595,
      "grad_norm": 1.8821446504623498,
      "learning_rate": 3.0716798196964427e-06,
      "loss": 0.5369,
      "step": 7426
    },
    {
      "epoch": 0.9106179499754782,
      "grad_norm": 1.9385462354210679,
      "learning_rate": 3.071186398106688e-06,
      "loss": 0.5123,
      "step": 7427
    },
    {
      "epoch": 0.9107405590975969,
      "grad_norm": 1.9212647624047277,
      "learning_rate": 3.0706929530401623e-06,
      "loss": 0.5767,
      "step": 7428
    },
    {
      "epoch": 0.9108631682197156,
      "grad_norm": 2.0205207494692297,
      "learning_rate": 3.0701994845171455e-06,
      "loss": 0.5642,
      "step": 7429
    },
    {
      "epoch": 0.9109857773418343,
      "grad_norm": 1.9854601209051608,
      "learning_rate": 3.069705992557921e-06,
      "loss": 0.5478,
      "step": 7430
    },
    {
      "epoch": 0.9111083864639529,
      "grad_norm": 2.0441768517562657,
      "learning_rate": 3.0692124771827707e-06,
      "loss": 0.5352,
      "step": 7431
    },
    {
      "epoch": 0.9112309955860716,
      "grad_norm": 1.9721235493873364,
      "learning_rate": 3.068718938411981e-06,
      "loss": 0.53,
      "step": 7432
    },
    {
      "epoch": 0.9113536047081903,
      "grad_norm": 1.7890253955981834,
      "learning_rate": 3.0682253762658348e-06,
      "loss": 0.5447,
      "step": 7433
    },
    {
      "epoch": 0.911476213830309,
      "grad_norm": 2.0778625334451664,
      "learning_rate": 3.0677317907646202e-06,
      "loss": 0.5003,
      "step": 7434
    },
    {
      "epoch": 0.9115988229524277,
      "grad_norm": 1.9556860980081074,
      "learning_rate": 3.0672381819286246e-06,
      "loss": 0.5822,
      "step": 7435
    },
    {
      "epoch": 0.9117214320745464,
      "grad_norm": 1.8844188109284363,
      "learning_rate": 3.0667445497781352e-06,
      "loss": 0.5224,
      "step": 7436
    },
    {
      "epoch": 0.9118440411966651,
      "grad_norm": 1.7698009737637996,
      "learning_rate": 3.0662508943334406e-06,
      "loss": 0.5082,
      "step": 7437
    },
    {
      "epoch": 0.9119666503187838,
      "grad_norm": 2.168606094598857,
      "learning_rate": 3.065757215614833e-06,
      "loss": 0.584,
      "step": 7438
    },
    {
      "epoch": 0.9120892594409024,
      "grad_norm": 1.8905671138022067,
      "learning_rate": 3.0652635136426005e-06,
      "loss": 0.4877,
      "step": 7439
    },
    {
      "epoch": 0.912211868563021,
      "grad_norm": 1.8967504675126359,
      "learning_rate": 3.0647697884370385e-06,
      "loss": 0.4706,
      "step": 7440
    },
    {
      "epoch": 0.9123344776851398,
      "grad_norm": 1.85083165639212,
      "learning_rate": 3.0642760400184377e-06,
      "loss": 0.5303,
      "step": 7441
    },
    {
      "epoch": 0.9124570868072585,
      "grad_norm": 1.969843215785849,
      "learning_rate": 3.0637822684070923e-06,
      "loss": 0.5387,
      "step": 7442
    },
    {
      "epoch": 0.9125796959293772,
      "grad_norm": 2.0466664812981636,
      "learning_rate": 3.063288473623299e-06,
      "loss": 0.5422,
      "step": 7443
    },
    {
      "epoch": 0.9127023050514959,
      "grad_norm": 1.8721511905803505,
      "learning_rate": 3.062794655687351e-06,
      "loss": 0.4959,
      "step": 7444
    },
    {
      "epoch": 0.9128249141736146,
      "grad_norm": 1.8507867376133502,
      "learning_rate": 3.0623008146195467e-06,
      "loss": 0.5483,
      "step": 7445
    },
    {
      "epoch": 0.9129475232957333,
      "grad_norm": 2.0575819135463127,
      "learning_rate": 3.061806950440183e-06,
      "loss": 0.5728,
      "step": 7446
    },
    {
      "epoch": 0.9130701324178518,
      "grad_norm": 1.964676075830319,
      "learning_rate": 3.0613130631695593e-06,
      "loss": 0.5261,
      "step": 7447
    },
    {
      "epoch": 0.9131927415399705,
      "grad_norm": 1.922077698585249,
      "learning_rate": 3.060819152827975e-06,
      "loss": 0.5677,
      "step": 7448
    },
    {
      "epoch": 0.9133153506620892,
      "grad_norm": 1.8011336727808993,
      "learning_rate": 3.060325219435731e-06,
      "loss": 0.5291,
      "step": 7449
    },
    {
      "epoch": 0.9134379597842079,
      "grad_norm": 2.058617397185208,
      "learning_rate": 3.0598312630131277e-06,
      "loss": 0.5387,
      "step": 7450
    },
    {
      "epoch": 0.9135605689063266,
      "grad_norm": 2.036617327357275,
      "learning_rate": 3.0593372835804695e-06,
      "loss": 0.5269,
      "step": 7451
    },
    {
      "epoch": 0.9136831780284453,
      "grad_norm": 1.7942758289658565,
      "learning_rate": 3.0588432811580576e-06,
      "loss": 0.5173,
      "step": 7452
    },
    {
      "epoch": 0.913805787150564,
      "grad_norm": 2.0640766678114972,
      "learning_rate": 3.0583492557661985e-06,
      "loss": 0.4956,
      "step": 7453
    },
    {
      "epoch": 0.9139283962726827,
      "grad_norm": 2.1078590550020944,
      "learning_rate": 3.057855207425196e-06,
      "loss": 0.5292,
      "step": 7454
    },
    {
      "epoch": 0.9140510053948013,
      "grad_norm": 1.8767184335152147,
      "learning_rate": 3.0573611361553566e-06,
      "loss": 0.5599,
      "step": 7455
    },
    {
      "epoch": 0.91417361451692,
      "grad_norm": 1.9785033055326777,
      "learning_rate": 3.0568670419769886e-06,
      "loss": 0.4928,
      "step": 7456
    },
    {
      "epoch": 0.9142962236390387,
      "grad_norm": 1.872260420089937,
      "learning_rate": 3.0563729249103983e-06,
      "loss": 0.5325,
      "step": 7457
    },
    {
      "epoch": 0.9144188327611574,
      "grad_norm": 2.0981070026209783,
      "learning_rate": 3.0558787849758963e-06,
      "loss": 0.567,
      "step": 7458
    },
    {
      "epoch": 0.9145414418832761,
      "grad_norm": 1.9269786622754792,
      "learning_rate": 3.055384622193792e-06,
      "loss": 0.5029,
      "step": 7459
    },
    {
      "epoch": 0.9146640510053948,
      "grad_norm": 1.939213332410855,
      "learning_rate": 3.0548904365843966e-06,
      "loss": 0.4775,
      "step": 7460
    },
    {
      "epoch": 0.9147866601275135,
      "grad_norm": 1.8348021957924519,
      "learning_rate": 3.0543962281680223e-06,
      "loss": 0.5434,
      "step": 7461
    },
    {
      "epoch": 0.9149092692496321,
      "grad_norm": 1.9076154560555463,
      "learning_rate": 3.0539019969649807e-06,
      "loss": 0.5365,
      "step": 7462
    },
    {
      "epoch": 0.9150318783717508,
      "grad_norm": 1.7200392386840897,
      "learning_rate": 3.053407742995587e-06,
      "loss": 0.517,
      "step": 7463
    },
    {
      "epoch": 0.9151544874938695,
      "grad_norm": 1.9074136739105363,
      "learning_rate": 3.0529134662801557e-06,
      "loss": 0.5175,
      "step": 7464
    },
    {
      "epoch": 0.9152770966159882,
      "grad_norm": 1.8462536905020537,
      "learning_rate": 3.0524191668390006e-06,
      "loss": 0.5099,
      "step": 7465
    },
    {
      "epoch": 0.9153997057381069,
      "grad_norm": 1.984009359617974,
      "learning_rate": 3.0519248446924417e-06,
      "loss": 0.5322,
      "step": 7466
    },
    {
      "epoch": 0.9155223148602256,
      "grad_norm": 1.7814260810978226,
      "learning_rate": 3.051430499860793e-06,
      "loss": 0.5076,
      "step": 7467
    },
    {
      "epoch": 0.9156449239823443,
      "grad_norm": 1.884484234233963,
      "learning_rate": 3.0509361323643754e-06,
      "loss": 0.5595,
      "step": 7468
    },
    {
      "epoch": 0.915767533104463,
      "grad_norm": 2.120989208488559,
      "learning_rate": 3.050441742223507e-06,
      "loss": 0.5373,
      "step": 7469
    },
    {
      "epoch": 0.9158901422265816,
      "grad_norm": 1.86999297134711,
      "learning_rate": 3.0499473294585085e-06,
      "loss": 0.5094,
      "step": 7470
    },
    {
      "epoch": 0.9160127513487003,
      "grad_norm": 1.7976629305859173,
      "learning_rate": 3.049452894089702e-06,
      "loss": 0.4851,
      "step": 7471
    },
    {
      "epoch": 0.916135360470819,
      "grad_norm": 2.180041774373128,
      "learning_rate": 3.0489584361374073e-06,
      "loss": 0.5701,
      "step": 7472
    },
    {
      "epoch": 0.9162579695929377,
      "grad_norm": 1.9951236578738483,
      "learning_rate": 3.0484639556219504e-06,
      "loss": 0.4818,
      "step": 7473
    },
    {
      "epoch": 0.9163805787150564,
      "grad_norm": 2.137131290412195,
      "learning_rate": 3.047969452563654e-06,
      "loss": 0.5175,
      "step": 7474
    },
    {
      "epoch": 0.9165031878371751,
      "grad_norm": 1.948663810288763,
      "learning_rate": 3.0474749269828415e-06,
      "loss": 0.5105,
      "step": 7475
    },
    {
      "epoch": 0.9166257969592938,
      "grad_norm": 1.9412922570591398,
      "learning_rate": 3.0469803788998416e-06,
      "loss": 0.5593,
      "step": 7476
    },
    {
      "epoch": 0.9167484060814125,
      "grad_norm": 1.8539046955317215,
      "learning_rate": 3.0464858083349784e-06,
      "loss": 0.5526,
      "step": 7477
    },
    {
      "epoch": 0.9168710152035311,
      "grad_norm": 1.8203989428076293,
      "learning_rate": 3.045991215308583e-06,
      "loss": 0.5323,
      "step": 7478
    },
    {
      "epoch": 0.9169936243256498,
      "grad_norm": 2.174090859581988,
      "learning_rate": 3.045496599840981e-06,
      "loss": 0.5462,
      "step": 7479
    },
    {
      "epoch": 0.9171162334477685,
      "grad_norm": 1.9313159541842628,
      "learning_rate": 3.045001961952503e-06,
      "loss": 0.5478,
      "step": 7480
    },
    {
      "epoch": 0.9172388425698872,
      "grad_norm": 1.9300105570987967,
      "learning_rate": 3.044507301663479e-06,
      "loss": 0.5622,
      "step": 7481
    },
    {
      "epoch": 0.9173614516920059,
      "grad_norm": 1.996523142145692,
      "learning_rate": 3.0440126189942416e-06,
      "loss": 0.5399,
      "step": 7482
    },
    {
      "epoch": 0.9174840608141246,
      "grad_norm": 1.6802291819948374,
      "learning_rate": 3.0435179139651223e-06,
      "loss": 0.5534,
      "step": 7483
    },
    {
      "epoch": 0.9176066699362433,
      "grad_norm": 1.7510034576169264,
      "learning_rate": 3.0430231865964547e-06,
      "loss": 0.5112,
      "step": 7484
    },
    {
      "epoch": 0.917729279058362,
      "grad_norm": 1.9310076623825538,
      "learning_rate": 3.0425284369085723e-06,
      "loss": 0.5322,
      "step": 7485
    },
    {
      "epoch": 0.9178518881804806,
      "grad_norm": 1.8965661352458794,
      "learning_rate": 3.042033664921811e-06,
      "loss": 0.4962,
      "step": 7486
    },
    {
      "epoch": 0.9179744973025993,
      "grad_norm": 2.060593460727013,
      "learning_rate": 3.0415388706565062e-06,
      "loss": 0.54,
      "step": 7487
    },
    {
      "epoch": 0.918097106424718,
      "grad_norm": 1.8357313349149127,
      "learning_rate": 3.0410440541329957e-06,
      "loss": 0.485,
      "step": 7488
    },
    {
      "epoch": 0.9182197155468367,
      "grad_norm": 2.002944523177679,
      "learning_rate": 3.0405492153716165e-06,
      "loss": 0.5177,
      "step": 7489
    },
    {
      "epoch": 0.9183423246689554,
      "grad_norm": 1.9264883107983595,
      "learning_rate": 3.040054354392707e-06,
      "loss": 0.5546,
      "step": 7490
    },
    {
      "epoch": 0.9184649337910741,
      "grad_norm": 1.939069393516464,
      "learning_rate": 3.039559471216609e-06,
      "loss": 0.5523,
      "step": 7491
    },
    {
      "epoch": 0.9185875429131928,
      "grad_norm": 1.8862785647273375,
      "learning_rate": 3.0390645658636597e-06,
      "loss": 0.51,
      "step": 7492
    },
    {
      "epoch": 0.9187101520353115,
      "grad_norm": 1.9779983362553422,
      "learning_rate": 3.0385696383542035e-06,
      "loss": 0.5507,
      "step": 7493
    },
    {
      "epoch": 0.9188327611574301,
      "grad_norm": 1.988306243658951,
      "learning_rate": 3.038074688708581e-06,
      "loss": 0.5372,
      "step": 7494
    },
    {
      "epoch": 0.9189553702795488,
      "grad_norm": 1.8744198139106312,
      "learning_rate": 3.0375797169471366e-06,
      "loss": 0.5418,
      "step": 7495
    },
    {
      "epoch": 0.9190779794016675,
      "grad_norm": 1.8690166338313587,
      "learning_rate": 3.037084723090214e-06,
      "loss": 0.5219,
      "step": 7496
    },
    {
      "epoch": 0.9192005885237862,
      "grad_norm": 2.2414668217615974,
      "learning_rate": 3.0365897071581583e-06,
      "loss": 0.6054,
      "step": 7497
    },
    {
      "epoch": 0.9193231976459049,
      "grad_norm": 1.7933243823788196,
      "learning_rate": 3.0360946691713163e-06,
      "loss": 0.4923,
      "step": 7498
    },
    {
      "epoch": 0.9194458067680236,
      "grad_norm": 1.9480926653142463,
      "learning_rate": 3.035599609150034e-06,
      "loss": 0.5313,
      "step": 7499
    },
    {
      "epoch": 0.9195684158901423,
      "grad_norm": 1.8489429341696935,
      "learning_rate": 3.035104527114659e-06,
      "loss": 0.4773,
      "step": 7500
    },
    {
      "epoch": 0.919691025012261,
      "grad_norm": 1.7967956201030804,
      "learning_rate": 3.0346094230855416e-06,
      "loss": 0.4891,
      "step": 7501
    },
    {
      "epoch": 0.9198136341343796,
      "grad_norm": 2.0405588390525478,
      "learning_rate": 3.03411429708303e-06,
      "loss": 0.5132,
      "step": 7502
    },
    {
      "epoch": 0.9199362432564983,
      "grad_norm": 2.11200163208362,
      "learning_rate": 3.033619149127475e-06,
      "loss": 0.5693,
      "step": 7503
    },
    {
      "epoch": 0.920058852378617,
      "grad_norm": 1.8464251162993233,
      "learning_rate": 3.0331239792392284e-06,
      "loss": 0.5165,
      "step": 7504
    },
    {
      "epoch": 0.9201814615007357,
      "grad_norm": 2.006802348121495,
      "learning_rate": 3.0326287874386423e-06,
      "loss": 0.5331,
      "step": 7505
    },
    {
      "epoch": 0.9203040706228544,
      "grad_norm": 1.7220326180281165,
      "learning_rate": 3.0321335737460707e-06,
      "loss": 0.4874,
      "step": 7506
    },
    {
      "epoch": 0.920426679744973,
      "grad_norm": 1.9763214376327547,
      "learning_rate": 3.0316383381818663e-06,
      "loss": 0.5375,
      "step": 7507
    },
    {
      "epoch": 0.9205492888670918,
      "grad_norm": 1.9401473305408956,
      "learning_rate": 3.0311430807663853e-06,
      "loss": 0.5304,
      "step": 7508
    },
    {
      "epoch": 0.9206718979892105,
      "grad_norm": 2.0237771878715027,
      "learning_rate": 3.030647801519984e-06,
      "loss": 0.5311,
      "step": 7509
    },
    {
      "epoch": 0.920794507111329,
      "grad_norm": 1.8931222176065758,
      "learning_rate": 3.0301525004630173e-06,
      "loss": 0.486,
      "step": 7510
    },
    {
      "epoch": 0.9209171162334477,
      "grad_norm": 2.126155419501615,
      "learning_rate": 3.029657177615846e-06,
      "loss": 0.5815,
      "step": 7511
    },
    {
      "epoch": 0.9210397253555664,
      "grad_norm": 1.7836133735088633,
      "learning_rate": 3.029161832998826e-06,
      "loss": 0.5426,
      "step": 7512
    },
    {
      "epoch": 0.9211623344776851,
      "grad_norm": 2.0028554183357365,
      "learning_rate": 3.0286664666323173e-06,
      "loss": 0.4627,
      "step": 7513
    },
    {
      "epoch": 0.9212849435998038,
      "grad_norm": 1.9373133216840608,
      "learning_rate": 3.0281710785366818e-06,
      "loss": 0.5556,
      "step": 7514
    },
    {
      "epoch": 0.9214075527219225,
      "grad_norm": 2.105240661528163,
      "learning_rate": 3.027675668732279e-06,
      "loss": 0.6007,
      "step": 7515
    },
    {
      "epoch": 0.9215301618440412,
      "grad_norm": 2.075035902900007,
      "learning_rate": 3.0271802372394734e-06,
      "loss": 0.4717,
      "step": 7516
    },
    {
      "epoch": 0.9216527709661598,
      "grad_norm": 1.8708288840276373,
      "learning_rate": 3.0266847840786254e-06,
      "loss": 0.5144,
      "step": 7517
    },
    {
      "epoch": 0.9217753800882785,
      "grad_norm": 1.9603576009429677,
      "learning_rate": 3.026189309270101e-06,
      "loss": 0.5382,
      "step": 7518
    },
    {
      "epoch": 0.9218979892103972,
      "grad_norm": 1.948112921155769,
      "learning_rate": 3.025693812834265e-06,
      "loss": 0.5391,
      "step": 7519
    },
    {
      "epoch": 0.9220205983325159,
      "grad_norm": 1.9265179889024955,
      "learning_rate": 3.0251982947914814e-06,
      "loss": 0.5251,
      "step": 7520
    },
    {
      "epoch": 0.9221432074546346,
      "grad_norm": 1.8005923240555566,
      "learning_rate": 3.0247027551621187e-06,
      "loss": 0.5283,
      "step": 7521
    },
    {
      "epoch": 0.9222658165767533,
      "grad_norm": 1.895339242708726,
      "learning_rate": 3.0242071939665446e-06,
      "loss": 0.5382,
      "step": 7522
    },
    {
      "epoch": 0.922388425698872,
      "grad_norm": 1.8246535583211037,
      "learning_rate": 3.023711611225126e-06,
      "loss": 0.5038,
      "step": 7523
    },
    {
      "epoch": 0.9225110348209907,
      "grad_norm": 1.8872599676538313,
      "learning_rate": 3.0232160069582335e-06,
      "loss": 0.5375,
      "step": 7524
    },
    {
      "epoch": 0.9226336439431093,
      "grad_norm": 1.8798744051918306,
      "learning_rate": 3.0227203811862354e-06,
      "loss": 0.5586,
      "step": 7525
    },
    {
      "epoch": 0.922756253065228,
      "grad_norm": 1.8685537750735188,
      "learning_rate": 3.0222247339295063e-06,
      "loss": 0.496,
      "step": 7526
    },
    {
      "epoch": 0.9228788621873467,
      "grad_norm": 1.750881347187248,
      "learning_rate": 3.0217290652084147e-06,
      "loss": 0.5147,
      "step": 7527
    },
    {
      "epoch": 0.9230014713094654,
      "grad_norm": 1.7739009068385883,
      "learning_rate": 3.0212333750433355e-06,
      "loss": 0.5011,
      "step": 7528
    },
    {
      "epoch": 0.9231240804315841,
      "grad_norm": 1.8575336446787654,
      "learning_rate": 3.0207376634546416e-06,
      "loss": 0.5376,
      "step": 7529
    },
    {
      "epoch": 0.9232466895537028,
      "grad_norm": 1.960869158081882,
      "learning_rate": 3.020241930462707e-06,
      "loss": 0.4929,
      "step": 7530
    },
    {
      "epoch": 0.9233692986758215,
      "grad_norm": 2.0123983854850453,
      "learning_rate": 3.019746176087909e-06,
      "loss": 0.522,
      "step": 7531
    },
    {
      "epoch": 0.9234919077979402,
      "grad_norm": 1.922420599974508,
      "learning_rate": 3.0192504003506235e-06,
      "loss": 0.5239,
      "step": 7532
    },
    {
      "epoch": 0.9236145169200588,
      "grad_norm": 2.048983854458914,
      "learning_rate": 3.0187546032712263e-06,
      "loss": 0.4791,
      "step": 7533
    },
    {
      "epoch": 0.9237371260421775,
      "grad_norm": 1.9043893014547961,
      "learning_rate": 3.018258784870097e-06,
      "loss": 0.5376,
      "step": 7534
    },
    {
      "epoch": 0.9238597351642962,
      "grad_norm": 1.987446142154325,
      "learning_rate": 3.0177629451676137e-06,
      "loss": 0.54,
      "step": 7535
    },
    {
      "epoch": 0.9239823442864149,
      "grad_norm": 2.062377676763144,
      "learning_rate": 3.017267084184157e-06,
      "loss": 0.5681,
      "step": 7536
    },
    {
      "epoch": 0.9241049534085336,
      "grad_norm": 1.9412332739225864,
      "learning_rate": 3.016771201940108e-06,
      "loss": 0.5135,
      "step": 7537
    },
    {
      "epoch": 0.9242275625306523,
      "grad_norm": 1.9558722229687502,
      "learning_rate": 3.0162752984558462e-06,
      "loss": 0.5108,
      "step": 7538
    },
    {
      "epoch": 0.924350171652771,
      "grad_norm": 1.8014743911869295,
      "learning_rate": 3.015779373751758e-06,
      "loss": 0.4864,
      "step": 7539
    },
    {
      "epoch": 0.9244727807748897,
      "grad_norm": 1.9367275238535648,
      "learning_rate": 3.0152834278482225e-06,
      "loss": 0.5051,
      "step": 7540
    },
    {
      "epoch": 0.9245953898970083,
      "grad_norm": 1.917204109687246,
      "learning_rate": 3.0147874607656265e-06,
      "loss": 0.511,
      "step": 7541
    },
    {
      "epoch": 0.924717999019127,
      "grad_norm": 1.8796738729025548,
      "learning_rate": 3.0142914725243545e-06,
      "loss": 0.5339,
      "step": 7542
    },
    {
      "epoch": 0.9248406081412457,
      "grad_norm": 1.9646363928620412,
      "learning_rate": 3.013795463144792e-06,
      "loss": 0.4928,
      "step": 7543
    },
    {
      "epoch": 0.9249632172633644,
      "grad_norm": 1.84804730058782,
      "learning_rate": 3.013299432647327e-06,
      "loss": 0.4993,
      "step": 7544
    },
    {
      "epoch": 0.9250858263854831,
      "grad_norm": 1.8777096425442228,
      "learning_rate": 3.012803381052346e-06,
      "loss": 0.5178,
      "step": 7545
    },
    {
      "epoch": 0.9252084355076018,
      "grad_norm": 2.1156170902517997,
      "learning_rate": 3.0123073083802384e-06,
      "loss": 0.5026,
      "step": 7546
    },
    {
      "epoch": 0.9253310446297205,
      "grad_norm": 1.91315444669288,
      "learning_rate": 3.0118112146513945e-06,
      "loss": 0.5449,
      "step": 7547
    },
    {
      "epoch": 0.9254536537518392,
      "grad_norm": 1.8051559677795148,
      "learning_rate": 3.0113150998862016e-06,
      "loss": 0.5379,
      "step": 7548
    },
    {
      "epoch": 0.9255762628739578,
      "grad_norm": 2.032430030042333,
      "learning_rate": 3.0108189641050543e-06,
      "loss": 0.575,
      "step": 7549
    },
    {
      "epoch": 0.9256988719960765,
      "grad_norm": 1.8087124703155875,
      "learning_rate": 3.010322807328343e-06,
      "loss": 0.5588,
      "step": 7550
    },
    {
      "epoch": 0.9258214811181952,
      "grad_norm": 1.9069975702231985,
      "learning_rate": 3.0098266295764613e-06,
      "loss": 0.5332,
      "step": 7551
    },
    {
      "epoch": 0.9259440902403139,
      "grad_norm": 1.8055510650745192,
      "learning_rate": 3.009330430869802e-06,
      "loss": 0.5223,
      "step": 7552
    },
    {
      "epoch": 0.9260666993624326,
      "grad_norm": 1.9375797262940218,
      "learning_rate": 3.00883421122876e-06,
      "loss": 0.5735,
      "step": 7553
    },
    {
      "epoch": 0.9261893084845513,
      "grad_norm": 1.9543155480852556,
      "learning_rate": 3.0083379706737316e-06,
      "loss": 0.5145,
      "step": 7554
    },
    {
      "epoch": 0.92631191760667,
      "grad_norm": 1.9031955242657557,
      "learning_rate": 3.0078417092251124e-06,
      "loss": 0.5203,
      "step": 7555
    },
    {
      "epoch": 0.9264345267287887,
      "grad_norm": 1.9226247249152348,
      "learning_rate": 3.0073454269033e-06,
      "loss": 0.5198,
      "step": 7556
    },
    {
      "epoch": 0.9265571358509073,
      "grad_norm": 1.9039764319131414,
      "learning_rate": 3.0068491237286924e-06,
      "loss": 0.5164,
      "step": 7557
    },
    {
      "epoch": 0.926679744973026,
      "grad_norm": 1.9932239938958893,
      "learning_rate": 3.006352799721688e-06,
      "loss": 0.5521,
      "step": 7558
    },
    {
      "epoch": 0.9268023540951447,
      "grad_norm": 2.169078660626459,
      "learning_rate": 3.005856454902688e-06,
      "loss": 0.5791,
      "step": 7559
    },
    {
      "epoch": 0.9269249632172634,
      "grad_norm": 1.9759511734808968,
      "learning_rate": 3.005360089292092e-06,
      "loss": 0.4839,
      "step": 7560
    },
    {
      "epoch": 0.9270475723393821,
      "grad_norm": 1.9169145336414946,
      "learning_rate": 3.0048637029103004e-06,
      "loss": 0.56,
      "step": 7561
    },
    {
      "epoch": 0.9271701814615008,
      "grad_norm": 1.8745709709820735,
      "learning_rate": 3.0043672957777186e-06,
      "loss": 0.505,
      "step": 7562
    },
    {
      "epoch": 0.9272927905836195,
      "grad_norm": 2.1602500973598926,
      "learning_rate": 3.0038708679147465e-06,
      "loss": 0.5807,
      "step": 7563
    },
    {
      "epoch": 0.927415399705738,
      "grad_norm": 1.8599881012430686,
      "learning_rate": 3.003374419341791e-06,
      "loss": 0.5318,
      "step": 7564
    },
    {
      "epoch": 0.9275380088278568,
      "grad_norm": 1.8962786711174155,
      "learning_rate": 3.0028779500792547e-06,
      "loss": 0.5149,
      "step": 7565
    },
    {
      "epoch": 0.9276606179499755,
      "grad_norm": 2.047250339055101,
      "learning_rate": 3.002381460147545e-06,
      "loss": 0.499,
      "step": 7566
    },
    {
      "epoch": 0.9277832270720942,
      "grad_norm": 2.001529130063175,
      "learning_rate": 3.0018849495670677e-06,
      "loss": 0.5453,
      "step": 7567
    },
    {
      "epoch": 0.9279058361942129,
      "grad_norm": 2.041021978263893,
      "learning_rate": 3.0013884183582304e-06,
      "loss": 0.5183,
      "step": 7568
    },
    {
      "epoch": 0.9280284453163316,
      "grad_norm": 2.0407430793849044,
      "learning_rate": 3.0008918665414415e-06,
      "loss": 0.525,
      "step": 7569
    },
    {
      "epoch": 0.9281510544384503,
      "grad_norm": 1.9686866881009037,
      "learning_rate": 3.0003952941371107e-06,
      "loss": 0.5329,
      "step": 7570
    },
    {
      "epoch": 0.928273663560569,
      "grad_norm": 2.1921153934033386,
      "learning_rate": 2.999898701165646e-06,
      "loss": 0.6017,
      "step": 7571
    },
    {
      "epoch": 0.9283962726826875,
      "grad_norm": 1.9331025397293764,
      "learning_rate": 2.999402087647461e-06,
      "loss": 0.4907,
      "step": 7572
    },
    {
      "epoch": 0.9285188818048062,
      "grad_norm": 1.9753655169606068,
      "learning_rate": 2.9989054536029655e-06,
      "loss": 0.5657,
      "step": 7573
    },
    {
      "epoch": 0.9286414909269249,
      "grad_norm": 1.8663230420431807,
      "learning_rate": 2.998408799052573e-06,
      "loss": 0.5331,
      "step": 7574
    },
    {
      "epoch": 0.9287641000490436,
      "grad_norm": 1.9097704206786423,
      "learning_rate": 2.9979121240166964e-06,
      "loss": 0.5423,
      "step": 7575
    },
    {
      "epoch": 0.9288867091711623,
      "grad_norm": 1.8630026262471013,
      "learning_rate": 2.99741542851575e-06,
      "loss": 0.5024,
      "step": 7576
    },
    {
      "epoch": 0.929009318293281,
      "grad_norm": 1.892559665955021,
      "learning_rate": 2.996918712570148e-06,
      "loss": 0.5459,
      "step": 7577
    },
    {
      "epoch": 0.9291319274153997,
      "grad_norm": 1.855200638643854,
      "learning_rate": 2.9964219762003084e-06,
      "loss": 0.5247,
      "step": 7578
    },
    {
      "epoch": 0.9292545365375184,
      "grad_norm": 2.0500683473488808,
      "learning_rate": 2.9959252194266463e-06,
      "loss": 0.4915,
      "step": 7579
    },
    {
      "epoch": 0.929377145659637,
      "grad_norm": 1.7907689671855536,
      "learning_rate": 2.99542844226958e-06,
      "loss": 0.5239,
      "step": 7580
    },
    {
      "epoch": 0.9294997547817557,
      "grad_norm": 1.8518135848890698,
      "learning_rate": 2.994931644749527e-06,
      "loss": 0.5409,
      "step": 7581
    },
    {
      "epoch": 0.9296223639038744,
      "grad_norm": 1.9794130101339928,
      "learning_rate": 2.994434826886908e-06,
      "loss": 0.571,
      "step": 7582
    },
    {
      "epoch": 0.9297449730259931,
      "grad_norm": 1.8898574370927297,
      "learning_rate": 2.9939379887021407e-06,
      "loss": 0.5121,
      "step": 7583
    },
    {
      "epoch": 0.9298675821481118,
      "grad_norm": 1.870551870917239,
      "learning_rate": 2.993441130215649e-06,
      "loss": 0.5323,
      "step": 7584
    },
    {
      "epoch": 0.9299901912702305,
      "grad_norm": 1.8961138362645715,
      "learning_rate": 2.9929442514478533e-06,
      "loss": 0.5192,
      "step": 7585
    },
    {
      "epoch": 0.9301128003923492,
      "grad_norm": 2.1687522567632898,
      "learning_rate": 2.9924473524191756e-06,
      "loss": 0.5325,
      "step": 7586
    },
    {
      "epoch": 0.9302354095144679,
      "grad_norm": 1.9808524534363376,
      "learning_rate": 2.9919504331500404e-06,
      "loss": 0.5166,
      "step": 7587
    },
    {
      "epoch": 0.9303580186365865,
      "grad_norm": 1.8262820311290242,
      "learning_rate": 2.9914534936608714e-06,
      "loss": 0.551,
      "step": 7588
    },
    {
      "epoch": 0.9304806277587052,
      "grad_norm": 1.9280970809410218,
      "learning_rate": 2.9909565339720936e-06,
      "loss": 0.4825,
      "step": 7589
    },
    {
      "epoch": 0.9306032368808239,
      "grad_norm": 1.9571915726559839,
      "learning_rate": 2.9904595541041326e-06,
      "loss": 0.5896,
      "step": 7590
    },
    {
      "epoch": 0.9307258460029426,
      "grad_norm": 1.9564716194620737,
      "learning_rate": 2.9899625540774163e-06,
      "loss": 0.5086,
      "step": 7591
    },
    {
      "epoch": 0.9308484551250613,
      "grad_norm": 1.7511720622913929,
      "learning_rate": 2.989465533912371e-06,
      "loss": 0.5672,
      "step": 7592
    },
    {
      "epoch": 0.93097106424718,
      "grad_norm": 1.8743027437246762,
      "learning_rate": 2.988968493629427e-06,
      "loss": 0.5309,
      "step": 7593
    },
    {
      "epoch": 0.9310936733692987,
      "grad_norm": 1.8437449302863562,
      "learning_rate": 2.9884714332490106e-06,
      "loss": 0.5162,
      "step": 7594
    },
    {
      "epoch": 0.9312162824914174,
      "grad_norm": 1.919396131250842,
      "learning_rate": 2.987974352791555e-06,
      "loss": 0.5017,
      "step": 7595
    },
    {
      "epoch": 0.931338891613536,
      "grad_norm": 1.74098126528407,
      "learning_rate": 2.9874772522774887e-06,
      "loss": 0.4914,
      "step": 7596
    },
    {
      "epoch": 0.9314615007356547,
      "grad_norm": 1.9281720812117913,
      "learning_rate": 2.986980131727245e-06,
      "loss": 0.5583,
      "step": 7597
    },
    {
      "epoch": 0.9315841098577734,
      "grad_norm": 1.8678441793696123,
      "learning_rate": 2.986482991161255e-06,
      "loss": 0.5758,
      "step": 7598
    },
    {
      "epoch": 0.9317067189798921,
      "grad_norm": 2.080949998217047,
      "learning_rate": 2.9859858305999534e-06,
      "loss": 0.5607,
      "step": 7599
    },
    {
      "epoch": 0.9318293281020108,
      "grad_norm": 1.6340325440497145,
      "learning_rate": 2.9854886500637733e-06,
      "loss": 0.4814,
      "step": 7600
    },
    {
      "epoch": 0.9319519372241295,
      "grad_norm": 2.077599854490973,
      "learning_rate": 2.9849914495731504e-06,
      "loss": 0.55,
      "step": 7601
    },
    {
      "epoch": 0.9320745463462482,
      "grad_norm": 1.8121809082516567,
      "learning_rate": 2.98449422914852e-06,
      "loss": 0.4721,
      "step": 7602
    },
    {
      "epoch": 0.9321971554683669,
      "grad_norm": 1.9378133175014114,
      "learning_rate": 2.98399698881032e-06,
      "loss": 0.5138,
      "step": 7603
    },
    {
      "epoch": 0.9323197645904855,
      "grad_norm": 1.937751506231664,
      "learning_rate": 2.9834997285789857e-06,
      "loss": 0.4934,
      "step": 7604
    },
    {
      "epoch": 0.9324423737126042,
      "grad_norm": 1.8635093268075655,
      "learning_rate": 2.983002448474958e-06,
      "loss": 0.5607,
      "step": 7605
    },
    {
      "epoch": 0.9325649828347229,
      "grad_norm": 1.8089813363638936,
      "learning_rate": 2.982505148518673e-06,
      "loss": 0.5632,
      "step": 7606
    },
    {
      "epoch": 0.9326875919568416,
      "grad_norm": 1.6917893026952204,
      "learning_rate": 2.9820078287305736e-06,
      "loss": 0.4871,
      "step": 7607
    },
    {
      "epoch": 0.9328102010789603,
      "grad_norm": 1.726798303701516,
      "learning_rate": 2.9815104891310987e-06,
      "loss": 0.4547,
      "step": 7608
    },
    {
      "epoch": 0.932932810201079,
      "grad_norm": 1.8716047090761014,
      "learning_rate": 2.9810131297406892e-06,
      "loss": 0.5352,
      "step": 7609
    },
    {
      "epoch": 0.9330554193231977,
      "grad_norm": 2.0159718877077766,
      "learning_rate": 2.98051575057979e-06,
      "loss": 0.543,
      "step": 7610
    },
    {
      "epoch": 0.9331780284453163,
      "grad_norm": 1.8305897731408463,
      "learning_rate": 2.9800183516688414e-06,
      "loss": 0.5337,
      "step": 7611
    },
    {
      "epoch": 0.933300637567435,
      "grad_norm": 1.9916177119396457,
      "learning_rate": 2.9795209330282897e-06,
      "loss": 0.5152,
      "step": 7612
    },
    {
      "epoch": 0.9334232466895537,
      "grad_norm": 1.937753274292212,
      "learning_rate": 2.9790234946785785e-06,
      "loss": 0.549,
      "step": 7613
    },
    {
      "epoch": 0.9335458558116724,
      "grad_norm": 2.057944649411242,
      "learning_rate": 2.9785260366401536e-06,
      "loss": 0.5165,
      "step": 7614
    },
    {
      "epoch": 0.9336684649337911,
      "grad_norm": 1.856202011608517,
      "learning_rate": 2.9780285589334614e-06,
      "loss": 0.5453,
      "step": 7615
    },
    {
      "epoch": 0.9337910740559098,
      "grad_norm": 2.0482566522406978,
      "learning_rate": 2.977531061578949e-06,
      "loss": 0.5421,
      "step": 7616
    },
    {
      "epoch": 0.9339136831780285,
      "grad_norm": 1.8898315333913296,
      "learning_rate": 2.9770335445970644e-06,
      "loss": 0.5048,
      "step": 7617
    },
    {
      "epoch": 0.9340362923001472,
      "grad_norm": 1.9977620390793462,
      "learning_rate": 2.9765360080082577e-06,
      "loss": 0.6005,
      "step": 7618
    },
    {
      "epoch": 0.9341589014222658,
      "grad_norm": 2.208210775027313,
      "learning_rate": 2.976038451832976e-06,
      "loss": 0.5841,
      "step": 7619
    },
    {
      "epoch": 0.9342815105443845,
      "grad_norm": 1.8606752151833186,
      "learning_rate": 2.975540876091672e-06,
      "loss": 0.5141,
      "step": 7620
    },
    {
      "epoch": 0.9344041196665032,
      "grad_norm": 2.100179006412932,
      "learning_rate": 2.975043280804796e-06,
      "loss": 0.5779,
      "step": 7621
    },
    {
      "epoch": 0.9345267287886219,
      "grad_norm": 2.1090304587805773,
      "learning_rate": 2.9745456659928e-06,
      "loss": 0.5375,
      "step": 7622
    },
    {
      "epoch": 0.9346493379107406,
      "grad_norm": 1.807279577413621,
      "learning_rate": 2.9740480316761374e-06,
      "loss": 0.4872,
      "step": 7623
    },
    {
      "epoch": 0.9347719470328593,
      "grad_norm": 1.929018338778756,
      "learning_rate": 2.9735503778752613e-06,
      "loss": 0.5231,
      "step": 7624
    },
    {
      "epoch": 0.934894556154978,
      "grad_norm": 2.0321481622911723,
      "learning_rate": 2.973052704610626e-06,
      "loss": 0.537,
      "step": 7625
    },
    {
      "epoch": 0.9350171652770967,
      "grad_norm": 1.8575106277744584,
      "learning_rate": 2.972555011902687e-06,
      "loss": 0.5319,
      "step": 7626
    },
    {
      "epoch": 0.9351397743992153,
      "grad_norm": 2.050954281118429,
      "learning_rate": 2.9720572997719004e-06,
      "loss": 0.4969,
      "step": 7627
    },
    {
      "epoch": 0.935262383521334,
      "grad_norm": 2.0083355597023784,
      "learning_rate": 2.9715595682387243e-06,
      "loss": 0.5044,
      "step": 7628
    },
    {
      "epoch": 0.9353849926434527,
      "grad_norm": 2.065621820287013,
      "learning_rate": 2.9710618173236133e-06,
      "loss": 0.5095,
      "step": 7629
    },
    {
      "epoch": 0.9355076017655714,
      "grad_norm": 1.9649390194793346,
      "learning_rate": 2.970564047047029e-06,
      "loss": 0.5272,
      "step": 7630
    },
    {
      "epoch": 0.93563021088769,
      "grad_norm": 1.8794233660124908,
      "learning_rate": 2.970066257429428e-06,
      "loss": 0.4801,
      "step": 7631
    },
    {
      "epoch": 0.9357528200098088,
      "grad_norm": 1.9921594010273727,
      "learning_rate": 2.969568448491272e-06,
      "loss": 0.5089,
      "step": 7632
    },
    {
      "epoch": 0.9358754291319274,
      "grad_norm": 1.9069355427245183,
      "learning_rate": 2.969070620253023e-06,
      "loss": 0.5587,
      "step": 7633
    },
    {
      "epoch": 0.9359980382540461,
      "grad_norm": 1.9706550784741348,
      "learning_rate": 2.9685727727351392e-06,
      "loss": 0.5007,
      "step": 7634
    },
    {
      "epoch": 0.9361206473761647,
      "grad_norm": 2.1439903289462015,
      "learning_rate": 2.9680749059580865e-06,
      "loss": 0.5657,
      "step": 7635
    },
    {
      "epoch": 0.9362432564982834,
      "grad_norm": 1.7548341651675836,
      "learning_rate": 2.9675770199423254e-06,
      "loss": 0.5168,
      "step": 7636
    },
    {
      "epoch": 0.9363658656204021,
      "grad_norm": 1.812467926995903,
      "learning_rate": 2.967079114708321e-06,
      "loss": 0.5325,
      "step": 7637
    },
    {
      "epoch": 0.9364884747425208,
      "grad_norm": 2.19301558754932,
      "learning_rate": 2.9665811902765383e-06,
      "loss": 0.5973,
      "step": 7638
    },
    {
      "epoch": 0.9366110838646395,
      "grad_norm": 1.935666458072104,
      "learning_rate": 2.966083246667443e-06,
      "loss": 0.5386,
      "step": 7639
    },
    {
      "epoch": 0.9367336929867582,
      "grad_norm": 2.0097746396739837,
      "learning_rate": 2.965585283901501e-06,
      "loss": 0.5648,
      "step": 7640
    },
    {
      "epoch": 0.9368563021088769,
      "grad_norm": 2.112852601029596,
      "learning_rate": 2.9650873019991793e-06,
      "loss": 0.5408,
      "step": 7641
    },
    {
      "epoch": 0.9369789112309956,
      "grad_norm": 1.9907550947574981,
      "learning_rate": 2.9645893009809464e-06,
      "loss": 0.5688,
      "step": 7642
    },
    {
      "epoch": 0.9371015203531142,
      "grad_norm": 1.7528011605085492,
      "learning_rate": 2.9640912808672705e-06,
      "loss": 0.4895,
      "step": 7643
    },
    {
      "epoch": 0.9372241294752329,
      "grad_norm": 1.947860206663515,
      "learning_rate": 2.963593241678621e-06,
      "loss": 0.5801,
      "step": 7644
    },
    {
      "epoch": 0.9373467385973516,
      "grad_norm": 1.9908759621592647,
      "learning_rate": 2.96309518343547e-06,
      "loss": 0.5313,
      "step": 7645
    },
    {
      "epoch": 0.9374693477194703,
      "grad_norm": 2.057696070399876,
      "learning_rate": 2.962597106158286e-06,
      "loss": 0.5237,
      "step": 7646
    },
    {
      "epoch": 0.937591956841589,
      "grad_norm": 2.097085337231088,
      "learning_rate": 2.962099009867542e-06,
      "loss": 0.5312,
      "step": 7647
    },
    {
      "epoch": 0.9377145659637077,
      "grad_norm": 2.1654188564265437,
      "learning_rate": 2.9616008945837105e-06,
      "loss": 0.5596,
      "step": 7648
    },
    {
      "epoch": 0.9378371750858264,
      "grad_norm": 1.9587308762930042,
      "learning_rate": 2.9611027603272653e-06,
      "loss": 0.5319,
      "step": 7649
    },
    {
      "epoch": 0.9379597842079451,
      "grad_norm": 1.961200944805901,
      "learning_rate": 2.96060460711868e-06,
      "loss": 0.5212,
      "step": 7650
    },
    {
      "epoch": 0.9380823933300637,
      "grad_norm": 1.7557901801505384,
      "learning_rate": 2.9601064349784303e-06,
      "loss": 0.4955,
      "step": 7651
    },
    {
      "epoch": 0.9382050024521824,
      "grad_norm": 1.852519069482199,
      "learning_rate": 2.959608243926991e-06,
      "loss": 0.5038,
      "step": 7652
    },
    {
      "epoch": 0.9383276115743011,
      "grad_norm": 1.8654464489501472,
      "learning_rate": 2.95911003398484e-06,
      "loss": 0.5057,
      "step": 7653
    },
    {
      "epoch": 0.9384502206964198,
      "grad_norm": 1.7188739689849897,
      "learning_rate": 2.9586118051724532e-06,
      "loss": 0.4557,
      "step": 7654
    },
    {
      "epoch": 0.9385728298185385,
      "grad_norm": 1.9431863803246954,
      "learning_rate": 2.95811355751031e-06,
      "loss": 0.55,
      "step": 7655
    },
    {
      "epoch": 0.9386954389406572,
      "grad_norm": 1.9821798019940722,
      "learning_rate": 2.9576152910188884e-06,
      "loss": 0.5115,
      "step": 7656
    },
    {
      "epoch": 0.9388180480627759,
      "grad_norm": 1.9815630758161629,
      "learning_rate": 2.957117005718667e-06,
      "loss": 0.5364,
      "step": 7657
    },
    {
      "epoch": 0.9389406571848946,
      "grad_norm": 1.837664676595739,
      "learning_rate": 2.9566187016301283e-06,
      "loss": 0.5246,
      "step": 7658
    },
    {
      "epoch": 0.9390632663070132,
      "grad_norm": 1.8052991766563202,
      "learning_rate": 2.9561203787737523e-06,
      "loss": 0.4692,
      "step": 7659
    },
    {
      "epoch": 0.9391858754291319,
      "grad_norm": 1.983327983834976,
      "learning_rate": 2.9556220371700223e-06,
      "loss": 0.4903,
      "step": 7660
    },
    {
      "epoch": 0.9393084845512506,
      "grad_norm": 2.053646130984371,
      "learning_rate": 2.9551236768394188e-06,
      "loss": 0.5704,
      "step": 7661
    },
    {
      "epoch": 0.9394310936733693,
      "grad_norm": 1.75958728648451,
      "learning_rate": 2.954625297802427e-06,
      "loss": 0.5348,
      "step": 7662
    },
    {
      "epoch": 0.939553702795488,
      "grad_norm": 1.994597200339554,
      "learning_rate": 2.9541269000795304e-06,
      "loss": 0.5058,
      "step": 7663
    },
    {
      "epoch": 0.9396763119176067,
      "grad_norm": 1.937981903990401,
      "learning_rate": 2.9536284836912145e-06,
      "loss": 0.5636,
      "step": 7664
    },
    {
      "epoch": 0.9397989210397254,
      "grad_norm": 1.878976600810374,
      "learning_rate": 2.953130048657964e-06,
      "loss": 0.4943,
      "step": 7665
    },
    {
      "epoch": 0.939921530161844,
      "grad_norm": 2.1038941396683826,
      "learning_rate": 2.9526315950002675e-06,
      "loss": 0.5649,
      "step": 7666
    },
    {
      "epoch": 0.9400441392839627,
      "grad_norm": 1.8678826992721012,
      "learning_rate": 2.9521331227386095e-06,
      "loss": 0.5105,
      "step": 7667
    },
    {
      "epoch": 0.9401667484060814,
      "grad_norm": 1.9238842291634675,
      "learning_rate": 2.951634631893482e-06,
      "loss": 0.5328,
      "step": 7668
    },
    {
      "epoch": 0.9402893575282001,
      "grad_norm": 1.9745857008829713,
      "learning_rate": 2.9511361224853697e-06,
      "loss": 0.4943,
      "step": 7669
    },
    {
      "epoch": 0.9404119666503188,
      "grad_norm": 2.045586990139437,
      "learning_rate": 2.950637594534765e-06,
      "loss": 0.5544,
      "step": 7670
    },
    {
      "epoch": 0.9405345757724375,
      "grad_norm": 1.691301211422247,
      "learning_rate": 2.950139048062157e-06,
      "loss": 0.5208,
      "step": 7671
    },
    {
      "epoch": 0.9406571848945562,
      "grad_norm": 1.8442780528364648,
      "learning_rate": 2.949640483088037e-06,
      "loss": 0.5272,
      "step": 7672
    },
    {
      "epoch": 0.9407797940166749,
      "grad_norm": 1.9862301954307386,
      "learning_rate": 2.9491418996328974e-06,
      "loss": 0.4715,
      "step": 7673
    },
    {
      "epoch": 0.9409024031387935,
      "grad_norm": 1.6617100872041992,
      "learning_rate": 2.948643297717231e-06,
      "loss": 0.492,
      "step": 7674
    },
    {
      "epoch": 0.9410250122609122,
      "grad_norm": 1.9389948543950866,
      "learning_rate": 2.94814467736153e-06,
      "loss": 0.529,
      "step": 7675
    },
    {
      "epoch": 0.9411476213830309,
      "grad_norm": 1.9283960935884754,
      "learning_rate": 2.9476460385862902e-06,
      "loss": 0.5257,
      "step": 7676
    },
    {
      "epoch": 0.9412702305051496,
      "grad_norm": 1.8905599119485441,
      "learning_rate": 2.947147381412005e-06,
      "loss": 0.5625,
      "step": 7677
    },
    {
      "epoch": 0.9413928396272683,
      "grad_norm": 1.792962505890942,
      "learning_rate": 2.946648705859171e-06,
      "loss": 0.5335,
      "step": 7678
    },
    {
      "epoch": 0.941515448749387,
      "grad_norm": 1.8420524453950762,
      "learning_rate": 2.9461500119482846e-06,
      "loss": 0.4946,
      "step": 7679
    },
    {
      "epoch": 0.9416380578715057,
      "grad_norm": 1.9050072732052472,
      "learning_rate": 2.945651299699843e-06,
      "loss": 0.4669,
      "step": 7680
    },
    {
      "epoch": 0.9417606669936244,
      "grad_norm": 1.96513115010141,
      "learning_rate": 2.945152569134344e-06,
      "loss": 0.5227,
      "step": 7681
    },
    {
      "epoch": 0.941883276115743,
      "grad_norm": 1.9612502127306934,
      "learning_rate": 2.9446538202722857e-06,
      "loss": 0.4612,
      "step": 7682
    },
    {
      "epoch": 0.9420058852378617,
      "grad_norm": 1.8541114363624724,
      "learning_rate": 2.9441550531341688e-06,
      "loss": 0.5058,
      "step": 7683
    },
    {
      "epoch": 0.9421284943599804,
      "grad_norm": 2.0267126284020414,
      "learning_rate": 2.943656267740493e-06,
      "loss": 0.5036,
      "step": 7684
    },
    {
      "epoch": 0.9422511034820991,
      "grad_norm": 1.772777893324593,
      "learning_rate": 2.9431574641117587e-06,
      "loss": 0.5769,
      "step": 7685
    },
    {
      "epoch": 0.9423737126042178,
      "grad_norm": 1.7408995986326232,
      "learning_rate": 2.9426586422684683e-06,
      "loss": 0.464,
      "step": 7686
    },
    {
      "epoch": 0.9424963217263365,
      "grad_norm": 2.0977173506390843,
      "learning_rate": 2.9421598022311236e-06,
      "loss": 0.5383,
      "step": 7687
    },
    {
      "epoch": 0.9426189308484552,
      "grad_norm": 1.7868966232623424,
      "learning_rate": 2.941660944020229e-06,
      "loss": 0.4811,
      "step": 7688
    },
    {
      "epoch": 0.9427415399705739,
      "grad_norm": 1.9913546575848,
      "learning_rate": 2.9411620676562875e-06,
      "loss": 0.5429,
      "step": 7689
    },
    {
      "epoch": 0.9428641490926924,
      "grad_norm": 1.9771267306324005,
      "learning_rate": 2.940663173159804e-06,
      "loss": 0.5662,
      "step": 7690
    },
    {
      "epoch": 0.9429867582148111,
      "grad_norm": 2.1227315062007763,
      "learning_rate": 2.9401642605512844e-06,
      "loss": 0.483,
      "step": 7691
    },
    {
      "epoch": 0.9431093673369298,
      "grad_norm": 1.8152345312521723,
      "learning_rate": 2.939665329851233e-06,
      "loss": 0.5398,
      "step": 7692
    },
    {
      "epoch": 0.9432319764590485,
      "grad_norm": 1.9670796397915025,
      "learning_rate": 2.9391663810801595e-06,
      "loss": 0.5125,
      "step": 7693
    },
    {
      "epoch": 0.9433545855811672,
      "grad_norm": 1.9793848987154132,
      "learning_rate": 2.9386674142585696e-06,
      "loss": 0.5446,
      "step": 7694
    },
    {
      "epoch": 0.943477194703286,
      "grad_norm": 1.817605667711209,
      "learning_rate": 2.938168429406973e-06,
      "loss": 0.5102,
      "step": 7695
    },
    {
      "epoch": 0.9435998038254046,
      "grad_norm": 2.128077756362781,
      "learning_rate": 2.9376694265458777e-06,
      "loss": 0.5163,
      "step": 7696
    },
    {
      "epoch": 0.9437224129475233,
      "grad_norm": 1.8778006412019317,
      "learning_rate": 2.937170405695794e-06,
      "loss": 0.4833,
      "step": 7697
    },
    {
      "epoch": 0.9438450220696419,
      "grad_norm": 1.7276554494681993,
      "learning_rate": 2.936671366877233e-06,
      "loss": 0.531,
      "step": 7698
    },
    {
      "epoch": 0.9439676311917606,
      "grad_norm": 1.860970257202616,
      "learning_rate": 2.936172310110706e-06,
      "loss": 0.4603,
      "step": 7699
    },
    {
      "epoch": 0.9440902403138793,
      "grad_norm": 1.8712674645584328,
      "learning_rate": 2.9356732354167242e-06,
      "loss": 0.5099,
      "step": 7700
    },
    {
      "epoch": 0.944212849435998,
      "grad_norm": 2.044018071892613,
      "learning_rate": 2.935174142815802e-06,
      "loss": 0.4982,
      "step": 7701
    },
    {
      "epoch": 0.9443354585581167,
      "grad_norm": 1.8379774066175887,
      "learning_rate": 2.934675032328451e-06,
      "loss": 0.5668,
      "step": 7702
    },
    {
      "epoch": 0.9444580676802354,
      "grad_norm": 1.8061793135516209,
      "learning_rate": 2.9341759039751875e-06,
      "loss": 0.5046,
      "step": 7703
    },
    {
      "epoch": 0.9445806768023541,
      "grad_norm": 1.9721244351288316,
      "learning_rate": 2.9336767577765258e-06,
      "loss": 0.5301,
      "step": 7704
    },
    {
      "epoch": 0.9447032859244728,
      "grad_norm": 2.0091183851242493,
      "learning_rate": 2.9331775937529806e-06,
      "loss": 0.5604,
      "step": 7705
    },
    {
      "epoch": 0.9448258950465914,
      "grad_norm": 2.076776082921663,
      "learning_rate": 2.9326784119250705e-06,
      "loss": 0.584,
      "step": 7706
    },
    {
      "epoch": 0.9449485041687101,
      "grad_norm": 1.811926531822057,
      "learning_rate": 2.9321792123133108e-06,
      "loss": 0.4727,
      "step": 7707
    },
    {
      "epoch": 0.9450711132908288,
      "grad_norm": 1.6613001867933552,
      "learning_rate": 2.9316799949382214e-06,
      "loss": 0.5364,
      "step": 7708
    },
    {
      "epoch": 0.9451937224129475,
      "grad_norm": 1.8605421841597596,
      "learning_rate": 2.931180759820319e-06,
      "loss": 0.4853,
      "step": 7709
    },
    {
      "epoch": 0.9453163315350662,
      "grad_norm": 1.9456327397193647,
      "learning_rate": 2.930681506980124e-06,
      "loss": 0.5425,
      "step": 7710
    },
    {
      "epoch": 0.9454389406571849,
      "grad_norm": 2.054730593916197,
      "learning_rate": 2.9301822364381573e-06,
      "loss": 0.5519,
      "step": 7711
    },
    {
      "epoch": 0.9455615497793036,
      "grad_norm": 1.687700485390103,
      "learning_rate": 2.9296829482149385e-06,
      "loss": 0.5078,
      "step": 7712
    },
    {
      "epoch": 0.9456841589014222,
      "grad_norm": 2.0702691976445244,
      "learning_rate": 2.9291836423309904e-06,
      "loss": 0.5392,
      "step": 7713
    },
    {
      "epoch": 0.9458067680235409,
      "grad_norm": 1.7148746030932267,
      "learning_rate": 2.928684318806835e-06,
      "loss": 0.5111,
      "step": 7714
    },
    {
      "epoch": 0.9459293771456596,
      "grad_norm": 2.0230004584827577,
      "learning_rate": 2.928184977662994e-06,
      "loss": 0.5475,
      "step": 7715
    },
    {
      "epoch": 0.9460519862677783,
      "grad_norm": 2.0057592495615606,
      "learning_rate": 2.927685618919994e-06,
      "loss": 0.5158,
      "step": 7716
    },
    {
      "epoch": 0.946174595389897,
      "grad_norm": 1.8323928943616965,
      "learning_rate": 2.927186242598356e-06,
      "loss": 0.5069,
      "step": 7717
    },
    {
      "epoch": 0.9462972045120157,
      "grad_norm": 1.907474506673485,
      "learning_rate": 2.9266868487186084e-06,
      "loss": 0.5612,
      "step": 7718
    },
    {
      "epoch": 0.9464198136341344,
      "grad_norm": 1.8143617839641046,
      "learning_rate": 2.926187437301276e-06,
      "loss": 0.5311,
      "step": 7719
    },
    {
      "epoch": 0.9465424227562531,
      "grad_norm": 1.8620629027312776,
      "learning_rate": 2.925688008366885e-06,
      "loss": 0.5512,
      "step": 7720
    },
    {
      "epoch": 0.9466650318783717,
      "grad_norm": 1.878868453674574,
      "learning_rate": 2.9251885619359632e-06,
      "loss": 0.5026,
      "step": 7721
    },
    {
      "epoch": 0.9467876410004904,
      "grad_norm": 2.0635981226542963,
      "learning_rate": 2.9246890980290386e-06,
      "loss": 0.5571,
      "step": 7722
    },
    {
      "epoch": 0.9469102501226091,
      "grad_norm": 1.8191950679143503,
      "learning_rate": 2.9241896166666407e-06,
      "loss": 0.4644,
      "step": 7723
    },
    {
      "epoch": 0.9470328592447278,
      "grad_norm": 1.9944793515011492,
      "learning_rate": 2.923690117869299e-06,
      "loss": 0.4975,
      "step": 7724
    },
    {
      "epoch": 0.9471554683668465,
      "grad_norm": 1.881604967714984,
      "learning_rate": 2.923190601657542e-06,
      "loss": 0.5432,
      "step": 7725
    },
    {
      "epoch": 0.9472780774889652,
      "grad_norm": 1.8877696961880421,
      "learning_rate": 2.9226910680519034e-06,
      "loss": 0.5023,
      "step": 7726
    },
    {
      "epoch": 0.9474006866110839,
      "grad_norm": 1.7773743568735954,
      "learning_rate": 2.922191517072913e-06,
      "loss": 0.5253,
      "step": 7727
    },
    {
      "epoch": 0.9475232957332026,
      "grad_norm": 1.8095118727006165,
      "learning_rate": 2.9216919487411034e-06,
      "loss": 0.4843,
      "step": 7728
    },
    {
      "epoch": 0.9476459048553212,
      "grad_norm": 2.1342148455086565,
      "learning_rate": 2.921192363077009e-06,
      "loss": 0.5291,
      "step": 7729
    },
    {
      "epoch": 0.9477685139774399,
      "grad_norm": 1.9845029115332091,
      "learning_rate": 2.9206927601011614e-06,
      "loss": 0.5542,
      "step": 7730
    },
    {
      "epoch": 0.9478911230995586,
      "grad_norm": 1.8616124534086786,
      "learning_rate": 2.920193139834098e-06,
      "loss": 0.4904,
      "step": 7731
    },
    {
      "epoch": 0.9480137322216773,
      "grad_norm": 2.011626101342908,
      "learning_rate": 2.9196935022963524e-06,
      "loss": 0.524,
      "step": 7732
    },
    {
      "epoch": 0.948136341343796,
      "grad_norm": 1.8599316502904562,
      "learning_rate": 2.919193847508461e-06,
      "loss": 0.5329,
      "step": 7733
    },
    {
      "epoch": 0.9482589504659147,
      "grad_norm": 1.8654828937308248,
      "learning_rate": 2.9186941754909596e-06,
      "loss": 0.5553,
      "step": 7734
    },
    {
      "epoch": 0.9483815595880334,
      "grad_norm": 1.8088356237774414,
      "learning_rate": 2.918194486264387e-06,
      "loss": 0.5074,
      "step": 7735
    },
    {
      "epoch": 0.9485041687101521,
      "grad_norm": 2.0272154371920847,
      "learning_rate": 2.9176947798492804e-06,
      "loss": 0.5256,
      "step": 7736
    },
    {
      "epoch": 0.9486267778322707,
      "grad_norm": 2.02073573201066,
      "learning_rate": 2.917195056266179e-06,
      "loss": 0.4994,
      "step": 7737
    },
    {
      "epoch": 0.9487493869543894,
      "grad_norm": 2.1294012727312652,
      "learning_rate": 2.916695315535622e-06,
      "loss": 0.5483,
      "step": 7738
    },
    {
      "epoch": 0.9488719960765081,
      "grad_norm": 1.8003799221955716,
      "learning_rate": 2.9161955576781503e-06,
      "loss": 0.4928,
      "step": 7739
    },
    {
      "epoch": 0.9489946051986268,
      "grad_norm": 1.903458544439555,
      "learning_rate": 2.9156957827143033e-06,
      "loss": 0.5605,
      "step": 7740
    },
    {
      "epoch": 0.9491172143207455,
      "grad_norm": 1.8949882833643577,
      "learning_rate": 2.9151959906646254e-06,
      "loss": 0.5528,
      "step": 7741
    },
    {
      "epoch": 0.9492398234428642,
      "grad_norm": 2.097834496091531,
      "learning_rate": 2.914696181549656e-06,
      "loss": 0.5424,
      "step": 7742
    },
    {
      "epoch": 0.9493624325649829,
      "grad_norm": 2.0997673556273218,
      "learning_rate": 2.9141963553899392e-06,
      "loss": 0.5379,
      "step": 7743
    },
    {
      "epoch": 0.9494850416871016,
      "grad_norm": 1.9007174010010763,
      "learning_rate": 2.9136965122060194e-06,
      "loss": 0.5112,
      "step": 7744
    },
    {
      "epoch": 0.9496076508092202,
      "grad_norm": 1.9007954479606977,
      "learning_rate": 2.913196652018441e-06,
      "loss": 0.4843,
      "step": 7745
    },
    {
      "epoch": 0.9497302599313389,
      "grad_norm": 1.8537547715493972,
      "learning_rate": 2.912696774847748e-06,
      "loss": 0.5134,
      "step": 7746
    },
    {
      "epoch": 0.9498528690534576,
      "grad_norm": 1.85201025305842,
      "learning_rate": 2.9121968807144866e-06,
      "loss": 0.4952,
      "step": 7747
    },
    {
      "epoch": 0.9499754781755763,
      "grad_norm": 2.008789746978141,
      "learning_rate": 2.9116969696392044e-06,
      "loss": 0.5244,
      "step": 7748
    },
    {
      "epoch": 0.950098087297695,
      "grad_norm": 1.6359011041377733,
      "learning_rate": 2.9111970416424486e-06,
      "loss": 0.4783,
      "step": 7749
    },
    {
      "epoch": 0.9502206964198137,
      "grad_norm": 2.00382171187443,
      "learning_rate": 2.9106970967447647e-06,
      "loss": 0.5174,
      "step": 7750
    },
    {
      "epoch": 0.9503433055419324,
      "grad_norm": 1.6862582155180361,
      "learning_rate": 2.9101971349667035e-06,
      "loss": 0.4765,
      "step": 7751
    },
    {
      "epoch": 0.9504659146640511,
      "grad_norm": 1.8900792269459963,
      "learning_rate": 2.909697156328815e-06,
      "loss": 0.5075,
      "step": 7752
    },
    {
      "epoch": 0.9505885237861696,
      "grad_norm": 2.039879559716901,
      "learning_rate": 2.9091971608516466e-06,
      "loss": 0.5115,
      "step": 7753
    },
    {
      "epoch": 0.9507111329082883,
      "grad_norm": 2.010206490592972,
      "learning_rate": 2.908697148555752e-06,
      "loss": 0.5536,
      "step": 7754
    },
    {
      "epoch": 0.950833742030407,
      "grad_norm": 2.042187045031156,
      "learning_rate": 2.9081971194616793e-06,
      "loss": 0.5544,
      "step": 7755
    },
    {
      "epoch": 0.9509563511525257,
      "grad_norm": 1.8773604365673753,
      "learning_rate": 2.907697073589984e-06,
      "loss": 0.4993,
      "step": 7756
    },
    {
      "epoch": 0.9510789602746444,
      "grad_norm": 1.9942560923574906,
      "learning_rate": 2.907197010961216e-06,
      "loss": 0.5274,
      "step": 7757
    },
    {
      "epoch": 0.9512015693967631,
      "grad_norm": 1.8120751820281087,
      "learning_rate": 2.9066969315959305e-06,
      "loss": 0.4911,
      "step": 7758
    },
    {
      "epoch": 0.9513241785188818,
      "grad_norm": 1.7543496284583346,
      "learning_rate": 2.906196835514681e-06,
      "loss": 0.4903,
      "step": 7759
    },
    {
      "epoch": 0.9514467876410004,
      "grad_norm": 2.021904941500956,
      "learning_rate": 2.905696722738022e-06,
      "loss": 0.5507,
      "step": 7760
    },
    {
      "epoch": 0.9515693967631191,
      "grad_norm": 1.976865128781976,
      "learning_rate": 2.90519659328651e-06,
      "loss": 0.5593,
      "step": 7761
    },
    {
      "epoch": 0.9516920058852378,
      "grad_norm": 1.7436859676312424,
      "learning_rate": 2.9046964471807016e-06,
      "loss": 0.5441,
      "step": 7762
    },
    {
      "epoch": 0.9518146150073565,
      "grad_norm": 2.189609759725413,
      "learning_rate": 2.904196284441151e-06,
      "loss": 0.5928,
      "step": 7763
    },
    {
      "epoch": 0.9519372241294752,
      "grad_norm": 1.797863360035864,
      "learning_rate": 2.903696105088419e-06,
      "loss": 0.5567,
      "step": 7764
    },
    {
      "epoch": 0.9520598332515939,
      "grad_norm": 1.9493156367902353,
      "learning_rate": 2.9031959091430608e-06,
      "loss": 0.5141,
      "step": 7765
    },
    {
      "epoch": 0.9521824423737126,
      "grad_norm": 1.962066589961013,
      "learning_rate": 2.9026956966256383e-06,
      "loss": 0.4991,
      "step": 7766
    },
    {
      "epoch": 0.9523050514958313,
      "grad_norm": 1.8388292877539143,
      "learning_rate": 2.902195467556709e-06,
      "loss": 0.5157,
      "step": 7767
    },
    {
      "epoch": 0.9524276606179499,
      "grad_norm": 1.9158411062308356,
      "learning_rate": 2.9016952219568347e-06,
      "loss": 0.5107,
      "step": 7768
    },
    {
      "epoch": 0.9525502697400686,
      "grad_norm": 1.751614803425431,
      "learning_rate": 2.901194959846575e-06,
      "loss": 0.5295,
      "step": 7769
    },
    {
      "epoch": 0.9526728788621873,
      "grad_norm": 1.9078316497270673,
      "learning_rate": 2.900694681246492e-06,
      "loss": 0.5198,
      "step": 7770
    },
    {
      "epoch": 0.952795487984306,
      "grad_norm": 1.8689177773302224,
      "learning_rate": 2.900194386177149e-06,
      "loss": 0.518,
      "step": 7771
    },
    {
      "epoch": 0.9529180971064247,
      "grad_norm": 2.053743950915292,
      "learning_rate": 2.899694074659108e-06,
      "loss": 0.5543,
      "step": 7772
    },
    {
      "epoch": 0.9530407062285434,
      "grad_norm": 1.7719666857435257,
      "learning_rate": 2.899193746712932e-06,
      "loss": 0.5362,
      "step": 7773
    },
    {
      "epoch": 0.9531633153506621,
      "grad_norm": 1.9148228102957143,
      "learning_rate": 2.898693402359187e-06,
      "loss": 0.5029,
      "step": 7774
    },
    {
      "epoch": 0.9532859244727808,
      "grad_norm": 2.1054049446431917,
      "learning_rate": 2.898193041618437e-06,
      "loss": 0.5578,
      "step": 7775
    },
    {
      "epoch": 0.9534085335948994,
      "grad_norm": 1.7853068897786508,
      "learning_rate": 2.8976926645112486e-06,
      "loss": 0.4751,
      "step": 7776
    },
    {
      "epoch": 0.9535311427170181,
      "grad_norm": 2.1142255369459453,
      "learning_rate": 2.8971922710581878e-06,
      "loss": 0.5086,
      "step": 7777
    },
    {
      "epoch": 0.9536537518391368,
      "grad_norm": 1.936520483989031,
      "learning_rate": 2.8966918612798205e-06,
      "loss": 0.5212,
      "step": 7778
    },
    {
      "epoch": 0.9537763609612555,
      "grad_norm": 1.9612978171287652,
      "learning_rate": 2.896191435196717e-06,
      "loss": 0.5591,
      "step": 7779
    },
    {
      "epoch": 0.9538989700833742,
      "grad_norm": 1.7415989396546114,
      "learning_rate": 2.895690992829443e-06,
      "loss": 0.5219,
      "step": 7780
    },
    {
      "epoch": 0.9540215792054929,
      "grad_norm": 2.019414058215863,
      "learning_rate": 2.8951905341985687e-06,
      "loss": 0.5304,
      "step": 7781
    },
    {
      "epoch": 0.9541441883276116,
      "grad_norm": 1.992801025245801,
      "learning_rate": 2.894690059324664e-06,
      "loss": 0.489,
      "step": 7782
    },
    {
      "epoch": 0.9542667974497303,
      "grad_norm": 2.0041356951224842,
      "learning_rate": 2.8941895682282987e-06,
      "loss": 0.4885,
      "step": 7783
    },
    {
      "epoch": 0.9543894065718489,
      "grad_norm": 1.7768429809896888,
      "learning_rate": 2.893689060930045e-06,
      "loss": 0.4817,
      "step": 7784
    },
    {
      "epoch": 0.9545120156939676,
      "grad_norm": 2.056476006469671,
      "learning_rate": 2.8931885374504732e-06,
      "loss": 0.595,
      "step": 7785
    },
    {
      "epoch": 0.9546346248160863,
      "grad_norm": 1.9447921064437277,
      "learning_rate": 2.8926879978101568e-06,
      "loss": 0.5307,
      "step": 7786
    },
    {
      "epoch": 0.954757233938205,
      "grad_norm": 2.021972805495956,
      "learning_rate": 2.892187442029669e-06,
      "loss": 0.5579,
      "step": 7787
    },
    {
      "epoch": 0.9548798430603237,
      "grad_norm": 1.9556478368594041,
      "learning_rate": 2.891686870129582e-06,
      "loss": 0.5537,
      "step": 7788
    },
    {
      "epoch": 0.9550024521824424,
      "grad_norm": 1.806278929782041,
      "learning_rate": 2.891186282130472e-06,
      "loss": 0.5198,
      "step": 7789
    },
    {
      "epoch": 0.9551250613045611,
      "grad_norm": 1.8315447322489455,
      "learning_rate": 2.890685678052913e-06,
      "loss": 0.464,
      "step": 7790
    },
    {
      "epoch": 0.9552476704266798,
      "grad_norm": 1.9231060320654947,
      "learning_rate": 2.8901850579174813e-06,
      "loss": 0.5272,
      "step": 7791
    },
    {
      "epoch": 0.9553702795487984,
      "grad_norm": 1.760386202536772,
      "learning_rate": 2.889684421744752e-06,
      "loss": 0.4981,
      "step": 7792
    },
    {
      "epoch": 0.9554928886709171,
      "grad_norm": 1.7266997787871607,
      "learning_rate": 2.889183769555304e-06,
      "loss": 0.4641,
      "step": 7793
    },
    {
      "epoch": 0.9556154977930358,
      "grad_norm": 2.0687082035422715,
      "learning_rate": 2.8886831013697138e-06,
      "loss": 0.5482,
      "step": 7794
    },
    {
      "epoch": 0.9557381069151545,
      "grad_norm": 1.8911730650762897,
      "learning_rate": 2.88818241720856e-06,
      "loss": 0.51,
      "step": 7795
    },
    {
      "epoch": 0.9558607160372732,
      "grad_norm": 2.0178762330737334,
      "learning_rate": 2.887681717092421e-06,
      "loss": 0.5448,
      "step": 7796
    },
    {
      "epoch": 0.9559833251593919,
      "grad_norm": 1.7338245972517465,
      "learning_rate": 2.8871810010418782e-06,
      "loss": 0.5541,
      "step": 7797
    },
    {
      "epoch": 0.9561059342815106,
      "grad_norm": 1.8297420468046302,
      "learning_rate": 2.8866802690775096e-06,
      "loss": 0.5127,
      "step": 7798
    },
    {
      "epoch": 0.9562285434036293,
      "grad_norm": 1.754631070443293,
      "learning_rate": 2.8861795212198984e-06,
      "loss": 0.5198,
      "step": 7799
    },
    {
      "epoch": 0.9563511525257479,
      "grad_norm": 1.910658928633811,
      "learning_rate": 2.8856787574896254e-06,
      "loss": 0.5042,
      "step": 7800
    },
    {
      "epoch": 0.9564737616478666,
      "grad_norm": 1.9521109357900697,
      "learning_rate": 2.8851779779072714e-06,
      "loss": 0.5186,
      "step": 7801
    },
    {
      "epoch": 0.9565963707699853,
      "grad_norm": 2.065248751829885,
      "learning_rate": 2.8846771824934222e-06,
      "loss": 0.4743,
      "step": 7802
    },
    {
      "epoch": 0.956718979892104,
      "grad_norm": 2.1547055572477056,
      "learning_rate": 2.8841763712686583e-06,
      "loss": 0.5006,
      "step": 7803
    },
    {
      "epoch": 0.9568415890142227,
      "grad_norm": 2.190423171438083,
      "learning_rate": 2.8836755442535663e-06,
      "loss": 0.5435,
      "step": 7804
    },
    {
      "epoch": 0.9569641981363414,
      "grad_norm": 2.2483107049621567,
      "learning_rate": 2.88317470146873e-06,
      "loss": 0.5175,
      "step": 7805
    },
    {
      "epoch": 0.9570868072584601,
      "grad_norm": 2.020728250989084,
      "learning_rate": 2.882673842934735e-06,
      "loss": 0.5223,
      "step": 7806
    },
    {
      "epoch": 0.9572094163805788,
      "grad_norm": 1.6940060721525265,
      "learning_rate": 2.882172968672168e-06,
      "loss": 0.5262,
      "step": 7807
    },
    {
      "epoch": 0.9573320255026974,
      "grad_norm": 1.8579200276350694,
      "learning_rate": 2.881672078701615e-06,
      "loss": 0.4992,
      "step": 7808
    },
    {
      "epoch": 0.9574546346248161,
      "grad_norm": 1.9093338473774228,
      "learning_rate": 2.8811711730436637e-06,
      "loss": 0.5348,
      "step": 7809
    },
    {
      "epoch": 0.9575772437469348,
      "grad_norm": 1.8460387288520426,
      "learning_rate": 2.8806702517189035e-06,
      "loss": 0.5152,
      "step": 7810
    },
    {
      "epoch": 0.9576998528690535,
      "grad_norm": 1.7120420251619457,
      "learning_rate": 2.88016931474792e-06,
      "loss": 0.5067,
      "step": 7811
    },
    {
      "epoch": 0.9578224619911722,
      "grad_norm": 1.737290773428898,
      "learning_rate": 2.8796683621513067e-06,
      "loss": 0.4973,
      "step": 7812
    },
    {
      "epoch": 0.9579450711132909,
      "grad_norm": 1.8909434369357803,
      "learning_rate": 2.8791673939496494e-06,
      "loss": 0.6015,
      "step": 7813
    },
    {
      "epoch": 0.9580676802354096,
      "grad_norm": 1.8048718475598087,
      "learning_rate": 2.878666410163543e-06,
      "loss": 0.5185,
      "step": 7814
    },
    {
      "epoch": 0.9581902893575281,
      "grad_norm": 1.8344002912716415,
      "learning_rate": 2.878165410813576e-06,
      "loss": 0.5142,
      "step": 7815
    },
    {
      "epoch": 0.9583128984796468,
      "grad_norm": 1.9312785296486423,
      "learning_rate": 2.877664395920341e-06,
      "loss": 0.4991,
      "step": 7816
    },
    {
      "epoch": 0.9584355076017655,
      "grad_norm": 1.8269064607314023,
      "learning_rate": 2.8771633655044307e-06,
      "loss": 0.5012,
      "step": 7817
    },
    {
      "epoch": 0.9585581167238842,
      "grad_norm": 2.0368324512037623,
      "learning_rate": 2.8766623195864383e-06,
      "loss": 0.5194,
      "step": 7818
    },
    {
      "epoch": 0.958680725846003,
      "grad_norm": 1.8247874980815961,
      "learning_rate": 2.876161258186958e-06,
      "loss": 0.4896,
      "step": 7819
    },
    {
      "epoch": 0.9588033349681216,
      "grad_norm": 1.8748198735727084,
      "learning_rate": 2.8756601813265843e-06,
      "loss": 0.4972,
      "step": 7820
    },
    {
      "epoch": 0.9589259440902403,
      "grad_norm": 1.9953981497807673,
      "learning_rate": 2.8751590890259113e-06,
      "loss": 0.5448,
      "step": 7821
    },
    {
      "epoch": 0.959048553212359,
      "grad_norm": 2.0093607789043206,
      "learning_rate": 2.8746579813055363e-06,
      "loss": 0.5593,
      "step": 7822
    },
    {
      "epoch": 0.9591711623344776,
      "grad_norm": 1.804002848567391,
      "learning_rate": 2.874156858186054e-06,
      "loss": 0.5216,
      "step": 7823
    },
    {
      "epoch": 0.9592937714565963,
      "grad_norm": 1.8419401689657553,
      "learning_rate": 2.873655719688063e-06,
      "loss": 0.5099,
      "step": 7824
    },
    {
      "epoch": 0.959416380578715,
      "grad_norm": 1.9001018919402284,
      "learning_rate": 2.8731545658321613e-06,
      "loss": 0.5143,
      "step": 7825
    },
    {
      "epoch": 0.9595389897008337,
      "grad_norm": 1.8847819560069947,
      "learning_rate": 2.872653396638945e-06,
      "loss": 0.4813,
      "step": 7826
    },
    {
      "epoch": 0.9596615988229524,
      "grad_norm": 2.15362144697551,
      "learning_rate": 2.872152212129015e-06,
      "loss": 0.5574,
      "step": 7827
    },
    {
      "epoch": 0.9597842079450711,
      "grad_norm": 2.0486636597052805,
      "learning_rate": 2.871651012322971e-06,
      "loss": 0.5321,
      "step": 7828
    },
    {
      "epoch": 0.9599068170671898,
      "grad_norm": 1.8521719880968772,
      "learning_rate": 2.8711497972414108e-06,
      "loss": 0.5153,
      "step": 7829
    },
    {
      "epoch": 0.9600294261893085,
      "grad_norm": 1.7977232895443587,
      "learning_rate": 2.8706485669049377e-06,
      "loss": 0.5061,
      "step": 7830
    },
    {
      "epoch": 0.9601520353114271,
      "grad_norm": 2.0110666426120734,
      "learning_rate": 2.8701473213341523e-06,
      "loss": 0.591,
      "step": 7831
    },
    {
      "epoch": 0.9602746444335458,
      "grad_norm": 1.7905182892684235,
      "learning_rate": 2.869646060549657e-06,
      "loss": 0.5201,
      "step": 7832
    },
    {
      "epoch": 0.9603972535556645,
      "grad_norm": 1.7341468919958123,
      "learning_rate": 2.869144784572054e-06,
      "loss": 0.4652,
      "step": 7833
    },
    {
      "epoch": 0.9605198626777832,
      "grad_norm": 2.056080720005101,
      "learning_rate": 2.8686434934219466e-06,
      "loss": 0.5248,
      "step": 7834
    },
    {
      "epoch": 0.9606424717999019,
      "grad_norm": 1.9930479591697121,
      "learning_rate": 2.8681421871199394e-06,
      "loss": 0.5186,
      "step": 7835
    },
    {
      "epoch": 0.9607650809220206,
      "grad_norm": 2.00337339256341,
      "learning_rate": 2.8676408656866356e-06,
      "loss": 0.4731,
      "step": 7836
    },
    {
      "epoch": 0.9608876900441393,
      "grad_norm": 2.1685319634735407,
      "learning_rate": 2.8671395291426422e-06,
      "loss": 0.5649,
      "step": 7837
    },
    {
      "epoch": 0.961010299166258,
      "grad_norm": 1.90881107197934,
      "learning_rate": 2.8666381775085646e-06,
      "loss": 0.5248,
      "step": 7838
    },
    {
      "epoch": 0.9611329082883766,
      "grad_norm": 1.8234237224692833,
      "learning_rate": 2.866136810805008e-06,
      "loss": 0.4922,
      "step": 7839
    },
    {
      "epoch": 0.9612555174104953,
      "grad_norm": 1.9387799582437115,
      "learning_rate": 2.8656354290525806e-06,
      "loss": 0.5011,
      "step": 7840
    },
    {
      "epoch": 0.961378126532614,
      "grad_norm": 1.8422652392560592,
      "learning_rate": 2.8651340322718896e-06,
      "loss": 0.5055,
      "step": 7841
    },
    {
      "epoch": 0.9615007356547327,
      "grad_norm": 2.007110748713445,
      "learning_rate": 2.8646326204835434e-06,
      "loss": 0.5475,
      "step": 7842
    },
    {
      "epoch": 0.9616233447768514,
      "grad_norm": 1.9402334694265129,
      "learning_rate": 2.864131193708151e-06,
      "loss": 0.546,
      "step": 7843
    },
    {
      "epoch": 0.9617459538989701,
      "grad_norm": 2.0720449958467446,
      "learning_rate": 2.863629751966322e-06,
      "loss": 0.5066,
      "step": 7844
    },
    {
      "epoch": 0.9618685630210888,
      "grad_norm": 1.8811321390738054,
      "learning_rate": 2.863128295278667e-06,
      "loss": 0.5083,
      "step": 7845
    },
    {
      "epoch": 0.9619911721432075,
      "grad_norm": 1.8843520342782938,
      "learning_rate": 2.862626823665795e-06,
      "loss": 0.5148,
      "step": 7846
    },
    {
      "epoch": 0.9621137812653261,
      "grad_norm": 1.92140380005107,
      "learning_rate": 2.8621253371483193e-06,
      "loss": 0.5013,
      "step": 7847
    },
    {
      "epoch": 0.9622363903874448,
      "grad_norm": 1.8344745620662135,
      "learning_rate": 2.861623835746852e-06,
      "loss": 0.5444,
      "step": 7848
    },
    {
      "epoch": 0.9623589995095635,
      "grad_norm": 1.8660676130037783,
      "learning_rate": 2.8611223194820026e-06,
      "loss": 0.5139,
      "step": 7849
    },
    {
      "epoch": 0.9624816086316822,
      "grad_norm": 1.8267560878463043,
      "learning_rate": 2.8606207883743888e-06,
      "loss": 0.5224,
      "step": 7850
    },
    {
      "epoch": 0.9626042177538009,
      "grad_norm": 1.941083106065315,
      "learning_rate": 2.8601192424446204e-06,
      "loss": 0.487,
      "step": 7851
    },
    {
      "epoch": 0.9627268268759196,
      "grad_norm": 1.907409510998798,
      "learning_rate": 2.8596176817133146e-06,
      "loss": 0.5442,
      "step": 7852
    },
    {
      "epoch": 0.9628494359980383,
      "grad_norm": 1.8544731741281446,
      "learning_rate": 2.8591161062010853e-06,
      "loss": 0.4888,
      "step": 7853
    },
    {
      "epoch": 0.962972045120157,
      "grad_norm": 1.922563277898831,
      "learning_rate": 2.8586145159285483e-06,
      "loss": 0.585,
      "step": 7854
    },
    {
      "epoch": 0.9630946542422756,
      "grad_norm": 1.8769467620804574,
      "learning_rate": 2.8581129109163196e-06,
      "loss": 0.4979,
      "step": 7855
    },
    {
      "epoch": 0.9632172633643943,
      "grad_norm": 1.9434912181743873,
      "learning_rate": 2.857611291185016e-06,
      "loss": 0.478,
      "step": 7856
    },
    {
      "epoch": 0.963339872486513,
      "grad_norm": 1.7997189652015153,
      "learning_rate": 2.8571096567552557e-06,
      "loss": 0.5128,
      "step": 7857
    },
    {
      "epoch": 0.9634624816086317,
      "grad_norm": 2.101732951088321,
      "learning_rate": 2.8566080076476573e-06,
      "loss": 0.5192,
      "step": 7858
    },
    {
      "epoch": 0.9635850907307504,
      "grad_norm": 2.0091542489594585,
      "learning_rate": 2.856106343882837e-06,
      "loss": 0.4931,
      "step": 7859
    },
    {
      "epoch": 0.9637076998528691,
      "grad_norm": 2.033143225921478,
      "learning_rate": 2.8556046654814164e-06,
      "loss": 0.5554,
      "step": 7860
    },
    {
      "epoch": 0.9638303089749878,
      "grad_norm": 2.0513219917142824,
      "learning_rate": 2.8551029724640133e-06,
      "loss": 0.5424,
      "step": 7861
    },
    {
      "epoch": 0.9639529180971064,
      "grad_norm": 1.8698432652190968,
      "learning_rate": 2.8546012648512504e-06,
      "loss": 0.5225,
      "step": 7862
    },
    {
      "epoch": 0.9640755272192251,
      "grad_norm": 1.6334101712934317,
      "learning_rate": 2.8540995426637475e-06,
      "loss": 0.4776,
      "step": 7863
    },
    {
      "epoch": 0.9641981363413438,
      "grad_norm": 1.8288581166786724,
      "learning_rate": 2.8535978059221263e-06,
      "loss": 0.5044,
      "step": 7864
    },
    {
      "epoch": 0.9643207454634625,
      "grad_norm": 1.8643229885508183,
      "learning_rate": 2.8530960546470092e-06,
      "loss": 0.483,
      "step": 7865
    },
    {
      "epoch": 0.9644433545855812,
      "grad_norm": 1.6545502904237097,
      "learning_rate": 2.8525942888590198e-06,
      "loss": 0.4872,
      "step": 7866
    },
    {
      "epoch": 0.9645659637076999,
      "grad_norm": 2.0345971975049726,
      "learning_rate": 2.8520925085787803e-06,
      "loss": 0.4893,
      "step": 7867
    },
    {
      "epoch": 0.9646885728298186,
      "grad_norm": 2.0322503503505143,
      "learning_rate": 2.851590713826916e-06,
      "loss": 0.5112,
      "step": 7868
    },
    {
      "epoch": 0.9648111819519373,
      "grad_norm": 1.9213739865342572,
      "learning_rate": 2.85108890462405e-06,
      "loss": 0.5526,
      "step": 7869
    },
    {
      "epoch": 0.9649337910740559,
      "grad_norm": 1.898606513419943,
      "learning_rate": 2.850587080990809e-06,
      "loss": 0.5259,
      "step": 7870
    },
    {
      "epoch": 0.9650564001961746,
      "grad_norm": 1.9097267474509767,
      "learning_rate": 2.850085242947818e-06,
      "loss": 0.5576,
      "step": 7871
    },
    {
      "epoch": 0.9651790093182933,
      "grad_norm": 1.9769394281075017,
      "learning_rate": 2.849583390515704e-06,
      "loss": 0.4614,
      "step": 7872
    },
    {
      "epoch": 0.965301618440412,
      "grad_norm": 1.9358234494253048,
      "learning_rate": 2.8490815237150944e-06,
      "loss": 0.4823,
      "step": 7873
    },
    {
      "epoch": 0.9654242275625307,
      "grad_norm": 1.8996696653127136,
      "learning_rate": 2.8485796425666144e-06,
      "loss": 0.5527,
      "step": 7874
    },
    {
      "epoch": 0.9655468366846494,
      "grad_norm": 1.9512373862801038,
      "learning_rate": 2.848077747090896e-06,
      "loss": 0.5286,
      "step": 7875
    },
    {
      "epoch": 0.9656694458067681,
      "grad_norm": 1.8631064227598573,
      "learning_rate": 2.8475758373085648e-06,
      "loss": 0.5225,
      "step": 7876
    },
    {
      "epoch": 0.9657920549288868,
      "grad_norm": 1.7332737710262833,
      "learning_rate": 2.847073913240252e-06,
      "loss": 0.4837,
      "step": 7877
    },
    {
      "epoch": 0.9659146640510053,
      "grad_norm": 1.9578801108898072,
      "learning_rate": 2.8465719749065863e-06,
      "loss": 0.5211,
      "step": 7878
    },
    {
      "epoch": 0.966037273173124,
      "grad_norm": 1.9635503769270446,
      "learning_rate": 2.846070022328199e-06,
      "loss": 0.4971,
      "step": 7879
    },
    {
      "epoch": 0.9661598822952427,
      "grad_norm": 1.9492826841942568,
      "learning_rate": 2.8455680555257213e-06,
      "loss": 0.521,
      "step": 7880
    },
    {
      "epoch": 0.9662824914173614,
      "grad_norm": 2.0311519389292427,
      "learning_rate": 2.8450660745197844e-06,
      "loss": 0.5397,
      "step": 7881
    },
    {
      "epoch": 0.9664051005394801,
      "grad_norm": 1.7831486999863257,
      "learning_rate": 2.8445640793310214e-06,
      "loss": 0.482,
      "step": 7882
    },
    {
      "epoch": 0.9665277096615988,
      "grad_norm": 2.006901348309996,
      "learning_rate": 2.844062069980065e-06,
      "loss": 0.5263,
      "step": 7883
    },
    {
      "epoch": 0.9666503187837175,
      "grad_norm": 1.8621213838423936,
      "learning_rate": 2.8435600464875474e-06,
      "loss": 0.5128,
      "step": 7884
    },
    {
      "epoch": 0.9667729279058362,
      "grad_norm": 1.974611727148908,
      "learning_rate": 2.8430580088741046e-06,
      "loss": 0.5251,
      "step": 7885
    },
    {
      "epoch": 0.9668955370279548,
      "grad_norm": 1.6659496069218893,
      "learning_rate": 2.84255595716037e-06,
      "loss": 0.5007,
      "step": 7886
    },
    {
      "epoch": 0.9670181461500735,
      "grad_norm": 2.2525604741821565,
      "learning_rate": 2.8420538913669787e-06,
      "loss": 0.5454,
      "step": 7887
    },
    {
      "epoch": 0.9671407552721922,
      "grad_norm": 1.7887232335744019,
      "learning_rate": 2.8415518115145673e-06,
      "loss": 0.5058,
      "step": 7888
    },
    {
      "epoch": 0.9672633643943109,
      "grad_norm": 2.1021915704484817,
      "learning_rate": 2.841049717623772e-06,
      "loss": 0.5535,
      "step": 7889
    },
    {
      "epoch": 0.9673859735164296,
      "grad_norm": 2.0964555526551334,
      "learning_rate": 2.840547609715229e-06,
      "loss": 0.5292,
      "step": 7890
    },
    {
      "epoch": 0.9675085826385483,
      "grad_norm": 1.9041680364894484,
      "learning_rate": 2.840045487809576e-06,
      "loss": 0.5619,
      "step": 7891
    },
    {
      "epoch": 0.967631191760667,
      "grad_norm": 1.9629977578413256,
      "learning_rate": 2.8395433519274523e-06,
      "loss": 0.5523,
      "step": 7892
    },
    {
      "epoch": 0.9677538008827857,
      "grad_norm": 2.1298721648128236,
      "learning_rate": 2.8390412020894962e-06,
      "loss": 0.506,
      "step": 7893
    },
    {
      "epoch": 0.9678764100049043,
      "grad_norm": 1.9454166818205296,
      "learning_rate": 2.838539038316345e-06,
      "loss": 0.5336,
      "step": 7894
    },
    {
      "epoch": 0.967999019127023,
      "grad_norm": 2.0010900611856304,
      "learning_rate": 2.838036860628641e-06,
      "loss": 0.5636,
      "step": 7895
    },
    {
      "epoch": 0.9681216282491417,
      "grad_norm": 1.95702868402314,
      "learning_rate": 2.8375346690470233e-06,
      "loss": 0.5518,
      "step": 7896
    },
    {
      "epoch": 0.9682442373712604,
      "grad_norm": 2.050706153423048,
      "learning_rate": 2.837032463592133e-06,
      "loss": 0.4943,
      "step": 7897
    },
    {
      "epoch": 0.9683668464933791,
      "grad_norm": 2.116101199935814,
      "learning_rate": 2.8365302442846123e-06,
      "loss": 0.5056,
      "step": 7898
    },
    {
      "epoch": 0.9684894556154978,
      "grad_norm": 2.175832108105917,
      "learning_rate": 2.8360280111451015e-06,
      "loss": 0.548,
      "step": 7899
    },
    {
      "epoch": 0.9686120647376165,
      "grad_norm": 2.000617104691372,
      "learning_rate": 2.8355257641942463e-06,
      "loss": 0.5341,
      "step": 7900
    },
    {
      "epoch": 0.9687346738597352,
      "grad_norm": 2.0292445678214714,
      "learning_rate": 2.8350235034526872e-06,
      "loss": 0.5559,
      "step": 7901
    },
    {
      "epoch": 0.9688572829818538,
      "grad_norm": 2.0937595320625855,
      "learning_rate": 2.834521228941069e-06,
      "loss": 0.5526,
      "step": 7902
    },
    {
      "epoch": 0.9689798921039725,
      "grad_norm": 1.9057933343645752,
      "learning_rate": 2.834018940680036e-06,
      "loss": 0.5341,
      "step": 7903
    },
    {
      "epoch": 0.9691025012260912,
      "grad_norm": 2.043477135420991,
      "learning_rate": 2.8335166386902337e-06,
      "loss": 0.5051,
      "step": 7904
    },
    {
      "epoch": 0.9692251103482099,
      "grad_norm": 1.8822540602769318,
      "learning_rate": 2.833014322992307e-06,
      "loss": 0.5415,
      "step": 7905
    },
    {
      "epoch": 0.9693477194703286,
      "grad_norm": 2.0192375184860674,
      "learning_rate": 2.832511993606902e-06,
      "loss": 0.5287,
      "step": 7906
    },
    {
      "epoch": 0.9694703285924473,
      "grad_norm": 1.8738897195482072,
      "learning_rate": 2.8320096505546645e-06,
      "loss": 0.4944,
      "step": 7907
    },
    {
      "epoch": 0.969592937714566,
      "grad_norm": 1.871750621035998,
      "learning_rate": 2.8315072938562444e-06,
      "loss": 0.5093,
      "step": 7908
    },
    {
      "epoch": 0.9697155468366846,
      "grad_norm": 1.935283448537769,
      "learning_rate": 2.8310049235322855e-06,
      "loss": 0.4868,
      "step": 7909
    },
    {
      "epoch": 0.9698381559588033,
      "grad_norm": 1.9376562504651191,
      "learning_rate": 2.83050253960344e-06,
      "loss": 0.5603,
      "step": 7910
    },
    {
      "epoch": 0.969960765080922,
      "grad_norm": 1.7683511297647736,
      "learning_rate": 2.8300001420903546e-06,
      "loss": 0.4736,
      "step": 7911
    },
    {
      "epoch": 0.9700833742030407,
      "grad_norm": 1.922124928208028,
      "learning_rate": 2.829497731013679e-06,
      "loss": 0.5995,
      "step": 7912
    },
    {
      "epoch": 0.9702059833251594,
      "grad_norm": 1.8515497383400976,
      "learning_rate": 2.8289953063940627e-06,
      "loss": 0.5377,
      "step": 7913
    },
    {
      "epoch": 0.9703285924472781,
      "grad_norm": 2.087625101819315,
      "learning_rate": 2.828492868252157e-06,
      "loss": 0.5341,
      "step": 7914
    },
    {
      "epoch": 0.9704512015693968,
      "grad_norm": 1.6947879371173116,
      "learning_rate": 2.8279904166086137e-06,
      "loss": 0.5209,
      "step": 7915
    },
    {
      "epoch": 0.9705738106915155,
      "grad_norm": 1.928319670385988,
      "learning_rate": 2.827487951484083e-06,
      "loss": 0.4873,
      "step": 7916
    },
    {
      "epoch": 0.9706964198136341,
      "grad_norm": 1.9055111986469542,
      "learning_rate": 2.826985472899218e-06,
      "loss": 0.4922,
      "step": 7917
    },
    {
      "epoch": 0.9708190289357528,
      "grad_norm": 1.6594422228786267,
      "learning_rate": 2.8264829808746717e-06,
      "loss": 0.5192,
      "step": 7918
    },
    {
      "epoch": 0.9709416380578715,
      "grad_norm": 2.004434099938684,
      "learning_rate": 2.8259804754310954e-06,
      "loss": 0.5354,
      "step": 7919
    },
    {
      "epoch": 0.9710642471799902,
      "grad_norm": 1.880687671997614,
      "learning_rate": 2.8254779565891457e-06,
      "loss": 0.5413,
      "step": 7920
    },
    {
      "epoch": 0.9711868563021089,
      "grad_norm": 1.8748984223546246,
      "learning_rate": 2.8249754243694756e-06,
      "loss": 0.5409,
      "step": 7921
    },
    {
      "epoch": 0.9713094654242276,
      "grad_norm": 1.9238060118879172,
      "learning_rate": 2.8244728787927393e-06,
      "loss": 0.533,
      "step": 7922
    },
    {
      "epoch": 0.9714320745463463,
      "grad_norm": 1.9051277774111728,
      "learning_rate": 2.823970319879595e-06,
      "loss": 0.5202,
      "step": 7923
    },
    {
      "epoch": 0.971554683668465,
      "grad_norm": 1.8498755277600438,
      "learning_rate": 2.8234677476506955e-06,
      "loss": 0.526,
      "step": 7924
    },
    {
      "epoch": 0.9716772927905836,
      "grad_norm": 1.8307367803327945,
      "learning_rate": 2.8229651621266996e-06,
      "loss": 0.5219,
      "step": 7925
    },
    {
      "epoch": 0.9717999019127023,
      "grad_norm": 1.9436969549119896,
      "learning_rate": 2.8224625633282633e-06,
      "loss": 0.5465,
      "step": 7926
    },
    {
      "epoch": 0.971922511034821,
      "grad_norm": 1.888163787278684,
      "learning_rate": 2.8219599512760454e-06,
      "loss": 0.5216,
      "step": 7927
    },
    {
      "epoch": 0.9720451201569397,
      "grad_norm": 2.0526260026607153,
      "learning_rate": 2.821457325990703e-06,
      "loss": 0.5161,
      "step": 7928
    },
    {
      "epoch": 0.9721677292790584,
      "grad_norm": 1.9530361735730593,
      "learning_rate": 2.8209546874928955e-06,
      "loss": 0.5363,
      "step": 7929
    },
    {
      "epoch": 0.9722903384011771,
      "grad_norm": 1.8803115774667047,
      "learning_rate": 2.820452035803282e-06,
      "loss": 0.5555,
      "step": 7930
    },
    {
      "epoch": 0.9724129475232958,
      "grad_norm": 2.008866507457302,
      "learning_rate": 2.8199493709425237e-06,
      "loss": 0.5153,
      "step": 7931
    },
    {
      "epoch": 0.9725355566454145,
      "grad_norm": 1.8236858457384921,
      "learning_rate": 2.819446692931278e-06,
      "loss": 0.5023,
      "step": 7932
    },
    {
      "epoch": 0.9726581657675331,
      "grad_norm": 2.063108250459791,
      "learning_rate": 2.8189440017902094e-06,
      "loss": 0.5505,
      "step": 7933
    },
    {
      "epoch": 0.9727807748896518,
      "grad_norm": 1.548181412684248,
      "learning_rate": 2.818441297539976e-06,
      "loss": 0.5088,
      "step": 7934
    },
    {
      "epoch": 0.9729033840117705,
      "grad_norm": 1.9867538356012817,
      "learning_rate": 2.8179385802012427e-06,
      "loss": 0.4918,
      "step": 7935
    },
    {
      "epoch": 0.9730259931338892,
      "grad_norm": 2.016543535198485,
      "learning_rate": 2.81743584979467e-06,
      "loss": 0.4883,
      "step": 7936
    },
    {
      "epoch": 0.9731486022560079,
      "grad_norm": 1.7963912391519183,
      "learning_rate": 2.816933106340923e-06,
      "loss": 0.5564,
      "step": 7937
    },
    {
      "epoch": 0.9732712113781266,
      "grad_norm": 1.8279817608192535,
      "learning_rate": 2.816430349860663e-06,
      "loss": 0.487,
      "step": 7938
    },
    {
      "epoch": 0.9733938205002453,
      "grad_norm": 1.8630105250135591,
      "learning_rate": 2.815927580374556e-06,
      "loss": 0.5058,
      "step": 7939
    },
    {
      "epoch": 0.973516429622364,
      "grad_norm": 2.00222323835072,
      "learning_rate": 2.815424797903267e-06,
      "loss": 0.5215,
      "step": 7940
    },
    {
      "epoch": 0.9736390387444825,
      "grad_norm": 2.0017240829751803,
      "learning_rate": 2.8149220024674596e-06,
      "loss": 0.5228,
      "step": 7941
    },
    {
      "epoch": 0.9737616478666012,
      "grad_norm": 1.978638752650963,
      "learning_rate": 2.8144191940877997e-06,
      "loss": 0.5662,
      "step": 7942
    },
    {
      "epoch": 0.9738842569887199,
      "grad_norm": 2.012835333076155,
      "learning_rate": 2.813916372784955e-06,
      "loss": 0.495,
      "step": 7943
    },
    {
      "epoch": 0.9740068661108386,
      "grad_norm": 1.94818400289981,
      "learning_rate": 2.813413538579592e-06,
      "loss": 0.4999,
      "step": 7944
    },
    {
      "epoch": 0.9741294752329573,
      "grad_norm": 1.9522993695153399,
      "learning_rate": 2.812910691492377e-06,
      "loss": 0.4612,
      "step": 7945
    },
    {
      "epoch": 0.974252084355076,
      "grad_norm": 1.8013208060223103,
      "learning_rate": 2.8124078315439795e-06,
      "loss": 0.5007,
      "step": 7946
    },
    {
      "epoch": 0.9743746934771947,
      "grad_norm": 1.9289255691300697,
      "learning_rate": 2.8119049587550656e-06,
      "loss": 0.5222,
      "step": 7947
    },
    {
      "epoch": 0.9744973025993134,
      "grad_norm": 2.0023256060692978,
      "learning_rate": 2.8114020731463073e-06,
      "loss": 0.506,
      "step": 7948
    },
    {
      "epoch": 0.974619911721432,
      "grad_norm": 1.85337283182125,
      "learning_rate": 2.810899174738372e-06,
      "loss": 0.5001,
      "step": 7949
    },
    {
      "epoch": 0.9747425208435507,
      "grad_norm": 1.9903201804077852,
      "learning_rate": 2.81039626355193e-06,
      "loss": 0.5332,
      "step": 7950
    },
    {
      "epoch": 0.9748651299656694,
      "grad_norm": 2.0661852487609993,
      "learning_rate": 2.8098933396076518e-06,
      "loss": 0.5691,
      "step": 7951
    },
    {
      "epoch": 0.9749877390877881,
      "grad_norm": 2.0199718161308855,
      "learning_rate": 2.8093904029262088e-06,
      "loss": 0.5326,
      "step": 7952
    },
    {
      "epoch": 0.9751103482099068,
      "grad_norm": 1.969430808368183,
      "learning_rate": 2.808887453528273e-06,
      "loss": 0.5403,
      "step": 7953
    },
    {
      "epoch": 0.9752329573320255,
      "grad_norm": 1.8615337495479207,
      "learning_rate": 2.8083844914345155e-06,
      "loss": 0.4847,
      "step": 7954
    },
    {
      "epoch": 0.9753555664541442,
      "grad_norm": 1.8607114283220496,
      "learning_rate": 2.807881516665609e-06,
      "loss": 0.4695,
      "step": 7955
    },
    {
      "epoch": 0.9754781755762629,
      "grad_norm": 1.958613242931107,
      "learning_rate": 2.807378529242229e-06,
      "loss": 0.554,
      "step": 7956
    },
    {
      "epoch": 0.9756007846983815,
      "grad_norm": 1.991643366142807,
      "learning_rate": 2.806875529185045e-06,
      "loss": 0.5227,
      "step": 7957
    },
    {
      "epoch": 0.9757233938205002,
      "grad_norm": 1.9557791309839099,
      "learning_rate": 2.8063725165147353e-06,
      "loss": 0.5588,
      "step": 7958
    },
    {
      "epoch": 0.9758460029426189,
      "grad_norm": 1.8034841087662776,
      "learning_rate": 2.8058694912519716e-06,
      "loss": 0.4967,
      "step": 7959
    },
    {
      "epoch": 0.9759686120647376,
      "grad_norm": 1.9247879503924166,
      "learning_rate": 2.8053664534174306e-06,
      "loss": 0.5526,
      "step": 7960
    },
    {
      "epoch": 0.9760912211868563,
      "grad_norm": 2.0113637106768967,
      "learning_rate": 2.8048634030317882e-06,
      "loss": 0.5421,
      "step": 7961
    },
    {
      "epoch": 0.976213830308975,
      "grad_norm": 2.0349936562961046,
      "learning_rate": 2.80436034011572e-06,
      "loss": 0.5401,
      "step": 7962
    },
    {
      "epoch": 0.9763364394310937,
      "grad_norm": 1.9116537420825657,
      "learning_rate": 2.8038572646899028e-06,
      "loss": 0.4954,
      "step": 7963
    },
    {
      "epoch": 0.9764590485532123,
      "grad_norm": 1.741201335186972,
      "learning_rate": 2.803354176775014e-06,
      "loss": 0.4822,
      "step": 7964
    },
    {
      "epoch": 0.976581657675331,
      "grad_norm": 2.0101745830644524,
      "learning_rate": 2.8028510763917314e-06,
      "loss": 0.511,
      "step": 7965
    },
    {
      "epoch": 0.9767042667974497,
      "grad_norm": 1.7689447858851466,
      "learning_rate": 2.8023479635607344e-06,
      "loss": 0.5139,
      "step": 7966
    },
    {
      "epoch": 0.9768268759195684,
      "grad_norm": 1.999579451616356,
      "learning_rate": 2.8018448383026993e-06,
      "loss": 0.5321,
      "step": 7967
    },
    {
      "epoch": 0.9769494850416871,
      "grad_norm": 1.8597190730309539,
      "learning_rate": 2.8013417006383078e-06,
      "loss": 0.586,
      "step": 7968
    },
    {
      "epoch": 0.9770720941638058,
      "grad_norm": 1.761042085129122,
      "learning_rate": 2.8008385505882396e-06,
      "loss": 0.4932,
      "step": 7969
    },
    {
      "epoch": 0.9771947032859245,
      "grad_norm": 1.9475448698249607,
      "learning_rate": 2.8003353881731726e-06,
      "loss": 0.4892,
      "step": 7970
    },
    {
      "epoch": 0.9773173124080432,
      "grad_norm": 1.96191859847461,
      "learning_rate": 2.799832213413791e-06,
      "loss": 0.5334,
      "step": 7971
    },
    {
      "epoch": 0.9774399215301618,
      "grad_norm": 2.076872928348778,
      "learning_rate": 2.7993290263307736e-06,
      "loss": 0.5873,
      "step": 7972
    },
    {
      "epoch": 0.9775625306522805,
      "grad_norm": 1.8161154024844608,
      "learning_rate": 2.7988258269448043e-06,
      "loss": 0.4917,
      "step": 7973
    },
    {
      "epoch": 0.9776851397743992,
      "grad_norm": 1.782346175885848,
      "learning_rate": 2.7983226152765642e-06,
      "loss": 0.4998,
      "step": 7974
    },
    {
      "epoch": 0.9778077488965179,
      "grad_norm": 1.9225265738266586,
      "learning_rate": 2.797819391346736e-06,
      "loss": 0.4783,
      "step": 7975
    },
    {
      "epoch": 0.9779303580186366,
      "grad_norm": 2.1223353849148454,
      "learning_rate": 2.7973161551760034e-06,
      "loss": 0.5465,
      "step": 7976
    },
    {
      "epoch": 0.9780529671407553,
      "grad_norm": 1.9505413498858295,
      "learning_rate": 2.7968129067850514e-06,
      "loss": 0.4871,
      "step": 7977
    },
    {
      "epoch": 0.978175576262874,
      "grad_norm": 2.1551556731724677,
      "learning_rate": 2.7963096461945625e-06,
      "loss": 0.5246,
      "step": 7978
    },
    {
      "epoch": 0.9782981853849927,
      "grad_norm": 2.0435763764669614,
      "learning_rate": 2.795806373425224e-06,
      "loss": 0.5229,
      "step": 7979
    },
    {
      "epoch": 0.9784207945071113,
      "grad_norm": 1.964302971734176,
      "learning_rate": 2.795303088497718e-06,
      "loss": 0.4981,
      "step": 7980
    },
    {
      "epoch": 0.97854340362923,
      "grad_norm": 1.7366196700288912,
      "learning_rate": 2.794799791432734e-06,
      "loss": 0.4751,
      "step": 7981
    },
    {
      "epoch": 0.9786660127513487,
      "grad_norm": 1.9003031293006,
      "learning_rate": 2.794296482250955e-06,
      "loss": 0.5321,
      "step": 7982
    },
    {
      "epoch": 0.9787886218734674,
      "grad_norm": 2.1284512285648782,
      "learning_rate": 2.793793160973071e-06,
      "loss": 0.5032,
      "step": 7983
    },
    {
      "epoch": 0.9789112309955861,
      "grad_norm": 1.7121519545017274,
      "learning_rate": 2.7932898276197673e-06,
      "loss": 0.5628,
      "step": 7984
    },
    {
      "epoch": 0.9790338401177048,
      "grad_norm": 2.0164365833228906,
      "learning_rate": 2.7927864822117325e-06,
      "loss": 0.505,
      "step": 7985
    },
    {
      "epoch": 0.9791564492398235,
      "grad_norm": 2.0823746837984274,
      "learning_rate": 2.7922831247696545e-06,
      "loss": 0.5317,
      "step": 7986
    },
    {
      "epoch": 0.9792790583619422,
      "grad_norm": 1.9811424448223207,
      "learning_rate": 2.7917797553142234e-06,
      "loss": 0.4847,
      "step": 7987
    },
    {
      "epoch": 0.9794016674840608,
      "grad_norm": 1.9022374923739667,
      "learning_rate": 2.7912763738661276e-06,
      "loss": 0.5608,
      "step": 7988
    },
    {
      "epoch": 0.9795242766061795,
      "grad_norm": 1.7906601696367777,
      "learning_rate": 2.7907729804460576e-06,
      "loss": 0.4638,
      "step": 7989
    },
    {
      "epoch": 0.9796468857282982,
      "grad_norm": 2.0288641683714137,
      "learning_rate": 2.790269575074702e-06,
      "loss": 0.4702,
      "step": 7990
    },
    {
      "epoch": 0.9797694948504169,
      "grad_norm": 1.8216109431853365,
      "learning_rate": 2.7897661577727536e-06,
      "loss": 0.5132,
      "step": 7991
    },
    {
      "epoch": 0.9798921039725356,
      "grad_norm": 2.001981297786489,
      "learning_rate": 2.7892627285609035e-06,
      "loss": 0.5112,
      "step": 7992
    },
    {
      "epoch": 0.9800147130946543,
      "grad_norm": 2.016564770153404,
      "learning_rate": 2.788759287459843e-06,
      "loss": 0.5496,
      "step": 7993
    },
    {
      "epoch": 0.980137322216773,
      "grad_norm": 1.991142948972551,
      "learning_rate": 2.788255834490265e-06,
      "loss": 0.566,
      "step": 7994
    },
    {
      "epoch": 0.9802599313388917,
      "grad_norm": 1.8272755727359993,
      "learning_rate": 2.787752369672861e-06,
      "loss": 0.5276,
      "step": 7995
    },
    {
      "epoch": 0.9803825404610103,
      "grad_norm": 1.814662785428967,
      "learning_rate": 2.787248893028326e-06,
      "loss": 0.5545,
      "step": 7996
    },
    {
      "epoch": 0.980505149583129,
      "grad_norm": 1.8230505956140883,
      "learning_rate": 2.7867454045773524e-06,
      "loss": 0.5127,
      "step": 7997
    },
    {
      "epoch": 0.9806277587052477,
      "grad_norm": 1.991323538138398,
      "learning_rate": 2.7862419043406352e-06,
      "loss": 0.5502,
      "step": 7998
    },
    {
      "epoch": 0.9807503678273664,
      "grad_norm": 2.1398791299166677,
      "learning_rate": 2.785738392338869e-06,
      "loss": 0.4878,
      "step": 7999
    },
    {
      "epoch": 0.980872976949485,
      "grad_norm": 1.8122749830249785,
      "learning_rate": 2.785234868592749e-06,
      "loss": 0.4973,
      "step": 8000
    },
    {
      "epoch": 0.9809955860716038,
      "grad_norm": 2.0423245989521663,
      "learning_rate": 2.784731333122971e-06,
      "loss": 0.533,
      "step": 8001
    },
    {
      "epoch": 0.9811181951937225,
      "grad_norm": 1.7747526853740585,
      "learning_rate": 2.784227785950231e-06,
      "loss": 0.5124,
      "step": 8002
    },
    {
      "epoch": 0.9812408043158412,
      "grad_norm": 1.9626692083055715,
      "learning_rate": 2.783724227095225e-06,
      "loss": 0.5023,
      "step": 8003
    },
    {
      "epoch": 0.9813634134379597,
      "grad_norm": 1.8255763496484851,
      "learning_rate": 2.783220656578653e-06,
      "loss": 0.4732,
      "step": 8004
    },
    {
      "epoch": 0.9814860225600784,
      "grad_norm": 1.8677972755526149,
      "learning_rate": 2.7827170744212083e-06,
      "loss": 0.5046,
      "step": 8005
    },
    {
      "epoch": 0.9816086316821971,
      "grad_norm": 2.0032175305750344,
      "learning_rate": 2.7822134806435934e-06,
      "loss": 0.5179,
      "step": 8006
    },
    {
      "epoch": 0.9817312408043158,
      "grad_norm": 1.9268689180800567,
      "learning_rate": 2.781709875266504e-06,
      "loss": 0.5114,
      "step": 8007
    },
    {
      "epoch": 0.9818538499264345,
      "grad_norm": 2.0253534809755904,
      "learning_rate": 2.78120625831064e-06,
      "loss": 0.5265,
      "step": 8008
    },
    {
      "epoch": 0.9819764590485532,
      "grad_norm": 2.026044195867057,
      "learning_rate": 2.7807026297967006e-06,
      "loss": 0.5375,
      "step": 8009
    },
    {
      "epoch": 0.9820990681706719,
      "grad_norm": 2.089209322279373,
      "learning_rate": 2.780198989745386e-06,
      "loss": 0.5057,
      "step": 8010
    },
    {
      "epoch": 0.9822216772927905,
      "grad_norm": 1.852343771199745,
      "learning_rate": 2.779695338177398e-06,
      "loss": 0.4765,
      "step": 8011
    },
    {
      "epoch": 0.9823442864149092,
      "grad_norm": 1.8085374879769829,
      "learning_rate": 2.779191675113435e-06,
      "loss": 0.5049,
      "step": 8012
    },
    {
      "epoch": 0.9824668955370279,
      "grad_norm": 1.930659455474603,
      "learning_rate": 2.7786880005742005e-06,
      "loss": 0.5439,
      "step": 8013
    },
    {
      "epoch": 0.9825895046591466,
      "grad_norm": 2.062955650268834,
      "learning_rate": 2.778184314580396e-06,
      "loss": 0.5414,
      "step": 8014
    },
    {
      "epoch": 0.9827121137812653,
      "grad_norm": 1.9847056136227201,
      "learning_rate": 2.777680617152724e-06,
      "loss": 0.5308,
      "step": 8015
    },
    {
      "epoch": 0.982834722903384,
      "grad_norm": 2.175830376959384,
      "learning_rate": 2.777176908311886e-06,
      "loss": 0.5677,
      "step": 8016
    },
    {
      "epoch": 0.9829573320255027,
      "grad_norm": 1.8249868320899427,
      "learning_rate": 2.776673188078588e-06,
      "loss": 0.49,
      "step": 8017
    },
    {
      "epoch": 0.9830799411476214,
      "grad_norm": 1.9532436185509374,
      "learning_rate": 2.7761694564735303e-06,
      "loss": 0.5031,
      "step": 8018
    },
    {
      "epoch": 0.98320255026974,
      "grad_norm": 1.7816166077505817,
      "learning_rate": 2.7756657135174202e-06,
      "loss": 0.4936,
      "step": 8019
    },
    {
      "epoch": 0.9833251593918587,
      "grad_norm": 1.988539066746372,
      "learning_rate": 2.7751619592309604e-06,
      "loss": 0.6182,
      "step": 8020
    },
    {
      "epoch": 0.9834477685139774,
      "grad_norm": 1.9395296383233238,
      "learning_rate": 2.774658193634858e-06,
      "loss": 0.5685,
      "step": 8021
    },
    {
      "epoch": 0.9835703776360961,
      "grad_norm": 2.0705602462257557,
      "learning_rate": 2.7741544167498167e-06,
      "loss": 0.5288,
      "step": 8022
    },
    {
      "epoch": 0.9836929867582148,
      "grad_norm": 1.8520165972484541,
      "learning_rate": 2.7736506285965444e-06,
      "loss": 0.4825,
      "step": 8023
    },
    {
      "epoch": 0.9838155958803335,
      "grad_norm": 1.8416026519563151,
      "learning_rate": 2.7731468291957465e-06,
      "loss": 0.4987,
      "step": 8024
    },
    {
      "epoch": 0.9839382050024522,
      "grad_norm": 1.916296821255967,
      "learning_rate": 2.7726430185681295e-06,
      "loss": 0.5917,
      "step": 8025
    },
    {
      "epoch": 0.9840608141245709,
      "grad_norm": 1.9402924218784865,
      "learning_rate": 2.7721391967344025e-06,
      "loss": 0.5044,
      "step": 8026
    },
    {
      "epoch": 0.9841834232466895,
      "grad_norm": 1.8492743793548687,
      "learning_rate": 2.771635363715273e-06,
      "loss": 0.5449,
      "step": 8027
    },
    {
      "epoch": 0.9843060323688082,
      "grad_norm": 2.1093702563136727,
      "learning_rate": 2.771131519531448e-06,
      "loss": 0.5285,
      "step": 8028
    },
    {
      "epoch": 0.9844286414909269,
      "grad_norm": 2.0182556504201083,
      "learning_rate": 2.7706276642036388e-06,
      "loss": 0.5543,
      "step": 8029
    },
    {
      "epoch": 0.9845512506130456,
      "grad_norm": 1.7390359461559188,
      "learning_rate": 2.770123797752553e-06,
      "loss": 0.4802,
      "step": 8030
    },
    {
      "epoch": 0.9846738597351643,
      "grad_norm": 1.8695241217235616,
      "learning_rate": 2.769619920198901e-06,
      "loss": 0.484,
      "step": 8031
    },
    {
      "epoch": 0.984796468857283,
      "grad_norm": 1.8191772995464828,
      "learning_rate": 2.769116031563393e-06,
      "loss": 0.5127,
      "step": 8032
    },
    {
      "epoch": 0.9849190779794017,
      "grad_norm": 1.8828346034142893,
      "learning_rate": 2.7686121318667397e-06,
      "loss": 0.4772,
      "step": 8033
    },
    {
      "epoch": 0.9850416871015204,
      "grad_norm": 1.8470633085338666,
      "learning_rate": 2.768108221129652e-06,
      "loss": 0.5218,
      "step": 8034
    },
    {
      "epoch": 0.985164296223639,
      "grad_norm": 1.6971399748464846,
      "learning_rate": 2.767604299372842e-06,
      "loss": 0.4876,
      "step": 8035
    },
    {
      "epoch": 0.9852869053457577,
      "grad_norm": 2.0442915357181675,
      "learning_rate": 2.767100366617021e-06,
      "loss": 0.5291,
      "step": 8036
    },
    {
      "epoch": 0.9854095144678764,
      "grad_norm": 1.8802719728206911,
      "learning_rate": 2.766596422882903e-06,
      "loss": 0.5172,
      "step": 8037
    },
    {
      "epoch": 0.9855321235899951,
      "grad_norm": 2.0187333236883163,
      "learning_rate": 2.766092468191199e-06,
      "loss": 0.5129,
      "step": 8038
    },
    {
      "epoch": 0.9856547327121138,
      "grad_norm": 1.9603221829712665,
      "learning_rate": 2.765588502562624e-06,
      "loss": 0.5096,
      "step": 8039
    },
    {
      "epoch": 0.9857773418342325,
      "grad_norm": 1.7940154752607662,
      "learning_rate": 2.765084526017891e-06,
      "loss": 0.5259,
      "step": 8040
    },
    {
      "epoch": 0.9858999509563512,
      "grad_norm": 2.2556280091267196,
      "learning_rate": 2.764580538577715e-06,
      "loss": 0.5588,
      "step": 8041
    },
    {
      "epoch": 0.9860225600784699,
      "grad_norm": 2.126681437719389,
      "learning_rate": 2.764076540262811e-06,
      "loss": 0.5909,
      "step": 8042
    },
    {
      "epoch": 0.9861451692005885,
      "grad_norm": 1.9392251020153917,
      "learning_rate": 2.763572531093893e-06,
      "loss": 0.4936,
      "step": 8043
    },
    {
      "epoch": 0.9862677783227072,
      "grad_norm": 1.9206276941689178,
      "learning_rate": 2.763068511091678e-06,
      "loss": 0.4757,
      "step": 8044
    },
    {
      "epoch": 0.9863903874448259,
      "grad_norm": 1.8121228039638109,
      "learning_rate": 2.762564480276881e-06,
      "loss": 0.5495,
      "step": 8045
    },
    {
      "epoch": 0.9865129965669446,
      "grad_norm": 2.056464413725736,
      "learning_rate": 2.7620604386702187e-06,
      "loss": 0.5324,
      "step": 8046
    },
    {
      "epoch": 0.9866356056890633,
      "grad_norm": 1.8606713854604964,
      "learning_rate": 2.761556386292409e-06,
      "loss": 0.5132,
      "step": 8047
    },
    {
      "epoch": 0.986758214811182,
      "grad_norm": 2.0628218003417205,
      "learning_rate": 2.761052323164168e-06,
      "loss": 0.5861,
      "step": 8048
    },
    {
      "epoch": 0.9868808239333007,
      "grad_norm": 1.9708635727115407,
      "learning_rate": 2.760548249306215e-06,
      "loss": 0.48,
      "step": 8049
    },
    {
      "epoch": 0.9870034330554194,
      "grad_norm": 1.8778490202565596,
      "learning_rate": 2.7600441647392674e-06,
      "loss": 0.4899,
      "step": 8050
    },
    {
      "epoch": 0.987126042177538,
      "grad_norm": 1.9773452533978684,
      "learning_rate": 2.7595400694840446e-06,
      "loss": 0.5071,
      "step": 8051
    },
    {
      "epoch": 0.9872486512996567,
      "grad_norm": 1.9119347379738714,
      "learning_rate": 2.7590359635612655e-06,
      "loss": 0.4993,
      "step": 8052
    },
    {
      "epoch": 0.9873712604217754,
      "grad_norm": 2.0241351855272143,
      "learning_rate": 2.758531846991649e-06,
      "loss": 0.5189,
      "step": 8053
    },
    {
      "epoch": 0.9874938695438941,
      "grad_norm": 1.7449648481287081,
      "learning_rate": 2.758027719795917e-06,
      "loss": 0.4972,
      "step": 8054
    },
    {
      "epoch": 0.9876164786660128,
      "grad_norm": 2.103962919541911,
      "learning_rate": 2.7575235819947883e-06,
      "loss": 0.5347,
      "step": 8055
    },
    {
      "epoch": 0.9877390877881315,
      "grad_norm": 1.9700292492760034,
      "learning_rate": 2.7570194336089846e-06,
      "loss": 0.4905,
      "step": 8056
    },
    {
      "epoch": 0.9878616969102502,
      "grad_norm": 2.0252086523882236,
      "learning_rate": 2.7565152746592266e-06,
      "loss": 0.5311,
      "step": 8057
    },
    {
      "epoch": 0.9879843060323688,
      "grad_norm": 2.01843454643652,
      "learning_rate": 2.7560111051662373e-06,
      "loss": 0.5636,
      "step": 8058
    },
    {
      "epoch": 0.9881069151544875,
      "grad_norm": 1.8998166015186784,
      "learning_rate": 2.755506925150738e-06,
      "loss": 0.5254,
      "step": 8059
    },
    {
      "epoch": 0.9882295242766062,
      "grad_norm": 2.1780318535889056,
      "learning_rate": 2.7550027346334517e-06,
      "loss": 0.4992,
      "step": 8060
    },
    {
      "epoch": 0.9883521333987249,
      "grad_norm": 2.0404517407916356,
      "learning_rate": 2.754498533635102e-06,
      "loss": 0.4559,
      "step": 8061
    },
    {
      "epoch": 0.9884747425208436,
      "grad_norm": 2.025506085406711,
      "learning_rate": 2.753994322176412e-06,
      "loss": 0.5725,
      "step": 8062
    },
    {
      "epoch": 0.9885973516429623,
      "grad_norm": 2.143813247514371,
      "learning_rate": 2.753490100278105e-06,
      "loss": 0.5891,
      "step": 8063
    },
    {
      "epoch": 0.988719960765081,
      "grad_norm": 1.8519507712568217,
      "learning_rate": 2.7529858679609066e-06,
      "loss": 0.487,
      "step": 8064
    },
    {
      "epoch": 0.9888425698871997,
      "grad_norm": 1.9321175222515765,
      "learning_rate": 2.7524816252455418e-06,
      "loss": 0.5376,
      "step": 8065
    },
    {
      "epoch": 0.9889651790093182,
      "grad_norm": 2.033004215454913,
      "learning_rate": 2.7519773721527338e-06,
      "loss": 0.5157,
      "step": 8066
    },
    {
      "epoch": 0.9890877881314369,
      "grad_norm": 1.9257209925228675,
      "learning_rate": 2.751473108703211e-06,
      "loss": 0.5499,
      "step": 8067
    },
    {
      "epoch": 0.9892103972535556,
      "grad_norm": 1.8770070131906267,
      "learning_rate": 2.7509688349176973e-06,
      "loss": 0.5177,
      "step": 8068
    },
    {
      "epoch": 0.9893330063756743,
      "grad_norm": 1.8282874088046306,
      "learning_rate": 2.750464550816921e-06,
      "loss": 0.4815,
      "step": 8069
    },
    {
      "epoch": 0.989455615497793,
      "grad_norm": 2.0428655684606216,
      "learning_rate": 2.749960256421608e-06,
      "loss": 0.5601,
      "step": 8070
    },
    {
      "epoch": 0.9895782246199117,
      "grad_norm": 1.965905425961823,
      "learning_rate": 2.7494559517524855e-06,
      "loss": 0.5249,
      "step": 8071
    },
    {
      "epoch": 0.9897008337420304,
      "grad_norm": 1.8122213768434268,
      "learning_rate": 2.748951636830282e-06,
      "loss": 0.544,
      "step": 8072
    },
    {
      "epoch": 0.9898234428641491,
      "grad_norm": 1.684090142945781,
      "learning_rate": 2.7484473116757256e-06,
      "loss": 0.5093,
      "step": 8073
    },
    {
      "epoch": 0.9899460519862677,
      "grad_norm": 2.0284520924030316,
      "learning_rate": 2.7479429763095444e-06,
      "loss": 0.5149,
      "step": 8074
    },
    {
      "epoch": 0.9900686611083864,
      "grad_norm": 2.034069294989306,
      "learning_rate": 2.7474386307524692e-06,
      "loss": 0.5554,
      "step": 8075
    },
    {
      "epoch": 0.9901912702305051,
      "grad_norm": 1.9014582184438125,
      "learning_rate": 2.7469342750252266e-06,
      "loss": 0.5366,
      "step": 8076
    },
    {
      "epoch": 0.9903138793526238,
      "grad_norm": 2.042464525495782,
      "learning_rate": 2.7464299091485493e-06,
      "loss": 0.4873,
      "step": 8077
    },
    {
      "epoch": 0.9904364884747425,
      "grad_norm": 1.8028748450718721,
      "learning_rate": 2.7459255331431655e-06,
      "loss": 0.4968,
      "step": 8078
    },
    {
      "epoch": 0.9905590975968612,
      "grad_norm": 1.6840909151920749,
      "learning_rate": 2.745421147029808e-06,
      "loss": 0.4907,
      "step": 8079
    },
    {
      "epoch": 0.9906817067189799,
      "grad_norm": 1.793639199778033,
      "learning_rate": 2.7449167508292065e-06,
      "loss": 0.4657,
      "step": 8080
    },
    {
      "epoch": 0.9908043158410986,
      "grad_norm": 1.7887498564092283,
      "learning_rate": 2.7444123445620926e-06,
      "loss": 0.5336,
      "step": 8081
    },
    {
      "epoch": 0.9909269249632172,
      "grad_norm": 2.080252306026551,
      "learning_rate": 2.743907928249199e-06,
      "loss": 0.4741,
      "step": 8082
    },
    {
      "epoch": 0.9910495340853359,
      "grad_norm": 1.9864021474070463,
      "learning_rate": 2.743403501911257e-06,
      "loss": 0.4768,
      "step": 8083
    },
    {
      "epoch": 0.9911721432074546,
      "grad_norm": 2.193335501713311,
      "learning_rate": 2.7428990655690007e-06,
      "loss": 0.4732,
      "step": 8084
    },
    {
      "epoch": 0.9912947523295733,
      "grad_norm": 1.863649543153775,
      "learning_rate": 2.742394619243163e-06,
      "loss": 0.4704,
      "step": 8085
    },
    {
      "epoch": 0.991417361451692,
      "grad_norm": 2.09941981314631,
      "learning_rate": 2.741890162954477e-06,
      "loss": 0.5965,
      "step": 8086
    },
    {
      "epoch": 0.9915399705738107,
      "grad_norm": 1.9637065782355527,
      "learning_rate": 2.741385696723677e-06,
      "loss": 0.5135,
      "step": 8087
    },
    {
      "epoch": 0.9916625796959294,
      "grad_norm": 1.7339700933217466,
      "learning_rate": 2.7408812205714973e-06,
      "loss": 0.5004,
      "step": 8088
    },
    {
      "epoch": 0.9917851888180481,
      "grad_norm": 2.003726620313308,
      "learning_rate": 2.740376734518674e-06,
      "loss": 0.534,
      "step": 8089
    },
    {
      "epoch": 0.9919077979401667,
      "grad_norm": 2.0274452665122897,
      "learning_rate": 2.739872238585941e-06,
      "loss": 0.4883,
      "step": 8090
    },
    {
      "epoch": 0.9920304070622854,
      "grad_norm": 1.7464327343153254,
      "learning_rate": 2.739367732794033e-06,
      "loss": 0.557,
      "step": 8091
    },
    {
      "epoch": 0.9921530161844041,
      "grad_norm": 1.911055143685131,
      "learning_rate": 2.738863217163689e-06,
      "loss": 0.5067,
      "step": 8092
    },
    {
      "epoch": 0.9922756253065228,
      "grad_norm": 1.9384616027489188,
      "learning_rate": 2.7383586917156434e-06,
      "loss": 0.5262,
      "step": 8093
    },
    {
      "epoch": 0.9923982344286415,
      "grad_norm": 1.9404175916934354,
      "learning_rate": 2.737854156470634e-06,
      "loss": 0.4962,
      "step": 8094
    },
    {
      "epoch": 0.9925208435507602,
      "grad_norm": 2.1916550344851378,
      "learning_rate": 2.737349611449397e-06,
      "loss": 0.5124,
      "step": 8095
    },
    {
      "epoch": 0.9926434526728789,
      "grad_norm": 1.9586964221009207,
      "learning_rate": 2.7368450566726714e-06,
      "loss": 0.5026,
      "step": 8096
    },
    {
      "epoch": 0.9927660617949976,
      "grad_norm": 1.9023295991311573,
      "learning_rate": 2.736340492161194e-06,
      "loss": 0.5104,
      "step": 8097
    },
    {
      "epoch": 0.9928886709171162,
      "grad_norm": 1.9468428156328081,
      "learning_rate": 2.735835917935704e-06,
      "loss": 0.5435,
      "step": 8098
    },
    {
      "epoch": 0.9930112800392349,
      "grad_norm": 1.8999925223741012,
      "learning_rate": 2.735331334016941e-06,
      "loss": 0.5409,
      "step": 8099
    },
    {
      "epoch": 0.9931338891613536,
      "grad_norm": 2.090971373114266,
      "learning_rate": 2.7348267404256434e-06,
      "loss": 0.5991,
      "step": 8100
    },
    {
      "epoch": 0.9932564982834723,
      "grad_norm": 1.8802760235702316,
      "learning_rate": 2.73432213718255e-06,
      "loss": 0.5609,
      "step": 8101
    },
    {
      "epoch": 0.993379107405591,
      "grad_norm": 1.9132673084435468,
      "learning_rate": 2.733817524308403e-06,
      "loss": 0.5392,
      "step": 8102
    },
    {
      "epoch": 0.9935017165277097,
      "grad_norm": 1.8642123769023171,
      "learning_rate": 2.733312901823942e-06,
      "loss": 0.5294,
      "step": 8103
    },
    {
      "epoch": 0.9936243256498284,
      "grad_norm": 1.7603831675926545,
      "learning_rate": 2.732808269749907e-06,
      "loss": 0.5071,
      "step": 8104
    },
    {
      "epoch": 0.9937469347719471,
      "grad_norm": 1.7965713875752178,
      "learning_rate": 2.7323036281070397e-06,
      "loss": 0.5353,
      "step": 8105
    },
    {
      "epoch": 0.9938695438940657,
      "grad_norm": 1.8182091569902494,
      "learning_rate": 2.7317989769160826e-06,
      "loss": 0.4998,
      "step": 8106
    },
    {
      "epoch": 0.9939921530161844,
      "grad_norm": 2.031577206440928,
      "learning_rate": 2.731294316197777e-06,
      "loss": 0.5052,
      "step": 8107
    },
    {
      "epoch": 0.9941147621383031,
      "grad_norm": 1.9000940139790103,
      "learning_rate": 2.7307896459728654e-06,
      "loss": 0.4726,
      "step": 8108
    },
    {
      "epoch": 0.9942373712604218,
      "grad_norm": 1.8759574670758672,
      "learning_rate": 2.7302849662620904e-06,
      "loss": 0.5163,
      "step": 8109
    },
    {
      "epoch": 0.9943599803825405,
      "grad_norm": 1.7179911175297262,
      "learning_rate": 2.7297802770861962e-06,
      "loss": 0.4725,
      "step": 8110
    },
    {
      "epoch": 0.9944825895046592,
      "grad_norm": 2.1895040876506293,
      "learning_rate": 2.7292755784659257e-06,
      "loss": 0.5474,
      "step": 8111
    },
    {
      "epoch": 0.9946051986267779,
      "grad_norm": 1.972129583764225,
      "learning_rate": 2.728770870422023e-06,
      "loss": 0.5441,
      "step": 8112
    },
    {
      "epoch": 0.9947278077488965,
      "grad_norm": 2.49681475564504,
      "learning_rate": 2.7282661529752337e-06,
      "loss": 0.5416,
      "step": 8113
    },
    {
      "epoch": 0.9948504168710152,
      "grad_norm": 1.9141359819562613,
      "learning_rate": 2.727761426146299e-06,
      "loss": 0.5148,
      "step": 8114
    },
    {
      "epoch": 0.9949730259931339,
      "grad_norm": 1.7174187216780405,
      "learning_rate": 2.727256689955968e-06,
      "loss": 0.555,
      "step": 8115
    },
    {
      "epoch": 0.9950956351152526,
      "grad_norm": 1.8893275419283646,
      "learning_rate": 2.7267519444249844e-06,
      "loss": 0.5168,
      "step": 8116
    },
    {
      "epoch": 0.9952182442373713,
      "grad_norm": 1.9451600179634805,
      "learning_rate": 2.726247189574095e-06,
      "loss": 0.515,
      "step": 8117
    },
    {
      "epoch": 0.99534085335949,
      "grad_norm": 1.6753660728230158,
      "learning_rate": 2.725742425424045e-06,
      "loss": 0.4807,
      "step": 8118
    },
    {
      "epoch": 0.9954634624816087,
      "grad_norm": 1.8900010985864393,
      "learning_rate": 2.725237651995582e-06,
      "loss": 0.5191,
      "step": 8119
    },
    {
      "epoch": 0.9955860716037274,
      "grad_norm": 1.8687764296139975,
      "learning_rate": 2.724732869309452e-06,
      "loss": 0.5332,
      "step": 8120
    },
    {
      "epoch": 0.995708680725846,
      "grad_norm": 1.9119208687356133,
      "learning_rate": 2.724228077386404e-06,
      "loss": 0.5368,
      "step": 8121
    },
    {
      "epoch": 0.9958312898479647,
      "grad_norm": 2.0679355668187553,
      "learning_rate": 2.7237232762471846e-06,
      "loss": 0.5555,
      "step": 8122
    },
    {
      "epoch": 0.9959538989700834,
      "grad_norm": 1.8734758886370881,
      "learning_rate": 2.7232184659125433e-06,
      "loss": 0.5181,
      "step": 8123
    },
    {
      "epoch": 0.996076508092202,
      "grad_norm": 1.851804302733365,
      "learning_rate": 2.7227136464032266e-06,
      "loss": 0.4721,
      "step": 8124
    },
    {
      "epoch": 0.9961991172143208,
      "grad_norm": 2.013657844303393,
      "learning_rate": 2.722208817739986e-06,
      "loss": 0.5642,
      "step": 8125
    },
    {
      "epoch": 0.9963217263364395,
      "grad_norm": 1.8659942072411677,
      "learning_rate": 2.721703979943568e-06,
      "loss": 0.5197,
      "step": 8126
    },
    {
      "epoch": 0.9964443354585582,
      "grad_norm": 1.9748798635835638,
      "learning_rate": 2.7211991330347254e-06,
      "loss": 0.5524,
      "step": 8127
    },
    {
      "epoch": 0.9965669445806769,
      "grad_norm": 1.8728851931733608,
      "learning_rate": 2.7206942770342065e-06,
      "loss": 0.563,
      "step": 8128
    },
    {
      "epoch": 0.9966895537027954,
      "grad_norm": 1.9420244662603434,
      "learning_rate": 2.7201894119627614e-06,
      "loss": 0.5325,
      "step": 8129
    },
    {
      "epoch": 0.9968121628249141,
      "grad_norm": 1.8650339663731843,
      "learning_rate": 2.7196845378411424e-06,
      "loss": 0.5221,
      "step": 8130
    },
    {
      "epoch": 0.9969347719470328,
      "grad_norm": 2.052937543153604,
      "learning_rate": 2.7191796546900996e-06,
      "loss": 0.5091,
      "step": 8131
    },
    {
      "epoch": 0.9970573810691515,
      "grad_norm": 2.138471502898907,
      "learning_rate": 2.718674762530385e-06,
      "loss": 0.5835,
      "step": 8132
    },
    {
      "epoch": 0.9971799901912702,
      "grad_norm": 1.9447899984674353,
      "learning_rate": 2.7181698613827512e-06,
      "loss": 0.5237,
      "step": 8133
    },
    {
      "epoch": 0.9973025993133889,
      "grad_norm": 1.9272030805002442,
      "learning_rate": 2.7176649512679486e-06,
      "loss": 0.5245,
      "step": 8134
    },
    {
      "epoch": 0.9974252084355076,
      "grad_norm": 1.924104864096299,
      "learning_rate": 2.7171600322067322e-06,
      "loss": 0.5234,
      "step": 8135
    },
    {
      "epoch": 0.9975478175576263,
      "grad_norm": 2.012353019881667,
      "learning_rate": 2.7166551042198537e-06,
      "loss": 0.5282,
      "step": 8136
    },
    {
      "epoch": 0.9976704266797449,
      "grad_norm": 1.9508742806636654,
      "learning_rate": 2.7161501673280666e-06,
      "loss": 0.4986,
      "step": 8137
    },
    {
      "epoch": 0.9977930358018636,
      "grad_norm": 2.018367954989952,
      "learning_rate": 2.7156452215521256e-06,
      "loss": 0.4988,
      "step": 8138
    },
    {
      "epoch": 0.9979156449239823,
      "grad_norm": 1.9843010038442175,
      "learning_rate": 2.7151402669127837e-06,
      "loss": 0.5096,
      "step": 8139
    },
    {
      "epoch": 0.998038254046101,
      "grad_norm": 1.9304556726270432,
      "learning_rate": 2.7146353034307964e-06,
      "loss": 0.4832,
      "step": 8140
    },
    {
      "epoch": 0.9981608631682197,
      "grad_norm": 1.9040209447714358,
      "learning_rate": 2.7141303311269173e-06,
      "loss": 0.4958,
      "step": 8141
    },
    {
      "epoch": 0.9982834722903384,
      "grad_norm": 1.757480774752008,
      "learning_rate": 2.7136253500219034e-06,
      "loss": 0.4714,
      "step": 8142
    },
    {
      "epoch": 0.9984060814124571,
      "grad_norm": 1.9517625322148984,
      "learning_rate": 2.713120360136509e-06,
      "loss": 0.5043,
      "step": 8143
    },
    {
      "epoch": 0.9985286905345758,
      "grad_norm": 2.0784637497196212,
      "learning_rate": 2.7126153614914907e-06,
      "loss": 0.5533,
      "step": 8144
    },
    {
      "epoch": 0.9986512996566944,
      "grad_norm": 1.9155772326250546,
      "learning_rate": 2.7121103541076044e-06,
      "loss": 0.5156,
      "step": 8145
    },
    {
      "epoch": 0.9987739087788131,
      "grad_norm": 1.9191926017825822,
      "learning_rate": 2.7116053380056066e-06,
      "loss": 0.5715,
      "step": 8146
    },
    {
      "epoch": 0.9988965179009318,
      "grad_norm": 1.9907464381193845,
      "learning_rate": 2.711100313206256e-06,
      "loss": 0.5886,
      "step": 8147
    },
    {
      "epoch": 0.9990191270230505,
      "grad_norm": 2.1180220313163556,
      "learning_rate": 2.7105952797303083e-06,
      "loss": 0.4732,
      "step": 8148
    },
    {
      "epoch": 0.9991417361451692,
      "grad_norm": 1.9361686032790646,
      "learning_rate": 2.7100902375985213e-06,
      "loss": 0.5501,
      "step": 8149
    },
    {
      "epoch": 0.9992643452672879,
      "grad_norm": 1.914011345690789,
      "learning_rate": 2.7095851868316543e-06,
      "loss": 0.5089,
      "step": 8150
    },
    {
      "epoch": 0.9993869543894066,
      "grad_norm": 1.9379578179682122,
      "learning_rate": 2.709080127450464e-06,
      "loss": 0.4703,
      "step": 8151
    },
    {
      "epoch": 0.9995095635115253,
      "grad_norm": 2.1943399776449652,
      "learning_rate": 2.708575059475711e-06,
      "loss": 0.5176,
      "step": 8152
    },
    {
      "epoch": 0.9996321726336439,
      "grad_norm": 1.9484711180869143,
      "learning_rate": 2.708069982928154e-06,
      "loss": 0.4783,
      "step": 8153
    },
    {
      "epoch": 0.9997547817557626,
      "grad_norm": 1.9689061589720132,
      "learning_rate": 2.7075648978285517e-06,
      "loss": 0.5189,
      "step": 8154
    },
    {
      "epoch": 0.9998773908778813,
      "grad_norm": 1.944030701973654,
      "learning_rate": 2.7070598041976655e-06,
      "loss": 0.4887,
      "step": 8155
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.1549974719613183,
      "learning_rate": 2.7065547020562543e-06,
      "loss": 0.501,
      "step": 8156
    },
    {
      "epoch": 1.0001226091221187,
      "grad_norm": 1.7197165772315004,
      "learning_rate": 2.706049591425079e-06,
      "loss": 0.451,
      "step": 8157
    },
    {
      "epoch": 1.0002452182442374,
      "grad_norm": 1.7391629737434928,
      "learning_rate": 2.705544472324901e-06,
      "loss": 0.3998,
      "step": 8158
    },
    {
      "epoch": 1.000367827366356,
      "grad_norm": 1.8055928123100609,
      "learning_rate": 2.705039344776481e-06,
      "loss": 0.4691,
      "step": 8159
    },
    {
      "epoch": 1.0004904364884748,
      "grad_norm": 1.7171705635512653,
      "learning_rate": 2.704534208800581e-06,
      "loss": 0.4495,
      "step": 8160
    },
    {
      "epoch": 1.0006130456105935,
      "grad_norm": 1.63200532792853,
      "learning_rate": 2.704029064417964e-06,
      "loss": 0.4335,
      "step": 8161
    },
    {
      "epoch": 1.0007356547327122,
      "grad_norm": 1.791184056214997,
      "learning_rate": 2.7035239116493893e-06,
      "loss": 0.463,
      "step": 8162
    },
    {
      "epoch": 1.000858263854831,
      "grad_norm": 1.7680586363048831,
      "learning_rate": 2.703018750515623e-06,
      "loss": 0.4907,
      "step": 8163
    },
    {
      "epoch": 1.0009808729769494,
      "grad_norm": 1.8529752292048014,
      "learning_rate": 2.7025135810374257e-06,
      "loss": 0.4245,
      "step": 8164
    },
    {
      "epoch": 1.001103482099068,
      "grad_norm": 1.9562407656860028,
      "learning_rate": 2.7020084032355626e-06,
      "loss": 0.4949,
      "step": 8165
    },
    {
      "epoch": 1.0012260912211868,
      "grad_norm": 1.6984952001388354,
      "learning_rate": 2.7015032171307963e-06,
      "loss": 0.4313,
      "step": 8166
    },
    {
      "epoch": 1.0013487003433055,
      "grad_norm": 1.8001875528809899,
      "learning_rate": 2.700998022743891e-06,
      "loss": 0.4568,
      "step": 8167
    },
    {
      "epoch": 1.0014713094654242,
      "grad_norm": 2.0335942084351246,
      "learning_rate": 2.7004928200956106e-06,
      "loss": 0.4944,
      "step": 8168
    },
    {
      "epoch": 1.0015939185875429,
      "grad_norm": 1.846733274392651,
      "learning_rate": 2.6999876092067208e-06,
      "loss": 0.4756,
      "step": 8169
    },
    {
      "epoch": 1.0017165277096616,
      "grad_norm": 1.8728021436295559,
      "learning_rate": 2.699482390097986e-06,
      "loss": 0.4407,
      "step": 8170
    },
    {
      "epoch": 1.0018391368317803,
      "grad_norm": 1.7867104661862503,
      "learning_rate": 2.6989771627901723e-06,
      "loss": 0.4558,
      "step": 8171
    },
    {
      "epoch": 1.001961745953899,
      "grad_norm": 1.9176114622012388,
      "learning_rate": 2.6984719273040437e-06,
      "loss": 0.4427,
      "step": 8172
    },
    {
      "epoch": 1.0020843550760177,
      "grad_norm": 2.071994727384818,
      "learning_rate": 2.697966683660369e-06,
      "loss": 0.4208,
      "step": 8173
    },
    {
      "epoch": 1.0022069641981364,
      "grad_norm": 1.9167995916761869,
      "learning_rate": 2.6974614318799118e-06,
      "loss": 0.4382,
      "step": 8174
    },
    {
      "epoch": 1.002329573320255,
      "grad_norm": 2.1087633773444474,
      "learning_rate": 2.6969561719834414e-06,
      "loss": 0.4726,
      "step": 8175
    },
    {
      "epoch": 1.0024521824423738,
      "grad_norm": 1.9443179026756778,
      "learning_rate": 2.696450903991723e-06,
      "loss": 0.4512,
      "step": 8176
    },
    {
      "epoch": 1.0025747915644925,
      "grad_norm": 1.8436807734988863,
      "learning_rate": 2.6959456279255253e-06,
      "loss": 0.4481,
      "step": 8177
    },
    {
      "epoch": 1.0026974006866112,
      "grad_norm": 1.6612898519536996,
      "learning_rate": 2.6954403438056147e-06,
      "loss": 0.399,
      "step": 8178
    },
    {
      "epoch": 1.0028200098087299,
      "grad_norm": 1.733837056027666,
      "learning_rate": 2.69493505165276e-06,
      "loss": 0.4504,
      "step": 8179
    },
    {
      "epoch": 1.0029426189308484,
      "grad_norm": 1.6772599951400857,
      "learning_rate": 2.69442975148773e-06,
      "loss": 0.4227,
      "step": 8180
    },
    {
      "epoch": 1.003065228052967,
      "grad_norm": 1.9558427085662562,
      "learning_rate": 2.693924443331293e-06,
      "loss": 0.4703,
      "step": 8181
    },
    {
      "epoch": 1.0031878371750858,
      "grad_norm": 2.0601938145033443,
      "learning_rate": 2.693419127204217e-06,
      "loss": 0.4299,
      "step": 8182
    },
    {
      "epoch": 1.0033104462972045,
      "grad_norm": 1.8389539356921536,
      "learning_rate": 2.6929138031272733e-06,
      "loss": 0.4508,
      "step": 8183
    },
    {
      "epoch": 1.0034330554193231,
      "grad_norm": 1.6467414230702662,
      "learning_rate": 2.6924084711212305e-06,
      "loss": 0.44,
      "step": 8184
    },
    {
      "epoch": 1.0035556645414418,
      "grad_norm": 1.8131840131197303,
      "learning_rate": 2.6919031312068595e-06,
      "loss": 0.4412,
      "step": 8185
    },
    {
      "epoch": 1.0036782736635605,
      "grad_norm": 1.8905126434774044,
      "learning_rate": 2.69139778340493e-06,
      "loss": 0.455,
      "step": 8186
    },
    {
      "epoch": 1.0038008827856792,
      "grad_norm": 1.938335102804717,
      "learning_rate": 2.690892427736212e-06,
      "loss": 0.4066,
      "step": 8187
    },
    {
      "epoch": 1.003923491907798,
      "grad_norm": 1.8011093063310613,
      "learning_rate": 2.690387064221478e-06,
      "loss": 0.4433,
      "step": 8188
    },
    {
      "epoch": 1.0040461010299166,
      "grad_norm": 1.9533437631674642,
      "learning_rate": 2.6898816928814987e-06,
      "loss": 0.4552,
      "step": 8189
    },
    {
      "epoch": 1.0041687101520353,
      "grad_norm": 1.8532504707437603,
      "learning_rate": 2.689376313737045e-06,
      "loss": 0.4265,
      "step": 8190
    },
    {
      "epoch": 1.004291319274154,
      "grad_norm": 1.9890776865126703,
      "learning_rate": 2.6888709268088898e-06,
      "loss": 0.4338,
      "step": 8191
    },
    {
      "epoch": 1.0044139283962727,
      "grad_norm": 1.853316468694908,
      "learning_rate": 2.6883655321178048e-06,
      "loss": 0.3855,
      "step": 8192
    },
    {
      "epoch": 1.0045365375183914,
      "grad_norm": 1.7559720191184311,
      "learning_rate": 2.6878601296845634e-06,
      "loss": 0.4764,
      "step": 8193
    },
    {
      "epoch": 1.0046591466405101,
      "grad_norm": 1.8897325354872732,
      "learning_rate": 2.687354719529938e-06,
      "loss": 0.4449,
      "step": 8194
    },
    {
      "epoch": 1.0047817557626288,
      "grad_norm": 1.9231337021704793,
      "learning_rate": 2.6868493016747022e-06,
      "loss": 0.4275,
      "step": 8195
    },
    {
      "epoch": 1.0049043648847473,
      "grad_norm": 1.8519727744587289,
      "learning_rate": 2.6863438761396295e-06,
      "loss": 0.4914,
      "step": 8196
    },
    {
      "epoch": 1.005026974006866,
      "grad_norm": 2.013493778408758,
      "learning_rate": 2.6858384429454926e-06,
      "loss": 0.4609,
      "step": 8197
    },
    {
      "epoch": 1.0051495831289847,
      "grad_norm": 1.9064740156999018,
      "learning_rate": 2.6853330021130675e-06,
      "loss": 0.4621,
      "step": 8198
    },
    {
      "epoch": 1.0052721922511034,
      "grad_norm": 1.9954441927402056,
      "learning_rate": 2.6848275536631273e-06,
      "loss": 0.4201,
      "step": 8199
    },
    {
      "epoch": 1.0053948013732221,
      "grad_norm": 1.870610742147225,
      "learning_rate": 2.684322097616448e-06,
      "loss": 0.4262,
      "step": 8200
    },
    {
      "epoch": 1.0055174104953408,
      "grad_norm": 1.7849550177396025,
      "learning_rate": 2.6838166339938036e-06,
      "loss": 0.508,
      "step": 8201
    },
    {
      "epoch": 1.0056400196174595,
      "grad_norm": 1.8638738934522106,
      "learning_rate": 2.683311162815971e-06,
      "loss": 0.4474,
      "step": 8202
    },
    {
      "epoch": 1.0057626287395782,
      "grad_norm": 1.806137390304744,
      "learning_rate": 2.682805684103724e-06,
      "loss": 0.449,
      "step": 8203
    },
    {
      "epoch": 1.005885237861697,
      "grad_norm": 1.8764726634616198,
      "learning_rate": 2.6823001978778407e-06,
      "loss": 0.438,
      "step": 8204
    },
    {
      "epoch": 1.0060078469838156,
      "grad_norm": 1.8710856297639504,
      "learning_rate": 2.681794704159096e-06,
      "loss": 0.4254,
      "step": 8205
    },
    {
      "epoch": 1.0061304561059343,
      "grad_norm": 1.8957206897117225,
      "learning_rate": 2.6812892029682667e-06,
      "loss": 0.4269,
      "step": 8206
    },
    {
      "epoch": 1.006253065228053,
      "grad_norm": 1.913325339158325,
      "learning_rate": 2.680783694326131e-06,
      "loss": 0.4146,
      "step": 8207
    },
    {
      "epoch": 1.0063756743501717,
      "grad_norm": 1.7888550278319093,
      "learning_rate": 2.6802781782534655e-06,
      "loss": 0.3968,
      "step": 8208
    },
    {
      "epoch": 1.0064982834722904,
      "grad_norm": 1.755707367682818,
      "learning_rate": 2.679772654771048e-06,
      "loss": 0.3754,
      "step": 8209
    },
    {
      "epoch": 1.0066208925944091,
      "grad_norm": 1.8779578462837108,
      "learning_rate": 2.6792671238996546e-06,
      "loss": 0.4093,
      "step": 8210
    },
    {
      "epoch": 1.0067435017165276,
      "grad_norm": 1.6280242023695033,
      "learning_rate": 2.678761585660067e-06,
      "loss": 0.4138,
      "step": 8211
    },
    {
      "epoch": 1.0068661108386463,
      "grad_norm": 1.7780036510677615,
      "learning_rate": 2.6782560400730596e-06,
      "loss": 0.4335,
      "step": 8212
    },
    {
      "epoch": 1.006988719960765,
      "grad_norm": 1.8854582074110715,
      "learning_rate": 2.677750487159415e-06,
      "loss": 0.4201,
      "step": 8213
    },
    {
      "epoch": 1.0071113290828837,
      "grad_norm": 1.7958935056041545,
      "learning_rate": 2.67724492693991e-06,
      "loss": 0.447,
      "step": 8214
    },
    {
      "epoch": 1.0072339382050024,
      "grad_norm": 2.1010513503899104,
      "learning_rate": 2.6767393594353253e-06,
      "loss": 0.495,
      "step": 8215
    },
    {
      "epoch": 1.007356547327121,
      "grad_norm": 1.7120687854366483,
      "learning_rate": 2.6762337846664392e-06,
      "loss": 0.4521,
      "step": 8216
    },
    {
      "epoch": 1.0074791564492398,
      "grad_norm": 1.9053471574693135,
      "learning_rate": 2.675728202654033e-06,
      "loss": 0.4452,
      "step": 8217
    },
    {
      "epoch": 1.0076017655713585,
      "grad_norm": 1.882544394958051,
      "learning_rate": 2.675222613418887e-06,
      "loss": 0.4125,
      "step": 8218
    },
    {
      "epoch": 1.0077243746934772,
      "grad_norm": 1.7075645060760871,
      "learning_rate": 2.674717016981781e-06,
      "loss": 0.3862,
      "step": 8219
    },
    {
      "epoch": 1.007846983815596,
      "grad_norm": 1.6379464601305371,
      "learning_rate": 2.6742114133634957e-06,
      "loss": 0.4463,
      "step": 8220
    },
    {
      "epoch": 1.0079695929377146,
      "grad_norm": 1.957155696141854,
      "learning_rate": 2.673705802584814e-06,
      "loss": 0.4445,
      "step": 8221
    },
    {
      "epoch": 1.0080922020598333,
      "grad_norm": 1.9223042150836025,
      "learning_rate": 2.673200184666515e-06,
      "loss": 0.4459,
      "step": 8222
    },
    {
      "epoch": 1.008214811181952,
      "grad_norm": 1.7717313052095764,
      "learning_rate": 2.672694559629383e-06,
      "loss": 0.398,
      "step": 8223
    },
    {
      "epoch": 1.0083374203040707,
      "grad_norm": 1.9438418561286486,
      "learning_rate": 2.6721889274941986e-06,
      "loss": 0.453,
      "step": 8224
    },
    {
      "epoch": 1.0084600294261894,
      "grad_norm": 2.0568511824786433,
      "learning_rate": 2.671683288281744e-06,
      "loss": 0.4361,
      "step": 8225
    },
    {
      "epoch": 1.008582638548308,
      "grad_norm": 1.9140314048938203,
      "learning_rate": 2.671177642012803e-06,
      "loss": 0.4281,
      "step": 8226
    },
    {
      "epoch": 1.0087052476704266,
      "grad_norm": 1.8832084908075377,
      "learning_rate": 2.6706719887081565e-06,
      "loss": 0.419,
      "step": 8227
    },
    {
      "epoch": 1.0088278567925453,
      "grad_norm": 1.6852739457912091,
      "learning_rate": 2.6701663283885905e-06,
      "loss": 0.4981,
      "step": 8228
    },
    {
      "epoch": 1.008950465914664,
      "grad_norm": 1.982381240175228,
      "learning_rate": 2.669660661074887e-06,
      "loss": 0.4886,
      "step": 8229
    },
    {
      "epoch": 1.0090730750367827,
      "grad_norm": 1.8872796802528071,
      "learning_rate": 2.6691549867878293e-06,
      "loss": 0.4553,
      "step": 8230
    },
    {
      "epoch": 1.0091956841589014,
      "grad_norm": 1.9915685869771738,
      "learning_rate": 2.668649305548202e-06,
      "loss": 0.4568,
      "step": 8231
    },
    {
      "epoch": 1.00931829328102,
      "grad_norm": 1.815435318840524,
      "learning_rate": 2.6681436173767904e-06,
      "loss": 0.4457,
      "step": 8232
    },
    {
      "epoch": 1.0094409024031388,
      "grad_norm": 2.02197406198331,
      "learning_rate": 2.667637922294378e-06,
      "loss": 0.4465,
      "step": 8233
    },
    {
      "epoch": 1.0095635115252575,
      "grad_norm": 1.8394104290578295,
      "learning_rate": 2.667132220321751e-06,
      "loss": 0.4288,
      "step": 8234
    },
    {
      "epoch": 1.0096861206473762,
      "grad_norm": 1.706367647655106,
      "learning_rate": 2.666626511479693e-06,
      "loss": 0.4144,
      "step": 8235
    },
    {
      "epoch": 1.0098087297694949,
      "grad_norm": 1.9685078634790614,
      "learning_rate": 2.6661207957889908e-06,
      "loss": 0.4379,
      "step": 8236
    },
    {
      "epoch": 1.0099313388916136,
      "grad_norm": 2.0021728591589603,
      "learning_rate": 2.66561507327043e-06,
      "loss": 0.4536,
      "step": 8237
    },
    {
      "epoch": 1.0100539480137323,
      "grad_norm": 1.845159591831336,
      "learning_rate": 2.665109343944796e-06,
      "loss": 0.3873,
      "step": 8238
    },
    {
      "epoch": 1.010176557135851,
      "grad_norm": 1.941350727101169,
      "learning_rate": 2.6646036078328756e-06,
      "loss": 0.442,
      "step": 8239
    },
    {
      "epoch": 1.0102991662579697,
      "grad_norm": 1.9337325964959848,
      "learning_rate": 2.6640978649554556e-06,
      "loss": 0.4361,
      "step": 8240
    },
    {
      "epoch": 1.0104217753800884,
      "grad_norm": 2.018797500079748,
      "learning_rate": 2.6635921153333232e-06,
      "loss": 0.4787,
      "step": 8241
    },
    {
      "epoch": 1.010544384502207,
      "grad_norm": 1.8015656325929972,
      "learning_rate": 2.6630863589872646e-06,
      "loss": 0.4128,
      "step": 8242
    },
    {
      "epoch": 1.0106669936243255,
      "grad_norm": 2.0446422590091493,
      "learning_rate": 2.6625805959380684e-06,
      "loss": 0.4123,
      "step": 8243
    },
    {
      "epoch": 1.0107896027464442,
      "grad_norm": 1.9479180533190006,
      "learning_rate": 2.6620748262065226e-06,
      "loss": 0.4531,
      "step": 8244
    },
    {
      "epoch": 1.010912211868563,
      "grad_norm": 1.8501400056010087,
      "learning_rate": 2.661569049813413e-06,
      "loss": 0.4439,
      "step": 8245
    },
    {
      "epoch": 1.0110348209906816,
      "grad_norm": 1.8844715529099567,
      "learning_rate": 2.6610632667795305e-06,
      "loss": 0.4418,
      "step": 8246
    },
    {
      "epoch": 1.0111574301128003,
      "grad_norm": 1.928357440649855,
      "learning_rate": 2.6605574771256626e-06,
      "loss": 0.4133,
      "step": 8247
    },
    {
      "epoch": 1.011280039234919,
      "grad_norm": 1.8264697986874354,
      "learning_rate": 2.6600516808725975e-06,
      "loss": 0.4245,
      "step": 8248
    },
    {
      "epoch": 1.0114026483570377,
      "grad_norm": 1.9708648524932073,
      "learning_rate": 2.6595458780411255e-06,
      "loss": 0.4451,
      "step": 8249
    },
    {
      "epoch": 1.0115252574791564,
      "grad_norm": 2.1022502888335906,
      "learning_rate": 2.6590400686520353e-06,
      "loss": 0.4298,
      "step": 8250
    },
    {
      "epoch": 1.0116478666012751,
      "grad_norm": 1.780736193362539,
      "learning_rate": 2.6585342527261167e-06,
      "loss": 0.4314,
      "step": 8251
    },
    {
      "epoch": 1.0117704757233938,
      "grad_norm": 2.0674380351408845,
      "learning_rate": 2.658028430284159e-06,
      "loss": 0.4687,
      "step": 8252
    },
    {
      "epoch": 1.0118930848455125,
      "grad_norm": 1.9483589173517963,
      "learning_rate": 2.6575226013469537e-06,
      "loss": 0.4792,
      "step": 8253
    },
    {
      "epoch": 1.0120156939676312,
      "grad_norm": 2.0099810901753363,
      "learning_rate": 2.6570167659352907e-06,
      "loss": 0.4396,
      "step": 8254
    },
    {
      "epoch": 1.01213830308975,
      "grad_norm": 2.018218954232198,
      "learning_rate": 2.6565109240699604e-06,
      "loss": 0.4871,
      "step": 8255
    },
    {
      "epoch": 1.0122609122118686,
      "grad_norm": 1.8041897252263166,
      "learning_rate": 2.656005075771754e-06,
      "loss": 0.4506,
      "step": 8256
    },
    {
      "epoch": 1.0123835213339873,
      "grad_norm": 1.9610511893168439,
      "learning_rate": 2.655499221061463e-06,
      "loss": 0.427,
      "step": 8257
    },
    {
      "epoch": 1.012506130456106,
      "grad_norm": 2.0631683003983565,
      "learning_rate": 2.6549933599598786e-06,
      "loss": 0.425,
      "step": 8258
    },
    {
      "epoch": 1.0126287395782245,
      "grad_norm": 1.8922243404000862,
      "learning_rate": 2.6544874924877933e-06,
      "loss": 0.4179,
      "step": 8259
    },
    {
      "epoch": 1.0127513487003432,
      "grad_norm": 2.0543938704107583,
      "learning_rate": 2.6539816186659974e-06,
      "loss": 0.4352,
      "step": 8260
    },
    {
      "epoch": 1.012873957822462,
      "grad_norm": 1.7401338968552578,
      "learning_rate": 2.6534757385152853e-06,
      "loss": 0.408,
      "step": 8261
    },
    {
      "epoch": 1.0129965669445806,
      "grad_norm": 1.863459793361999,
      "learning_rate": 2.6529698520564486e-06,
      "loss": 0.3988,
      "step": 8262
    },
    {
      "epoch": 1.0131191760666993,
      "grad_norm": 2.0522279481410273,
      "learning_rate": 2.65246395931028e-06,
      "loss": 0.4447,
      "step": 8263
    },
    {
      "epoch": 1.013241785188818,
      "grad_norm": 1.8210637003167303,
      "learning_rate": 2.6519580602975725e-06,
      "loss": 0.4361,
      "step": 8264
    },
    {
      "epoch": 1.0133643943109367,
      "grad_norm": 1.9827952880680682,
      "learning_rate": 2.65145215503912e-06,
      "loss": 0.4369,
      "step": 8265
    },
    {
      "epoch": 1.0134870034330554,
      "grad_norm": 1.8383240615013596,
      "learning_rate": 2.6509462435557155e-06,
      "loss": 0.4396,
      "step": 8266
    },
    {
      "epoch": 1.0136096125551741,
      "grad_norm": 1.9375243195511118,
      "learning_rate": 2.650440325868154e-06,
      "loss": 0.4466,
      "step": 8267
    },
    {
      "epoch": 1.0137322216772928,
      "grad_norm": 1.906045050497804,
      "learning_rate": 2.649934401997227e-06,
      "loss": 0.4445,
      "step": 8268
    },
    {
      "epoch": 1.0138548307994115,
      "grad_norm": 1.8042522177532594,
      "learning_rate": 2.6494284719637325e-06,
      "loss": 0.4586,
      "step": 8269
    },
    {
      "epoch": 1.0139774399215302,
      "grad_norm": 2.0202177348516845,
      "learning_rate": 2.6489225357884617e-06,
      "loss": 0.4916,
      "step": 8270
    },
    {
      "epoch": 1.014100049043649,
      "grad_norm": 1.8741350094198583,
      "learning_rate": 2.6484165934922124e-06,
      "loss": 0.4485,
      "step": 8271
    },
    {
      "epoch": 1.0142226581657676,
      "grad_norm": 1.8352347624615175,
      "learning_rate": 2.6479106450957783e-06,
      "loss": 0.4536,
      "step": 8272
    },
    {
      "epoch": 1.0143452672878863,
      "grad_norm": 1.849506362677989,
      "learning_rate": 2.647404690619954e-06,
      "loss": 0.4329,
      "step": 8273
    },
    {
      "epoch": 1.0144678764100048,
      "grad_norm": 1.9305254865367065,
      "learning_rate": 2.6468987300855358e-06,
      "loss": 0.417,
      "step": 8274
    },
    {
      "epoch": 1.0145904855321235,
      "grad_norm": 1.8854297535420967,
      "learning_rate": 2.646392763513321e-06,
      "loss": 0.4783,
      "step": 8275
    },
    {
      "epoch": 1.0147130946542422,
      "grad_norm": 1.8979882151857086,
      "learning_rate": 2.6458867909241033e-06,
      "loss": 0.453,
      "step": 8276
    },
    {
      "epoch": 1.014835703776361,
      "grad_norm": 1.9245530261507355,
      "learning_rate": 2.645380812338681e-06,
      "loss": 0.412,
      "step": 8277
    },
    {
      "epoch": 1.0149583128984796,
      "grad_norm": 1.9056735388737902,
      "learning_rate": 2.644874827777849e-06,
      "loss": 0.4529,
      "step": 8278
    },
    {
      "epoch": 1.0150809220205983,
      "grad_norm": 1.9640574798948895,
      "learning_rate": 2.6443688372624053e-06,
      "loss": 0.3952,
      "step": 8279
    },
    {
      "epoch": 1.015203531142717,
      "grad_norm": 1.8510769529347417,
      "learning_rate": 2.643862840813147e-06,
      "loss": 0.3963,
      "step": 8280
    },
    {
      "epoch": 1.0153261402648357,
      "grad_norm": 1.9938351583633993,
      "learning_rate": 2.6433568384508717e-06,
      "loss": 0.4677,
      "step": 8281
    },
    {
      "epoch": 1.0154487493869544,
      "grad_norm": 1.9164642603765552,
      "learning_rate": 2.642850830196377e-06,
      "loss": 0.4508,
      "step": 8282
    },
    {
      "epoch": 1.015571358509073,
      "grad_norm": 1.8449526944781351,
      "learning_rate": 2.6423448160704584e-06,
      "loss": 0.4272,
      "step": 8283
    },
    {
      "epoch": 1.0156939676311918,
      "grad_norm": 1.757833026111606,
      "learning_rate": 2.6418387960939173e-06,
      "loss": 0.4364,
      "step": 8284
    },
    {
      "epoch": 1.0158165767533105,
      "grad_norm": 1.8081773090229132,
      "learning_rate": 2.64133277028755e-06,
      "loss": 0.4315,
      "step": 8285
    },
    {
      "epoch": 1.0159391858754292,
      "grad_norm": 1.9756938818160534,
      "learning_rate": 2.6408267386721566e-06,
      "loss": 0.4425,
      "step": 8286
    },
    {
      "epoch": 1.016061794997548,
      "grad_norm": 1.7698777347533612,
      "learning_rate": 2.640320701268534e-06,
      "loss": 0.3967,
      "step": 8287
    },
    {
      "epoch": 1.0161844041196666,
      "grad_norm": 2.0084284677966275,
      "learning_rate": 2.639814658097482e-06,
      "loss": 0.4718,
      "step": 8288
    },
    {
      "epoch": 1.0163070132417853,
      "grad_norm": 1.8530723869938446,
      "learning_rate": 2.6393086091798e-06,
      "loss": 0.4624,
      "step": 8289
    },
    {
      "epoch": 1.0164296223639038,
      "grad_norm": 1.852734587433749,
      "learning_rate": 2.638802554536288e-06,
      "loss": 0.451,
      "step": 8290
    },
    {
      "epoch": 1.0165522314860225,
      "grad_norm": 1.9077916342167738,
      "learning_rate": 2.6382964941877453e-06,
      "loss": 0.4798,
      "step": 8291
    },
    {
      "epoch": 1.0166748406081412,
      "grad_norm": 2.0186708309059607,
      "learning_rate": 2.6377904281549727e-06,
      "loss": 0.4461,
      "step": 8292
    },
    {
      "epoch": 1.0167974497302599,
      "grad_norm": 1.9436284388887197,
      "learning_rate": 2.637284356458768e-06,
      "loss": 0.409,
      "step": 8293
    },
    {
      "epoch": 1.0169200588523786,
      "grad_norm": 1.6653833090824988,
      "learning_rate": 2.6367782791199343e-06,
      "loss": 0.4149,
      "step": 8294
    },
    {
      "epoch": 1.0170426679744973,
      "grad_norm": 2.078162684024652,
      "learning_rate": 2.6362721961592706e-06,
      "loss": 0.4418,
      "step": 8295
    },
    {
      "epoch": 1.017165277096616,
      "grad_norm": 1.8856603442423854,
      "learning_rate": 2.635766107597578e-06,
      "loss": 0.4604,
      "step": 8296
    },
    {
      "epoch": 1.0172878862187347,
      "grad_norm": 1.9075415441065469,
      "learning_rate": 2.635260013455659e-06,
      "loss": 0.4252,
      "step": 8297
    },
    {
      "epoch": 1.0174104953408534,
      "grad_norm": 1.836422863839654,
      "learning_rate": 2.634753913754313e-06,
      "loss": 0.4067,
      "step": 8298
    },
    {
      "epoch": 1.017533104462972,
      "grad_norm": 1.8667978154516576,
      "learning_rate": 2.6342478085143437e-06,
      "loss": 0.4277,
      "step": 8299
    },
    {
      "epoch": 1.0176557135850908,
      "grad_norm": 1.7993321796051904,
      "learning_rate": 2.633741697756551e-06,
      "loss": 0.4506,
      "step": 8300
    },
    {
      "epoch": 1.0177783227072095,
      "grad_norm": 1.8783421948099797,
      "learning_rate": 2.633235581501738e-06,
      "loss": 0.3992,
      "step": 8301
    },
    {
      "epoch": 1.0179009318293282,
      "grad_norm": 1.8077169606623789,
      "learning_rate": 2.6327294597707064e-06,
      "loss": 0.4159,
      "step": 8302
    },
    {
      "epoch": 1.0180235409514469,
      "grad_norm": 1.782187167620932,
      "learning_rate": 2.6322233325842595e-06,
      "loss": 0.4173,
      "step": 8303
    },
    {
      "epoch": 1.0181461500735656,
      "grad_norm": 1.8412470143746058,
      "learning_rate": 2.6317171999631992e-06,
      "loss": 0.4248,
      "step": 8304
    },
    {
      "epoch": 1.018268759195684,
      "grad_norm": 2.0561985362412605,
      "learning_rate": 2.631211061928329e-06,
      "loss": 0.402,
      "step": 8305
    },
    {
      "epoch": 1.0183913683178027,
      "grad_norm": 2.075346561229774,
      "learning_rate": 2.630704918500451e-06,
      "loss": 0.4568,
      "step": 8306
    },
    {
      "epoch": 1.0185139774399214,
      "grad_norm": 1.9670247777652397,
      "learning_rate": 2.6301987697003712e-06,
      "loss": 0.4561,
      "step": 8307
    },
    {
      "epoch": 1.0186365865620401,
      "grad_norm": 1.981630381150851,
      "learning_rate": 2.62969261554889e-06,
      "loss": 0.4588,
      "step": 8308
    },
    {
      "epoch": 1.0187591956841588,
      "grad_norm": 1.737438030891404,
      "learning_rate": 2.6291864560668133e-06,
      "loss": 0.4315,
      "step": 8309
    },
    {
      "epoch": 1.0188818048062775,
      "grad_norm": 1.766315335566819,
      "learning_rate": 2.628680291274945e-06,
      "loss": 0.4621,
      "step": 8310
    },
    {
      "epoch": 1.0190044139283962,
      "grad_norm": 2.0329475672700377,
      "learning_rate": 2.628174121194088e-06,
      "loss": 0.4657,
      "step": 8311
    },
    {
      "epoch": 1.019127023050515,
      "grad_norm": 1.9084131921422907,
      "learning_rate": 2.6276679458450483e-06,
      "loss": 0.4,
      "step": 8312
    },
    {
      "epoch": 1.0192496321726336,
      "grad_norm": 1.8500595567173181,
      "learning_rate": 2.6271617652486297e-06,
      "loss": 0.4491,
      "step": 8313
    },
    {
      "epoch": 1.0193722412947523,
      "grad_norm": 1.8687893646519056,
      "learning_rate": 2.626655579425637e-06,
      "loss": 0.4729,
      "step": 8314
    },
    {
      "epoch": 1.019494850416871,
      "grad_norm": 1.9482857817921997,
      "learning_rate": 2.626149388396877e-06,
      "loss": 0.4062,
      "step": 8315
    },
    {
      "epoch": 1.0196174595389897,
      "grad_norm": 2.231013618725635,
      "learning_rate": 2.6256431921831525e-06,
      "loss": 0.4202,
      "step": 8316
    },
    {
      "epoch": 1.0197400686611084,
      "grad_norm": 2.001533625816762,
      "learning_rate": 2.6251369908052715e-06,
      "loss": 0.4915,
      "step": 8317
    },
    {
      "epoch": 1.0198626777832271,
      "grad_norm": 1.8122540828060685,
      "learning_rate": 2.6246307842840374e-06,
      "loss": 0.4482,
      "step": 8318
    },
    {
      "epoch": 1.0199852869053458,
      "grad_norm": 2.206056862146982,
      "learning_rate": 2.624124572640259e-06,
      "loss": 0.4749,
      "step": 8319
    },
    {
      "epoch": 1.0201078960274645,
      "grad_norm": 1.7178242485359427,
      "learning_rate": 2.62361835589474e-06,
      "loss": 0.4066,
      "step": 8320
    },
    {
      "epoch": 1.020230505149583,
      "grad_norm": 1.9981576162385313,
      "learning_rate": 2.6231121340682875e-06,
      "loss": 0.4529,
      "step": 8321
    },
    {
      "epoch": 1.0203531142717017,
      "grad_norm": 1.859620162352937,
      "learning_rate": 2.6226059071817085e-06,
      "loss": 0.4486,
      "step": 8322
    },
    {
      "epoch": 1.0204757233938204,
      "grad_norm": 1.9381989935376787,
      "learning_rate": 2.62209967525581e-06,
      "loss": 0.4662,
      "step": 8323
    },
    {
      "epoch": 1.0205983325159391,
      "grad_norm": 1.7385099686861372,
      "learning_rate": 2.6215934383113983e-06,
      "loss": 0.4323,
      "step": 8324
    },
    {
      "epoch": 1.0207209416380578,
      "grad_norm": 1.9523936521439629,
      "learning_rate": 2.621087196369281e-06,
      "loss": 0.4565,
      "step": 8325
    },
    {
      "epoch": 1.0208435507601765,
      "grad_norm": 2.042225938050626,
      "learning_rate": 2.620580949450266e-06,
      "loss": 0.411,
      "step": 8326
    },
    {
      "epoch": 1.0209661598822952,
      "grad_norm": 1.8556535306731847,
      "learning_rate": 2.62007469757516e-06,
      "loss": 0.4058,
      "step": 8327
    },
    {
      "epoch": 1.021088769004414,
      "grad_norm": 1.8309867127856936,
      "learning_rate": 2.6195684407647714e-06,
      "loss": 0.4447,
      "step": 8328
    },
    {
      "epoch": 1.0212113781265326,
      "grad_norm": 1.9019738816635812,
      "learning_rate": 2.6190621790399086e-06,
      "loss": 0.3806,
      "step": 8329
    },
    {
      "epoch": 1.0213339872486513,
      "grad_norm": 1.9323818337166037,
      "learning_rate": 2.6185559124213794e-06,
      "loss": 0.4652,
      "step": 8330
    },
    {
      "epoch": 1.02145659637077,
      "grad_norm": 1.91205244169635,
      "learning_rate": 2.6180496409299916e-06,
      "loss": 0.4446,
      "step": 8331
    },
    {
      "epoch": 1.0215792054928887,
      "grad_norm": 1.950746455999389,
      "learning_rate": 2.6175433645865556e-06,
      "loss": 0.4268,
      "step": 8332
    },
    {
      "epoch": 1.0217018146150074,
      "grad_norm": 2.0956679561773774,
      "learning_rate": 2.617037083411879e-06,
      "loss": 0.4299,
      "step": 8333
    },
    {
      "epoch": 1.0218244237371261,
      "grad_norm": 1.8504969293863602,
      "learning_rate": 2.6165307974267704e-06,
      "loss": 0.4728,
      "step": 8334
    },
    {
      "epoch": 1.0219470328592448,
      "grad_norm": 1.8291991071466598,
      "learning_rate": 2.6160245066520402e-06,
      "loss": 0.4177,
      "step": 8335
    },
    {
      "epoch": 1.0220696419813635,
      "grad_norm": 1.9162729314846174,
      "learning_rate": 2.6155182111084976e-06,
      "loss": 0.4193,
      "step": 8336
    },
    {
      "epoch": 1.022192251103482,
      "grad_norm": 1.887746507580215,
      "learning_rate": 2.6150119108169516e-06,
      "loss": 0.4756,
      "step": 8337
    },
    {
      "epoch": 1.0223148602256007,
      "grad_norm": 1.9187408689553285,
      "learning_rate": 2.6145056057982125e-06,
      "loss": 0.4323,
      "step": 8338
    },
    {
      "epoch": 1.0224374693477194,
      "grad_norm": 1.8832338092027887,
      "learning_rate": 2.6139992960730903e-06,
      "loss": 0.4074,
      "step": 8339
    },
    {
      "epoch": 1.022560078469838,
      "grad_norm": 1.793820181143303,
      "learning_rate": 2.6134929816623953e-06,
      "loss": 0.4023,
      "step": 8340
    },
    {
      "epoch": 1.0226826875919568,
      "grad_norm": 1.9251975216703558,
      "learning_rate": 2.612986662586938e-06,
      "loss": 0.4682,
      "step": 8341
    },
    {
      "epoch": 1.0228052967140755,
      "grad_norm": 1.8247604951533611,
      "learning_rate": 2.612480338867529e-06,
      "loss": 0.4326,
      "step": 8342
    },
    {
      "epoch": 1.0229279058361942,
      "grad_norm": 1.8710898546777466,
      "learning_rate": 2.6119740105249784e-06,
      "loss": 0.4671,
      "step": 8343
    },
    {
      "epoch": 1.023050514958313,
      "grad_norm": 1.9747155602642172,
      "learning_rate": 2.611467677580098e-06,
      "loss": 0.457,
      "step": 8344
    },
    {
      "epoch": 1.0231731240804316,
      "grad_norm": 1.9985720613636897,
      "learning_rate": 2.6109613400536985e-06,
      "loss": 0.4523,
      "step": 8345
    },
    {
      "epoch": 1.0232957332025503,
      "grad_norm": 1.888140936982718,
      "learning_rate": 2.6104549979665916e-06,
      "loss": 0.4292,
      "step": 8346
    },
    {
      "epoch": 1.023418342324669,
      "grad_norm": 2.012684464121353,
      "learning_rate": 2.609948651339589e-06,
      "loss": 0.467,
      "step": 8347
    },
    {
      "epoch": 1.0235409514467877,
      "grad_norm": 2.0112212618897773,
      "learning_rate": 2.6094423001935014e-06,
      "loss": 0.4575,
      "step": 8348
    },
    {
      "epoch": 1.0236635605689064,
      "grad_norm": 1.9206015458411783,
      "learning_rate": 2.6089359445491425e-06,
      "loss": 0.4117,
      "step": 8349
    },
    {
      "epoch": 1.023786169691025,
      "grad_norm": 1.813736709578755,
      "learning_rate": 2.6084295844273227e-06,
      "loss": 0.4493,
      "step": 8350
    },
    {
      "epoch": 1.0239087788131438,
      "grad_norm": 1.8226721742551253,
      "learning_rate": 2.607923219848855e-06,
      "loss": 0.4139,
      "step": 8351
    },
    {
      "epoch": 1.0240313879352625,
      "grad_norm": 1.9528001256204595,
      "learning_rate": 2.607416850834552e-06,
      "loss": 0.423,
      "step": 8352
    },
    {
      "epoch": 1.024153997057381,
      "grad_norm": 1.9186993511017831,
      "learning_rate": 2.606910477405227e-06,
      "loss": 0.4208,
      "step": 8353
    },
    {
      "epoch": 1.0242766061794997,
      "grad_norm": 2.049396045386891,
      "learning_rate": 2.6064040995816913e-06,
      "loss": 0.4244,
      "step": 8354
    },
    {
      "epoch": 1.0243992153016184,
      "grad_norm": 1.806814124820717,
      "learning_rate": 2.605897717384759e-06,
      "loss": 0.4524,
      "step": 8355
    },
    {
      "epoch": 1.024521824423737,
      "grad_norm": 1.7940966567889562,
      "learning_rate": 2.605391330835243e-06,
      "loss": 0.4269,
      "step": 8356
    },
    {
      "epoch": 1.0246444335458558,
      "grad_norm": 2.2079598257280084,
      "learning_rate": 2.6048849399539572e-06,
      "loss": 0.4282,
      "step": 8357
    },
    {
      "epoch": 1.0247670426679745,
      "grad_norm": 1.8927470651683274,
      "learning_rate": 2.6043785447617144e-06,
      "loss": 0.4409,
      "step": 8358
    },
    {
      "epoch": 1.0248896517900932,
      "grad_norm": 1.9961614558240557,
      "learning_rate": 2.603872145279328e-06,
      "loss": 0.4336,
      "step": 8359
    },
    {
      "epoch": 1.0250122609122119,
      "grad_norm": 1.8724253777032962,
      "learning_rate": 2.603365741527613e-06,
      "loss": 0.4656,
      "step": 8360
    },
    {
      "epoch": 1.0251348700343306,
      "grad_norm": 1.8764404415069365,
      "learning_rate": 2.6028593335273833e-06,
      "loss": 0.4503,
      "step": 8361
    },
    {
      "epoch": 1.0252574791564493,
      "grad_norm": 1.8776998250250763,
      "learning_rate": 2.6023529212994525e-06,
      "loss": 0.4338,
      "step": 8362
    },
    {
      "epoch": 1.025380088278568,
      "grad_norm": 1.886129632593518,
      "learning_rate": 2.6018465048646363e-06,
      "loss": 0.4239,
      "step": 8363
    },
    {
      "epoch": 1.0255026974006867,
      "grad_norm": 2.084607988963995,
      "learning_rate": 2.601340084243747e-06,
      "loss": 0.4164,
      "step": 8364
    },
    {
      "epoch": 1.0256253065228054,
      "grad_norm": 1.967185253208482,
      "learning_rate": 2.6008336594576023e-06,
      "loss": 0.4908,
      "step": 8365
    },
    {
      "epoch": 1.025747915644924,
      "grad_norm": 1.8735168033642613,
      "learning_rate": 2.600327230527014e-06,
      "loss": 0.4299,
      "step": 8366
    },
    {
      "epoch": 1.0258705247670428,
      "grad_norm": 1.801353227312572,
      "learning_rate": 2.5998207974728003e-06,
      "loss": 0.4076,
      "step": 8367
    },
    {
      "epoch": 1.0259931338891612,
      "grad_norm": 2.0000821863620146,
      "learning_rate": 2.5993143603157746e-06,
      "loss": 0.4295,
      "step": 8368
    },
    {
      "epoch": 1.02611574301128,
      "grad_norm": 1.9453398997567601,
      "learning_rate": 2.598807919076753e-06,
      "loss": 0.4545,
      "step": 8369
    },
    {
      "epoch": 1.0262383521333986,
      "grad_norm": 1.935878147361311,
      "learning_rate": 2.5983014737765512e-06,
      "loss": 0.4382,
      "step": 8370
    },
    {
      "epoch": 1.0263609612555173,
      "grad_norm": 1.9806066482984128,
      "learning_rate": 2.597795024435984e-06,
      "loss": 0.4349,
      "step": 8371
    },
    {
      "epoch": 1.026483570377636,
      "grad_norm": 1.7960509659367043,
      "learning_rate": 2.597288571075869e-06,
      "loss": 0.4375,
      "step": 8372
    },
    {
      "epoch": 1.0266061794997547,
      "grad_norm": 1.8542749623165844,
      "learning_rate": 2.5967821137170207e-06,
      "loss": 0.4366,
      "step": 8373
    },
    {
      "epoch": 1.0267287886218734,
      "grad_norm": 1.937915970821709,
      "learning_rate": 2.5962756523802566e-06,
      "loss": 0.4467,
      "step": 8374
    },
    {
      "epoch": 1.0268513977439921,
      "grad_norm": 2.0371837886506188,
      "learning_rate": 2.5957691870863926e-06,
      "loss": 0.4702,
      "step": 8375
    },
    {
      "epoch": 1.0269740068661108,
      "grad_norm": 2.1060906608142025,
      "learning_rate": 2.5952627178562463e-06,
      "loss": 0.4667,
      "step": 8376
    },
    {
      "epoch": 1.0270966159882295,
      "grad_norm": 1.7860372881495612,
      "learning_rate": 2.594756244710633e-06,
      "loss": 0.4688,
      "step": 8377
    },
    {
      "epoch": 1.0272192251103482,
      "grad_norm": 2.0690570573859834,
      "learning_rate": 2.5942497676703705e-06,
      "loss": 0.4354,
      "step": 8378
    },
    {
      "epoch": 1.027341834232467,
      "grad_norm": 1.8569188754350938,
      "learning_rate": 2.5937432867562746e-06,
      "loss": 0.4257,
      "step": 8379
    },
    {
      "epoch": 1.0274644433545856,
      "grad_norm": 1.9529302689113475,
      "learning_rate": 2.593236801989165e-06,
      "loss": 0.4046,
      "step": 8380
    },
    {
      "epoch": 1.0275870524767043,
      "grad_norm": 2.0244001950373396,
      "learning_rate": 2.5927303133898574e-06,
      "loss": 0.44,
      "step": 8381
    },
    {
      "epoch": 1.027709661598823,
      "grad_norm": 1.7236882780505893,
      "learning_rate": 2.5922238209791696e-06,
      "loss": 0.4184,
      "step": 8382
    },
    {
      "epoch": 1.0278322707209417,
      "grad_norm": 1.9901955757186527,
      "learning_rate": 2.59171732477792e-06,
      "loss": 0.4544,
      "step": 8383
    },
    {
      "epoch": 1.0279548798430602,
      "grad_norm": 1.8583459091364336,
      "learning_rate": 2.5912108248069252e-06,
      "loss": 0.4699,
      "step": 8384
    },
    {
      "epoch": 1.028077488965179,
      "grad_norm": 1.9180388158695265,
      "learning_rate": 2.590704321087004e-06,
      "loss": 0.4276,
      "step": 8385
    },
    {
      "epoch": 1.0282000980872976,
      "grad_norm": 1.9601015750414759,
      "learning_rate": 2.5901978136389748e-06,
      "loss": 0.4438,
      "step": 8386
    },
    {
      "epoch": 1.0283227072094163,
      "grad_norm": 1.7558206235704397,
      "learning_rate": 2.589691302483656e-06,
      "loss": 0.4449,
      "step": 8387
    },
    {
      "epoch": 1.028445316331535,
      "grad_norm": 2.0837243539111316,
      "learning_rate": 2.589184787641867e-06,
      "loss": 0.4219,
      "step": 8388
    },
    {
      "epoch": 1.0285679254536537,
      "grad_norm": 1.8264189807960016,
      "learning_rate": 2.588678269134423e-06,
      "loss": 0.3993,
      "step": 8389
    },
    {
      "epoch": 1.0286905345757724,
      "grad_norm": 1.889756966755849,
      "learning_rate": 2.5881717469821473e-06,
      "loss": 0.4573,
      "step": 8390
    },
    {
      "epoch": 1.0288131436978911,
      "grad_norm": 2.010358618902388,
      "learning_rate": 2.5876652212058554e-06,
      "loss": 0.436,
      "step": 8391
    },
    {
      "epoch": 1.0289357528200098,
      "grad_norm": 2.039310630143338,
      "learning_rate": 2.587158691826368e-06,
      "loss": 0.4084,
      "step": 8392
    },
    {
      "epoch": 1.0290583619421285,
      "grad_norm": 1.8789588917155604,
      "learning_rate": 2.5866521588645043e-06,
      "loss": 0.4289,
      "step": 8393
    },
    {
      "epoch": 1.0291809710642472,
      "grad_norm": 1.936431866207638,
      "learning_rate": 2.586145622341083e-06,
      "loss": 0.4174,
      "step": 8394
    },
    {
      "epoch": 1.029303580186366,
      "grad_norm": 2.004133450522836,
      "learning_rate": 2.5856390822769247e-06,
      "loss": 0.4736,
      "step": 8395
    },
    {
      "epoch": 1.0294261893084846,
      "grad_norm": 1.8741461571636941,
      "learning_rate": 2.5851325386928477e-06,
      "loss": 0.4455,
      "step": 8396
    },
    {
      "epoch": 1.0295487984306033,
      "grad_norm": 1.9363835787608827,
      "learning_rate": 2.5846259916096733e-06,
      "loss": 0.4242,
      "step": 8397
    },
    {
      "epoch": 1.029671407552722,
      "grad_norm": 1.906236335889903,
      "learning_rate": 2.584119441048221e-06,
      "loss": 0.4647,
      "step": 8398
    },
    {
      "epoch": 1.0297940166748405,
      "grad_norm": 2.022607186860242,
      "learning_rate": 2.58361288702931e-06,
      "loss": 0.3945,
      "step": 8399
    },
    {
      "epoch": 1.0299166257969592,
      "grad_norm": 2.0616157679099367,
      "learning_rate": 2.5831063295737618e-06,
      "loss": 0.4352,
      "step": 8400
    },
    {
      "epoch": 1.030039234919078,
      "grad_norm": 2.076026521518469,
      "learning_rate": 2.5825997687023966e-06,
      "loss": 0.4381,
      "step": 8401
    },
    {
      "epoch": 1.0301618440411966,
      "grad_norm": 2.1690305763414224,
      "learning_rate": 2.5820932044360336e-06,
      "loss": 0.4495,
      "step": 8402
    },
    {
      "epoch": 1.0302844531633153,
      "grad_norm": 1.9981375882630814,
      "learning_rate": 2.5815866367954963e-06,
      "loss": 0.4158,
      "step": 8403
    },
    {
      "epoch": 1.030407062285434,
      "grad_norm": 1.8378888234422581,
      "learning_rate": 2.5810800658016016e-06,
      "loss": 0.3796,
      "step": 8404
    },
    {
      "epoch": 1.0305296714075527,
      "grad_norm": 1.9342638631591555,
      "learning_rate": 2.5805734914751745e-06,
      "loss": 0.413,
      "step": 8405
    },
    {
      "epoch": 1.0306522805296714,
      "grad_norm": 1.8494163546272442,
      "learning_rate": 2.580066913837034e-06,
      "loss": 0.4175,
      "step": 8406
    },
    {
      "epoch": 1.03077488965179,
      "grad_norm": 1.9072084621283139,
      "learning_rate": 2.579560332908001e-06,
      "loss": 0.4386,
      "step": 8407
    },
    {
      "epoch": 1.0308974987739088,
      "grad_norm": 2.1840605110780875,
      "learning_rate": 2.5790537487088975e-06,
      "loss": 0.4709,
      "step": 8408
    },
    {
      "epoch": 1.0310201078960275,
      "grad_norm": 2.2784170461927316,
      "learning_rate": 2.578547161260545e-06,
      "loss": 0.431,
      "step": 8409
    },
    {
      "epoch": 1.0311427170181462,
      "grad_norm": 1.91771799997041,
      "learning_rate": 2.578040570583765e-06,
      "loss": 0.4035,
      "step": 8410
    },
    {
      "epoch": 1.031265326140265,
      "grad_norm": 1.9634132455861393,
      "learning_rate": 2.57753397669938e-06,
      "loss": 0.4794,
      "step": 8411
    },
    {
      "epoch": 1.0313879352623836,
      "grad_norm": 1.8725398550088392,
      "learning_rate": 2.577027379628211e-06,
      "loss": 0.3908,
      "step": 8412
    },
    {
      "epoch": 1.0315105443845023,
      "grad_norm": 1.9448691493322283,
      "learning_rate": 2.5765207793910805e-06,
      "loss": 0.443,
      "step": 8413
    },
    {
      "epoch": 1.031633153506621,
      "grad_norm": 1.8660348780762344,
      "learning_rate": 2.5760141760088097e-06,
      "loss": 0.4418,
      "step": 8414
    },
    {
      "epoch": 1.0317557626287397,
      "grad_norm": 2.1013842867868444,
      "learning_rate": 2.5755075695022223e-06,
      "loss": 0.4589,
      "step": 8415
    },
    {
      "epoch": 1.0318783717508582,
      "grad_norm": 2.031670307052663,
      "learning_rate": 2.5750009598921405e-06,
      "loss": 0.4582,
      "step": 8416
    },
    {
      "epoch": 1.0320009808729769,
      "grad_norm": 1.9430672439870282,
      "learning_rate": 2.574494347199386e-06,
      "loss": 0.4842,
      "step": 8417
    },
    {
      "epoch": 1.0321235899950956,
      "grad_norm": 2.0230618930767648,
      "learning_rate": 2.573987731444782e-06,
      "loss": 0.5091,
      "step": 8418
    },
    {
      "epoch": 1.0322461991172143,
      "grad_norm": 1.902603539582664,
      "learning_rate": 2.573481112649151e-06,
      "loss": 0.4506,
      "step": 8419
    },
    {
      "epoch": 1.032368808239333,
      "grad_norm": 2.0500287643251087,
      "learning_rate": 2.5729744908333167e-06,
      "loss": 0.512,
      "step": 8420
    },
    {
      "epoch": 1.0324914173614517,
      "grad_norm": 1.9655876706905953,
      "learning_rate": 2.572467866018101e-06,
      "loss": 0.3988,
      "step": 8421
    },
    {
      "epoch": 1.0326140264835704,
      "grad_norm": 1.8934214196152754,
      "learning_rate": 2.571961238224328e-06,
      "loss": 0.4382,
      "step": 8422
    },
    {
      "epoch": 1.032736635605689,
      "grad_norm": 1.749447890530152,
      "learning_rate": 2.571454607472821e-06,
      "loss": 0.4299,
      "step": 8423
    },
    {
      "epoch": 1.0328592447278078,
      "grad_norm": 1.993211940931957,
      "learning_rate": 2.570947973784403e-06,
      "loss": 0.3867,
      "step": 8424
    },
    {
      "epoch": 1.0329818538499265,
      "grad_norm": 1.9107979362098335,
      "learning_rate": 2.5704413371798974e-06,
      "loss": 0.4228,
      "step": 8425
    },
    {
      "epoch": 1.0331044629720452,
      "grad_norm": 1.833014512849135,
      "learning_rate": 2.5699346976801286e-06,
      "loss": 0.4155,
      "step": 8426
    },
    {
      "epoch": 1.0332270720941639,
      "grad_norm": 1.9548613788098559,
      "learning_rate": 2.5694280553059192e-06,
      "loss": 0.4533,
      "step": 8427
    },
    {
      "epoch": 1.0333496812162826,
      "grad_norm": 2.188546182795085,
      "learning_rate": 2.5689214100780948e-06,
      "loss": 0.4756,
      "step": 8428
    },
    {
      "epoch": 1.0334722903384013,
      "grad_norm": 2.282242537979536,
      "learning_rate": 2.5684147620174775e-06,
      "loss": 0.4114,
      "step": 8429
    },
    {
      "epoch": 1.03359489946052,
      "grad_norm": 1.8211829768647145,
      "learning_rate": 2.567908111144893e-06,
      "loss": 0.4154,
      "step": 8430
    },
    {
      "epoch": 1.0337175085826384,
      "grad_norm": 1.9711460694773242,
      "learning_rate": 2.5674014574811645e-06,
      "loss": 0.4454,
      "step": 8431
    },
    {
      "epoch": 1.0338401177047571,
      "grad_norm": 2.0405085468344226,
      "learning_rate": 2.5668948010471167e-06,
      "loss": 0.4724,
      "step": 8432
    },
    {
      "epoch": 1.0339627268268758,
      "grad_norm": 1.8332735782176093,
      "learning_rate": 2.5663881418635744e-06,
      "loss": 0.4143,
      "step": 8433
    },
    {
      "epoch": 1.0340853359489945,
      "grad_norm": 1.94151310737704,
      "learning_rate": 2.565881479951362e-06,
      "loss": 0.4418,
      "step": 8434
    },
    {
      "epoch": 1.0342079450711132,
      "grad_norm": 1.838055578493876,
      "learning_rate": 2.565374815331304e-06,
      "loss": 0.3981,
      "step": 8435
    },
    {
      "epoch": 1.034330554193232,
      "grad_norm": 2.0193583365815804,
      "learning_rate": 2.564868148024226e-06,
      "loss": 0.4299,
      "step": 8436
    },
    {
      "epoch": 1.0344531633153506,
      "grad_norm": 2.106925193903687,
      "learning_rate": 2.5643614780509513e-06,
      "loss": 0.448,
      "step": 8437
    },
    {
      "epoch": 1.0345757724374693,
      "grad_norm": 1.7561774261300003,
      "learning_rate": 2.5638548054323065e-06,
      "loss": 0.4404,
      "step": 8438
    },
    {
      "epoch": 1.034698381559588,
      "grad_norm": 2.0030292295465917,
      "learning_rate": 2.563348130189116e-06,
      "loss": 0.4957,
      "step": 8439
    },
    {
      "epoch": 1.0348209906817067,
      "grad_norm": 1.8760464864117576,
      "learning_rate": 2.562841452342205e-06,
      "loss": 0.4399,
      "step": 8440
    },
    {
      "epoch": 1.0349435998038254,
      "grad_norm": 2.072695044050512,
      "learning_rate": 2.5623347719123994e-06,
      "loss": 0.4354,
      "step": 8441
    },
    {
      "epoch": 1.0350662089259441,
      "grad_norm": 1.7789755433109562,
      "learning_rate": 2.5618280889205237e-06,
      "loss": 0.4561,
      "step": 8442
    },
    {
      "epoch": 1.0351888180480628,
      "grad_norm": 1.9719521722221593,
      "learning_rate": 2.5613214033874047e-06,
      "loss": 0.4207,
      "step": 8443
    },
    {
      "epoch": 1.0353114271701815,
      "grad_norm": 1.9728281788791338,
      "learning_rate": 2.5608147153338676e-06,
      "loss": 0.4613,
      "step": 8444
    },
    {
      "epoch": 1.0354340362923002,
      "grad_norm": 1.767475160957616,
      "learning_rate": 2.560308024780737e-06,
      "loss": 0.4254,
      "step": 8445
    },
    {
      "epoch": 1.035556645414419,
      "grad_norm": 1.8561375798351734,
      "learning_rate": 2.5598013317488405e-06,
      "loss": 0.4205,
      "step": 8446
    },
    {
      "epoch": 1.0356792545365374,
      "grad_norm": 1.7986647940948828,
      "learning_rate": 2.559294636259004e-06,
      "loss": 0.4153,
      "step": 8447
    },
    {
      "epoch": 1.0358018636586561,
      "grad_norm": 2.190209483325509,
      "learning_rate": 2.5587879383320518e-06,
      "loss": 0.4325,
      "step": 8448
    },
    {
      "epoch": 1.0359244727807748,
      "grad_norm": 1.9899304219267697,
      "learning_rate": 2.558281237988813e-06,
      "loss": 0.4523,
      "step": 8449
    },
    {
      "epoch": 1.0360470819028935,
      "grad_norm": 2.102161929012264,
      "learning_rate": 2.5577745352501097e-06,
      "loss": 0.5094,
      "step": 8450
    },
    {
      "epoch": 1.0361696910250122,
      "grad_norm": 1.7723132844897929,
      "learning_rate": 2.5572678301367726e-06,
      "loss": 0.4307,
      "step": 8451
    },
    {
      "epoch": 1.036292300147131,
      "grad_norm": 1.8339080689401515,
      "learning_rate": 2.5567611226696253e-06,
      "loss": 0.4513,
      "step": 8452
    },
    {
      "epoch": 1.0364149092692496,
      "grad_norm": 1.9242809695365837,
      "learning_rate": 2.556254412869496e-06,
      "loss": 0.4539,
      "step": 8453
    },
    {
      "epoch": 1.0365375183913683,
      "grad_norm": 1.8547856434845993,
      "learning_rate": 2.5557477007572108e-06,
      "loss": 0.4232,
      "step": 8454
    },
    {
      "epoch": 1.036660127513487,
      "grad_norm": 1.8492411706256922,
      "learning_rate": 2.555240986353596e-06,
      "loss": 0.3889,
      "step": 8455
    },
    {
      "epoch": 1.0367827366356057,
      "grad_norm": 1.726347238199031,
      "learning_rate": 2.5547342696794792e-06,
      "loss": 0.4435,
      "step": 8456
    },
    {
      "epoch": 1.0369053457577244,
      "grad_norm": 1.8402286779011068,
      "learning_rate": 2.554227550755686e-06,
      "loss": 0.4815,
      "step": 8457
    },
    {
      "epoch": 1.0370279548798431,
      "grad_norm": 1.9293917510199847,
      "learning_rate": 2.553720829603046e-06,
      "loss": 0.4562,
      "step": 8458
    },
    {
      "epoch": 1.0371505640019618,
      "grad_norm": 2.00747888912504,
      "learning_rate": 2.5532141062423845e-06,
      "loss": 0.4259,
      "step": 8459
    },
    {
      "epoch": 1.0372731731240805,
      "grad_norm": 1.766937406440932,
      "learning_rate": 2.552707380694528e-06,
      "loss": 0.3855,
      "step": 8460
    },
    {
      "epoch": 1.0373957822461992,
      "grad_norm": 1.9238925134075826,
      "learning_rate": 2.5522006529803057e-06,
      "loss": 0.4526,
      "step": 8461
    },
    {
      "epoch": 1.0375183913683177,
      "grad_norm": 1.821514185873969,
      "learning_rate": 2.5516939231205435e-06,
      "loss": 0.4769,
      "step": 8462
    },
    {
      "epoch": 1.0376410004904364,
      "grad_norm": 2.0744192889032456,
      "learning_rate": 2.5511871911360704e-06,
      "loss": 0.4731,
      "step": 8463
    },
    {
      "epoch": 1.037763609612555,
      "grad_norm": 2.113217314616311,
      "learning_rate": 2.5506804570477122e-06,
      "loss": 0.4075,
      "step": 8464
    },
    {
      "epoch": 1.0378862187346738,
      "grad_norm": 2.140193183581597,
      "learning_rate": 2.5501737208762985e-06,
      "loss": 0.4769,
      "step": 8465
    },
    {
      "epoch": 1.0380088278567925,
      "grad_norm": 2.0171707453071637,
      "learning_rate": 2.549666982642655e-06,
      "loss": 0.4751,
      "step": 8466
    },
    {
      "epoch": 1.0381314369789112,
      "grad_norm": 1.9166099624618287,
      "learning_rate": 2.549160242367611e-06,
      "loss": 0.4163,
      "step": 8467
    },
    {
      "epoch": 1.03825404610103,
      "grad_norm": 2.008159183652645,
      "learning_rate": 2.548653500071994e-06,
      "loss": 0.4351,
      "step": 8468
    },
    {
      "epoch": 1.0383766552231486,
      "grad_norm": 1.9469340000168383,
      "learning_rate": 2.548146755776632e-06,
      "loss": 0.4355,
      "step": 8469
    },
    {
      "epoch": 1.0384992643452673,
      "grad_norm": 1.7574054885411123,
      "learning_rate": 2.5476400095023534e-06,
      "loss": 0.4261,
      "step": 8470
    },
    {
      "epoch": 1.038621873467386,
      "grad_norm": 1.9232741000582831,
      "learning_rate": 2.547133261269986e-06,
      "loss": 0.4016,
      "step": 8471
    },
    {
      "epoch": 1.0387444825895047,
      "grad_norm": 1.9905713351604788,
      "learning_rate": 2.5466265111003578e-06,
      "loss": 0.4224,
      "step": 8472
    },
    {
      "epoch": 1.0388670917116234,
      "grad_norm": 2.1075887924697247,
      "learning_rate": 2.5461197590142978e-06,
      "loss": 0.4729,
      "step": 8473
    },
    {
      "epoch": 1.038989700833742,
      "grad_norm": 1.965795322966975,
      "learning_rate": 2.545613005032634e-06,
      "loss": 0.4595,
      "step": 8474
    },
    {
      "epoch": 1.0391123099558608,
      "grad_norm": 1.8436467948549837,
      "learning_rate": 2.545106249176194e-06,
      "loss": 0.4448,
      "step": 8475
    },
    {
      "epoch": 1.0392349190779795,
      "grad_norm": 2.0156550232812096,
      "learning_rate": 2.544599491465809e-06,
      "loss": 0.4593,
      "step": 8476
    },
    {
      "epoch": 1.0393575282000982,
      "grad_norm": 2.0443770395173817,
      "learning_rate": 2.5440927319223053e-06,
      "loss": 0.4628,
      "step": 8477
    },
    {
      "epoch": 1.0394801373222167,
      "grad_norm": 1.6644626324341425,
      "learning_rate": 2.5435859705665115e-06,
      "loss": 0.4238,
      "step": 8478
    },
    {
      "epoch": 1.0396027464443354,
      "grad_norm": 1.8576434664815875,
      "learning_rate": 2.543079207419258e-06,
      "loss": 0.4254,
      "step": 8479
    },
    {
      "epoch": 1.039725355566454,
      "grad_norm": 1.902592179203468,
      "learning_rate": 2.542572442501372e-06,
      "loss": 0.4089,
      "step": 8480
    },
    {
      "epoch": 1.0398479646885728,
      "grad_norm": 1.944731267716488,
      "learning_rate": 2.5420656758336844e-06,
      "loss": 0.4667,
      "step": 8481
    },
    {
      "epoch": 1.0399705738106915,
      "grad_norm": 2.036146968980654,
      "learning_rate": 2.5415589074370216e-06,
      "loss": 0.4889,
      "step": 8482
    },
    {
      "epoch": 1.0400931829328102,
      "grad_norm": 1.9551141444053388,
      "learning_rate": 2.541052137332215e-06,
      "loss": 0.4322,
      "step": 8483
    },
    {
      "epoch": 1.0402157920549289,
      "grad_norm": 1.9010437974507606,
      "learning_rate": 2.5405453655400934e-06,
      "loss": 0.486,
      "step": 8484
    },
    {
      "epoch": 1.0403384011770476,
      "grad_norm": 1.9164025429791018,
      "learning_rate": 2.5400385920814846e-06,
      "loss": 0.4295,
      "step": 8485
    },
    {
      "epoch": 1.0404610102991663,
      "grad_norm": 1.936919395826018,
      "learning_rate": 2.539531816977219e-06,
      "loss": 0.4693,
      "step": 8486
    },
    {
      "epoch": 1.040583619421285,
      "grad_norm": 1.8459382872847392,
      "learning_rate": 2.539025040248126e-06,
      "loss": 0.4462,
      "step": 8487
    },
    {
      "epoch": 1.0407062285434037,
      "grad_norm": 2.0582858979464826,
      "learning_rate": 2.5385182619150344e-06,
      "loss": 0.4398,
      "step": 8488
    },
    {
      "epoch": 1.0408288376655224,
      "grad_norm": 1.8537986596714824,
      "learning_rate": 2.5380114819987744e-06,
      "loss": 0.4355,
      "step": 8489
    },
    {
      "epoch": 1.040951446787641,
      "grad_norm": 1.9703806286571588,
      "learning_rate": 2.5375047005201747e-06,
      "loss": 0.4431,
      "step": 8490
    },
    {
      "epoch": 1.0410740559097598,
      "grad_norm": 1.7965855205055317,
      "learning_rate": 2.5369979175000658e-06,
      "loss": 0.4119,
      "step": 8491
    },
    {
      "epoch": 1.0411966650318785,
      "grad_norm": 1.9165244250333786,
      "learning_rate": 2.5364911329592773e-06,
      "loss": 0.4329,
      "step": 8492
    },
    {
      "epoch": 1.041319274153997,
      "grad_norm": 1.7845162110992774,
      "learning_rate": 2.535984346918638e-06,
      "loss": 0.4471,
      "step": 8493
    },
    {
      "epoch": 1.0414418832761156,
      "grad_norm": 2.020913561408177,
      "learning_rate": 2.5354775593989785e-06,
      "loss": 0.3897,
      "step": 8494
    },
    {
      "epoch": 1.0415644923982343,
      "grad_norm": 2.1044792456006127,
      "learning_rate": 2.5349707704211286e-06,
      "loss": 0.4653,
      "step": 8495
    },
    {
      "epoch": 1.041687101520353,
      "grad_norm": 1.9073661992089124,
      "learning_rate": 2.534463980005918e-06,
      "loss": 0.4233,
      "step": 8496
    },
    {
      "epoch": 1.0418097106424717,
      "grad_norm": 1.9271871543039552,
      "learning_rate": 2.5339571881741774e-06,
      "loss": 0.3812,
      "step": 8497
    },
    {
      "epoch": 1.0419323197645904,
      "grad_norm": 2.006692264056423,
      "learning_rate": 2.5334503949467353e-06,
      "loss": 0.4085,
      "step": 8498
    },
    {
      "epoch": 1.0420549288867091,
      "grad_norm": 1.829634454771557,
      "learning_rate": 2.5329436003444236e-06,
      "loss": 0.4403,
      "step": 8499
    },
    {
      "epoch": 1.0421775380088278,
      "grad_norm": 1.8323224378772585,
      "learning_rate": 2.5324368043880705e-06,
      "loss": 0.4568,
      "step": 8500
    },
    {
      "epoch": 1.0423001471309465,
      "grad_norm": 1.858698637434466,
      "learning_rate": 2.5319300070985086e-06,
      "loss": 0.4318,
      "step": 8501
    },
    {
      "epoch": 1.0424227562530652,
      "grad_norm": 1.9553238413132619,
      "learning_rate": 2.5314232084965662e-06,
      "loss": 0.4486,
      "step": 8502
    },
    {
      "epoch": 1.042545365375184,
      "grad_norm": 1.8116376855041385,
      "learning_rate": 2.5309164086030748e-06,
      "loss": 0.4538,
      "step": 8503
    },
    {
      "epoch": 1.0426679744973026,
      "grad_norm": 1.8594990745471904,
      "learning_rate": 2.5304096074388636e-06,
      "loss": 0.4312,
      "step": 8504
    },
    {
      "epoch": 1.0427905836194213,
      "grad_norm": 1.8810703767860284,
      "learning_rate": 2.529902805024764e-06,
      "loss": 0.445,
      "step": 8505
    },
    {
      "epoch": 1.04291319274154,
      "grad_norm": 1.9838498727423381,
      "learning_rate": 2.5293960013816062e-06,
      "loss": 0.4016,
      "step": 8506
    },
    {
      "epoch": 1.0430358018636587,
      "grad_norm": 1.9004221652103928,
      "learning_rate": 2.528889196530221e-06,
      "loss": 0.4929,
      "step": 8507
    },
    {
      "epoch": 1.0431584109857774,
      "grad_norm": 1.9951982308719969,
      "learning_rate": 2.528382390491438e-06,
      "loss": 0.4141,
      "step": 8508
    },
    {
      "epoch": 1.0432810201078961,
      "grad_norm": 1.8197395641385548,
      "learning_rate": 2.527875583286089e-06,
      "loss": 0.4198,
      "step": 8509
    },
    {
      "epoch": 1.0434036292300146,
      "grad_norm": 1.6937218681173558,
      "learning_rate": 2.5273687749350034e-06,
      "loss": 0.4698,
      "step": 8510
    },
    {
      "epoch": 1.0435262383521333,
      "grad_norm": 1.9307597647934132,
      "learning_rate": 2.526861965459014e-06,
      "loss": 0.4831,
      "step": 8511
    },
    {
      "epoch": 1.043648847474252,
      "grad_norm": 1.8337355268209015,
      "learning_rate": 2.5263551548789494e-06,
      "loss": 0.4228,
      "step": 8512
    },
    {
      "epoch": 1.0437714565963707,
      "grad_norm": 2.0038109824789965,
      "learning_rate": 2.5258483432156416e-06,
      "loss": 0.4174,
      "step": 8513
    },
    {
      "epoch": 1.0438940657184894,
      "grad_norm": 1.910277751864967,
      "learning_rate": 2.5253415304899208e-06,
      "loss": 0.4398,
      "step": 8514
    },
    {
      "epoch": 1.0440166748406081,
      "grad_norm": 1.793499382705494,
      "learning_rate": 2.5248347167226183e-06,
      "loss": 0.4464,
      "step": 8515
    },
    {
      "epoch": 1.0441392839627268,
      "grad_norm": 1.8654321685465183,
      "learning_rate": 2.524327901934565e-06,
      "loss": 0.4609,
      "step": 8516
    },
    {
      "epoch": 1.0442618930848455,
      "grad_norm": 1.7923539844550611,
      "learning_rate": 2.5238210861465917e-06,
      "loss": 0.4413,
      "step": 8517
    },
    {
      "epoch": 1.0443845022069642,
      "grad_norm": 1.96247069886538,
      "learning_rate": 2.5233142693795297e-06,
      "loss": 0.4563,
      "step": 8518
    },
    {
      "epoch": 1.044507111329083,
      "grad_norm": 1.9168617818482385,
      "learning_rate": 2.5228074516542104e-06,
      "loss": 0.4413,
      "step": 8519
    },
    {
      "epoch": 1.0446297204512016,
      "grad_norm": 1.9693913752564505,
      "learning_rate": 2.522300632991464e-06,
      "loss": 0.4,
      "step": 8520
    },
    {
      "epoch": 1.0447523295733203,
      "grad_norm": 1.7703390031339856,
      "learning_rate": 2.5217938134121222e-06,
      "loss": 0.4629,
      "step": 8521
    },
    {
      "epoch": 1.044874938695439,
      "grad_norm": 1.9197968351206938,
      "learning_rate": 2.5212869929370172e-06,
      "loss": 0.4374,
      "step": 8522
    },
    {
      "epoch": 1.0449975478175577,
      "grad_norm": 1.6231952412612398,
      "learning_rate": 2.520780171586977e-06,
      "loss": 0.3857,
      "step": 8523
    },
    {
      "epoch": 1.0451201569396764,
      "grad_norm": 2.1007958841588015,
      "learning_rate": 2.520273349382837e-06,
      "loss": 0.3968,
      "step": 8524
    },
    {
      "epoch": 1.045242766061795,
      "grad_norm": 1.834855154798367,
      "learning_rate": 2.519766526345425e-06,
      "loss": 0.4352,
      "step": 8525
    },
    {
      "epoch": 1.0453653751839136,
      "grad_norm": 1.8363202874518034,
      "learning_rate": 2.5192597024955743e-06,
      "loss": 0.4077,
      "step": 8526
    },
    {
      "epoch": 1.0454879843060323,
      "grad_norm": 1.775201907708358,
      "learning_rate": 2.5187528778541154e-06,
      "loss": 0.4441,
      "step": 8527
    },
    {
      "epoch": 1.045610593428151,
      "grad_norm": 2.0431042177288954,
      "learning_rate": 2.5182460524418806e-06,
      "loss": 0.492,
      "step": 8528
    },
    {
      "epoch": 1.0457332025502697,
      "grad_norm": 2.1066648100963925,
      "learning_rate": 2.5177392262797003e-06,
      "loss": 0.4216,
      "step": 8529
    },
    {
      "epoch": 1.0458558116723884,
      "grad_norm": 1.8207572489417445,
      "learning_rate": 2.5172323993884062e-06,
      "loss": 0.4155,
      "step": 8530
    },
    {
      "epoch": 1.045978420794507,
      "grad_norm": 1.8431459443249831,
      "learning_rate": 2.5167255717888304e-06,
      "loss": 0.4011,
      "step": 8531
    },
    {
      "epoch": 1.0461010299166258,
      "grad_norm": 1.9152533778351184,
      "learning_rate": 2.5162187435018044e-06,
      "loss": 0.4044,
      "step": 8532
    },
    {
      "epoch": 1.0462236390387445,
      "grad_norm": 1.9534028606035188,
      "learning_rate": 2.5157119145481584e-06,
      "loss": 0.4087,
      "step": 8533
    },
    {
      "epoch": 1.0463462481608632,
      "grad_norm": 1.930638429692496,
      "learning_rate": 2.515205084948726e-06,
      "loss": 0.3861,
      "step": 8534
    },
    {
      "epoch": 1.046468857282982,
      "grad_norm": 1.8034992627340665,
      "learning_rate": 2.5146982547243365e-06,
      "loss": 0.4024,
      "step": 8535
    },
    {
      "epoch": 1.0465914664051006,
      "grad_norm": 1.814651477606857,
      "learning_rate": 2.5141914238958232e-06,
      "loss": 0.4186,
      "step": 8536
    },
    {
      "epoch": 1.0467140755272193,
      "grad_norm": 1.9826986301653262,
      "learning_rate": 2.5136845924840177e-06,
      "loss": 0.4382,
      "step": 8537
    },
    {
      "epoch": 1.046836684649338,
      "grad_norm": 1.9654882464436636,
      "learning_rate": 2.51317776050975e-06,
      "loss": 0.43,
      "step": 8538
    },
    {
      "epoch": 1.0469592937714567,
      "grad_norm": 2.0086646690417775,
      "learning_rate": 2.5126709279938544e-06,
      "loss": 0.4727,
      "step": 8539
    },
    {
      "epoch": 1.0470819028935754,
      "grad_norm": 2.136640000572644,
      "learning_rate": 2.51216409495716e-06,
      "loss": 0.417,
      "step": 8540
    },
    {
      "epoch": 1.0472045120156939,
      "grad_norm": 2.220903578356891,
      "learning_rate": 2.511657261420501e-06,
      "loss": 0.4696,
      "step": 8541
    },
    {
      "epoch": 1.0473271211378126,
      "grad_norm": 2.152995242068523,
      "learning_rate": 2.5111504274047073e-06,
      "loss": 0.4589,
      "step": 8542
    },
    {
      "epoch": 1.0474497302599313,
      "grad_norm": 1.8394727267616746,
      "learning_rate": 2.5106435929306113e-06,
      "loss": 0.4642,
      "step": 8543
    },
    {
      "epoch": 1.04757233938205,
      "grad_norm": 2.0365250345699213,
      "learning_rate": 2.5101367580190443e-06,
      "loss": 0.4277,
      "step": 8544
    },
    {
      "epoch": 1.0476949485041687,
      "grad_norm": 1.8931981533550781,
      "learning_rate": 2.50962992269084e-06,
      "loss": 0.4326,
      "step": 8545
    },
    {
      "epoch": 1.0478175576262874,
      "grad_norm": 1.8703929154101142,
      "learning_rate": 2.509123086966827e-06,
      "loss": 0.3984,
      "step": 8546
    },
    {
      "epoch": 1.047940166748406,
      "grad_norm": 1.8298199559846398,
      "learning_rate": 2.50861625086784e-06,
      "loss": 0.4304,
      "step": 8547
    },
    {
      "epoch": 1.0480627758705248,
      "grad_norm": 1.8415376872312537,
      "learning_rate": 2.5081094144147093e-06,
      "loss": 0.4193,
      "step": 8548
    },
    {
      "epoch": 1.0481853849926435,
      "grad_norm": 2.092165257252546,
      "learning_rate": 2.5076025776282688e-06,
      "loss": 0.4044,
      "step": 8549
    },
    {
      "epoch": 1.0483079941147622,
      "grad_norm": 1.7649007912029895,
      "learning_rate": 2.5070957405293476e-06,
      "loss": 0.3876,
      "step": 8550
    },
    {
      "epoch": 1.0484306032368809,
      "grad_norm": 1.932240199157542,
      "learning_rate": 2.506588903138779e-06,
      "loss": 0.3897,
      "step": 8551
    },
    {
      "epoch": 1.0485532123589996,
      "grad_norm": 1.7342152525038808,
      "learning_rate": 2.506082065477395e-06,
      "loss": 0.396,
      "step": 8552
    },
    {
      "epoch": 1.0486758214811183,
      "grad_norm": 2.059497641453751,
      "learning_rate": 2.505575227566027e-06,
      "loss": 0.4461,
      "step": 8553
    },
    {
      "epoch": 1.048798430603237,
      "grad_norm": 1.9967534161014031,
      "learning_rate": 2.505068389425508e-06,
      "loss": 0.458,
      "step": 8554
    },
    {
      "epoch": 1.0489210397253557,
      "grad_norm": 1.9723781382817889,
      "learning_rate": 2.5045615510766684e-06,
      "loss": 0.4162,
      "step": 8555
    },
    {
      "epoch": 1.0490436488474741,
      "grad_norm": 1.918020398426201,
      "learning_rate": 2.5040547125403413e-06,
      "loss": 0.4642,
      "step": 8556
    },
    {
      "epoch": 1.0491662579695928,
      "grad_norm": 1.7523765043587436,
      "learning_rate": 2.503547873837359e-06,
      "loss": 0.4516,
      "step": 8557
    },
    {
      "epoch": 1.0492888670917115,
      "grad_norm": 1.8776713416879862,
      "learning_rate": 2.5030410349885514e-06,
      "loss": 0.4246,
      "step": 8558
    },
    {
      "epoch": 1.0494114762138302,
      "grad_norm": 2.068827860815871,
      "learning_rate": 2.5025341960147538e-06,
      "loss": 0.4196,
      "step": 8559
    },
    {
      "epoch": 1.049534085335949,
      "grad_norm": 1.8680345133417964,
      "learning_rate": 2.5020273569367948e-06,
      "loss": 0.4322,
      "step": 8560
    },
    {
      "epoch": 1.0496566944580676,
      "grad_norm": 1.7895938670220006,
      "learning_rate": 2.501520517775508e-06,
      "loss": 0.4876,
      "step": 8561
    },
    {
      "epoch": 1.0497793035801863,
      "grad_norm": 1.804944305017598,
      "learning_rate": 2.5010136785517256e-06,
      "loss": 0.3971,
      "step": 8562
    },
    {
      "epoch": 1.049901912702305,
      "grad_norm": 2.1611239379820186,
      "learning_rate": 2.5005068392862793e-06,
      "loss": 0.419,
      "step": 8563
    },
    {
      "epoch": 1.0500245218244237,
      "grad_norm": 1.9981293764622163,
      "learning_rate": 2.5e-06,
      "loss": 0.4702,
      "step": 8564
    },
    {
      "epoch": 1.0501471309465424,
      "grad_norm": 1.7384569827111684,
      "learning_rate": 2.499493160713722e-06,
      "loss": 0.4229,
      "step": 8565
    },
    {
      "epoch": 1.0502697400686611,
      "grad_norm": 1.8597949237684375,
      "learning_rate": 2.4989863214482748e-06,
      "loss": 0.4879,
      "step": 8566
    },
    {
      "epoch": 1.0503923491907798,
      "grad_norm": 1.9536429287893102,
      "learning_rate": 2.498479482224493e-06,
      "loss": 0.3994,
      "step": 8567
    },
    {
      "epoch": 1.0505149583128985,
      "grad_norm": 1.8698803915719324,
      "learning_rate": 2.497972643063206e-06,
      "loss": 0.4476,
      "step": 8568
    },
    {
      "epoch": 1.0506375674350172,
      "grad_norm": 1.9836362202316284,
      "learning_rate": 2.4974658039852475e-06,
      "loss": 0.4388,
      "step": 8569
    },
    {
      "epoch": 1.050760176557136,
      "grad_norm": 1.8107818848235522,
      "learning_rate": 2.4969589650114486e-06,
      "loss": 0.4119,
      "step": 8570
    },
    {
      "epoch": 1.0508827856792546,
      "grad_norm": 2.098330214874342,
      "learning_rate": 2.4964521261626425e-06,
      "loss": 0.4548,
      "step": 8571
    },
    {
      "epoch": 1.0510053948013731,
      "grad_norm": 2.0086907431847973,
      "learning_rate": 2.495945287459659e-06,
      "loss": 0.418,
      "step": 8572
    },
    {
      "epoch": 1.0511280039234918,
      "grad_norm": 1.8013273467105293,
      "learning_rate": 2.495438448923332e-06,
      "loss": 0.4215,
      "step": 8573
    },
    {
      "epoch": 1.0512506130456105,
      "grad_norm": 1.95534297559219,
      "learning_rate": 2.4949316105744926e-06,
      "loss": 0.4834,
      "step": 8574
    },
    {
      "epoch": 1.0513732221677292,
      "grad_norm": 1.8783763742280406,
      "learning_rate": 2.4944247724339734e-06,
      "loss": 0.4514,
      "step": 8575
    },
    {
      "epoch": 1.051495831289848,
      "grad_norm": 1.8879500183274673,
      "learning_rate": 2.4939179345226058e-06,
      "loss": 0.4607,
      "step": 8576
    },
    {
      "epoch": 1.0516184404119666,
      "grad_norm": 1.9777651186723733,
      "learning_rate": 2.4934110968612217e-06,
      "loss": 0.4698,
      "step": 8577
    },
    {
      "epoch": 1.0517410495340853,
      "grad_norm": 1.8655192204024802,
      "learning_rate": 2.492904259470653e-06,
      "loss": 0.4525,
      "step": 8578
    },
    {
      "epoch": 1.051863658656204,
      "grad_norm": 1.8763766402673505,
      "learning_rate": 2.492397422371733e-06,
      "loss": 0.4388,
      "step": 8579
    },
    {
      "epoch": 1.0519862677783227,
      "grad_norm": 1.8004267680724915,
      "learning_rate": 2.491890585585291e-06,
      "loss": 0.4701,
      "step": 8580
    },
    {
      "epoch": 1.0521088769004414,
      "grad_norm": 1.9875698609470482,
      "learning_rate": 2.4913837491321602e-06,
      "loss": 0.407,
      "step": 8581
    },
    {
      "epoch": 1.0522314860225601,
      "grad_norm": 1.823000490067278,
      "learning_rate": 2.490876913033173e-06,
      "loss": 0.373,
      "step": 8582
    },
    {
      "epoch": 1.0523540951446788,
      "grad_norm": 1.7013901617804053,
      "learning_rate": 2.4903700773091616e-06,
      "loss": 0.4265,
      "step": 8583
    },
    {
      "epoch": 1.0524767042667975,
      "grad_norm": 1.8668130876483437,
      "learning_rate": 2.489863241980956e-06,
      "loss": 0.4399,
      "step": 8584
    },
    {
      "epoch": 1.0525993133889162,
      "grad_norm": 2.1839491570349057,
      "learning_rate": 2.48935640706939e-06,
      "loss": 0.4591,
      "step": 8585
    },
    {
      "epoch": 1.052721922511035,
      "grad_norm": 1.8948959028599777,
      "learning_rate": 2.488849572595293e-06,
      "loss": 0.4668,
      "step": 8586
    },
    {
      "epoch": 1.0528445316331536,
      "grad_norm": 1.9919998159128791,
      "learning_rate": 2.4883427385795008e-06,
      "loss": 0.4446,
      "step": 8587
    },
    {
      "epoch": 1.052967140755272,
      "grad_norm": 2.056010325027379,
      "learning_rate": 2.4878359050428402e-06,
      "loss": 0.4196,
      "step": 8588
    },
    {
      "epoch": 1.0530897498773908,
      "grad_norm": 1.8896299531756644,
      "learning_rate": 2.4873290720061464e-06,
      "loss": 0.4399,
      "step": 8589
    },
    {
      "epoch": 1.0532123589995095,
      "grad_norm": 1.993979398549707,
      "learning_rate": 2.4868222394902503e-06,
      "loss": 0.3985,
      "step": 8590
    },
    {
      "epoch": 1.0533349681216282,
      "grad_norm": 1.9797575880006415,
      "learning_rate": 2.4863154075159836e-06,
      "loss": 0.4044,
      "step": 8591
    },
    {
      "epoch": 1.053457577243747,
      "grad_norm": 1.8874172442250496,
      "learning_rate": 2.485808576104178e-06,
      "loss": 0.4024,
      "step": 8592
    },
    {
      "epoch": 1.0535801863658656,
      "grad_norm": 1.9067999078209306,
      "learning_rate": 2.4853017452756644e-06,
      "loss": 0.4087,
      "step": 8593
    },
    {
      "epoch": 1.0537027954879843,
      "grad_norm": 2.043550334644368,
      "learning_rate": 2.484794915051275e-06,
      "loss": 0.4296,
      "step": 8594
    },
    {
      "epoch": 1.053825404610103,
      "grad_norm": 1.9053573595053723,
      "learning_rate": 2.484288085451842e-06,
      "loss": 0.4546,
      "step": 8595
    },
    {
      "epoch": 1.0539480137322217,
      "grad_norm": 1.8471280821844198,
      "learning_rate": 2.483781256498197e-06,
      "loss": 0.443,
      "step": 8596
    },
    {
      "epoch": 1.0540706228543404,
      "grad_norm": 1.9966613978475463,
      "learning_rate": 2.4832744282111704e-06,
      "loss": 0.4186,
      "step": 8597
    },
    {
      "epoch": 1.054193231976459,
      "grad_norm": 1.8067076389414676,
      "learning_rate": 2.4827676006115946e-06,
      "loss": 0.4684,
      "step": 8598
    },
    {
      "epoch": 1.0543158410985778,
      "grad_norm": 2.066149139708384,
      "learning_rate": 2.4822607737203e-06,
      "loss": 0.4584,
      "step": 8599
    },
    {
      "epoch": 1.0544384502206965,
      "grad_norm": 1.7905677346407463,
      "learning_rate": 2.481753947558121e-06,
      "loss": 0.4308,
      "step": 8600
    },
    {
      "epoch": 1.0545610593428152,
      "grad_norm": 2.0627748490252498,
      "learning_rate": 2.481247122145885e-06,
      "loss": 0.4835,
      "step": 8601
    },
    {
      "epoch": 1.0546836684649339,
      "grad_norm": 1.7753840363132345,
      "learning_rate": 2.4807402975044265e-06,
      "loss": 0.4114,
      "step": 8602
    },
    {
      "epoch": 1.0548062775870526,
      "grad_norm": 1.8124603832858457,
      "learning_rate": 2.480233473654576e-06,
      "loss": 0.4294,
      "step": 8603
    },
    {
      "epoch": 1.054928886709171,
      "grad_norm": 1.9304385201696597,
      "learning_rate": 2.4797266506171648e-06,
      "loss": 0.4695,
      "step": 8604
    },
    {
      "epoch": 1.0550514958312898,
      "grad_norm": 1.8288669618822715,
      "learning_rate": 2.4792198284130237e-06,
      "loss": 0.4915,
      "step": 8605
    },
    {
      "epoch": 1.0551741049534085,
      "grad_norm": 1.8832630240257038,
      "learning_rate": 2.478713007062984e-06,
      "loss": 0.4089,
      "step": 8606
    },
    {
      "epoch": 1.0552967140755272,
      "grad_norm": 1.8778926075554854,
      "learning_rate": 2.4782061865878778e-06,
      "loss": 0.3845,
      "step": 8607
    },
    {
      "epoch": 1.0554193231976459,
      "grad_norm": 1.9774797987123296,
      "learning_rate": 2.4776993670085363e-06,
      "loss": 0.3858,
      "step": 8608
    },
    {
      "epoch": 1.0555419323197646,
      "grad_norm": 1.8561762160624833,
      "learning_rate": 2.4771925483457904e-06,
      "loss": 0.4033,
      "step": 8609
    },
    {
      "epoch": 1.0556645414418833,
      "grad_norm": 2.322309507124511,
      "learning_rate": 2.4766857306204707e-06,
      "loss": 0.5068,
      "step": 8610
    },
    {
      "epoch": 1.055787150564002,
      "grad_norm": 1.9963916911780304,
      "learning_rate": 2.4761789138534087e-06,
      "loss": 0.4632,
      "step": 8611
    },
    {
      "epoch": 1.0559097596861207,
      "grad_norm": 1.933573044498747,
      "learning_rate": 2.475672098065435e-06,
      "loss": 0.4549,
      "step": 8612
    },
    {
      "epoch": 1.0560323688082394,
      "grad_norm": 1.896054397598988,
      "learning_rate": 2.475165283277382e-06,
      "loss": 0.4191,
      "step": 8613
    },
    {
      "epoch": 1.056154977930358,
      "grad_norm": 1.9149925197247757,
      "learning_rate": 2.47465846951008e-06,
      "loss": 0.4589,
      "step": 8614
    },
    {
      "epoch": 1.0562775870524768,
      "grad_norm": 1.8571474101170995,
      "learning_rate": 2.4741516567843592e-06,
      "loss": 0.4422,
      "step": 8615
    },
    {
      "epoch": 1.0564001961745955,
      "grad_norm": 1.9895272158423531,
      "learning_rate": 2.4736448451210514e-06,
      "loss": 0.436,
      "step": 8616
    },
    {
      "epoch": 1.0565228052967142,
      "grad_norm": 1.9337999713351732,
      "learning_rate": 2.4731380345409874e-06,
      "loss": 0.4782,
      "step": 8617
    },
    {
      "epoch": 1.0566454144188329,
      "grad_norm": 1.9434218472144873,
      "learning_rate": 2.4726312250649974e-06,
      "loss": 0.4565,
      "step": 8618
    },
    {
      "epoch": 1.0567680235409513,
      "grad_norm": 1.8916190865178766,
      "learning_rate": 2.4721244167139116e-06,
      "loss": 0.4472,
      "step": 8619
    },
    {
      "epoch": 1.05689063266307,
      "grad_norm": 1.894068530453734,
      "learning_rate": 2.4716176095085624e-06,
      "loss": 0.3987,
      "step": 8620
    },
    {
      "epoch": 1.0570132417851887,
      "grad_norm": 1.9775670781633021,
      "learning_rate": 2.4711108034697804e-06,
      "loss": 0.4363,
      "step": 8621
    },
    {
      "epoch": 1.0571358509073074,
      "grad_norm": 1.9027116112298847,
      "learning_rate": 2.4706039986183946e-06,
      "loss": 0.3983,
      "step": 8622
    },
    {
      "epoch": 1.0572584600294261,
      "grad_norm": 2.0607127922251105,
      "learning_rate": 2.470097194975237e-06,
      "loss": 0.385,
      "step": 8623
    },
    {
      "epoch": 1.0573810691515448,
      "grad_norm": 2.127360170445868,
      "learning_rate": 2.469590392561137e-06,
      "loss": 0.4204,
      "step": 8624
    },
    {
      "epoch": 1.0575036782736635,
      "grad_norm": 2.000200795915259,
      "learning_rate": 2.469083591396927e-06,
      "loss": 0.4188,
      "step": 8625
    },
    {
      "epoch": 1.0576262873957822,
      "grad_norm": 2.0607998422073757,
      "learning_rate": 2.4685767915034346e-06,
      "loss": 0.4714,
      "step": 8626
    },
    {
      "epoch": 1.057748896517901,
      "grad_norm": 2.214666744598474,
      "learning_rate": 2.468069992901492e-06,
      "loss": 0.4574,
      "step": 8627
    },
    {
      "epoch": 1.0578715056400196,
      "grad_norm": 1.8892234552671467,
      "learning_rate": 2.4675631956119295e-06,
      "loss": 0.4423,
      "step": 8628
    },
    {
      "epoch": 1.0579941147621383,
      "grad_norm": 1.8469071970437343,
      "learning_rate": 2.4670563996555776e-06,
      "loss": 0.402,
      "step": 8629
    },
    {
      "epoch": 1.058116723884257,
      "grad_norm": 2.070715135887054,
      "learning_rate": 2.4665496050532655e-06,
      "loss": 0.4427,
      "step": 8630
    },
    {
      "epoch": 1.0582393330063757,
      "grad_norm": 2.0399668323480395,
      "learning_rate": 2.4660428118258234e-06,
      "loss": 0.425,
      "step": 8631
    },
    {
      "epoch": 1.0583619421284944,
      "grad_norm": 1.941117264813963,
      "learning_rate": 2.4655360199940824e-06,
      "loss": 0.4662,
      "step": 8632
    },
    {
      "epoch": 1.0584845512506131,
      "grad_norm": 2.1437545675869694,
      "learning_rate": 2.4650292295788722e-06,
      "loss": 0.4074,
      "step": 8633
    },
    {
      "epoch": 1.0586071603727318,
      "grad_norm": 1.8152690298301086,
      "learning_rate": 2.4645224406010223e-06,
      "loss": 0.466,
      "step": 8634
    },
    {
      "epoch": 1.0587297694948503,
      "grad_norm": 1.881818382734835,
      "learning_rate": 2.4640156530813626e-06,
      "loss": 0.4638,
      "step": 8635
    },
    {
      "epoch": 1.058852378616969,
      "grad_norm": 1.9650743656393936,
      "learning_rate": 2.4635088670407235e-06,
      "loss": 0.4133,
      "step": 8636
    },
    {
      "epoch": 1.0589749877390877,
      "grad_norm": 1.861442548898927,
      "learning_rate": 2.463002082499934e-06,
      "loss": 0.4166,
      "step": 8637
    },
    {
      "epoch": 1.0590975968612064,
      "grad_norm": 1.7197258671553228,
      "learning_rate": 2.462495299479826e-06,
      "loss": 0.4516,
      "step": 8638
    },
    {
      "epoch": 1.0592202059833251,
      "grad_norm": 1.8167662745053539,
      "learning_rate": 2.461988518001226e-06,
      "loss": 0.425,
      "step": 8639
    },
    {
      "epoch": 1.0593428151054438,
      "grad_norm": 1.9410410511758212,
      "learning_rate": 2.461481738084966e-06,
      "loss": 0.4172,
      "step": 8640
    },
    {
      "epoch": 1.0594654242275625,
      "grad_norm": 1.8957853105559184,
      "learning_rate": 2.4609749597518744e-06,
      "loss": 0.3832,
      "step": 8641
    },
    {
      "epoch": 1.0595880333496812,
      "grad_norm": 1.931361190583092,
      "learning_rate": 2.460468183022782e-06,
      "loss": 0.4592,
      "step": 8642
    },
    {
      "epoch": 1.0597106424718,
      "grad_norm": 1.9937623666433753,
      "learning_rate": 2.459961407918516e-06,
      "loss": 0.4434,
      "step": 8643
    },
    {
      "epoch": 1.0598332515939186,
      "grad_norm": 2.0073968121665176,
      "learning_rate": 2.459454634459907e-06,
      "loss": 0.4384,
      "step": 8644
    },
    {
      "epoch": 1.0599558607160373,
      "grad_norm": 2.021688895754654,
      "learning_rate": 2.4589478626677847e-06,
      "loss": 0.4736,
      "step": 8645
    },
    {
      "epoch": 1.060078469838156,
      "grad_norm": 1.9140957498144717,
      "learning_rate": 2.4584410925629788e-06,
      "loss": 0.4314,
      "step": 8646
    },
    {
      "epoch": 1.0602010789602747,
      "grad_norm": 2.0379085953789065,
      "learning_rate": 2.457934324166317e-06,
      "loss": 0.437,
      "step": 8647
    },
    {
      "epoch": 1.0603236880823934,
      "grad_norm": 1.869631492786954,
      "learning_rate": 2.4574275574986283e-06,
      "loss": 0.4045,
      "step": 8648
    },
    {
      "epoch": 1.0604462972045121,
      "grad_norm": 1.9531895653954336,
      "learning_rate": 2.4569207925807425e-06,
      "loss": 0.4236,
      "step": 8649
    },
    {
      "epoch": 1.0605689063266306,
      "grad_norm": 1.8460394269731706,
      "learning_rate": 2.4564140294334897e-06,
      "loss": 0.4096,
      "step": 8650
    },
    {
      "epoch": 1.0606915154487493,
      "grad_norm": 2.0082660274663433,
      "learning_rate": 2.4559072680776955e-06,
      "loss": 0.4116,
      "step": 8651
    },
    {
      "epoch": 1.060814124570868,
      "grad_norm": 2.040931177234216,
      "learning_rate": 2.455400508534192e-06,
      "loss": 0.4272,
      "step": 8652
    },
    {
      "epoch": 1.0609367336929867,
      "grad_norm": 1.9213573664147285,
      "learning_rate": 2.4548937508238056e-06,
      "loss": 0.39,
      "step": 8653
    },
    {
      "epoch": 1.0610593428151054,
      "grad_norm": 1.9988712916024782,
      "learning_rate": 2.454386994967367e-06,
      "loss": 0.4136,
      "step": 8654
    },
    {
      "epoch": 1.061181951937224,
      "grad_norm": 2.145424766625779,
      "learning_rate": 2.453880240985703e-06,
      "loss": 0.4363,
      "step": 8655
    },
    {
      "epoch": 1.0613045610593428,
      "grad_norm": 2.043526721829953,
      "learning_rate": 2.453373488899643e-06,
      "loss": 0.432,
      "step": 8656
    },
    {
      "epoch": 1.0614271701814615,
      "grad_norm": 1.7430850881614934,
      "learning_rate": 2.452866738730014e-06,
      "loss": 0.4423,
      "step": 8657
    },
    {
      "epoch": 1.0615497793035802,
      "grad_norm": 1.821732236393807,
      "learning_rate": 2.452359990497648e-06,
      "loss": 0.398,
      "step": 8658
    },
    {
      "epoch": 1.0616723884256989,
      "grad_norm": 1.8049884500567241,
      "learning_rate": 2.4518532442233685e-06,
      "loss": 0.4173,
      "step": 8659
    },
    {
      "epoch": 1.0617949975478176,
      "grad_norm": 1.9097346192994744,
      "learning_rate": 2.4513464999280067e-06,
      "loss": 0.4188,
      "step": 8660
    },
    {
      "epoch": 1.0619176066699363,
      "grad_norm": 1.7881877287741055,
      "learning_rate": 2.4508397576323893e-06,
      "loss": 0.3711,
      "step": 8661
    },
    {
      "epoch": 1.062040215792055,
      "grad_norm": 1.7175725076678876,
      "learning_rate": 2.450333017357345e-06,
      "loss": 0.4122,
      "step": 8662
    },
    {
      "epoch": 1.0621628249141737,
      "grad_norm": 1.9929627362668834,
      "learning_rate": 2.449826279123703e-06,
      "loss": 0.5006,
      "step": 8663
    },
    {
      "epoch": 1.0622854340362924,
      "grad_norm": 1.822878886534528,
      "learning_rate": 2.449319542952288e-06,
      "loss": 0.4146,
      "step": 8664
    },
    {
      "epoch": 1.062408043158411,
      "grad_norm": 2.035805704558601,
      "learning_rate": 2.4488128088639305e-06,
      "loss": 0.439,
      "step": 8665
    },
    {
      "epoch": 1.0625306522805298,
      "grad_norm": 1.8463614953789094,
      "learning_rate": 2.448306076879457e-06,
      "loss": 0.4021,
      "step": 8666
    },
    {
      "epoch": 1.0626532614026483,
      "grad_norm": 1.878202750172653,
      "learning_rate": 2.4477993470196956e-06,
      "loss": 0.4565,
      "step": 8667
    },
    {
      "epoch": 1.062775870524767,
      "grad_norm": 2.0115279790697724,
      "learning_rate": 2.447292619305473e-06,
      "loss": 0.4546,
      "step": 8668
    },
    {
      "epoch": 1.0628984796468857,
      "grad_norm": 2.0485852005562113,
      "learning_rate": 2.4467858937576163e-06,
      "loss": 0.4077,
      "step": 8669
    },
    {
      "epoch": 1.0630210887690044,
      "grad_norm": 1.9581437651677533,
      "learning_rate": 2.446279170396954e-06,
      "loss": 0.4308,
      "step": 8670
    },
    {
      "epoch": 1.063143697891123,
      "grad_norm": 1.878054320582731,
      "learning_rate": 2.4457724492443142e-06,
      "loss": 0.4651,
      "step": 8671
    },
    {
      "epoch": 1.0632663070132418,
      "grad_norm": 1.7956606622531839,
      "learning_rate": 2.445265730320522e-06,
      "loss": 0.4379,
      "step": 8672
    },
    {
      "epoch": 1.0633889161353605,
      "grad_norm": 1.8942925069494643,
      "learning_rate": 2.444759013646405e-06,
      "loss": 0.4259,
      "step": 8673
    },
    {
      "epoch": 1.0635115252574792,
      "grad_norm": 1.8337198213582904,
      "learning_rate": 2.44425229924279e-06,
      "loss": 0.4434,
      "step": 8674
    },
    {
      "epoch": 1.0636341343795979,
      "grad_norm": 1.8537215613792735,
      "learning_rate": 2.4437455871305048e-06,
      "loss": 0.4841,
      "step": 8675
    },
    {
      "epoch": 1.0637567435017166,
      "grad_norm": 1.8862896755137906,
      "learning_rate": 2.4432388773303755e-06,
      "loss": 0.4052,
      "step": 8676
    },
    {
      "epoch": 1.0638793526238353,
      "grad_norm": 1.9380841519857293,
      "learning_rate": 2.4427321698632283e-06,
      "loss": 0.4345,
      "step": 8677
    },
    {
      "epoch": 1.064001961745954,
      "grad_norm": 1.8152568438882395,
      "learning_rate": 2.4422254647498903e-06,
      "loss": 0.3995,
      "step": 8678
    },
    {
      "epoch": 1.0641245708680727,
      "grad_norm": 1.9287522489479123,
      "learning_rate": 2.441718762011189e-06,
      "loss": 0.4855,
      "step": 8679
    },
    {
      "epoch": 1.0642471799901914,
      "grad_norm": 1.9494041690438255,
      "learning_rate": 2.4412120616679486e-06,
      "loss": 0.4468,
      "step": 8680
    },
    {
      "epoch": 1.0643697891123098,
      "grad_norm": 1.6439493097095306,
      "learning_rate": 2.4407053637409974e-06,
      "loss": 0.4104,
      "step": 8681
    },
    {
      "epoch": 1.0644923982344285,
      "grad_norm": 2.021205124081655,
      "learning_rate": 2.4401986682511594e-06,
      "loss": 0.4257,
      "step": 8682
    },
    {
      "epoch": 1.0646150073565472,
      "grad_norm": 1.8731743793531581,
      "learning_rate": 2.439691975219264e-06,
      "loss": 0.3959,
      "step": 8683
    },
    {
      "epoch": 1.064737616478666,
      "grad_norm": 1.969589294677152,
      "learning_rate": 2.4391852846661337e-06,
      "loss": 0.4505,
      "step": 8684
    },
    {
      "epoch": 1.0648602256007846,
      "grad_norm": 1.965273795383779,
      "learning_rate": 2.438678596612596e-06,
      "loss": 0.4304,
      "step": 8685
    },
    {
      "epoch": 1.0649828347229033,
      "grad_norm": 1.9846172848380785,
      "learning_rate": 2.4381719110794768e-06,
      "loss": 0.4319,
      "step": 8686
    },
    {
      "epoch": 1.065105443845022,
      "grad_norm": 1.8914375863904227,
      "learning_rate": 2.437665228087601e-06,
      "loss": 0.4588,
      "step": 8687
    },
    {
      "epoch": 1.0652280529671407,
      "grad_norm": 1.8928494088306396,
      "learning_rate": 2.4371585476577963e-06,
      "loss": 0.4215,
      "step": 8688
    },
    {
      "epoch": 1.0653506620892594,
      "grad_norm": 2.2259103828874185,
      "learning_rate": 2.4366518698108847e-06,
      "loss": 0.4029,
      "step": 8689
    },
    {
      "epoch": 1.0654732712113781,
      "grad_norm": 1.7792352377999043,
      "learning_rate": 2.4361451945676943e-06,
      "loss": 0.3728,
      "step": 8690
    },
    {
      "epoch": 1.0655958803334968,
      "grad_norm": 1.9943236525798522,
      "learning_rate": 2.435638521949049e-06,
      "loss": 0.4133,
      "step": 8691
    },
    {
      "epoch": 1.0657184894556155,
      "grad_norm": 1.9512177245880742,
      "learning_rate": 2.4351318519757754e-06,
      "loss": 0.454,
      "step": 8692
    },
    {
      "epoch": 1.0658410985777342,
      "grad_norm": 2.0154585908751477,
      "learning_rate": 2.4346251846686968e-06,
      "loss": 0.3979,
      "step": 8693
    },
    {
      "epoch": 1.065963707699853,
      "grad_norm": 1.9208488347112882,
      "learning_rate": 2.4341185200486387e-06,
      "loss": 0.4451,
      "step": 8694
    },
    {
      "epoch": 1.0660863168219716,
      "grad_norm": 1.968993556013974,
      "learning_rate": 2.4336118581364256e-06,
      "loss": 0.4125,
      "step": 8695
    },
    {
      "epoch": 1.0662089259440903,
      "grad_norm": 1.7540017756405863,
      "learning_rate": 2.433105198952884e-06,
      "loss": 0.4122,
      "step": 8696
    },
    {
      "epoch": 1.066331535066209,
      "grad_norm": 1.578585929862909,
      "learning_rate": 2.4325985425188363e-06,
      "loss": 0.4016,
      "step": 8697
    },
    {
      "epoch": 1.0664541441883275,
      "grad_norm": 1.7765479943066524,
      "learning_rate": 2.432091888855108e-06,
      "loss": 0.4652,
      "step": 8698
    },
    {
      "epoch": 1.0665767533104462,
      "grad_norm": 1.910842305237259,
      "learning_rate": 2.431585237982523e-06,
      "loss": 0.5162,
      "step": 8699
    },
    {
      "epoch": 1.066699362432565,
      "grad_norm": 2.17171917074754,
      "learning_rate": 2.4310785899219065e-06,
      "loss": 0.4258,
      "step": 8700
    },
    {
      "epoch": 1.0668219715546836,
      "grad_norm": 1.8663324343725778,
      "learning_rate": 2.4305719446940816e-06,
      "loss": 0.4482,
      "step": 8701
    },
    {
      "epoch": 1.0669445806768023,
      "grad_norm": 2.0787080001936196,
      "learning_rate": 2.4300653023198718e-06,
      "loss": 0.4447,
      "step": 8702
    },
    {
      "epoch": 1.067067189798921,
      "grad_norm": 1.876734626697616,
      "learning_rate": 2.429558662820103e-06,
      "loss": 0.4662,
      "step": 8703
    },
    {
      "epoch": 1.0671897989210397,
      "grad_norm": 1.9003294164284479,
      "learning_rate": 2.4290520262155983e-06,
      "loss": 0.4742,
      "step": 8704
    },
    {
      "epoch": 1.0673124080431584,
      "grad_norm": 1.9575961440533443,
      "learning_rate": 2.42854539252718e-06,
      "loss": 0.4315,
      "step": 8705
    },
    {
      "epoch": 1.0674350171652771,
      "grad_norm": 1.882489593130254,
      "learning_rate": 2.4280387617756727e-06,
      "loss": 0.4707,
      "step": 8706
    },
    {
      "epoch": 1.0675576262873958,
      "grad_norm": 1.844339916414304,
      "learning_rate": 2.4275321339818993e-06,
      "loss": 0.4053,
      "step": 8707
    },
    {
      "epoch": 1.0676802354095145,
      "grad_norm": 2.098686420444333,
      "learning_rate": 2.4270255091666837e-06,
      "loss": 0.4502,
      "step": 8708
    },
    {
      "epoch": 1.0678028445316332,
      "grad_norm": 1.9612037221397387,
      "learning_rate": 2.4265188873508495e-06,
      "loss": 0.4789,
      "step": 8709
    },
    {
      "epoch": 1.067925453653752,
      "grad_norm": 1.8045092409582029,
      "learning_rate": 2.426012268555219e-06,
      "loss": 0.4137,
      "step": 8710
    },
    {
      "epoch": 1.0680480627758706,
      "grad_norm": 1.9831085867402376,
      "learning_rate": 2.4255056528006148e-06,
      "loss": 0.4214,
      "step": 8711
    },
    {
      "epoch": 1.0681706718979893,
      "grad_norm": 1.820575297114546,
      "learning_rate": 2.4249990401078603e-06,
      "loss": 0.4705,
      "step": 8712
    },
    {
      "epoch": 1.0682932810201078,
      "grad_norm": 1.6881475611268608,
      "learning_rate": 2.4244924304977785e-06,
      "loss": 0.4744,
      "step": 8713
    },
    {
      "epoch": 1.0684158901422265,
      "grad_norm": 1.8476568335085308,
      "learning_rate": 2.423985823991191e-06,
      "loss": 0.4221,
      "step": 8714
    },
    {
      "epoch": 1.0685384992643452,
      "grad_norm": 1.9331476895150732,
      "learning_rate": 2.4234792206089203e-06,
      "loss": 0.4147,
      "step": 8715
    },
    {
      "epoch": 1.0686611083864639,
      "grad_norm": 1.993995712633833,
      "learning_rate": 2.4229726203717896e-06,
      "loss": 0.4422,
      "step": 8716
    },
    {
      "epoch": 1.0687837175085826,
      "grad_norm": 1.9434497421353611,
      "learning_rate": 2.4224660233006207e-06,
      "loss": 0.4856,
      "step": 8717
    },
    {
      "epoch": 1.0689063266307013,
      "grad_norm": 1.8915542191915284,
      "learning_rate": 2.4219594294162353e-06,
      "loss": 0.4671,
      "step": 8718
    },
    {
      "epoch": 1.06902893575282,
      "grad_norm": 1.8133606590252342,
      "learning_rate": 2.4214528387394558e-06,
      "loss": 0.4588,
      "step": 8719
    },
    {
      "epoch": 1.0691515448749387,
      "grad_norm": 1.6827958079179532,
      "learning_rate": 2.420946251291103e-06,
      "loss": 0.4791,
      "step": 8720
    },
    {
      "epoch": 1.0692741539970574,
      "grad_norm": 1.905359234787941,
      "learning_rate": 2.4204396670920006e-06,
      "loss": 0.4067,
      "step": 8721
    },
    {
      "epoch": 1.069396763119176,
      "grad_norm": 1.9115541515649657,
      "learning_rate": 2.4199330861629673e-06,
      "loss": 0.4038,
      "step": 8722
    },
    {
      "epoch": 1.0695193722412948,
      "grad_norm": 1.816520603204179,
      "learning_rate": 2.419426508524826e-06,
      "loss": 0.4324,
      "step": 8723
    },
    {
      "epoch": 1.0696419813634135,
      "grad_norm": 1.9432756473802746,
      "learning_rate": 2.4189199341983983e-06,
      "loss": 0.4645,
      "step": 8724
    },
    {
      "epoch": 1.0697645904855322,
      "grad_norm": 1.941150296079314,
      "learning_rate": 2.4184133632045054e-06,
      "loss": 0.414,
      "step": 8725
    },
    {
      "epoch": 1.0698871996076509,
      "grad_norm": 2.1383620571497923,
      "learning_rate": 2.417906795563967e-06,
      "loss": 0.4561,
      "step": 8726
    },
    {
      "epoch": 1.0700098087297696,
      "grad_norm": 1.9201343236018793,
      "learning_rate": 2.417400231297604e-06,
      "loss": 0.4487,
      "step": 8727
    },
    {
      "epoch": 1.0701324178518883,
      "grad_norm": 1.9602338197927909,
      "learning_rate": 2.4168936704262386e-06,
      "loss": 0.4244,
      "step": 8728
    },
    {
      "epoch": 1.0702550269740068,
      "grad_norm": 1.8127083501440273,
      "learning_rate": 2.4163871129706906e-06,
      "loss": 0.416,
      "step": 8729
    },
    {
      "epoch": 1.0703776360961255,
      "grad_norm": 1.9012618850780363,
      "learning_rate": 2.41588055895178e-06,
      "loss": 0.455,
      "step": 8730
    },
    {
      "epoch": 1.0705002452182442,
      "grad_norm": 2.0403736721678802,
      "learning_rate": 2.4153740083903275e-06,
      "loss": 0.4257,
      "step": 8731
    },
    {
      "epoch": 1.0706228543403629,
      "grad_norm": 1.8127714990236086,
      "learning_rate": 2.4148674613071528e-06,
      "loss": 0.4261,
      "step": 8732
    },
    {
      "epoch": 1.0707454634624816,
      "grad_norm": 1.7950129102073826,
      "learning_rate": 2.4143609177230757e-06,
      "loss": 0.4149,
      "step": 8733
    },
    {
      "epoch": 1.0708680725846003,
      "grad_norm": 1.8943749100872593,
      "learning_rate": 2.4138543776589182e-06,
      "loss": 0.443,
      "step": 8734
    },
    {
      "epoch": 1.070990681706719,
      "grad_norm": 1.7434146703852198,
      "learning_rate": 2.4133478411354965e-06,
      "loss": 0.4125,
      "step": 8735
    },
    {
      "epoch": 1.0711132908288377,
      "grad_norm": 1.9629434503836813,
      "learning_rate": 2.4128413081736328e-06,
      "loss": 0.4633,
      "step": 8736
    },
    {
      "epoch": 1.0712358999509564,
      "grad_norm": 1.946090411439222,
      "learning_rate": 2.4123347787941454e-06,
      "loss": 0.4135,
      "step": 8737
    },
    {
      "epoch": 1.071358509073075,
      "grad_norm": 1.6874960574036548,
      "learning_rate": 2.411828253017854e-06,
      "loss": 0.4234,
      "step": 8738
    },
    {
      "epoch": 1.0714811181951938,
      "grad_norm": 1.947852050744966,
      "learning_rate": 2.4113217308655774e-06,
      "loss": 0.4225,
      "step": 8739
    },
    {
      "epoch": 1.0716037273173125,
      "grad_norm": 1.9251052796008383,
      "learning_rate": 2.410815212358134e-06,
      "loss": 0.4305,
      "step": 8740
    },
    {
      "epoch": 1.0717263364394312,
      "grad_norm": 2.0438924211898177,
      "learning_rate": 2.410308697516344e-06,
      "loss": 0.4548,
      "step": 8741
    },
    {
      "epoch": 1.0718489455615499,
      "grad_norm": 1.9357514568340835,
      "learning_rate": 2.4098021863610256e-06,
      "loss": 0.4206,
      "step": 8742
    },
    {
      "epoch": 1.0719715546836686,
      "grad_norm": 2.1132473051329534,
      "learning_rate": 2.4092956789129966e-06,
      "loss": 0.4487,
      "step": 8743
    },
    {
      "epoch": 1.072094163805787,
      "grad_norm": 1.8517526798493558,
      "learning_rate": 2.4087891751930756e-06,
      "loss": 0.4415,
      "step": 8744
    },
    {
      "epoch": 1.0722167729279057,
      "grad_norm": 1.7632615214602323,
      "learning_rate": 2.4082826752220804e-06,
      "loss": 0.4254,
      "step": 8745
    },
    {
      "epoch": 1.0723393820500244,
      "grad_norm": 1.9552417754034597,
      "learning_rate": 2.4077761790208317e-06,
      "loss": 0.4334,
      "step": 8746
    },
    {
      "epoch": 1.0724619911721431,
      "grad_norm": 2.124368308755401,
      "learning_rate": 2.4072696866101434e-06,
      "loss": 0.4465,
      "step": 8747
    },
    {
      "epoch": 1.0725846002942618,
      "grad_norm": 1.8635264644686205,
      "learning_rate": 2.4067631980108353e-06,
      "loss": 0.4269,
      "step": 8748
    },
    {
      "epoch": 1.0727072094163805,
      "grad_norm": 1.872710146636175,
      "learning_rate": 2.4062567132437254e-06,
      "loss": 0.4285,
      "step": 8749
    },
    {
      "epoch": 1.0728298185384992,
      "grad_norm": 1.7368615674267138,
      "learning_rate": 2.4057502323296307e-06,
      "loss": 0.4477,
      "step": 8750
    },
    {
      "epoch": 1.072952427660618,
      "grad_norm": 2.0855111166126306,
      "learning_rate": 2.4052437552893683e-06,
      "loss": 0.4395,
      "step": 8751
    },
    {
      "epoch": 1.0730750367827366,
      "grad_norm": 1.6532165857796841,
      "learning_rate": 2.404737282143755e-06,
      "loss": 0.4293,
      "step": 8752
    },
    {
      "epoch": 1.0731976459048553,
      "grad_norm": 1.814062199749779,
      "learning_rate": 2.4042308129136074e-06,
      "loss": 0.4454,
      "step": 8753
    },
    {
      "epoch": 1.073320255026974,
      "grad_norm": 1.9430351626220985,
      "learning_rate": 2.4037243476197442e-06,
      "loss": 0.4083,
      "step": 8754
    },
    {
      "epoch": 1.0734428641490927,
      "grad_norm": 1.8192092738423478,
      "learning_rate": 2.40321788628298e-06,
      "loss": 0.4117,
      "step": 8755
    },
    {
      "epoch": 1.0735654732712114,
      "grad_norm": 1.8408410514063371,
      "learning_rate": 2.4027114289241323e-06,
      "loss": 0.3852,
      "step": 8756
    },
    {
      "epoch": 1.0736880823933301,
      "grad_norm": 1.8633471919165483,
      "learning_rate": 2.402204975564017e-06,
      "loss": 0.4139,
      "step": 8757
    },
    {
      "epoch": 1.0738106915154488,
      "grad_norm": 1.8106529657872341,
      "learning_rate": 2.401698526223449e-06,
      "loss": 0.4386,
      "step": 8758
    },
    {
      "epoch": 1.0739333006375675,
      "grad_norm": 1.8159589236147238,
      "learning_rate": 2.401192080923248e-06,
      "loss": 0.3997,
      "step": 8759
    },
    {
      "epoch": 1.0740559097596862,
      "grad_norm": 1.99438134408906,
      "learning_rate": 2.400685639684226e-06,
      "loss": 0.4485,
      "step": 8760
    },
    {
      "epoch": 1.0741785188818047,
      "grad_norm": 1.928019109403758,
      "learning_rate": 2.4001792025272e-06,
      "loss": 0.4288,
      "step": 8761
    },
    {
      "epoch": 1.0743011280039234,
      "grad_norm": 1.9255348614736183,
      "learning_rate": 2.399672769472986e-06,
      "loss": 0.4542,
      "step": 8762
    },
    {
      "epoch": 1.074423737126042,
      "grad_norm": 2.1518715357839944,
      "learning_rate": 2.3991663405423994e-06,
      "loss": 0.4362,
      "step": 8763
    },
    {
      "epoch": 1.0745463462481608,
      "grad_norm": 1.8497714331335864,
      "learning_rate": 2.398659915756254e-06,
      "loss": 0.4347,
      "step": 8764
    },
    {
      "epoch": 1.0746689553702795,
      "grad_norm": 1.9362838256783363,
      "learning_rate": 2.3981534951353645e-06,
      "loss": 0.427,
      "step": 8765
    },
    {
      "epoch": 1.0747915644923982,
      "grad_norm": 2.005957985423646,
      "learning_rate": 2.3976470787005475e-06,
      "loss": 0.4661,
      "step": 8766
    },
    {
      "epoch": 1.074914173614517,
      "grad_norm": 1.8921566711889182,
      "learning_rate": 2.3971406664726175e-06,
      "loss": 0.4215,
      "step": 8767
    },
    {
      "epoch": 1.0750367827366356,
      "grad_norm": 1.8056900947505974,
      "learning_rate": 2.3966342584723878e-06,
      "loss": 0.4182,
      "step": 8768
    },
    {
      "epoch": 1.0751593918587543,
      "grad_norm": 1.907247255576231,
      "learning_rate": 2.3961278547206724e-06,
      "loss": 0.4446,
      "step": 8769
    },
    {
      "epoch": 1.075282000980873,
      "grad_norm": 1.8501668330920942,
      "learning_rate": 2.3956214552382865e-06,
      "loss": 0.4651,
      "step": 8770
    },
    {
      "epoch": 1.0754046101029917,
      "grad_norm": 1.8379002387111318,
      "learning_rate": 2.395115060046044e-06,
      "loss": 0.4287,
      "step": 8771
    },
    {
      "epoch": 1.0755272192251104,
      "grad_norm": 1.9047036246356053,
      "learning_rate": 2.3946086691647576e-06,
      "loss": 0.4285,
      "step": 8772
    },
    {
      "epoch": 1.075649828347229,
      "grad_norm": 2.0589140269313915,
      "learning_rate": 2.3941022826152412e-06,
      "loss": 0.4924,
      "step": 8773
    },
    {
      "epoch": 1.0757724374693478,
      "grad_norm": 1.7511742275618798,
      "learning_rate": 2.3935959004183087e-06,
      "loss": 0.4542,
      "step": 8774
    },
    {
      "epoch": 1.0758950465914665,
      "grad_norm": 1.9517453181669304,
      "learning_rate": 2.393089522594774e-06,
      "loss": 0.4726,
      "step": 8775
    },
    {
      "epoch": 1.076017655713585,
      "grad_norm": 1.7893424786605405,
      "learning_rate": 2.3925831491654483e-06,
      "loss": 0.4083,
      "step": 8776
    },
    {
      "epoch": 1.0761402648357037,
      "grad_norm": 2.038331013868197,
      "learning_rate": 2.3920767801511454e-06,
      "loss": 0.4951,
      "step": 8777
    },
    {
      "epoch": 1.0762628739578224,
      "grad_norm": 1.907711572833626,
      "learning_rate": 2.3915704155726773e-06,
      "loss": 0.4062,
      "step": 8778
    },
    {
      "epoch": 1.076385483079941,
      "grad_norm": 1.97147668450983,
      "learning_rate": 2.391064055450859e-06,
      "loss": 0.4781,
      "step": 8779
    },
    {
      "epoch": 1.0765080922020598,
      "grad_norm": 1.877781811360707,
      "learning_rate": 2.390557699806499e-06,
      "loss": 0.4255,
      "step": 8780
    },
    {
      "epoch": 1.0766307013241785,
      "grad_norm": 1.8418023711486464,
      "learning_rate": 2.390051348660412e-06,
      "loss": 0.4341,
      "step": 8781
    },
    {
      "epoch": 1.0767533104462972,
      "grad_norm": 2.041564362849448,
      "learning_rate": 2.3895450020334092e-06,
      "loss": 0.4136,
      "step": 8782
    },
    {
      "epoch": 1.0768759195684159,
      "grad_norm": 1.990523936154228,
      "learning_rate": 2.389038659946302e-06,
      "loss": 0.4474,
      "step": 8783
    },
    {
      "epoch": 1.0769985286905346,
      "grad_norm": 1.8372679740404336,
      "learning_rate": 2.3885323224199033e-06,
      "loss": 0.4352,
      "step": 8784
    },
    {
      "epoch": 1.0771211378126533,
      "grad_norm": 1.863626392469053,
      "learning_rate": 2.3880259894750224e-06,
      "loss": 0.4616,
      "step": 8785
    },
    {
      "epoch": 1.077243746934772,
      "grad_norm": 1.8645399625224073,
      "learning_rate": 2.387519661132472e-06,
      "loss": 0.4442,
      "step": 8786
    },
    {
      "epoch": 1.0773663560568907,
      "grad_norm": 2.1240100541418245,
      "learning_rate": 2.3870133374130626e-06,
      "loss": 0.4546,
      "step": 8787
    },
    {
      "epoch": 1.0774889651790094,
      "grad_norm": 1.924577346667398,
      "learning_rate": 2.3865070183376055e-06,
      "loss": 0.461,
      "step": 8788
    },
    {
      "epoch": 1.077611574301128,
      "grad_norm": 1.767420506186475,
      "learning_rate": 2.3860007039269105e-06,
      "loss": 0.455,
      "step": 8789
    },
    {
      "epoch": 1.0777341834232468,
      "grad_norm": 2.000498342226591,
      "learning_rate": 2.3854943942017883e-06,
      "loss": 0.4374,
      "step": 8790
    },
    {
      "epoch": 1.0778567925453655,
      "grad_norm": 1.7055759300831117,
      "learning_rate": 2.384988089183049e-06,
      "loss": 0.3942,
      "step": 8791
    },
    {
      "epoch": 1.077979401667484,
      "grad_norm": 2.0647953763310865,
      "learning_rate": 2.384481788891504e-06,
      "loss": 0.4646,
      "step": 8792
    },
    {
      "epoch": 1.0781020107896027,
      "grad_norm": 1.7836708288796372,
      "learning_rate": 2.38397549334796e-06,
      "loss": 0.4259,
      "step": 8793
    },
    {
      "epoch": 1.0782246199117214,
      "grad_norm": 1.698573260942177,
      "learning_rate": 2.38346920257323e-06,
      "loss": 0.436,
      "step": 8794
    },
    {
      "epoch": 1.07834722903384,
      "grad_norm": 2.052819000181961,
      "learning_rate": 2.3829629165881217e-06,
      "loss": 0.4737,
      "step": 8795
    },
    {
      "epoch": 1.0784698381559588,
      "grad_norm": 1.9373716268957728,
      "learning_rate": 2.3824566354134452e-06,
      "loss": 0.4514,
      "step": 8796
    },
    {
      "epoch": 1.0785924472780775,
      "grad_norm": 1.8036862881551694,
      "learning_rate": 2.3819503590700092e-06,
      "loss": 0.4225,
      "step": 8797
    },
    {
      "epoch": 1.0787150564001962,
      "grad_norm": 1.870820266558189,
      "learning_rate": 2.3814440875786215e-06,
      "loss": 0.4348,
      "step": 8798
    },
    {
      "epoch": 1.0788376655223149,
      "grad_norm": 2.124195150027705,
      "learning_rate": 2.380937820960092e-06,
      "loss": 0.4478,
      "step": 8799
    },
    {
      "epoch": 1.0789602746444336,
      "grad_norm": 1.7672293818614475,
      "learning_rate": 2.380431559235229e-06,
      "loss": 0.4298,
      "step": 8800
    },
    {
      "epoch": 1.0790828837665523,
      "grad_norm": 1.9574084801871365,
      "learning_rate": 2.3799253024248408e-06,
      "loss": 0.4226,
      "step": 8801
    },
    {
      "epoch": 1.079205492888671,
      "grad_norm": 1.9799153352406762,
      "learning_rate": 2.379419050549735e-06,
      "loss": 0.4256,
      "step": 8802
    },
    {
      "epoch": 1.0793281020107897,
      "grad_norm": 1.7728237748859126,
      "learning_rate": 2.378912803630719e-06,
      "loss": 0.3963,
      "step": 8803
    },
    {
      "epoch": 1.0794507111329084,
      "grad_norm": 1.738265075538794,
      "learning_rate": 2.378406561688602e-06,
      "loss": 0.3987,
      "step": 8804
    },
    {
      "epoch": 1.079573320255027,
      "grad_norm": 2.005507167551367,
      "learning_rate": 2.377900324744191e-06,
      "loss": 0.4528,
      "step": 8805
    },
    {
      "epoch": 1.0796959293771458,
      "grad_norm": 1.9592270231807838,
      "learning_rate": 2.377394092818292e-06,
      "loss": 0.4794,
      "step": 8806
    },
    {
      "epoch": 1.0798185384992642,
      "grad_norm": 2.0573403577561082,
      "learning_rate": 2.3768878659317134e-06,
      "loss": 0.4507,
      "step": 8807
    },
    {
      "epoch": 1.079941147621383,
      "grad_norm": 1.9180698843585,
      "learning_rate": 2.376381644105261e-06,
      "loss": 0.4359,
      "step": 8808
    },
    {
      "epoch": 1.0800637567435016,
      "grad_norm": 2.011010846337334,
      "learning_rate": 2.3758754273597424e-06,
      "loss": 0.4105,
      "step": 8809
    },
    {
      "epoch": 1.0801863658656203,
      "grad_norm": 1.9806955525303216,
      "learning_rate": 2.375369215715963e-06,
      "loss": 0.4634,
      "step": 8810
    },
    {
      "epoch": 1.080308974987739,
      "grad_norm": 1.5520036132417077,
      "learning_rate": 2.3748630091947294e-06,
      "loss": 0.4579,
      "step": 8811
    },
    {
      "epoch": 1.0804315841098577,
      "grad_norm": 1.8624657687299666,
      "learning_rate": 2.3743568078168474e-06,
      "loss": 0.4055,
      "step": 8812
    },
    {
      "epoch": 1.0805541932319764,
      "grad_norm": 2.001023761830271,
      "learning_rate": 2.373850611603124e-06,
      "loss": 0.4306,
      "step": 8813
    },
    {
      "epoch": 1.0806768023540951,
      "grad_norm": 1.9626518458152415,
      "learning_rate": 2.3733444205743633e-06,
      "loss": 0.4105,
      "step": 8814
    },
    {
      "epoch": 1.0807994114762138,
      "grad_norm": 1.9557523677063906,
      "learning_rate": 2.372838234751371e-06,
      "loss": 0.4248,
      "step": 8815
    },
    {
      "epoch": 1.0809220205983325,
      "grad_norm": 2.0575210016565344,
      "learning_rate": 2.372332054154952e-06,
      "loss": 0.4473,
      "step": 8816
    },
    {
      "epoch": 1.0810446297204512,
      "grad_norm": 2.08836946371541,
      "learning_rate": 2.3718258788059133e-06,
      "loss": 0.4582,
      "step": 8817
    },
    {
      "epoch": 1.08116723884257,
      "grad_norm": 1.8468970408016792,
      "learning_rate": 2.371319708725056e-06,
      "loss": 0.4335,
      "step": 8818
    },
    {
      "epoch": 1.0812898479646886,
      "grad_norm": 1.881010734965566,
      "learning_rate": 2.370813543933187e-06,
      "loss": 0.4263,
      "step": 8819
    },
    {
      "epoch": 1.0814124570868073,
      "grad_norm": 2.130590004556811,
      "learning_rate": 2.3703073844511104e-06,
      "loss": 0.4481,
      "step": 8820
    },
    {
      "epoch": 1.081535066208926,
      "grad_norm": 1.7460339692974125,
      "learning_rate": 2.36980123029963e-06,
      "loss": 0.4416,
      "step": 8821
    },
    {
      "epoch": 1.0816576753310447,
      "grad_norm": 1.9623371094978046,
      "learning_rate": 2.3692950814995493e-06,
      "loss": 0.4534,
      "step": 8822
    },
    {
      "epoch": 1.0817802844531634,
      "grad_norm": 2.0024177429355055,
      "learning_rate": 2.3687889380716717e-06,
      "loss": 0.427,
      "step": 8823
    },
    {
      "epoch": 1.081902893575282,
      "grad_norm": 1.9981721551973732,
      "learning_rate": 2.368282800036801e-06,
      "loss": 0.4146,
      "step": 8824
    },
    {
      "epoch": 1.0820255026974006,
      "grad_norm": 1.945314065063595,
      "learning_rate": 2.3677766674157413e-06,
      "loss": 0.4633,
      "step": 8825
    },
    {
      "epoch": 1.0821481118195193,
      "grad_norm": 1.8720759086178909,
      "learning_rate": 2.3672705402292944e-06,
      "loss": 0.4177,
      "step": 8826
    },
    {
      "epoch": 1.082270720941638,
      "grad_norm": 1.8570907744438314,
      "learning_rate": 2.366764418498263e-06,
      "loss": 0.4851,
      "step": 8827
    },
    {
      "epoch": 1.0823933300637567,
      "grad_norm": 1.8804458765160026,
      "learning_rate": 2.36625830224345e-06,
      "loss": 0.4417,
      "step": 8828
    },
    {
      "epoch": 1.0825159391858754,
      "grad_norm": 1.8517805583084448,
      "learning_rate": 2.3657521914856567e-06,
      "loss": 0.4121,
      "step": 8829
    },
    {
      "epoch": 1.082638548307994,
      "grad_norm": 1.8974988113661144,
      "learning_rate": 2.365246086245688e-06,
      "loss": 0.4629,
      "step": 8830
    },
    {
      "epoch": 1.0827611574301128,
      "grad_norm": 1.9432293477791138,
      "learning_rate": 2.364739986544342e-06,
      "loss": 0.4292,
      "step": 8831
    },
    {
      "epoch": 1.0828837665522315,
      "grad_norm": 1.990400511539624,
      "learning_rate": 2.3642338924024223e-06,
      "loss": 0.4328,
      "step": 8832
    },
    {
      "epoch": 1.0830063756743502,
      "grad_norm": 1.9897282053708554,
      "learning_rate": 2.3637278038407303e-06,
      "loss": 0.4251,
      "step": 8833
    },
    {
      "epoch": 1.083128984796469,
      "grad_norm": 2.129351491625236,
      "learning_rate": 2.363221720880067e-06,
      "loss": 0.4427,
      "step": 8834
    },
    {
      "epoch": 1.0832515939185876,
      "grad_norm": 1.8997348029911012,
      "learning_rate": 2.362715643541233e-06,
      "loss": 0.4247,
      "step": 8835
    },
    {
      "epoch": 1.0833742030407063,
      "grad_norm": 2.1108349377664593,
      "learning_rate": 2.362209571845028e-06,
      "loss": 0.4474,
      "step": 8836
    },
    {
      "epoch": 1.083496812162825,
      "grad_norm": 2.0549214678304,
      "learning_rate": 2.3617035058122543e-06,
      "loss": 0.4027,
      "step": 8837
    },
    {
      "epoch": 1.0836194212849435,
      "grad_norm": 1.9630036388475773,
      "learning_rate": 2.3611974454637123e-06,
      "loss": 0.4743,
      "step": 8838
    },
    {
      "epoch": 1.0837420304070622,
      "grad_norm": 2.1198991924439654,
      "learning_rate": 2.3606913908202002e-06,
      "loss": 0.416,
      "step": 8839
    },
    {
      "epoch": 1.0838646395291809,
      "grad_norm": 2.145949415535737,
      "learning_rate": 2.3601853419025186e-06,
      "loss": 0.4415,
      "step": 8840
    },
    {
      "epoch": 1.0839872486512996,
      "grad_norm": 1.8372859871346636,
      "learning_rate": 2.3596792987314664e-06,
      "loss": 0.4166,
      "step": 8841
    },
    {
      "epoch": 1.0841098577734183,
      "grad_norm": 1.9835294270626238,
      "learning_rate": 2.359173261327845e-06,
      "loss": 0.4278,
      "step": 8842
    },
    {
      "epoch": 1.084232466895537,
      "grad_norm": 1.9148306426017767,
      "learning_rate": 2.3586672297124504e-06,
      "loss": 0.4459,
      "step": 8843
    },
    {
      "epoch": 1.0843550760176557,
      "grad_norm": 1.8190034985222685,
      "learning_rate": 2.358161203906083e-06,
      "loss": 0.4264,
      "step": 8844
    },
    {
      "epoch": 1.0844776851397744,
      "grad_norm": 2.0781224191467875,
      "learning_rate": 2.3576551839295416e-06,
      "loss": 0.357,
      "step": 8845
    },
    {
      "epoch": 1.084600294261893,
      "grad_norm": 1.7470592146942925,
      "learning_rate": 2.357149169803625e-06,
      "loss": 0.4107,
      "step": 8846
    },
    {
      "epoch": 1.0847229033840118,
      "grad_norm": 1.8969099850957976,
      "learning_rate": 2.356643161549129e-06,
      "loss": 0.4427,
      "step": 8847
    },
    {
      "epoch": 1.0848455125061305,
      "grad_norm": 2.040580821683227,
      "learning_rate": 2.3561371591868533e-06,
      "loss": 0.4797,
      "step": 8848
    },
    {
      "epoch": 1.0849681216282492,
      "grad_norm": 1.996490961121324,
      "learning_rate": 2.3556311627375947e-06,
      "loss": 0.4449,
      "step": 8849
    },
    {
      "epoch": 1.0850907307503679,
      "grad_norm": 2.1209238908221737,
      "learning_rate": 2.3551251722221526e-06,
      "loss": 0.4369,
      "step": 8850
    },
    {
      "epoch": 1.0852133398724866,
      "grad_norm": 1.8829249665405035,
      "learning_rate": 2.3546191876613202e-06,
      "loss": 0.4382,
      "step": 8851
    },
    {
      "epoch": 1.0853359489946053,
      "grad_norm": 2.0219622158936836,
      "learning_rate": 2.3541132090758975e-06,
      "loss": 0.4582,
      "step": 8852
    },
    {
      "epoch": 1.085458558116724,
      "grad_norm": 1.941953558448098,
      "learning_rate": 2.3536072364866804e-06,
      "loss": 0.3904,
      "step": 8853
    },
    {
      "epoch": 1.0855811672388427,
      "grad_norm": 1.857505689671974,
      "learning_rate": 2.3531012699144638e-06,
      "loss": 0.4227,
      "step": 8854
    },
    {
      "epoch": 1.0857037763609612,
      "grad_norm": 2.015997303349217,
      "learning_rate": 2.3525953093800475e-06,
      "loss": 0.4517,
      "step": 8855
    },
    {
      "epoch": 1.0858263854830799,
      "grad_norm": 1.9079575688772823,
      "learning_rate": 2.352089354904223e-06,
      "loss": 0.4553,
      "step": 8856
    },
    {
      "epoch": 1.0859489946051986,
      "grad_norm": 2.0811573453569037,
      "learning_rate": 2.351583406507788e-06,
      "loss": 0.4479,
      "step": 8857
    },
    {
      "epoch": 1.0860716037273173,
      "grad_norm": 2.006853449089152,
      "learning_rate": 2.3510774642115382e-06,
      "loss": 0.4292,
      "step": 8858
    },
    {
      "epoch": 1.086194212849436,
      "grad_norm": 1.9113211267681072,
      "learning_rate": 2.3505715280362687e-06,
      "loss": 0.4344,
      "step": 8859
    },
    {
      "epoch": 1.0863168219715547,
      "grad_norm": 1.7018897508640298,
      "learning_rate": 2.3500655980027733e-06,
      "loss": 0.4261,
      "step": 8860
    },
    {
      "epoch": 1.0864394310936734,
      "grad_norm": 1.9229957624268321,
      "learning_rate": 2.349559674131847e-06,
      "loss": 0.3926,
      "step": 8861
    },
    {
      "epoch": 1.086562040215792,
      "grad_norm": 1.89532671551136,
      "learning_rate": 2.349053756444285e-06,
      "loss": 0.4511,
      "step": 8862
    },
    {
      "epoch": 1.0866846493379108,
      "grad_norm": 2.053286099466552,
      "learning_rate": 2.348547844960881e-06,
      "loss": 0.416,
      "step": 8863
    },
    {
      "epoch": 1.0868072584600295,
      "grad_norm": 2.155012066345788,
      "learning_rate": 2.3480419397024283e-06,
      "loss": 0.4387,
      "step": 8864
    },
    {
      "epoch": 1.0869298675821482,
      "grad_norm": 2.0332510096939185,
      "learning_rate": 2.347536040689721e-06,
      "loss": 0.4208,
      "step": 8865
    },
    {
      "epoch": 1.0870524767042669,
      "grad_norm": 2.037203869533806,
      "learning_rate": 2.3470301479435523e-06,
      "loss": 0.4538,
      "step": 8866
    },
    {
      "epoch": 1.0871750858263856,
      "grad_norm": 1.909535068000158,
      "learning_rate": 2.346524261484716e-06,
      "loss": 0.4446,
      "step": 8867
    },
    {
      "epoch": 1.0872976949485043,
      "grad_norm": 1.9114102871018368,
      "learning_rate": 2.3460183813340035e-06,
      "loss": 0.4391,
      "step": 8868
    },
    {
      "epoch": 1.087420304070623,
      "grad_norm": 1.7423808093108955,
      "learning_rate": 2.3455125075122076e-06,
      "loss": 0.4345,
      "step": 8869
    },
    {
      "epoch": 1.0875429131927414,
      "grad_norm": 1.7644262521061198,
      "learning_rate": 2.3450066400401218e-06,
      "loss": 0.4111,
      "step": 8870
    },
    {
      "epoch": 1.0876655223148601,
      "grad_norm": 1.9615268529802268,
      "learning_rate": 2.3445007789385377e-06,
      "loss": 0.4309,
      "step": 8871
    },
    {
      "epoch": 1.0877881314369788,
      "grad_norm": 1.9666999965687386,
      "learning_rate": 2.3439949242282467e-06,
      "loss": 0.4649,
      "step": 8872
    },
    {
      "epoch": 1.0879107405590975,
      "grad_norm": 1.887345110950761,
      "learning_rate": 2.3434890759300404e-06,
      "loss": 0.4108,
      "step": 8873
    },
    {
      "epoch": 1.0880333496812162,
      "grad_norm": 2.006998729372369,
      "learning_rate": 2.3429832340647097e-06,
      "loss": 0.4439,
      "step": 8874
    },
    {
      "epoch": 1.088155958803335,
      "grad_norm": 1.6025800611503718,
      "learning_rate": 2.3424773986530463e-06,
      "loss": 0.4126,
      "step": 8875
    },
    {
      "epoch": 1.0882785679254536,
      "grad_norm": 1.717639298425146,
      "learning_rate": 2.3419715697158414e-06,
      "loss": 0.4133,
      "step": 8876
    },
    {
      "epoch": 1.0884011770475723,
      "grad_norm": 2.004409520844068,
      "learning_rate": 2.3414657472738846e-06,
      "loss": 0.4621,
      "step": 8877
    },
    {
      "epoch": 1.088523786169691,
      "grad_norm": 2.1698923386756808,
      "learning_rate": 2.340959931347966e-06,
      "loss": 0.4309,
      "step": 8878
    },
    {
      "epoch": 1.0886463952918097,
      "grad_norm": 1.8278671850574877,
      "learning_rate": 2.3404541219588745e-06,
      "loss": 0.4809,
      "step": 8879
    },
    {
      "epoch": 1.0887690044139284,
      "grad_norm": 1.9165522275477838,
      "learning_rate": 2.339948319127404e-06,
      "loss": 0.4215,
      "step": 8880
    },
    {
      "epoch": 1.0888916135360471,
      "grad_norm": 2.148305078258702,
      "learning_rate": 2.339442522874338e-06,
      "loss": 0.4519,
      "step": 8881
    },
    {
      "epoch": 1.0890142226581658,
      "grad_norm": 1.98579714119777,
      "learning_rate": 2.33893673322047e-06,
      "loss": 0.4449,
      "step": 8882
    },
    {
      "epoch": 1.0891368317802845,
      "grad_norm": 1.8707626276156564,
      "learning_rate": 2.338430950186587e-06,
      "loss": 0.4335,
      "step": 8883
    },
    {
      "epoch": 1.0892594409024032,
      "grad_norm": 1.9826768522243454,
      "learning_rate": 2.3379251737934787e-06,
      "loss": 0.4197,
      "step": 8884
    },
    {
      "epoch": 1.089382050024522,
      "grad_norm": 1.892244217988638,
      "learning_rate": 2.337419404061932e-06,
      "loss": 0.4059,
      "step": 8885
    },
    {
      "epoch": 1.0895046591466404,
      "grad_norm": 2.1620711452973413,
      "learning_rate": 2.336913641012736e-06,
      "loss": 0.4447,
      "step": 8886
    },
    {
      "epoch": 1.089627268268759,
      "grad_norm": 1.705172714067648,
      "learning_rate": 2.336407884666677e-06,
      "loss": 0.4495,
      "step": 8887
    },
    {
      "epoch": 1.0897498773908778,
      "grad_norm": 1.947807512059467,
      "learning_rate": 2.3359021350445456e-06,
      "loss": 0.3869,
      "step": 8888
    },
    {
      "epoch": 1.0898724865129965,
      "grad_norm": 1.8268198988418372,
      "learning_rate": 2.3353963921671253e-06,
      "loss": 0.4461,
      "step": 8889
    },
    {
      "epoch": 1.0899950956351152,
      "grad_norm": 1.9644249514817185,
      "learning_rate": 2.334890656055205e-06,
      "loss": 0.4151,
      "step": 8890
    },
    {
      "epoch": 1.090117704757234,
      "grad_norm": 1.8015689897119405,
      "learning_rate": 2.334384926729571e-06,
      "loss": 0.4095,
      "step": 8891
    },
    {
      "epoch": 1.0902403138793526,
      "grad_norm": 1.8480235689397146,
      "learning_rate": 2.3338792042110105e-06,
      "loss": 0.4204,
      "step": 8892
    },
    {
      "epoch": 1.0903629230014713,
      "grad_norm": 1.8552320359561598,
      "learning_rate": 2.333373488520308e-06,
      "loss": 0.3976,
      "step": 8893
    },
    {
      "epoch": 1.09048553212359,
      "grad_norm": 1.8971549309822346,
      "learning_rate": 2.3328677796782496e-06,
      "loss": 0.4098,
      "step": 8894
    },
    {
      "epoch": 1.0906081412457087,
      "grad_norm": 1.9009093509813468,
      "learning_rate": 2.3323620777056224e-06,
      "loss": 0.427,
      "step": 8895
    },
    {
      "epoch": 1.0907307503678274,
      "grad_norm": 1.845071406191591,
      "learning_rate": 2.3318563826232104e-06,
      "loss": 0.4233,
      "step": 8896
    },
    {
      "epoch": 1.090853359489946,
      "grad_norm": 1.7633750393262861,
      "learning_rate": 2.3313506944517984e-06,
      "loss": 0.4054,
      "step": 8897
    },
    {
      "epoch": 1.0909759686120648,
      "grad_norm": 1.8556755639994296,
      "learning_rate": 2.330845013212172e-06,
      "loss": 0.4324,
      "step": 8898
    },
    {
      "epoch": 1.0910985777341835,
      "grad_norm": 2.016111150238442,
      "learning_rate": 2.330339338925114e-06,
      "loss": 0.4393,
      "step": 8899
    },
    {
      "epoch": 1.0912211868563022,
      "grad_norm": 1.8777675128403806,
      "learning_rate": 2.32983367161141e-06,
      "loss": 0.4594,
      "step": 8900
    },
    {
      "epoch": 1.0913437959784207,
      "grad_norm": 1.7434164576815747,
      "learning_rate": 2.329328011291844e-06,
      "loss": 0.3885,
      "step": 8901
    },
    {
      "epoch": 1.0914664051005394,
      "grad_norm": 1.9613377681911686,
      "learning_rate": 2.3288223579871984e-06,
      "loss": 0.4281,
      "step": 8902
    },
    {
      "epoch": 1.091589014222658,
      "grad_norm": 1.8945575769599212,
      "learning_rate": 2.328316711718257e-06,
      "loss": 0.417,
      "step": 8903
    },
    {
      "epoch": 1.0917116233447768,
      "grad_norm": 1.9374262967781097,
      "learning_rate": 2.3278110725058022e-06,
      "loss": 0.4366,
      "step": 8904
    },
    {
      "epoch": 1.0918342324668955,
      "grad_norm": 2.018239320308002,
      "learning_rate": 2.327305440370618e-06,
      "loss": 0.4226,
      "step": 8905
    },
    {
      "epoch": 1.0919568415890142,
      "grad_norm": 1.9483799762982104,
      "learning_rate": 2.3267998153334855e-06,
      "loss": 0.4758,
      "step": 8906
    },
    {
      "epoch": 1.0920794507111329,
      "grad_norm": 2.047709134948428,
      "learning_rate": 2.326294197415187e-06,
      "loss": 0.4703,
      "step": 8907
    },
    {
      "epoch": 1.0922020598332516,
      "grad_norm": 1.8873310810464805,
      "learning_rate": 2.3257885866365043e-06,
      "loss": 0.4406,
      "step": 8908
    },
    {
      "epoch": 1.0923246689553703,
      "grad_norm": 2.1651882896947767,
      "learning_rate": 2.3252829830182203e-06,
      "loss": 0.4716,
      "step": 8909
    },
    {
      "epoch": 1.092447278077489,
      "grad_norm": 1.8692675430744954,
      "learning_rate": 2.3247773865811142e-06,
      "loss": 0.3888,
      "step": 8910
    },
    {
      "epoch": 1.0925698871996077,
      "grad_norm": 1.7697846627766973,
      "learning_rate": 2.3242717973459676e-06,
      "loss": 0.4554,
      "step": 8911
    },
    {
      "epoch": 1.0926924963217264,
      "grad_norm": 1.8857213761078124,
      "learning_rate": 2.3237662153335608e-06,
      "loss": 0.4393,
      "step": 8912
    },
    {
      "epoch": 1.092815105443845,
      "grad_norm": 1.8646459063071683,
      "learning_rate": 2.3232606405646764e-06,
      "loss": 0.4391,
      "step": 8913
    },
    {
      "epoch": 1.0929377145659638,
      "grad_norm": 1.8108832838915068,
      "learning_rate": 2.3227550730600906e-06,
      "loss": 0.4124,
      "step": 8914
    },
    {
      "epoch": 1.0930603236880825,
      "grad_norm": 1.829265018192237,
      "learning_rate": 2.3222495128405858e-06,
      "loss": 0.4178,
      "step": 8915
    },
    {
      "epoch": 1.0931829328102012,
      "grad_norm": 1.8361627610485312,
      "learning_rate": 2.3217439599269404e-06,
      "loss": 0.4224,
      "step": 8916
    },
    {
      "epoch": 1.0933055419323199,
      "grad_norm": 1.8899311649265564,
      "learning_rate": 2.3212384143399348e-06,
      "loss": 0.4487,
      "step": 8917
    },
    {
      "epoch": 1.0934281510544384,
      "grad_norm": 1.8175252773230548,
      "learning_rate": 2.320732876100346e-06,
      "loss": 0.457,
      "step": 8918
    },
    {
      "epoch": 1.093550760176557,
      "grad_norm": 1.9096473834847814,
      "learning_rate": 2.3202273452289532e-06,
      "loss": 0.4242,
      "step": 8919
    },
    {
      "epoch": 1.0936733692986758,
      "grad_norm": 1.7964158665407506,
      "learning_rate": 2.319721821746535e-06,
      "loss": 0.4453,
      "step": 8920
    },
    {
      "epoch": 1.0937959784207945,
      "grad_norm": 2.063126934318091,
      "learning_rate": 2.3192163056738697e-06,
      "loss": 0.4763,
      "step": 8921
    },
    {
      "epoch": 1.0939185875429132,
      "grad_norm": 1.794948314250084,
      "learning_rate": 2.3187107970317337e-06,
      "loss": 0.4715,
      "step": 8922
    },
    {
      "epoch": 1.0940411966650319,
      "grad_norm": 1.7829376566081312,
      "learning_rate": 2.318205295840905e-06,
      "loss": 0.4454,
      "step": 8923
    },
    {
      "epoch": 1.0941638057871506,
      "grad_norm": 1.8303663669190042,
      "learning_rate": 2.31769980212216e-06,
      "loss": 0.4054,
      "step": 8924
    },
    {
      "epoch": 1.0942864149092693,
      "grad_norm": 1.9231602470664226,
      "learning_rate": 2.317194315896276e-06,
      "loss": 0.4142,
      "step": 8925
    },
    {
      "epoch": 1.094409024031388,
      "grad_norm": 1.7782407972203718,
      "learning_rate": 2.3166888371840304e-06,
      "loss": 0.4625,
      "step": 8926
    },
    {
      "epoch": 1.0945316331535067,
      "grad_norm": 1.7560579600280528,
      "learning_rate": 2.316183366006197e-06,
      "loss": 0.4453,
      "step": 8927
    },
    {
      "epoch": 1.0946542422756254,
      "grad_norm": 1.7863627606565171,
      "learning_rate": 2.315677902383553e-06,
      "loss": 0.4434,
      "step": 8928
    },
    {
      "epoch": 1.094776851397744,
      "grad_norm": 1.8578502282655531,
      "learning_rate": 2.315172446336873e-06,
      "loss": 0.4244,
      "step": 8929
    },
    {
      "epoch": 1.0948994605198628,
      "grad_norm": 1.9672867626134036,
      "learning_rate": 2.3146669978869338e-06,
      "loss": 0.4734,
      "step": 8930
    },
    {
      "epoch": 1.0950220696419815,
      "grad_norm": 1.9218634898846871,
      "learning_rate": 2.3141615570545083e-06,
      "loss": 0.4608,
      "step": 8931
    },
    {
      "epoch": 1.0951446787641,
      "grad_norm": 1.9482130630513572,
      "learning_rate": 2.3136561238603713e-06,
      "loss": 0.4219,
      "step": 8932
    },
    {
      "epoch": 1.0952672878862186,
      "grad_norm": 1.8788317938490595,
      "learning_rate": 2.313150698325298e-06,
      "loss": 0.4076,
      "step": 8933
    },
    {
      "epoch": 1.0953898970083373,
      "grad_norm": 1.8199116484721785,
      "learning_rate": 2.3126452804700627e-06,
      "loss": 0.3971,
      "step": 8934
    },
    {
      "epoch": 1.095512506130456,
      "grad_norm": 1.896208013924481,
      "learning_rate": 2.3121398703154375e-06,
      "loss": 0.4389,
      "step": 8935
    },
    {
      "epoch": 1.0956351152525747,
      "grad_norm": 1.7189275668410968,
      "learning_rate": 2.311634467882196e-06,
      "loss": 0.3874,
      "step": 8936
    },
    {
      "epoch": 1.0957577243746934,
      "grad_norm": 2.105108186155204,
      "learning_rate": 2.3111290731911106e-06,
      "loss": 0.4859,
      "step": 8937
    },
    {
      "epoch": 1.0958803334968121,
      "grad_norm": 1.8462393083265578,
      "learning_rate": 2.3106236862629564e-06,
      "loss": 0.4017,
      "step": 8938
    },
    {
      "epoch": 1.0960029426189308,
      "grad_norm": 1.916701816174214,
      "learning_rate": 2.310118307118502e-06,
      "loss": 0.4249,
      "step": 8939
    },
    {
      "epoch": 1.0961255517410495,
      "grad_norm": 2.087821580574781,
      "learning_rate": 2.3096129357785227e-06,
      "loss": 0.4322,
      "step": 8940
    },
    {
      "epoch": 1.0962481608631682,
      "grad_norm": 1.937918763574948,
      "learning_rate": 2.3091075722637885e-06,
      "loss": 0.4546,
      "step": 8941
    },
    {
      "epoch": 1.096370769985287,
      "grad_norm": 1.9100626980703712,
      "learning_rate": 2.3086022165950714e-06,
      "loss": 0.4241,
      "step": 8942
    },
    {
      "epoch": 1.0964933791074056,
      "grad_norm": 2.1147688908409132,
      "learning_rate": 2.3080968687931414e-06,
      "loss": 0.4201,
      "step": 8943
    },
    {
      "epoch": 1.0966159882295243,
      "grad_norm": 1.9397852602250034,
      "learning_rate": 2.30759152887877e-06,
      "loss": 0.488,
      "step": 8944
    },
    {
      "epoch": 1.096738597351643,
      "grad_norm": 1.8789304691830249,
      "learning_rate": 2.3070861968727266e-06,
      "loss": 0.4043,
      "step": 8945
    },
    {
      "epoch": 1.0968612064737617,
      "grad_norm": 2.2323623287098835,
      "learning_rate": 2.306580872795784e-06,
      "loss": 0.4457,
      "step": 8946
    },
    {
      "epoch": 1.0969838155958804,
      "grad_norm": 1.8776047748246136,
      "learning_rate": 2.306075556668708e-06,
      "loss": 0.4421,
      "step": 8947
    },
    {
      "epoch": 1.0971064247179991,
      "grad_norm": 1.9368974590092325,
      "learning_rate": 2.305570248512271e-06,
      "loss": 0.4559,
      "step": 8948
    },
    {
      "epoch": 1.0972290338401176,
      "grad_norm": 1.883468800076731,
      "learning_rate": 2.305064948347241e-06,
      "loss": 0.4616,
      "step": 8949
    },
    {
      "epoch": 1.0973516429622363,
      "grad_norm": 1.8062507769641767,
      "learning_rate": 2.3045596561943857e-06,
      "loss": 0.449,
      "step": 8950
    },
    {
      "epoch": 1.097474252084355,
      "grad_norm": 1.9728913950416884,
      "learning_rate": 2.3040543720744764e-06,
      "loss": 0.4818,
      "step": 8951
    },
    {
      "epoch": 1.0975968612064737,
      "grad_norm": 2.004468483107807,
      "learning_rate": 2.3035490960082775e-06,
      "loss": 0.4586,
      "step": 8952
    },
    {
      "epoch": 1.0977194703285924,
      "grad_norm": 1.6951408067606692,
      "learning_rate": 2.303043828016559e-06,
      "loss": 0.4409,
      "step": 8953
    },
    {
      "epoch": 1.097842079450711,
      "grad_norm": 1.87911960988038,
      "learning_rate": 2.3025385681200882e-06,
      "loss": 0.4122,
      "step": 8954
    },
    {
      "epoch": 1.0979646885728298,
      "grad_norm": 2.022548531864101,
      "learning_rate": 2.302033316339632e-06,
      "loss": 0.4223,
      "step": 8955
    },
    {
      "epoch": 1.0980872976949485,
      "grad_norm": 1.9417429778684294,
      "learning_rate": 2.3015280726959567e-06,
      "loss": 0.4352,
      "step": 8956
    },
    {
      "epoch": 1.0982099068170672,
      "grad_norm": 1.8538105132176554,
      "learning_rate": 2.3010228372098285e-06,
      "loss": 0.4495,
      "step": 8957
    },
    {
      "epoch": 1.098332515939186,
      "grad_norm": 1.8840648707030958,
      "learning_rate": 2.3005176099020144e-06,
      "loss": 0.444,
      "step": 8958
    },
    {
      "epoch": 1.0984551250613046,
      "grad_norm": 1.928278056279964,
      "learning_rate": 2.30001239079328e-06,
      "loss": 0.4427,
      "step": 8959
    },
    {
      "epoch": 1.0985777341834233,
      "grad_norm": 1.911720950811444,
      "learning_rate": 2.2995071799043903e-06,
      "loss": 0.4603,
      "step": 8960
    },
    {
      "epoch": 1.098700343305542,
      "grad_norm": 2.023701196225908,
      "learning_rate": 2.29900197725611e-06,
      "loss": 0.4627,
      "step": 8961
    },
    {
      "epoch": 1.0988229524276607,
      "grad_norm": 2.12049322840955,
      "learning_rate": 2.2984967828692045e-06,
      "loss": 0.4257,
      "step": 8962
    },
    {
      "epoch": 1.0989455615497794,
      "grad_norm": 1.9160753770601864,
      "learning_rate": 2.297991596764438e-06,
      "loss": 0.4623,
      "step": 8963
    },
    {
      "epoch": 1.0990681706718979,
      "grad_norm": 1.7875360874714914,
      "learning_rate": 2.297486418962575e-06,
      "loss": 0.4982,
      "step": 8964
    },
    {
      "epoch": 1.0991907797940166,
      "grad_norm": 1.881201411983938,
      "learning_rate": 2.2969812494843773e-06,
      "loss": 0.4006,
      "step": 8965
    },
    {
      "epoch": 1.0993133889161353,
      "grad_norm": 1.9343330248567139,
      "learning_rate": 2.2964760883506107e-06,
      "loss": 0.4532,
      "step": 8966
    },
    {
      "epoch": 1.099435998038254,
      "grad_norm": 1.9240014975970496,
      "learning_rate": 2.2959709355820377e-06,
      "loss": 0.4819,
      "step": 8967
    },
    {
      "epoch": 1.0995586071603727,
      "grad_norm": 1.9637066927154072,
      "learning_rate": 2.2954657911994195e-06,
      "loss": 0.4306,
      "step": 8968
    },
    {
      "epoch": 1.0996812162824914,
      "grad_norm": 1.7724813246302658,
      "learning_rate": 2.2949606552235194e-06,
      "loss": 0.3956,
      "step": 8969
    },
    {
      "epoch": 1.09980382540461,
      "grad_norm": 1.8260017126895929,
      "learning_rate": 2.294455527675099e-06,
      "loss": 0.436,
      "step": 8970
    },
    {
      "epoch": 1.0999264345267288,
      "grad_norm": 1.9123375550459711,
      "learning_rate": 2.293950408574921e-06,
      "loss": 0.4272,
      "step": 8971
    },
    {
      "epoch": 1.1000490436488475,
      "grad_norm": 1.948940487011613,
      "learning_rate": 2.2934452979437466e-06,
      "loss": 0.4328,
      "step": 8972
    },
    {
      "epoch": 1.1001716527709662,
      "grad_norm": 2.0254739626184195,
      "learning_rate": 2.2929401958023353e-06,
      "loss": 0.4104,
      "step": 8973
    },
    {
      "epoch": 1.1002942618930849,
      "grad_norm": 1.8764196032561693,
      "learning_rate": 2.2924351021714487e-06,
      "loss": 0.4403,
      "step": 8974
    },
    {
      "epoch": 1.1004168710152036,
      "grad_norm": 2.0963732422270698,
      "learning_rate": 2.291930017071846e-06,
      "loss": 0.5014,
      "step": 8975
    },
    {
      "epoch": 1.1005394801373223,
      "grad_norm": 2.1536239526325707,
      "learning_rate": 2.29142494052429e-06,
      "loss": 0.4233,
      "step": 8976
    },
    {
      "epoch": 1.100662089259441,
      "grad_norm": 2.082970232121533,
      "learning_rate": 2.2909198725495364e-06,
      "loss": 0.4063,
      "step": 8977
    },
    {
      "epoch": 1.1007846983815597,
      "grad_norm": 2.0275105512971336,
      "learning_rate": 2.290414813168347e-06,
      "loss": 0.4382,
      "step": 8978
    },
    {
      "epoch": 1.1009073075036784,
      "grad_norm": 2.109517953977175,
      "learning_rate": 2.289909762401479e-06,
      "loss": 0.4768,
      "step": 8979
    },
    {
      "epoch": 1.1010299166257969,
      "grad_norm": 2.0324602318900986,
      "learning_rate": 2.289404720269693e-06,
      "loss": 0.4481,
      "step": 8980
    },
    {
      "epoch": 1.1011525257479156,
      "grad_norm": 2.0701150097207255,
      "learning_rate": 2.288899686793745e-06,
      "loss": 0.4097,
      "step": 8981
    },
    {
      "epoch": 1.1012751348700343,
      "grad_norm": 1.959804101774473,
      "learning_rate": 2.288394661994394e-06,
      "loss": 0.4287,
      "step": 8982
    },
    {
      "epoch": 1.101397743992153,
      "grad_norm": 1.9373868515569004,
      "learning_rate": 2.287889645892396e-06,
      "loss": 0.4355,
      "step": 8983
    },
    {
      "epoch": 1.1015203531142717,
      "grad_norm": 1.9231089770534766,
      "learning_rate": 2.2873846385085106e-06,
      "loss": 0.4358,
      "step": 8984
    },
    {
      "epoch": 1.1016429622363904,
      "grad_norm": 2.033877894456788,
      "learning_rate": 2.2868796398634917e-06,
      "loss": 0.456,
      "step": 8985
    },
    {
      "epoch": 1.101765571358509,
      "grad_norm": 2.102972820435061,
      "learning_rate": 2.2863746499780974e-06,
      "loss": 0.4376,
      "step": 8986
    },
    {
      "epoch": 1.1018881804806278,
      "grad_norm": 1.9483985570253295,
      "learning_rate": 2.285869668873083e-06,
      "loss": 0.4214,
      "step": 8987
    },
    {
      "epoch": 1.1020107896027465,
      "grad_norm": 1.9206671139667282,
      "learning_rate": 2.285364696569205e-06,
      "loss": 0.413,
      "step": 8988
    },
    {
      "epoch": 1.1021333987248652,
      "grad_norm": 1.781832229348696,
      "learning_rate": 2.2848597330872176e-06,
      "loss": 0.4282,
      "step": 8989
    },
    {
      "epoch": 1.1022560078469839,
      "grad_norm": 1.9748152149030302,
      "learning_rate": 2.284354778447875e-06,
      "loss": 0.4364,
      "step": 8990
    },
    {
      "epoch": 1.1023786169691026,
      "grad_norm": 1.9841848285736685,
      "learning_rate": 2.2838498326719334e-06,
      "loss": 0.4627,
      "step": 8991
    },
    {
      "epoch": 1.1025012260912213,
      "grad_norm": 1.9449785521092744,
      "learning_rate": 2.283344895780147e-06,
      "loss": 0.3896,
      "step": 8992
    },
    {
      "epoch": 1.10262383521334,
      "grad_norm": 2.0277597329583035,
      "learning_rate": 2.2828399677932686e-06,
      "loss": 0.4394,
      "step": 8993
    },
    {
      "epoch": 1.1027464443354587,
      "grad_norm": 1.812705287694694,
      "learning_rate": 2.282335048732052e-06,
      "loss": 0.4302,
      "step": 8994
    },
    {
      "epoch": 1.1028690534575771,
      "grad_norm": 1.95656240352732,
      "learning_rate": 2.281830138617249e-06,
      "loss": 0.4612,
      "step": 8995
    },
    {
      "epoch": 1.1029916625796958,
      "grad_norm": 2.1956178040714005,
      "learning_rate": 2.281325237469615e-06,
      "loss": 0.4484,
      "step": 8996
    },
    {
      "epoch": 1.1031142717018145,
      "grad_norm": 1.8298779765756759,
      "learning_rate": 2.280820345309901e-06,
      "loss": 0.3962,
      "step": 8997
    },
    {
      "epoch": 1.1032368808239332,
      "grad_norm": 1.9369213836189252,
      "learning_rate": 2.2803154621588585e-06,
      "loss": 0.4174,
      "step": 8998
    },
    {
      "epoch": 1.103359489946052,
      "grad_norm": 1.9918177544204818,
      "learning_rate": 2.279810588037239e-06,
      "loss": 0.4394,
      "step": 8999
    },
    {
      "epoch": 1.1034820990681706,
      "grad_norm": 1.9792890126363971,
      "learning_rate": 2.2793057229657943e-06,
      "loss": 0.393,
      "step": 9000
    },
    {
      "epoch": 1.1036047081902893,
      "grad_norm": 2.0145967190867133,
      "learning_rate": 2.278800866965276e-06,
      "loss": 0.4328,
      "step": 9001
    },
    {
      "epoch": 1.103727317312408,
      "grad_norm": 2.085481364192501,
      "learning_rate": 2.2782960200564325e-06,
      "loss": 0.398,
      "step": 9002
    },
    {
      "epoch": 1.1038499264345267,
      "grad_norm": 2.0188577381486645,
      "learning_rate": 2.277791182260015e-06,
      "loss": 0.4613,
      "step": 9003
    },
    {
      "epoch": 1.1039725355566454,
      "grad_norm": 1.9963680434623006,
      "learning_rate": 2.277286353596774e-06,
      "loss": 0.4105,
      "step": 9004
    },
    {
      "epoch": 1.1040951446787641,
      "grad_norm": 1.8662656340907606,
      "learning_rate": 2.2767815340874584e-06,
      "loss": 0.4067,
      "step": 9005
    },
    {
      "epoch": 1.1042177538008828,
      "grad_norm": 1.880787549466086,
      "learning_rate": 2.276276723752816e-06,
      "loss": 0.4355,
      "step": 9006
    },
    {
      "epoch": 1.1043403629230015,
      "grad_norm": 2.0540896693548265,
      "learning_rate": 2.275771922613597e-06,
      "loss": 0.4336,
      "step": 9007
    },
    {
      "epoch": 1.1044629720451202,
      "grad_norm": 1.8989120235668933,
      "learning_rate": 2.275267130690548e-06,
      "loss": 0.461,
      "step": 9008
    },
    {
      "epoch": 1.104585581167239,
      "grad_norm": 2.0411665442446805,
      "learning_rate": 2.2747623480044194e-06,
      "loss": 0.4132,
      "step": 9009
    },
    {
      "epoch": 1.1047081902893576,
      "grad_norm": 2.037804852186664,
      "learning_rate": 2.2742575745759557e-06,
      "loss": 0.442,
      "step": 9010
    },
    {
      "epoch": 1.1048307994114763,
      "grad_norm": 1.9550352530407946,
      "learning_rate": 2.273752810425906e-06,
      "loss": 0.48,
      "step": 9011
    },
    {
      "epoch": 1.1049534085335948,
      "grad_norm": 1.8673660211067062,
      "learning_rate": 2.273248055575016e-06,
      "loss": 0.4417,
      "step": 9012
    },
    {
      "epoch": 1.1050760176557135,
      "grad_norm": 1.9149021239406758,
      "learning_rate": 2.2727433100440326e-06,
      "loss": 0.4391,
      "step": 9013
    },
    {
      "epoch": 1.1051986267778322,
      "grad_norm": 1.8814319117392073,
      "learning_rate": 2.2722385738537013e-06,
      "loss": 0.4413,
      "step": 9014
    },
    {
      "epoch": 1.105321235899951,
      "grad_norm": 1.996116253619728,
      "learning_rate": 2.2717338470247675e-06,
      "loss": 0.4257,
      "step": 9015
    },
    {
      "epoch": 1.1054438450220696,
      "grad_norm": 1.960618363334187,
      "learning_rate": 2.2712291295779772e-06,
      "loss": 0.4272,
      "step": 9016
    },
    {
      "epoch": 1.1055664541441883,
      "grad_norm": 1.7791181807407601,
      "learning_rate": 2.2707244215340747e-06,
      "loss": 0.4643,
      "step": 9017
    },
    {
      "epoch": 1.105689063266307,
      "grad_norm": 1.748474822619174,
      "learning_rate": 2.270219722913804e-06,
      "loss": 0.4096,
      "step": 9018
    },
    {
      "epoch": 1.1058116723884257,
      "grad_norm": 1.8603879045160392,
      "learning_rate": 2.26971503373791e-06,
      "loss": 0.4191,
      "step": 9019
    },
    {
      "epoch": 1.1059342815105444,
      "grad_norm": 1.8572977494784688,
      "learning_rate": 2.2692103540271354e-06,
      "loss": 0.423,
      "step": 9020
    },
    {
      "epoch": 1.106056890632663,
      "grad_norm": 2.061217029652308,
      "learning_rate": 2.2687056838022236e-06,
      "loss": 0.4655,
      "step": 9021
    },
    {
      "epoch": 1.1061794997547818,
      "grad_norm": 2.095921650757783,
      "learning_rate": 2.2682010230839187e-06,
      "loss": 0.4562,
      "step": 9022
    },
    {
      "epoch": 1.1063021088769005,
      "grad_norm": 2.1129559640775843,
      "learning_rate": 2.2676963718929607e-06,
      "loss": 0.477,
      "step": 9023
    },
    {
      "epoch": 1.1064247179990192,
      "grad_norm": 1.8754112560271532,
      "learning_rate": 2.267191730250094e-06,
      "loss": 0.4397,
      "step": 9024
    },
    {
      "epoch": 1.106547327121138,
      "grad_norm": 2.0752259140840117,
      "learning_rate": 2.266687098176059e-06,
      "loss": 0.4279,
      "step": 9025
    },
    {
      "epoch": 1.1066699362432566,
      "grad_norm": 1.8571087875301542,
      "learning_rate": 2.2661824756915982e-06,
      "loss": 0.4266,
      "step": 9026
    },
    {
      "epoch": 1.106792545365375,
      "grad_norm": 1.8796949795104638,
      "learning_rate": 2.2656778628174505e-06,
      "loss": 0.4226,
      "step": 9027
    },
    {
      "epoch": 1.1069151544874938,
      "grad_norm": 1.940990876433302,
      "learning_rate": 2.2651732595743574e-06,
      "loss": 0.4958,
      "step": 9028
    },
    {
      "epoch": 1.1070377636096125,
      "grad_norm": 1.9005237678742577,
      "learning_rate": 2.2646686659830595e-06,
      "loss": 0.4257,
      "step": 9029
    },
    {
      "epoch": 1.1071603727317312,
      "grad_norm": 1.8612318150171683,
      "learning_rate": 2.2641640820642964e-06,
      "loss": 0.4316,
      "step": 9030
    },
    {
      "epoch": 1.1072829818538499,
      "grad_norm": 1.9845435162147715,
      "learning_rate": 2.2636595078388068e-06,
      "loss": 0.4221,
      "step": 9031
    },
    {
      "epoch": 1.1074055909759686,
      "grad_norm": 1.8050096004620553,
      "learning_rate": 2.2631549433273294e-06,
      "loss": 0.4208,
      "step": 9032
    },
    {
      "epoch": 1.1075282000980873,
      "grad_norm": 1.9788237629035221,
      "learning_rate": 2.2626503885506027e-06,
      "loss": 0.4326,
      "step": 9033
    },
    {
      "epoch": 1.107650809220206,
      "grad_norm": 1.8428468236831792,
      "learning_rate": 2.2621458435293675e-06,
      "loss": 0.4222,
      "step": 9034
    },
    {
      "epoch": 1.1077734183423247,
      "grad_norm": 1.8260633077350126,
      "learning_rate": 2.261641308284357e-06,
      "loss": 0.4113,
      "step": 9035
    },
    {
      "epoch": 1.1078960274644434,
      "grad_norm": 2.0490637384317725,
      "learning_rate": 2.2611367828363113e-06,
      "loss": 0.4325,
      "step": 9036
    },
    {
      "epoch": 1.108018636586562,
      "grad_norm": 1.907703123460342,
      "learning_rate": 2.2606322672059665e-06,
      "loss": 0.4128,
      "step": 9037
    },
    {
      "epoch": 1.1081412457086808,
      "grad_norm": 1.7310947462228543,
      "learning_rate": 2.2601277614140606e-06,
      "loss": 0.3974,
      "step": 9038
    },
    {
      "epoch": 1.1082638548307995,
      "grad_norm": 2.017104413335881,
      "learning_rate": 2.2596232654813273e-06,
      "loss": 0.4196,
      "step": 9039
    },
    {
      "epoch": 1.1083864639529182,
      "grad_norm": 1.884776509702491,
      "learning_rate": 2.259118779428503e-06,
      "loss": 0.4138,
      "step": 9040
    },
    {
      "epoch": 1.1085090730750369,
      "grad_norm": 1.6787694230427177,
      "learning_rate": 2.2586143032763235e-06,
      "loss": 0.4329,
      "step": 9041
    },
    {
      "epoch": 1.1086316821971556,
      "grad_norm": 2.0585636472015865,
      "learning_rate": 2.2581098370455247e-06,
      "loss": 0.4825,
      "step": 9042
    },
    {
      "epoch": 1.108754291319274,
      "grad_norm": 1.7303830602160781,
      "learning_rate": 2.2576053807568377e-06,
      "loss": 0.3758,
      "step": 9043
    },
    {
      "epoch": 1.1088769004413928,
      "grad_norm": 2.12933543306243,
      "learning_rate": 2.2571009344309998e-06,
      "loss": 0.4436,
      "step": 9044
    },
    {
      "epoch": 1.1089995095635115,
      "grad_norm": 2.0169389552237473,
      "learning_rate": 2.2565964980887436e-06,
      "loss": 0.44,
      "step": 9045
    },
    {
      "epoch": 1.1091221186856302,
      "grad_norm": 2.0053700417414007,
      "learning_rate": 2.2560920717508016e-06,
      "loss": 0.4311,
      "step": 9046
    },
    {
      "epoch": 1.1092447278077489,
      "grad_norm": 2.084868777187535,
      "learning_rate": 2.2555876554379087e-06,
      "loss": 0.4691,
      "step": 9047
    },
    {
      "epoch": 1.1093673369298676,
      "grad_norm": 1.8943410613891198,
      "learning_rate": 2.2550832491707944e-06,
      "loss": 0.4546,
      "step": 9048
    },
    {
      "epoch": 1.1094899460519863,
      "grad_norm": 1.9322425582196472,
      "learning_rate": 2.2545788529701927e-06,
      "loss": 0.4304,
      "step": 9049
    },
    {
      "epoch": 1.109612555174105,
      "grad_norm": 1.936911111313085,
      "learning_rate": 2.2540744668568345e-06,
      "loss": 0.4572,
      "step": 9050
    },
    {
      "epoch": 1.1097351642962237,
      "grad_norm": 1.9957312166296808,
      "learning_rate": 2.253570090851452e-06,
      "loss": 0.438,
      "step": 9051
    },
    {
      "epoch": 1.1098577734183424,
      "grad_norm": 1.8359605181808085,
      "learning_rate": 2.253065724974774e-06,
      "loss": 0.4021,
      "step": 9052
    },
    {
      "epoch": 1.109980382540461,
      "grad_norm": 2.114031759747428,
      "learning_rate": 2.2525613692475316e-06,
      "loss": 0.4744,
      "step": 9053
    },
    {
      "epoch": 1.1101029916625798,
      "grad_norm": 1.9001358617674666,
      "learning_rate": 2.2520570236904556e-06,
      "loss": 0.4297,
      "step": 9054
    },
    {
      "epoch": 1.1102256007846985,
      "grad_norm": 1.9444459909593734,
      "learning_rate": 2.2515526883242748e-06,
      "loss": 0.4407,
      "step": 9055
    },
    {
      "epoch": 1.1103482099068172,
      "grad_norm": 1.8566954084462064,
      "learning_rate": 2.2510483631697186e-06,
      "loss": 0.4412,
      "step": 9056
    },
    {
      "epoch": 1.1104708190289359,
      "grad_norm": 2.018001103730165,
      "learning_rate": 2.2505440482475153e-06,
      "loss": 0.4639,
      "step": 9057
    },
    {
      "epoch": 1.1105934281510543,
      "grad_norm": 1.8297444168648105,
      "learning_rate": 2.2500397435783925e-06,
      "loss": 0.4352,
      "step": 9058
    },
    {
      "epoch": 1.110716037273173,
      "grad_norm": 1.853495502828237,
      "learning_rate": 2.24953544918308e-06,
      "loss": 0.4366,
      "step": 9059
    },
    {
      "epoch": 1.1108386463952917,
      "grad_norm": 1.9433897151630968,
      "learning_rate": 2.2490311650823035e-06,
      "loss": 0.4586,
      "step": 9060
    },
    {
      "epoch": 1.1109612555174104,
      "grad_norm": 2.0321410994338724,
      "learning_rate": 2.2485268912967893e-06,
      "loss": 0.4585,
      "step": 9061
    },
    {
      "epoch": 1.1110838646395291,
      "grad_norm": 2.132026223280184,
      "learning_rate": 2.2480226278472662e-06,
      "loss": 0.4649,
      "step": 9062
    },
    {
      "epoch": 1.1112064737616478,
      "grad_norm": 1.9618857242325787,
      "learning_rate": 2.2475183747544595e-06,
      "loss": 0.4572,
      "step": 9063
    },
    {
      "epoch": 1.1113290828837665,
      "grad_norm": 2.182764368989424,
      "learning_rate": 2.247014132039094e-06,
      "loss": 0.4747,
      "step": 9064
    },
    {
      "epoch": 1.1114516920058852,
      "grad_norm": 2.0023264217223056,
      "learning_rate": 2.2465098997218953e-06,
      "loss": 0.3991,
      "step": 9065
    },
    {
      "epoch": 1.111574301128004,
      "grad_norm": 2.185004801732809,
      "learning_rate": 2.2460056778235883e-06,
      "loss": 0.4421,
      "step": 9066
    },
    {
      "epoch": 1.1116969102501226,
      "grad_norm": 1.895922201028192,
      "learning_rate": 2.245501466364898e-06,
      "loss": 0.4076,
      "step": 9067
    },
    {
      "epoch": 1.1118195193722413,
      "grad_norm": 1.8091421473738698,
      "learning_rate": 2.2449972653665487e-06,
      "loss": 0.4488,
      "step": 9068
    },
    {
      "epoch": 1.11194212849436,
      "grad_norm": 1.9919810942207403,
      "learning_rate": 2.2444930748492627e-06,
      "loss": 0.4346,
      "step": 9069
    },
    {
      "epoch": 1.1120647376164787,
      "grad_norm": 2.0163879952406516,
      "learning_rate": 2.2439888948337635e-06,
      "loss": 0.4573,
      "step": 9070
    },
    {
      "epoch": 1.1121873467385974,
      "grad_norm": 1.8230443781679861,
      "learning_rate": 2.2434847253407734e-06,
      "loss": 0.3934,
      "step": 9071
    },
    {
      "epoch": 1.1123099558607161,
      "grad_norm": 2.170534439112939,
      "learning_rate": 2.2429805663910166e-06,
      "loss": 0.4665,
      "step": 9072
    },
    {
      "epoch": 1.1124325649828348,
      "grad_norm": 1.8324947546953603,
      "learning_rate": 2.2424764180052126e-06,
      "loss": 0.4275,
      "step": 9073
    },
    {
      "epoch": 1.1125551741049535,
      "grad_norm": 1.81046273002175,
      "learning_rate": 2.2419722802040834e-06,
      "loss": 0.4433,
      "step": 9074
    },
    {
      "epoch": 1.112677783227072,
      "grad_norm": 1.9355129213918156,
      "learning_rate": 2.241468153008351e-06,
      "loss": 0.4245,
      "step": 9075
    },
    {
      "epoch": 1.1128003923491907,
      "grad_norm": 2.1309498373017006,
      "learning_rate": 2.2409640364387357e-06,
      "loss": 0.4518,
      "step": 9076
    },
    {
      "epoch": 1.1129230014713094,
      "grad_norm": 1.984921944632024,
      "learning_rate": 2.2404599305159562e-06,
      "loss": 0.4243,
      "step": 9077
    },
    {
      "epoch": 1.113045610593428,
      "grad_norm": 2.07227270744217,
      "learning_rate": 2.239955835260733e-06,
      "loss": 0.4722,
      "step": 9078
    },
    {
      "epoch": 1.1131682197155468,
      "grad_norm": 1.914599190459277,
      "learning_rate": 2.239451750693785e-06,
      "loss": 0.413,
      "step": 9079
    },
    {
      "epoch": 1.1132908288376655,
      "grad_norm": 1.9538188726151844,
      "learning_rate": 2.238947676835833e-06,
      "loss": 0.4351,
      "step": 9080
    },
    {
      "epoch": 1.1134134379597842,
      "grad_norm": 1.9020626483687386,
      "learning_rate": 2.238443613707592e-06,
      "loss": 0.404,
      "step": 9081
    },
    {
      "epoch": 1.113536047081903,
      "grad_norm": 2.1002213691655776,
      "learning_rate": 2.2379395613297817e-06,
      "loss": 0.4631,
      "step": 9082
    },
    {
      "epoch": 1.1136586562040216,
      "grad_norm": 1.958388477583197,
      "learning_rate": 2.2374355197231202e-06,
      "loss": 0.4263,
      "step": 9083
    },
    {
      "epoch": 1.1137812653261403,
      "grad_norm": 1.9516396561534248,
      "learning_rate": 2.2369314889083234e-06,
      "loss": 0.4719,
      "step": 9084
    },
    {
      "epoch": 1.113903874448259,
      "grad_norm": 1.8521037069391788,
      "learning_rate": 2.236427468906108e-06,
      "loss": 0.4434,
      "step": 9085
    },
    {
      "epoch": 1.1140264835703777,
      "grad_norm": 1.9443737534619951,
      "learning_rate": 2.2359234597371894e-06,
      "loss": 0.4549,
      "step": 9086
    },
    {
      "epoch": 1.1141490926924964,
      "grad_norm": 1.9988633792911719,
      "learning_rate": 2.235419461422285e-06,
      "loss": 0.4478,
      "step": 9087
    },
    {
      "epoch": 1.114271701814615,
      "grad_norm": 1.951490032570261,
      "learning_rate": 2.2349154739821094e-06,
      "loss": 0.4517,
      "step": 9088
    },
    {
      "epoch": 1.1143943109367336,
      "grad_norm": 1.758769687050496,
      "learning_rate": 2.2344114974373767e-06,
      "loss": 0.4332,
      "step": 9089
    },
    {
      "epoch": 1.1145169200588523,
      "grad_norm": 1.863349117248898,
      "learning_rate": 2.2339075318088015e-06,
      "loss": 0.4477,
      "step": 9090
    },
    {
      "epoch": 1.114639529180971,
      "grad_norm": 1.9000221298728608,
      "learning_rate": 2.2334035771170975e-06,
      "loss": 0.3767,
      "step": 9091
    },
    {
      "epoch": 1.1147621383030897,
      "grad_norm": 2.016180156169013,
      "learning_rate": 2.2328996333829793e-06,
      "loss": 0.401,
      "step": 9092
    },
    {
      "epoch": 1.1148847474252084,
      "grad_norm": 1.9768543726656387,
      "learning_rate": 2.232395700627159e-06,
      "loss": 0.4365,
      "step": 9093
    },
    {
      "epoch": 1.115007356547327,
      "grad_norm": 2.1252846861278787,
      "learning_rate": 2.2318917788703486e-06,
      "loss": 0.4205,
      "step": 9094
    },
    {
      "epoch": 1.1151299656694458,
      "grad_norm": 1.8502389248913356,
      "learning_rate": 2.231387868133261e-06,
      "loss": 0.4022,
      "step": 9095
    },
    {
      "epoch": 1.1152525747915645,
      "grad_norm": 1.8202985066636777,
      "learning_rate": 2.2308839684366078e-06,
      "loss": 0.434,
      "step": 9096
    },
    {
      "epoch": 1.1153751839136832,
      "grad_norm": 1.939233695302859,
      "learning_rate": 2.2303800798010997e-06,
      "loss": 0.4512,
      "step": 9097
    },
    {
      "epoch": 1.1154977930358019,
      "grad_norm": 1.9384909874394338,
      "learning_rate": 2.229876202247448e-06,
      "loss": 0.4516,
      "step": 9098
    },
    {
      "epoch": 1.1156204021579206,
      "grad_norm": 1.7704264546076216,
      "learning_rate": 2.2293723357963616e-06,
      "loss": 0.4383,
      "step": 9099
    },
    {
      "epoch": 1.1157430112800393,
      "grad_norm": 1.9844804551183886,
      "learning_rate": 2.228868480468552e-06,
      "loss": 0.4043,
      "step": 9100
    },
    {
      "epoch": 1.115865620402158,
      "grad_norm": 1.8896523928577085,
      "learning_rate": 2.228364636284728e-06,
      "loss": 0.4308,
      "step": 9101
    },
    {
      "epoch": 1.1159882295242767,
      "grad_norm": 1.9796978024917031,
      "learning_rate": 2.2278608032655983e-06,
      "loss": 0.4596,
      "step": 9102
    },
    {
      "epoch": 1.1161108386463954,
      "grad_norm": 1.744494375491454,
      "learning_rate": 2.227356981431871e-06,
      "loss": 0.4065,
      "step": 9103
    },
    {
      "epoch": 1.116233447768514,
      "grad_norm": 1.8939006117568744,
      "learning_rate": 2.226853170804254e-06,
      "loss": 0.4442,
      "step": 9104
    },
    {
      "epoch": 1.1163560568906328,
      "grad_norm": 1.8920495338698122,
      "learning_rate": 2.226349371403457e-06,
      "loss": 0.4333,
      "step": 9105
    },
    {
      "epoch": 1.1164786660127513,
      "grad_norm": 2.0640598510630066,
      "learning_rate": 2.2258455832501837e-06,
      "loss": 0.4422,
      "step": 9106
    },
    {
      "epoch": 1.11660127513487,
      "grad_norm": 1.8876531105618137,
      "learning_rate": 2.2253418063651423e-06,
      "loss": 0.4081,
      "step": 9107
    },
    {
      "epoch": 1.1167238842569887,
      "grad_norm": 1.7324878048312147,
      "learning_rate": 2.2248380407690396e-06,
      "loss": 0.4352,
      "step": 9108
    },
    {
      "epoch": 1.1168464933791074,
      "grad_norm": 1.989528428900301,
      "learning_rate": 2.224334286482581e-06,
      "loss": 0.4225,
      "step": 9109
    },
    {
      "epoch": 1.116969102501226,
      "grad_norm": 1.8573173052059737,
      "learning_rate": 2.2238305435264705e-06,
      "loss": 0.4398,
      "step": 9110
    },
    {
      "epoch": 1.1170917116233448,
      "grad_norm": 1.7803374543048378,
      "learning_rate": 2.223326811921413e-06,
      "loss": 0.4094,
      "step": 9111
    },
    {
      "epoch": 1.1172143207454635,
      "grad_norm": 2.2260309131637217,
      "learning_rate": 2.222823091688114e-06,
      "loss": 0.4211,
      "step": 9112
    },
    {
      "epoch": 1.1173369298675822,
      "grad_norm": 2.0594673790912914,
      "learning_rate": 2.222319382847277e-06,
      "loss": 0.4364,
      "step": 9113
    },
    {
      "epoch": 1.1174595389897009,
      "grad_norm": 1.7707848348833848,
      "learning_rate": 2.2218156854196045e-06,
      "loss": 0.4378,
      "step": 9114
    },
    {
      "epoch": 1.1175821481118196,
      "grad_norm": 1.8769290771243503,
      "learning_rate": 2.2213119994258e-06,
      "loss": 0.4123,
      "step": 9115
    },
    {
      "epoch": 1.1177047572339383,
      "grad_norm": 1.7433477477155581,
      "learning_rate": 2.2208083248865654e-06,
      "loss": 0.4589,
      "step": 9116
    },
    {
      "epoch": 1.117827366356057,
      "grad_norm": 2.0284687973429176,
      "learning_rate": 2.2203046618226024e-06,
      "loss": 0.4389,
      "step": 9117
    },
    {
      "epoch": 1.1179499754781757,
      "grad_norm": 2.019068977404833,
      "learning_rate": 2.219801010254615e-06,
      "loss": 0.448,
      "step": 9118
    },
    {
      "epoch": 1.1180725846002944,
      "grad_norm": 1.799992217143987,
      "learning_rate": 2.2192973702033003e-06,
      "loss": 0.4275,
      "step": 9119
    },
    {
      "epoch": 1.118195193722413,
      "grad_norm": 1.9741535851781604,
      "learning_rate": 2.218793741689361e-06,
      "loss": 0.4975,
      "step": 9120
    },
    {
      "epoch": 1.1183178028445315,
      "grad_norm": 1.8953393952097977,
      "learning_rate": 2.218290124733497e-06,
      "loss": 0.4229,
      "step": 9121
    },
    {
      "epoch": 1.1184404119666502,
      "grad_norm": 1.8607291819383647,
      "learning_rate": 2.217786519356408e-06,
      "loss": 0.4581,
      "step": 9122
    },
    {
      "epoch": 1.118563021088769,
      "grad_norm": 1.78641445380603,
      "learning_rate": 2.217282925578792e-06,
      "loss": 0.4206,
      "step": 9123
    },
    {
      "epoch": 1.1186856302108876,
      "grad_norm": 1.8232459814574653,
      "learning_rate": 2.216779343421348e-06,
      "loss": 0.4039,
      "step": 9124
    },
    {
      "epoch": 1.1188082393330063,
      "grad_norm": 1.9978441340077169,
      "learning_rate": 2.2162757729047744e-06,
      "loss": 0.4851,
      "step": 9125
    },
    {
      "epoch": 1.118930848455125,
      "grad_norm": 1.764596244233502,
      "learning_rate": 2.21577221404977e-06,
      "loss": 0.4142,
      "step": 9126
    },
    {
      "epoch": 1.1190534575772437,
      "grad_norm": 1.94394198831443,
      "learning_rate": 2.21526866687703e-06,
      "loss": 0.4252,
      "step": 9127
    },
    {
      "epoch": 1.1191760666993624,
      "grad_norm": 1.806448532438524,
      "learning_rate": 2.214765131407252e-06,
      "loss": 0.4135,
      "step": 9128
    },
    {
      "epoch": 1.1192986758214811,
      "grad_norm": 1.9333315590968116,
      "learning_rate": 2.214261607661131e-06,
      "loss": 0.4475,
      "step": 9129
    },
    {
      "epoch": 1.1194212849435998,
      "grad_norm": 1.9201413986015226,
      "learning_rate": 2.2137580956593665e-06,
      "loss": 0.4312,
      "step": 9130
    },
    {
      "epoch": 1.1195438940657185,
      "grad_norm": 1.9483371177807407,
      "learning_rate": 2.2132545954226485e-06,
      "loss": 0.4436,
      "step": 9131
    },
    {
      "epoch": 1.1196665031878372,
      "grad_norm": 1.8696805631706925,
      "learning_rate": 2.212751106971675e-06,
      "loss": 0.4504,
      "step": 9132
    },
    {
      "epoch": 1.119789112309956,
      "grad_norm": 1.8500435960156254,
      "learning_rate": 2.2122476303271393e-06,
      "loss": 0.4255,
      "step": 9133
    },
    {
      "epoch": 1.1199117214320746,
      "grad_norm": 1.9104838564905358,
      "learning_rate": 2.2117441655097363e-06,
      "loss": 0.397,
      "step": 9134
    },
    {
      "epoch": 1.1200343305541933,
      "grad_norm": 1.8533928790040053,
      "learning_rate": 2.211240712540158e-06,
      "loss": 0.4254,
      "step": 9135
    },
    {
      "epoch": 1.120156939676312,
      "grad_norm": 2.0203353030961004,
      "learning_rate": 2.2107372714390973e-06,
      "loss": 0.4376,
      "step": 9136
    },
    {
      "epoch": 1.1202795487984305,
      "grad_norm": 1.88998508851927,
      "learning_rate": 2.2102338422272464e-06,
      "loss": 0.44,
      "step": 9137
    },
    {
      "epoch": 1.1204021579205492,
      "grad_norm": 1.9012489457908484,
      "learning_rate": 2.2097304249252984e-06,
      "loss": 0.4292,
      "step": 9138
    },
    {
      "epoch": 1.120524767042668,
      "grad_norm": 2.010443778514971,
      "learning_rate": 2.209227019553944e-06,
      "loss": 0.4107,
      "step": 9139
    },
    {
      "epoch": 1.1206473761647866,
      "grad_norm": 1.9887112528115445,
      "learning_rate": 2.208723626133873e-06,
      "loss": 0.4514,
      "step": 9140
    },
    {
      "epoch": 1.1207699852869053,
      "grad_norm": 1.9868065061659532,
      "learning_rate": 2.2082202446857774e-06,
      "loss": 0.4277,
      "step": 9141
    },
    {
      "epoch": 1.120892594409024,
      "grad_norm": 1.947463074308952,
      "learning_rate": 2.207716875230345e-06,
      "loss": 0.4467,
      "step": 9142
    },
    {
      "epoch": 1.1210152035311427,
      "grad_norm": 1.805428888342316,
      "learning_rate": 2.207213517788269e-06,
      "loss": 0.4605,
      "step": 9143
    },
    {
      "epoch": 1.1211378126532614,
      "grad_norm": 1.8267280109390942,
      "learning_rate": 2.2067101723802335e-06,
      "loss": 0.4258,
      "step": 9144
    },
    {
      "epoch": 1.12126042177538,
      "grad_norm": 1.9767120959797,
      "learning_rate": 2.20620683902693e-06,
      "loss": 0.4304,
      "step": 9145
    },
    {
      "epoch": 1.1213830308974988,
      "grad_norm": 1.9976716441135647,
      "learning_rate": 2.205703517749045e-06,
      "loss": 0.4402,
      "step": 9146
    },
    {
      "epoch": 1.1215056400196175,
      "grad_norm": 1.9756716719517544,
      "learning_rate": 2.2052002085672673e-06,
      "loss": 0.4384,
      "step": 9147
    },
    {
      "epoch": 1.1216282491417362,
      "grad_norm": 1.8684923098966482,
      "learning_rate": 2.2046969115022827e-06,
      "loss": 0.4494,
      "step": 9148
    },
    {
      "epoch": 1.121750858263855,
      "grad_norm": 1.779879457824013,
      "learning_rate": 2.2041936265747766e-06,
      "loss": 0.4068,
      "step": 9149
    },
    {
      "epoch": 1.1218734673859736,
      "grad_norm": 1.945937821592292,
      "learning_rate": 2.2036903538054375e-06,
      "loss": 0.4562,
      "step": 9150
    },
    {
      "epoch": 1.1219960765080923,
      "grad_norm": 2.11901772279279,
      "learning_rate": 2.2031870932149494e-06,
      "loss": 0.469,
      "step": 9151
    },
    {
      "epoch": 1.1221186856302108,
      "grad_norm": 1.9742453848997357,
      "learning_rate": 2.202683844823997e-06,
      "loss": 0.4666,
      "step": 9152
    },
    {
      "epoch": 1.1222412947523295,
      "grad_norm": 2.04360497188234,
      "learning_rate": 2.2021806086532647e-06,
      "loss": 0.4288,
      "step": 9153
    },
    {
      "epoch": 1.1223639038744482,
      "grad_norm": 1.8766435103809054,
      "learning_rate": 2.2016773847234366e-06,
      "loss": 0.4376,
      "step": 9154
    },
    {
      "epoch": 1.1224865129965669,
      "grad_norm": 2.140917634459362,
      "learning_rate": 2.201174173055197e-06,
      "loss": 0.4732,
      "step": 9155
    },
    {
      "epoch": 1.1226091221186856,
      "grad_norm": 2.050817564266374,
      "learning_rate": 2.2006709736692268e-06,
      "loss": 0.491,
      "step": 9156
    },
    {
      "epoch": 1.1227317312408043,
      "grad_norm": 2.1662606211019173,
      "learning_rate": 2.2001677865862094e-06,
      "loss": 0.394,
      "step": 9157
    },
    {
      "epoch": 1.122854340362923,
      "grad_norm": 1.9350200594186726,
      "learning_rate": 2.1996646118268273e-06,
      "loss": 0.4123,
      "step": 9158
    },
    {
      "epoch": 1.1229769494850417,
      "grad_norm": 1.7578496027101371,
      "learning_rate": 2.199161449411762e-06,
      "loss": 0.4145,
      "step": 9159
    },
    {
      "epoch": 1.1230995586071604,
      "grad_norm": 2.036204988426232,
      "learning_rate": 2.1986582993616926e-06,
      "loss": 0.4224,
      "step": 9160
    },
    {
      "epoch": 1.123222167729279,
      "grad_norm": 1.93165410405103,
      "learning_rate": 2.198155161697301e-06,
      "loss": 0.4084,
      "step": 9161
    },
    {
      "epoch": 1.1233447768513978,
      "grad_norm": 1.952093786026188,
      "learning_rate": 2.1976520364392664e-06,
      "loss": 0.4307,
      "step": 9162
    },
    {
      "epoch": 1.1234673859735165,
      "grad_norm": 2.126803171573834,
      "learning_rate": 2.1971489236082686e-06,
      "loss": 0.4655,
      "step": 9163
    },
    {
      "epoch": 1.1235899950956352,
      "grad_norm": 2.111730915043018,
      "learning_rate": 2.196645823224987e-06,
      "loss": 0.4767,
      "step": 9164
    },
    {
      "epoch": 1.1237126042177539,
      "grad_norm": 1.7604092426492597,
      "learning_rate": 2.196142735310098e-06,
      "loss": 0.4407,
      "step": 9165
    },
    {
      "epoch": 1.1238352133398726,
      "grad_norm": 1.896812941694171,
      "learning_rate": 2.195639659884281e-06,
      "loss": 0.4199,
      "step": 9166
    },
    {
      "epoch": 1.1239578224619913,
      "grad_norm": 1.7169863581343188,
      "learning_rate": 2.195136596968212e-06,
      "loss": 0.3978,
      "step": 9167
    },
    {
      "epoch": 1.12408043158411,
      "grad_norm": 1.934002426619837,
      "learning_rate": 2.1946335465825703e-06,
      "loss": 0.4658,
      "step": 9168
    },
    {
      "epoch": 1.1242030407062285,
      "grad_norm": 2.0305277718005326,
      "learning_rate": 2.1941305087480288e-06,
      "loss": 0.4458,
      "step": 9169
    },
    {
      "epoch": 1.1243256498283472,
      "grad_norm": 1.8882254766911666,
      "learning_rate": 2.1936274834852655e-06,
      "loss": 0.4717,
      "step": 9170
    },
    {
      "epoch": 1.1244482589504659,
      "grad_norm": 1.834648319639264,
      "learning_rate": 2.193124470814955e-06,
      "loss": 0.4118,
      "step": 9171
    },
    {
      "epoch": 1.1245708680725846,
      "grad_norm": 1.8234745948951736,
      "learning_rate": 2.1926214707577723e-06,
      "loss": 0.4258,
      "step": 9172
    },
    {
      "epoch": 1.1246934771947033,
      "grad_norm": 2.238143269430148,
      "learning_rate": 2.192118483334391e-06,
      "loss": 0.4458,
      "step": 9173
    },
    {
      "epoch": 1.124816086316822,
      "grad_norm": 1.9621780844139673,
      "learning_rate": 2.1916155085654854e-06,
      "loss": 0.4365,
      "step": 9174
    },
    {
      "epoch": 1.1249386954389407,
      "grad_norm": 2.0043404164077523,
      "learning_rate": 2.1911125464717275e-06,
      "loss": 0.4632,
      "step": 9175
    },
    {
      "epoch": 1.1250613045610593,
      "grad_norm": 1.8821854399145115,
      "learning_rate": 2.1906095970737916e-06,
      "loss": 0.4275,
      "step": 9176
    },
    {
      "epoch": 1.125183913683178,
      "grad_norm": 1.9098774877700113,
      "learning_rate": 2.190106660392349e-06,
      "loss": 0.4155,
      "step": 9177
    },
    {
      "epoch": 1.1253065228052967,
      "grad_norm": 1.9706602455393463,
      "learning_rate": 2.189603736448071e-06,
      "loss": 0.4275,
      "step": 9178
    },
    {
      "epoch": 1.1254291319274154,
      "grad_norm": 2.108044357095901,
      "learning_rate": 2.189100825261629e-06,
      "loss": 0.4643,
      "step": 9179
    },
    {
      "epoch": 1.1255517410495341,
      "grad_norm": 1.837350440048676,
      "learning_rate": 2.188597926853694e-06,
      "loss": 0.4338,
      "step": 9180
    },
    {
      "epoch": 1.1256743501716528,
      "grad_norm": 2.1803294599334753,
      "learning_rate": 2.188095041244935e-06,
      "loss": 0.472,
      "step": 9181
    },
    {
      "epoch": 1.1257969592937715,
      "grad_norm": 1.93622905050119,
      "learning_rate": 2.1875921684560214e-06,
      "loss": 0.4151,
      "step": 9182
    },
    {
      "epoch": 1.12591956841589,
      "grad_norm": 1.750064519840708,
      "learning_rate": 2.1870893085076234e-06,
      "loss": 0.4335,
      "step": 9183
    },
    {
      "epoch": 1.1260421775380087,
      "grad_norm": 1.8725996841339592,
      "learning_rate": 2.186586461420409e-06,
      "loss": 0.4607,
      "step": 9184
    },
    {
      "epoch": 1.1261647866601274,
      "grad_norm": 1.9916485574754497,
      "learning_rate": 2.1860836272150453e-06,
      "loss": 0.442,
      "step": 9185
    },
    {
      "epoch": 1.1262873957822461,
      "grad_norm": 1.8389000150913692,
      "learning_rate": 2.1855808059122007e-06,
      "loss": 0.3952,
      "step": 9186
    },
    {
      "epoch": 1.1264100049043648,
      "grad_norm": 1.9201816792955995,
      "learning_rate": 2.185077997532541e-06,
      "loss": 0.4531,
      "step": 9187
    },
    {
      "epoch": 1.1265326140264835,
      "grad_norm": 1.9690045217086398,
      "learning_rate": 2.1845752020967335e-06,
      "loss": 0.4466,
      "step": 9188
    },
    {
      "epoch": 1.1266552231486022,
      "grad_norm": 2.0584046633151627,
      "learning_rate": 2.1840724196254445e-06,
      "loss": 0.4638,
      "step": 9189
    },
    {
      "epoch": 1.126777832270721,
      "grad_norm": 1.890909323241075,
      "learning_rate": 2.1835696501393373e-06,
      "loss": 0.449,
      "step": 9190
    },
    {
      "epoch": 1.1269004413928396,
      "grad_norm": 1.9369103293101237,
      "learning_rate": 2.183066893659078e-06,
      "loss": 0.4065,
      "step": 9191
    },
    {
      "epoch": 1.1270230505149583,
      "grad_norm": 1.6888913013755587,
      "learning_rate": 2.1825641502053304e-06,
      "loss": 0.3892,
      "step": 9192
    },
    {
      "epoch": 1.127145659637077,
      "grad_norm": 1.812967400693718,
      "learning_rate": 2.182061419798759e-06,
      "loss": 0.4302,
      "step": 9193
    },
    {
      "epoch": 1.1272682687591957,
      "grad_norm": 1.7134787269966971,
      "learning_rate": 2.1815587024600243e-06,
      "loss": 0.4264,
      "step": 9194
    },
    {
      "epoch": 1.1273908778813144,
      "grad_norm": 1.7179169482302248,
      "learning_rate": 2.181055998209792e-06,
      "loss": 0.4381,
      "step": 9195
    },
    {
      "epoch": 1.1275134870034331,
      "grad_norm": 1.9608191410740519,
      "learning_rate": 2.1805533070687224e-06,
      "loss": 0.4216,
      "step": 9196
    },
    {
      "epoch": 1.1276360961255518,
      "grad_norm": 2.0495108240114894,
      "learning_rate": 2.180050629057478e-06,
      "loss": 0.4555,
      "step": 9197
    },
    {
      "epoch": 1.1277587052476705,
      "grad_norm": 1.8459033617970135,
      "learning_rate": 2.1795479641967187e-06,
      "loss": 0.4465,
      "step": 9198
    },
    {
      "epoch": 1.1278813143697892,
      "grad_norm": 1.961626879147168,
      "learning_rate": 2.1790453125071053e-06,
      "loss": 0.4091,
      "step": 9199
    },
    {
      "epoch": 1.128003923491908,
      "grad_norm": 1.6840059638004277,
      "learning_rate": 2.1785426740092974e-06,
      "loss": 0.4186,
      "step": 9200
    },
    {
      "epoch": 1.1281265326140264,
      "grad_norm": 1.9000900361365642,
      "learning_rate": 2.1780400487239563e-06,
      "loss": 0.4357,
      "step": 9201
    },
    {
      "epoch": 1.128249141736145,
      "grad_norm": 2.0341532806550306,
      "learning_rate": 2.177537436671737e-06,
      "loss": 0.4424,
      "step": 9202
    },
    {
      "epoch": 1.1283717508582638,
      "grad_norm": 1.8894860408055334,
      "learning_rate": 2.1770348378733013e-06,
      "loss": 0.3937,
      "step": 9203
    },
    {
      "epoch": 1.1284943599803825,
      "grad_norm": 2.058735656820111,
      "learning_rate": 2.176532252349305e-06,
      "loss": 0.4113,
      "step": 9204
    },
    {
      "epoch": 1.1286169691025012,
      "grad_norm": 1.8418538152586441,
      "learning_rate": 2.1760296801204063e-06,
      "loss": 0.3824,
      "step": 9205
    },
    {
      "epoch": 1.12873957822462,
      "grad_norm": 2.040357749161059,
      "learning_rate": 2.175527121207261e-06,
      "loss": 0.4436,
      "step": 9206
    },
    {
      "epoch": 1.1288621873467386,
      "grad_norm": 1.8820965023909138,
      "learning_rate": 2.175024575630525e-06,
      "loss": 0.4481,
      "step": 9207
    },
    {
      "epoch": 1.1289847964688573,
      "grad_norm": 2.0406947281434853,
      "learning_rate": 2.1745220434108547e-06,
      "loss": 0.4298,
      "step": 9208
    },
    {
      "epoch": 1.129107405590976,
      "grad_norm": 1.8742255953249083,
      "learning_rate": 2.174019524568905e-06,
      "loss": 0.4265,
      "step": 9209
    },
    {
      "epoch": 1.1292300147130947,
      "grad_norm": 1.8906634248462362,
      "learning_rate": 2.1735170191253296e-06,
      "loss": 0.4062,
      "step": 9210
    },
    {
      "epoch": 1.1293526238352134,
      "grad_norm": 2.0304357991726314,
      "learning_rate": 2.1730145271007826e-06,
      "loss": 0.4453,
      "step": 9211
    },
    {
      "epoch": 1.129475232957332,
      "grad_norm": 1.9638377725584457,
      "learning_rate": 2.1725120485159172e-06,
      "loss": 0.4708,
      "step": 9212
    },
    {
      "epoch": 1.1295978420794508,
      "grad_norm": 2.0475323279207545,
      "learning_rate": 2.1720095833913862e-06,
      "loss": 0.4329,
      "step": 9213
    },
    {
      "epoch": 1.1297204512015693,
      "grad_norm": 2.0902913476888427,
      "learning_rate": 2.1715071317478433e-06,
      "loss": 0.4519,
      "step": 9214
    },
    {
      "epoch": 1.129843060323688,
      "grad_norm": 2.0915719898495997,
      "learning_rate": 2.1710046936059378e-06,
      "loss": 0.4584,
      "step": 9215
    },
    {
      "epoch": 1.1299656694458067,
      "grad_norm": 1.8015962253890725,
      "learning_rate": 2.170502268986322e-06,
      "loss": 0.424,
      "step": 9216
    },
    {
      "epoch": 1.1300882785679254,
      "grad_norm": 2.2164321491932086,
      "learning_rate": 2.1699998579096462e-06,
      "loss": 0.4051,
      "step": 9217
    },
    {
      "epoch": 1.130210887690044,
      "grad_norm": 1.8030702759359345,
      "learning_rate": 2.169497460396561e-06,
      "loss": 0.5323,
      "step": 9218
    },
    {
      "epoch": 1.1303334968121628,
      "grad_norm": 1.8361325452494466,
      "learning_rate": 2.168995076467715e-06,
      "loss": 0.4788,
      "step": 9219
    },
    {
      "epoch": 1.1304561059342815,
      "grad_norm": 2.0525712208433746,
      "learning_rate": 2.168492706143757e-06,
      "loss": 0.4441,
      "step": 9220
    },
    {
      "epoch": 1.1305787150564002,
      "grad_norm": 1.8470404466521264,
      "learning_rate": 2.167990349445335e-06,
      "loss": 0.424,
      "step": 9221
    },
    {
      "epoch": 1.1307013241785189,
      "grad_norm": 1.7907159994585349,
      "learning_rate": 2.167488006393099e-06,
      "loss": 0.4037,
      "step": 9222
    },
    {
      "epoch": 1.1308239333006376,
      "grad_norm": 1.8921363187859288,
      "learning_rate": 2.1669856770076942e-06,
      "loss": 0.4146,
      "step": 9223
    },
    {
      "epoch": 1.1309465424227563,
      "grad_norm": 2.102702047511805,
      "learning_rate": 2.1664833613097667e-06,
      "loss": 0.4943,
      "step": 9224
    },
    {
      "epoch": 1.131069151544875,
      "grad_norm": 1.9956961192443632,
      "learning_rate": 2.165981059319964e-06,
      "loss": 0.4628,
      "step": 9225
    },
    {
      "epoch": 1.1311917606669937,
      "grad_norm": 1.8932882402678726,
      "learning_rate": 2.165478771058932e-06,
      "loss": 0.4134,
      "step": 9226
    },
    {
      "epoch": 1.1313143697891124,
      "grad_norm": 1.9968468840346505,
      "learning_rate": 2.1649764965473136e-06,
      "loss": 0.4611,
      "step": 9227
    },
    {
      "epoch": 1.131436978911231,
      "grad_norm": 1.9380006995153536,
      "learning_rate": 2.1644742358057545e-06,
      "loss": 0.4595,
      "step": 9228
    },
    {
      "epoch": 1.1315595880333498,
      "grad_norm": 1.960766134201501,
      "learning_rate": 2.163971988854898e-06,
      "loss": 0.4311,
      "step": 9229
    },
    {
      "epoch": 1.1316821971554685,
      "grad_norm": 2.017052941561268,
      "learning_rate": 2.163469755715389e-06,
      "loss": 0.414,
      "step": 9230
    },
    {
      "epoch": 1.1318048062775872,
      "grad_norm": 2.026437670919271,
      "learning_rate": 2.162967536407868e-06,
      "loss": 0.4537,
      "step": 9231
    },
    {
      "epoch": 1.1319274153997056,
      "grad_norm": 1.7915094705362407,
      "learning_rate": 2.162465330952977e-06,
      "loss": 0.417,
      "step": 9232
    },
    {
      "epoch": 1.1320500245218243,
      "grad_norm": 2.0313210577731424,
      "learning_rate": 2.1619631393713596e-06,
      "loss": 0.4364,
      "step": 9233
    },
    {
      "epoch": 1.132172633643943,
      "grad_norm": 1.9885557228237274,
      "learning_rate": 2.1614609616836552e-06,
      "loss": 0.4448,
      "step": 9234
    },
    {
      "epoch": 1.1322952427660617,
      "grad_norm": 1.9152115245692303,
      "learning_rate": 2.160958797910505e-06,
      "loss": 0.4682,
      "step": 9235
    },
    {
      "epoch": 1.1324178518881804,
      "grad_norm": 1.8884928029565724,
      "learning_rate": 2.160456648072548e-06,
      "loss": 0.4225,
      "step": 9236
    },
    {
      "epoch": 1.1325404610102991,
      "grad_norm": 1.9512135415833822,
      "learning_rate": 2.1599545121904243e-06,
      "loss": 0.4754,
      "step": 9237
    },
    {
      "epoch": 1.1326630701324178,
      "grad_norm": 2.036951487070936,
      "learning_rate": 2.159452390284771e-06,
      "loss": 0.4276,
      "step": 9238
    },
    {
      "epoch": 1.1327856792545365,
      "grad_norm": 1.974215623267647,
      "learning_rate": 2.1589502823762294e-06,
      "loss": 0.4101,
      "step": 9239
    },
    {
      "epoch": 1.1329082883766552,
      "grad_norm": 1.7432501388115509,
      "learning_rate": 2.158448188485433e-06,
      "loss": 0.4049,
      "step": 9240
    },
    {
      "epoch": 1.133030897498774,
      "grad_norm": 1.8003283413077464,
      "learning_rate": 2.1579461086330217e-06,
      "loss": 0.4151,
      "step": 9241
    },
    {
      "epoch": 1.1331535066208926,
      "grad_norm": 1.8203150613781287,
      "learning_rate": 2.157444042839631e-06,
      "loss": 0.4291,
      "step": 9242
    },
    {
      "epoch": 1.1332761157430113,
      "grad_norm": 2.0166098653176254,
      "learning_rate": 2.1569419911258966e-06,
      "loss": 0.4582,
      "step": 9243
    },
    {
      "epoch": 1.13339872486513,
      "grad_norm": 1.8750608833338902,
      "learning_rate": 2.1564399535124534e-06,
      "loss": 0.4686,
      "step": 9244
    },
    {
      "epoch": 1.1335213339872487,
      "grad_norm": 1.942796824364388,
      "learning_rate": 2.1559379300199355e-06,
      "loss": 0.4402,
      "step": 9245
    },
    {
      "epoch": 1.1336439431093672,
      "grad_norm": 1.8071384242652264,
      "learning_rate": 2.155435920668979e-06,
      "loss": 0.4539,
      "step": 9246
    },
    {
      "epoch": 1.133766552231486,
      "grad_norm": 1.727410672270785,
      "learning_rate": 2.154933925480216e-06,
      "loss": 0.4237,
      "step": 9247
    },
    {
      "epoch": 1.1338891613536046,
      "grad_norm": 1.8419405781453777,
      "learning_rate": 2.1544319444742795e-06,
      "loss": 0.3999,
      "step": 9248
    },
    {
      "epoch": 1.1340117704757233,
      "grad_norm": 1.9463129757998723,
      "learning_rate": 2.1539299776718018e-06,
      "loss": 0.4724,
      "step": 9249
    },
    {
      "epoch": 1.134134379597842,
      "grad_norm": 2.0048898972814038,
      "learning_rate": 2.1534280250934145e-06,
      "loss": 0.4767,
      "step": 9250
    },
    {
      "epoch": 1.1342569887199607,
      "grad_norm": 1.9508469348072335,
      "learning_rate": 2.1529260867597497e-06,
      "loss": 0.4146,
      "step": 9251
    },
    {
      "epoch": 1.1343795978420794,
      "grad_norm": 1.8151452231034084,
      "learning_rate": 2.1524241626914357e-06,
      "loss": 0.4119,
      "step": 9252
    },
    {
      "epoch": 1.1345022069641981,
      "grad_norm": 1.8718846479813527,
      "learning_rate": 2.151922252909105e-06,
      "loss": 0.4011,
      "step": 9253
    },
    {
      "epoch": 1.1346248160863168,
      "grad_norm": 2.133163441848941,
      "learning_rate": 2.151420357433385e-06,
      "loss": 0.4536,
      "step": 9254
    },
    {
      "epoch": 1.1347474252084355,
      "grad_norm": 2.1416544253206067,
      "learning_rate": 2.1509184762849073e-06,
      "loss": 0.4147,
      "step": 9255
    },
    {
      "epoch": 1.1348700343305542,
      "grad_norm": 1.9586464947144553,
      "learning_rate": 2.150416609484297e-06,
      "loss": 0.4407,
      "step": 9256
    },
    {
      "epoch": 1.134992643452673,
      "grad_norm": 2.0646231613345587,
      "learning_rate": 2.1499147570521827e-06,
      "loss": 0.4863,
      "step": 9257
    },
    {
      "epoch": 1.1351152525747916,
      "grad_norm": 1.9306980357610246,
      "learning_rate": 2.1494129190091912e-06,
      "loss": 0.4297,
      "step": 9258
    },
    {
      "epoch": 1.1352378616969103,
      "grad_norm": 1.6915848498891044,
      "learning_rate": 2.1489110953759505e-06,
      "loss": 0.3937,
      "step": 9259
    },
    {
      "epoch": 1.135360470819029,
      "grad_norm": 1.8967159550447354,
      "learning_rate": 2.1484092861730853e-06,
      "loss": 0.4748,
      "step": 9260
    },
    {
      "epoch": 1.1354830799411477,
      "grad_norm": 2.0562491000542558,
      "learning_rate": 2.1479074914212206e-06,
      "loss": 0.478,
      "step": 9261
    },
    {
      "epoch": 1.1356056890632664,
      "grad_norm": 1.8725139374532418,
      "learning_rate": 2.1474057111409815e-06,
      "loss": 0.4616,
      "step": 9262
    },
    {
      "epoch": 1.135728298185385,
      "grad_norm": 1.847180045565894,
      "learning_rate": 2.1469039453529908e-06,
      "loss": 0.3905,
      "step": 9263
    },
    {
      "epoch": 1.1358509073075036,
      "grad_norm": 1.825410935969307,
      "learning_rate": 2.146402194077875e-06,
      "loss": 0.471,
      "step": 9264
    },
    {
      "epoch": 1.1359735164296223,
      "grad_norm": 1.8492419804174969,
      "learning_rate": 2.1459004573362534e-06,
      "loss": 0.3836,
      "step": 9265
    },
    {
      "epoch": 1.136096125551741,
      "grad_norm": 1.7909207707160548,
      "learning_rate": 2.1453987351487504e-06,
      "loss": 0.3978,
      "step": 9266
    },
    {
      "epoch": 1.1362187346738597,
      "grad_norm": 1.8754111705668317,
      "learning_rate": 2.144897027535987e-06,
      "loss": 0.4412,
      "step": 9267
    },
    {
      "epoch": 1.1363413437959784,
      "grad_norm": 1.9467301647620066,
      "learning_rate": 2.1443953345185853e-06,
      "loss": 0.4515,
      "step": 9268
    },
    {
      "epoch": 1.136463952918097,
      "grad_norm": 1.8319864475402108,
      "learning_rate": 2.1438936561171643e-06,
      "loss": 0.416,
      "step": 9269
    },
    {
      "epoch": 1.1365865620402158,
      "grad_norm": 1.8236307232772075,
      "learning_rate": 2.1433919923523435e-06,
      "loss": 0.4184,
      "step": 9270
    },
    {
      "epoch": 1.1367091711623345,
      "grad_norm": 1.6275224154087418,
      "learning_rate": 2.142890343244744e-06,
      "loss": 0.4421,
      "step": 9271
    },
    {
      "epoch": 1.1368317802844532,
      "grad_norm": 1.9470928641016638,
      "learning_rate": 2.1423887088149843e-06,
      "loss": 0.4194,
      "step": 9272
    },
    {
      "epoch": 1.136954389406572,
      "grad_norm": 2.1975504832896156,
      "learning_rate": 2.141887089083681e-06,
      "loss": 0.4461,
      "step": 9273
    },
    {
      "epoch": 1.1370769985286906,
      "grad_norm": 2.1621484027373694,
      "learning_rate": 2.1413854840714525e-06,
      "loss": 0.4526,
      "step": 9274
    },
    {
      "epoch": 1.1371996076508093,
      "grad_norm": 1.8502340657611613,
      "learning_rate": 2.140883893798915e-06,
      "loss": 0.4331,
      "step": 9275
    },
    {
      "epoch": 1.137322216772928,
      "grad_norm": 1.930493212197446,
      "learning_rate": 2.1403823182866867e-06,
      "loss": 0.4271,
      "step": 9276
    },
    {
      "epoch": 1.1374448258950465,
      "grad_norm": 2.0180405857228725,
      "learning_rate": 2.1398807575553805e-06,
      "loss": 0.4456,
      "step": 9277
    },
    {
      "epoch": 1.1375674350171652,
      "grad_norm": 1.888746549084664,
      "learning_rate": 2.1393792116256125e-06,
      "loss": 0.4118,
      "step": 9278
    },
    {
      "epoch": 1.1376900441392839,
      "grad_norm": 2.0126332244940492,
      "learning_rate": 2.1388776805179974e-06,
      "loss": 0.4153,
      "step": 9279
    },
    {
      "epoch": 1.1378126532614026,
      "grad_norm": 2.115325593954531,
      "learning_rate": 2.13837616425315e-06,
      "loss": 0.4432,
      "step": 9280
    },
    {
      "epoch": 1.1379352623835213,
      "grad_norm": 1.7296237593119153,
      "learning_rate": 2.1378746628516816e-06,
      "loss": 0.434,
      "step": 9281
    },
    {
      "epoch": 1.13805787150564,
      "grad_norm": 1.9238572521776598,
      "learning_rate": 2.1373731763342055e-06,
      "loss": 0.4294,
      "step": 9282
    },
    {
      "epoch": 1.1381804806277587,
      "grad_norm": 1.9215984351305064,
      "learning_rate": 2.1368717047213338e-06,
      "loss": 0.4453,
      "step": 9283
    },
    {
      "epoch": 1.1383030897498774,
      "grad_norm": 1.978031413486589,
      "learning_rate": 2.136370248033678e-06,
      "loss": 0.4238,
      "step": 9284
    },
    {
      "epoch": 1.138425698871996,
      "grad_norm": 2.0553720425521496,
      "learning_rate": 2.1358688062918497e-06,
      "loss": 0.4542,
      "step": 9285
    },
    {
      "epoch": 1.1385483079941148,
      "grad_norm": 1.9515548185506264,
      "learning_rate": 2.1353673795164574e-06,
      "loss": 0.4511,
      "step": 9286
    },
    {
      "epoch": 1.1386709171162335,
      "grad_norm": 1.8964516170025434,
      "learning_rate": 2.134865967728111e-06,
      "loss": 0.4129,
      "step": 9287
    },
    {
      "epoch": 1.1387935262383522,
      "grad_norm": 2.1413090774876373,
      "learning_rate": 2.13436457094742e-06,
      "loss": 0.461,
      "step": 9288
    },
    {
      "epoch": 1.1389161353604709,
      "grad_norm": 1.9577199276727322,
      "learning_rate": 2.1338631891949935e-06,
      "loss": 0.4289,
      "step": 9289
    },
    {
      "epoch": 1.1390387444825896,
      "grad_norm": 2.148705303601179,
      "learning_rate": 2.1333618224914366e-06,
      "loss": 0.4088,
      "step": 9290
    },
    {
      "epoch": 1.1391613536047083,
      "grad_norm": 1.9516308707817822,
      "learning_rate": 2.132860470857358e-06,
      "loss": 0.425,
      "step": 9291
    },
    {
      "epoch": 1.139283962726827,
      "grad_norm": 1.9975932216342034,
      "learning_rate": 2.1323591343133644e-06,
      "loss": 0.4394,
      "step": 9292
    },
    {
      "epoch": 1.1394065718489457,
      "grad_norm": 1.8911689687400846,
      "learning_rate": 2.1318578128800623e-06,
      "loss": 0.4486,
      "step": 9293
    },
    {
      "epoch": 1.1395291809710644,
      "grad_norm": 1.9024335014517266,
      "learning_rate": 2.1313565065780547e-06,
      "loss": 0.414,
      "step": 9294
    },
    {
      "epoch": 1.1396517900931828,
      "grad_norm": 2.0915682690836284,
      "learning_rate": 2.130855215427947e-06,
      "loss": 0.4848,
      "step": 9295
    },
    {
      "epoch": 1.1397743992153015,
      "grad_norm": 2.0269375551584994,
      "learning_rate": 2.1303539394503436e-06,
      "loss": 0.4205,
      "step": 9296
    },
    {
      "epoch": 1.1398970083374202,
      "grad_norm": 2.00380832079781,
      "learning_rate": 2.129852678665849e-06,
      "loss": 0.4333,
      "step": 9297
    },
    {
      "epoch": 1.140019617459539,
      "grad_norm": 1.8862739487045799,
      "learning_rate": 2.1293514330950627e-06,
      "loss": 0.4102,
      "step": 9298
    },
    {
      "epoch": 1.1401422265816576,
      "grad_norm": 1.8657787006770181,
      "learning_rate": 2.1288502027585896e-06,
      "loss": 0.4553,
      "step": 9299
    },
    {
      "epoch": 1.1402648357037763,
      "grad_norm": 1.9684317989545488,
      "learning_rate": 2.1283489876770304e-06,
      "loss": 0.4617,
      "step": 9300
    },
    {
      "epoch": 1.140387444825895,
      "grad_norm": 1.9503300932422527,
      "learning_rate": 2.1278477878709858e-06,
      "loss": 0.4248,
      "step": 9301
    },
    {
      "epoch": 1.1405100539480137,
      "grad_norm": 2.1472196769360385,
      "learning_rate": 2.1273466033610555e-06,
      "loss": 0.4631,
      "step": 9302
    },
    {
      "epoch": 1.1406326630701324,
      "grad_norm": 1.934936049934412,
      "learning_rate": 2.1268454341678395e-06,
      "loss": 0.4099,
      "step": 9303
    },
    {
      "epoch": 1.1407552721922511,
      "grad_norm": 2.2242944774255275,
      "learning_rate": 2.126344280311937e-06,
      "loss": 0.4396,
      "step": 9304
    },
    {
      "epoch": 1.1408778813143698,
      "grad_norm": 1.9633045802609008,
      "learning_rate": 2.1258431418139467e-06,
      "loss": 0.4407,
      "step": 9305
    },
    {
      "epoch": 1.1410004904364885,
      "grad_norm": 1.9111092314738225,
      "learning_rate": 2.125342018694465e-06,
      "loss": 0.4111,
      "step": 9306
    },
    {
      "epoch": 1.1411230995586072,
      "grad_norm": 1.9849292522584594,
      "learning_rate": 2.12484091097409e-06,
      "loss": 0.418,
      "step": 9307
    },
    {
      "epoch": 1.1412457086807257,
      "grad_norm": 2.0611877642757293,
      "learning_rate": 2.1243398186734166e-06,
      "loss": 0.4523,
      "step": 9308
    },
    {
      "epoch": 1.1413683178028444,
      "grad_norm": 1.7507007472149452,
      "learning_rate": 2.1238387418130425e-06,
      "loss": 0.4217,
      "step": 9309
    },
    {
      "epoch": 1.1414909269249631,
      "grad_norm": 2.1356804809592562,
      "learning_rate": 2.1233376804135625e-06,
      "loss": 0.4613,
      "step": 9310
    },
    {
      "epoch": 1.1416135360470818,
      "grad_norm": 1.9065285732295854,
      "learning_rate": 2.12283663449557e-06,
      "loss": 0.3888,
      "step": 9311
    },
    {
      "epoch": 1.1417361451692005,
      "grad_norm": 1.898255322518454,
      "learning_rate": 2.12233560407966e-06,
      "loss": 0.4657,
      "step": 9312
    },
    {
      "epoch": 1.1418587542913192,
      "grad_norm": 1.8498663769597072,
      "learning_rate": 2.121834589186425e-06,
      "loss": 0.4175,
      "step": 9313
    },
    {
      "epoch": 1.141981363413438,
      "grad_norm": 1.8657413693763847,
      "learning_rate": 2.1213335898364583e-06,
      "loss": 0.4115,
      "step": 9314
    },
    {
      "epoch": 1.1421039725355566,
      "grad_norm": 1.9009129894361814,
      "learning_rate": 2.120832606050351e-06,
      "loss": 0.4305,
      "step": 9315
    },
    {
      "epoch": 1.1422265816576753,
      "grad_norm": 1.7587785816770993,
      "learning_rate": 2.120331637848694e-06,
      "loss": 0.434,
      "step": 9316
    },
    {
      "epoch": 1.142349190779794,
      "grad_norm": 1.8138945723953095,
      "learning_rate": 2.1198306852520802e-06,
      "loss": 0.4384,
      "step": 9317
    },
    {
      "epoch": 1.1424717999019127,
      "grad_norm": 1.9081365608002574,
      "learning_rate": 2.119329748281098e-06,
      "loss": 0.4687,
      "step": 9318
    },
    {
      "epoch": 1.1425944090240314,
      "grad_norm": 2.114852780334487,
      "learning_rate": 2.118828826956337e-06,
      "loss": 0.4319,
      "step": 9319
    },
    {
      "epoch": 1.1427170181461501,
      "grad_norm": 1.7598125065264472,
      "learning_rate": 2.118327921298386e-06,
      "loss": 0.4483,
      "step": 9320
    },
    {
      "epoch": 1.1428396272682688,
      "grad_norm": 1.9046867805627012,
      "learning_rate": 2.1178270313278324e-06,
      "loss": 0.3936,
      "step": 9321
    },
    {
      "epoch": 1.1429622363903875,
      "grad_norm": 1.921046926292091,
      "learning_rate": 2.1173261570652664e-06,
      "loss": 0.462,
      "step": 9322
    },
    {
      "epoch": 1.1430848455125062,
      "grad_norm": 1.8636258994973678,
      "learning_rate": 2.116825298531271e-06,
      "loss": 0.4571,
      "step": 9323
    },
    {
      "epoch": 1.143207454634625,
      "grad_norm": 1.8405733473743895,
      "learning_rate": 2.116324455746434e-06,
      "loss": 0.4168,
      "step": 9324
    },
    {
      "epoch": 1.1433300637567436,
      "grad_norm": 1.7956498621288735,
      "learning_rate": 2.115823628731342e-06,
      "loss": 0.4161,
      "step": 9325
    },
    {
      "epoch": 1.143452672878862,
      "grad_norm": 1.7390188930503154,
      "learning_rate": 2.1153228175065795e-06,
      "loss": 0.4358,
      "step": 9326
    },
    {
      "epoch": 1.1435752820009808,
      "grad_norm": 1.9836318469029028,
      "learning_rate": 2.114822022092729e-06,
      "loss": 0.4392,
      "step": 9327
    },
    {
      "epoch": 1.1436978911230995,
      "grad_norm": 2.052230939899217,
      "learning_rate": 2.1143212425103755e-06,
      "loss": 0.4426,
      "step": 9328
    },
    {
      "epoch": 1.1438205002452182,
      "grad_norm": 1.9551882045231357,
      "learning_rate": 2.1138204787801016e-06,
      "loss": 0.4581,
      "step": 9329
    },
    {
      "epoch": 1.143943109367337,
      "grad_norm": 1.7656396283813007,
      "learning_rate": 2.11331973092249e-06,
      "loss": 0.4422,
      "step": 9330
    },
    {
      "epoch": 1.1440657184894556,
      "grad_norm": 1.8170923700290103,
      "learning_rate": 2.1128189989581226e-06,
      "loss": 0.4518,
      "step": 9331
    },
    {
      "epoch": 1.1441883276115743,
      "grad_norm": 1.8820280362607698,
      "learning_rate": 2.1123182829075794e-06,
      "loss": 0.4415,
      "step": 9332
    },
    {
      "epoch": 1.144310936733693,
      "grad_norm": 1.9646270218344992,
      "learning_rate": 2.111817582791441e-06,
      "loss": 0.4585,
      "step": 9333
    },
    {
      "epoch": 1.1444335458558117,
      "grad_norm": 1.9432601137349466,
      "learning_rate": 2.1113168986302866e-06,
      "loss": 0.4545,
      "step": 9334
    },
    {
      "epoch": 1.1445561549779304,
      "grad_norm": 1.9430333964614181,
      "learning_rate": 2.1108162304446974e-06,
      "loss": 0.439,
      "step": 9335
    },
    {
      "epoch": 1.144678764100049,
      "grad_norm": 2.186695687626558,
      "learning_rate": 2.1103155782552486e-06,
      "loss": 0.4702,
      "step": 9336
    },
    {
      "epoch": 1.1448013732221678,
      "grad_norm": 2.109549676272091,
      "learning_rate": 2.10981494208252e-06,
      "loss": 0.4181,
      "step": 9337
    },
    {
      "epoch": 1.1449239823442865,
      "grad_norm": 1.940180700568585,
      "learning_rate": 2.1093143219470875e-06,
      "loss": 0.4614,
      "step": 9338
    },
    {
      "epoch": 1.1450465914664052,
      "grad_norm": 1.9468102213800753,
      "learning_rate": 2.108813717869529e-06,
      "loss": 0.4293,
      "step": 9339
    },
    {
      "epoch": 1.1451692005885237,
      "grad_norm": 1.8606667840149842,
      "learning_rate": 2.1083131298704186e-06,
      "loss": 0.4134,
      "step": 9340
    },
    {
      "epoch": 1.1452918097106424,
      "grad_norm": 1.8807122387938264,
      "learning_rate": 2.107812557970332e-06,
      "loss": 0.4311,
      "step": 9341
    },
    {
      "epoch": 1.145414418832761,
      "grad_norm": 2.1504939232417786,
      "learning_rate": 2.1073120021898432e-06,
      "loss": 0.48,
      "step": 9342
    },
    {
      "epoch": 1.1455370279548798,
      "grad_norm": 1.7396527288669863,
      "learning_rate": 2.1068114625495276e-06,
      "loss": 0.4118,
      "step": 9343
    },
    {
      "epoch": 1.1456596370769985,
      "grad_norm": 1.9962605028990792,
      "learning_rate": 2.106310939069956e-06,
      "loss": 0.4371,
      "step": 9344
    },
    {
      "epoch": 1.1457822461991172,
      "grad_norm": 2.038768521643926,
      "learning_rate": 2.1058104317717017e-06,
      "loss": 0.431,
      "step": 9345
    },
    {
      "epoch": 1.1459048553212359,
      "grad_norm": 1.9944970063650993,
      "learning_rate": 2.1053099406753364e-06,
      "loss": 0.4527,
      "step": 9346
    },
    {
      "epoch": 1.1460274644433546,
      "grad_norm": 1.5823871339133062,
      "learning_rate": 2.1048094658014326e-06,
      "loss": 0.4161,
      "step": 9347
    },
    {
      "epoch": 1.1461500735654733,
      "grad_norm": 1.8536000759071931,
      "learning_rate": 2.1043090071705578e-06,
      "loss": 0.4263,
      "step": 9348
    },
    {
      "epoch": 1.146272682687592,
      "grad_norm": 1.8180859475154734,
      "learning_rate": 2.103808564803284e-06,
      "loss": 0.4334,
      "step": 9349
    },
    {
      "epoch": 1.1463952918097107,
      "grad_norm": 1.9375132559511177,
      "learning_rate": 2.1033081387201795e-06,
      "loss": 0.425,
      "step": 9350
    },
    {
      "epoch": 1.1465179009318294,
      "grad_norm": 2.023389444955182,
      "learning_rate": 2.1028077289418135e-06,
      "loss": 0.434,
      "step": 9351
    },
    {
      "epoch": 1.146640510053948,
      "grad_norm": 1.9242822016581818,
      "learning_rate": 2.102307335488752e-06,
      "loss": 0.4359,
      "step": 9352
    },
    {
      "epoch": 1.1467631191760668,
      "grad_norm": 2.079536751686048,
      "learning_rate": 2.1018069583815633e-06,
      "loss": 0.4193,
      "step": 9353
    },
    {
      "epoch": 1.1468857282981855,
      "grad_norm": 1.712770439901315,
      "learning_rate": 2.1013065976408133e-06,
      "loss": 0.4552,
      "step": 9354
    },
    {
      "epoch": 1.1470083374203042,
      "grad_norm": 1.9242303162165024,
      "learning_rate": 2.1008062532870683e-06,
      "loss": 0.417,
      "step": 9355
    },
    {
      "epoch": 1.1471309465424229,
      "grad_norm": 1.872660530261621,
      "learning_rate": 2.1003059253408933e-06,
      "loss": 0.4373,
      "step": 9356
    },
    {
      "epoch": 1.1472535556645413,
      "grad_norm": 1.9575984826623019,
      "learning_rate": 2.0998056138228524e-06,
      "loss": 0.4081,
      "step": 9357
    },
    {
      "epoch": 1.14737616478666,
      "grad_norm": 1.8947749275942132,
      "learning_rate": 2.0993053187535086e-06,
      "loss": 0.4713,
      "step": 9358
    },
    {
      "epoch": 1.1474987739087787,
      "grad_norm": 1.9364880736780412,
      "learning_rate": 2.0988050401534254e-06,
      "loss": 0.4446,
      "step": 9359
    },
    {
      "epoch": 1.1476213830308974,
      "grad_norm": 1.7760538013764458,
      "learning_rate": 2.098304778043167e-06,
      "loss": 0.4419,
      "step": 9360
    },
    {
      "epoch": 1.1477439921530161,
      "grad_norm": 1.733907912400283,
      "learning_rate": 2.0978045324432917e-06,
      "loss": 0.4218,
      "step": 9361
    },
    {
      "epoch": 1.1478666012751348,
      "grad_norm": 1.8249083471541871,
      "learning_rate": 2.097304303374362e-06,
      "loss": 0.389,
      "step": 9362
    },
    {
      "epoch": 1.1479892103972535,
      "grad_norm": 1.8126944893634886,
      "learning_rate": 2.096804090856939e-06,
      "loss": 0.423,
      "step": 9363
    },
    {
      "epoch": 1.1481118195193722,
      "grad_norm": 1.9561003353915036,
      "learning_rate": 2.096303894911583e-06,
      "loss": 0.4633,
      "step": 9364
    },
    {
      "epoch": 1.148234428641491,
      "grad_norm": 1.9506311187297511,
      "learning_rate": 2.09580371555885e-06,
      "loss": 0.4478,
      "step": 9365
    },
    {
      "epoch": 1.1483570377636096,
      "grad_norm": 1.8635073151414303,
      "learning_rate": 2.0953035528192992e-06,
      "loss": 0.4123,
      "step": 9366
    },
    {
      "epoch": 1.1484796468857283,
      "grad_norm": 2.0902287293591333,
      "learning_rate": 2.09480340671349e-06,
      "loss": 0.4395,
      "step": 9367
    },
    {
      "epoch": 1.148602256007847,
      "grad_norm": 1.8837605578545247,
      "learning_rate": 2.0943032772619783e-06,
      "loss": 0.42,
      "step": 9368
    },
    {
      "epoch": 1.1487248651299657,
      "grad_norm": 1.9154106613227524,
      "learning_rate": 2.0938031644853196e-06,
      "loss": 0.4732,
      "step": 9369
    },
    {
      "epoch": 1.1488474742520844,
      "grad_norm": 1.7463740286439557,
      "learning_rate": 2.0933030684040703e-06,
      "loss": 0.3536,
      "step": 9370
    },
    {
      "epoch": 1.148970083374203,
      "grad_norm": 2.0167609160538396,
      "learning_rate": 2.0928029890387846e-06,
      "loss": 0.4456,
      "step": 9371
    },
    {
      "epoch": 1.1490926924963216,
      "grad_norm": 1.9676164593914944,
      "learning_rate": 2.0923029264100174e-06,
      "loss": 0.4803,
      "step": 9372
    },
    {
      "epoch": 1.1492153016184403,
      "grad_norm": 1.9245670715490906,
      "learning_rate": 2.0918028805383215e-06,
      "loss": 0.4496,
      "step": 9373
    },
    {
      "epoch": 1.149337910740559,
      "grad_norm": 2.1660362860592572,
      "learning_rate": 2.091302851444249e-06,
      "loss": 0.4259,
      "step": 9374
    },
    {
      "epoch": 1.1494605198626777,
      "grad_norm": 1.9047919194744742,
      "learning_rate": 2.0908028391483534e-06,
      "loss": 0.4232,
      "step": 9375
    },
    {
      "epoch": 1.1495831289847964,
      "grad_norm": 1.883221595453032,
      "learning_rate": 2.0903028436711863e-06,
      "loss": 0.4325,
      "step": 9376
    },
    {
      "epoch": 1.1497057381069151,
      "grad_norm": 1.8768207298891204,
      "learning_rate": 2.089802865033297e-06,
      "loss": 0.4552,
      "step": 9377
    },
    {
      "epoch": 1.1498283472290338,
      "grad_norm": 1.9679046138220286,
      "learning_rate": 2.0893029032552357e-06,
      "loss": 0.4452,
      "step": 9378
    },
    {
      "epoch": 1.1499509563511525,
      "grad_norm": 1.8451943978260557,
      "learning_rate": 2.0888029583575522e-06,
      "loss": 0.3853,
      "step": 9379
    },
    {
      "epoch": 1.1500735654732712,
      "grad_norm": 1.8977089579332393,
      "learning_rate": 2.0883030303607955e-06,
      "loss": 0.4152,
      "step": 9380
    },
    {
      "epoch": 1.15019617459539,
      "grad_norm": 1.9583174876164113,
      "learning_rate": 2.0878031192855138e-06,
      "loss": 0.4409,
      "step": 9381
    },
    {
      "epoch": 1.1503187837175086,
      "grad_norm": 2.0024382212838168,
      "learning_rate": 2.087303225152253e-06,
      "loss": 0.4361,
      "step": 9382
    },
    {
      "epoch": 1.1504413928396273,
      "grad_norm": 1.978639787130786,
      "learning_rate": 2.08680334798156e-06,
      "loss": 0.4913,
      "step": 9383
    },
    {
      "epoch": 1.150564001961746,
      "grad_norm": 1.8419128251346546,
      "learning_rate": 2.0863034877939806e-06,
      "loss": 0.4275,
      "step": 9384
    },
    {
      "epoch": 1.1506866110838647,
      "grad_norm": 1.9731072652871662,
      "learning_rate": 2.0858036446100616e-06,
      "loss": 0.4912,
      "step": 9385
    },
    {
      "epoch": 1.1508092202059834,
      "grad_norm": 1.892201103995963,
      "learning_rate": 2.085303818450345e-06,
      "loss": 0.4113,
      "step": 9386
    },
    {
      "epoch": 1.1509318293281021,
      "grad_norm": 1.9763330210181236,
      "learning_rate": 2.0848040093353755e-06,
      "loss": 0.437,
      "step": 9387
    },
    {
      "epoch": 1.1510544384502208,
      "grad_norm": 2.0616442499545204,
      "learning_rate": 2.0843042172856966e-06,
      "loss": 0.4684,
      "step": 9388
    },
    {
      "epoch": 1.1511770475723393,
      "grad_norm": 1.9559308876829016,
      "learning_rate": 2.0838044423218506e-06,
      "loss": 0.4726,
      "step": 9389
    },
    {
      "epoch": 1.151299656694458,
      "grad_norm": 1.995891302567285,
      "learning_rate": 2.0833046844643787e-06,
      "loss": 0.4376,
      "step": 9390
    },
    {
      "epoch": 1.1514222658165767,
      "grad_norm": 1.773657544029882,
      "learning_rate": 2.082804943733822e-06,
      "loss": 0.4156,
      "step": 9391
    },
    {
      "epoch": 1.1515448749386954,
      "grad_norm": 2.035768696901311,
      "learning_rate": 2.08230522015072e-06,
      "loss": 0.4315,
      "step": 9392
    },
    {
      "epoch": 1.151667484060814,
      "grad_norm": 2.0409013314949087,
      "learning_rate": 2.081805513735614e-06,
      "loss": 0.4158,
      "step": 9393
    },
    {
      "epoch": 1.1517900931829328,
      "grad_norm": 1.9250636270371333,
      "learning_rate": 2.081305824509041e-06,
      "loss": 0.4239,
      "step": 9394
    },
    {
      "epoch": 1.1519127023050515,
      "grad_norm": 1.7916491431372594,
      "learning_rate": 2.08080615249154e-06,
      "loss": 0.4468,
      "step": 9395
    },
    {
      "epoch": 1.1520353114271702,
      "grad_norm": 2.146225642699047,
      "learning_rate": 2.080306497703648e-06,
      "loss": 0.4053,
      "step": 9396
    },
    {
      "epoch": 1.152157920549289,
      "grad_norm": 1.8827443922331246,
      "learning_rate": 2.0798068601659027e-06,
      "loss": 0.4275,
      "step": 9397
    },
    {
      "epoch": 1.1522805296714076,
      "grad_norm": 2.119429902079778,
      "learning_rate": 2.079307239898839e-06,
      "loss": 0.4342,
      "step": 9398
    },
    {
      "epoch": 1.1524031387935263,
      "grad_norm": 1.722558276360615,
      "learning_rate": 2.0788076369229915e-06,
      "loss": 0.4155,
      "step": 9399
    },
    {
      "epoch": 1.152525747915645,
      "grad_norm": 1.9170865969245026,
      "learning_rate": 2.078308051258897e-06,
      "loss": 0.4437,
      "step": 9400
    },
    {
      "epoch": 1.1526483570377637,
      "grad_norm": 1.9800016985691333,
      "learning_rate": 2.0778084829270876e-06,
      "loss": 0.4251,
      "step": 9401
    },
    {
      "epoch": 1.1527709661598822,
      "grad_norm": 1.8196022043680102,
      "learning_rate": 2.077308931948098e-06,
      "loss": 0.4234,
      "step": 9402
    },
    {
      "epoch": 1.1528935752820009,
      "grad_norm": 1.8193137547310048,
      "learning_rate": 2.0768093983424586e-06,
      "loss": 0.4183,
      "step": 9403
    },
    {
      "epoch": 1.1530161844041196,
      "grad_norm": 1.9691436053270264,
      "learning_rate": 2.076309882130702e-06,
      "loss": 0.4193,
      "step": 9404
    },
    {
      "epoch": 1.1531387935262383,
      "grad_norm": 2.0241274591583873,
      "learning_rate": 2.0758103833333597e-06,
      "loss": 0.4261,
      "step": 9405
    },
    {
      "epoch": 1.153261402648357,
      "grad_norm": 1.90558308718969,
      "learning_rate": 2.075310901970962e-06,
      "loss": 0.4705,
      "step": 9406
    },
    {
      "epoch": 1.1533840117704757,
      "grad_norm": 1.9825171114660167,
      "learning_rate": 2.0748114380640376e-06,
      "loss": 0.4558,
      "step": 9407
    },
    {
      "epoch": 1.1535066208925944,
      "grad_norm": 1.7614581044444977,
      "learning_rate": 2.074311991633116e-06,
      "loss": 0.4222,
      "step": 9408
    },
    {
      "epoch": 1.153629230014713,
      "grad_norm": 2.0896146252074947,
      "learning_rate": 2.073812562698725e-06,
      "loss": 0.4433,
      "step": 9409
    },
    {
      "epoch": 1.1537518391368318,
      "grad_norm": 1.8935939784503042,
      "learning_rate": 2.0733131512813925e-06,
      "loss": 0.4629,
      "step": 9410
    },
    {
      "epoch": 1.1538744482589505,
      "grad_norm": 2.3515738750363266,
      "learning_rate": 2.0728137574016445e-06,
      "loss": 0.4066,
      "step": 9411
    },
    {
      "epoch": 1.1539970573810692,
      "grad_norm": 2.116145919458108,
      "learning_rate": 2.072314381080007e-06,
      "loss": 0.4334,
      "step": 9412
    },
    {
      "epoch": 1.1541196665031879,
      "grad_norm": 1.858961271514306,
      "learning_rate": 2.071815022337006e-06,
      "loss": 0.4344,
      "step": 9413
    },
    {
      "epoch": 1.1542422756253066,
      "grad_norm": 1.9286071387004562,
      "learning_rate": 2.0713156811931663e-06,
      "loss": 0.4736,
      "step": 9414
    },
    {
      "epoch": 1.1543648847474253,
      "grad_norm": 2.060100101380628,
      "learning_rate": 2.0708163576690104e-06,
      "loss": 0.4277,
      "step": 9415
    },
    {
      "epoch": 1.154487493869544,
      "grad_norm": 1.9349502814656543,
      "learning_rate": 2.070317051785062e-06,
      "loss": 0.4219,
      "step": 9416
    },
    {
      "epoch": 1.1546101029916627,
      "grad_norm": 2.0590047648856022,
      "learning_rate": 2.0698177635618427e-06,
      "loss": 0.4487,
      "step": 9417
    },
    {
      "epoch": 1.1547327121137814,
      "grad_norm": 1.966186458160123,
      "learning_rate": 2.0693184930198768e-06,
      "loss": 0.4397,
      "step": 9418
    },
    {
      "epoch": 1.1548553212359,
      "grad_norm": 1.9414512678744513,
      "learning_rate": 2.068819240179682e-06,
      "loss": 0.42,
      "step": 9419
    },
    {
      "epoch": 1.1549779303580185,
      "grad_norm": 1.908584329232271,
      "learning_rate": 2.06832000506178e-06,
      "loss": 0.4426,
      "step": 9420
    },
    {
      "epoch": 1.1551005394801372,
      "grad_norm": 1.8324130238864413,
      "learning_rate": 2.0678207876866897e-06,
      "loss": 0.4258,
      "step": 9421
    },
    {
      "epoch": 1.155223148602256,
      "grad_norm": 1.9225709178214667,
      "learning_rate": 2.0673215880749307e-06,
      "loss": 0.4265,
      "step": 9422
    },
    {
      "epoch": 1.1553457577243746,
      "grad_norm": 1.9606063358039194,
      "learning_rate": 2.0668224062470202e-06,
      "loss": 0.4171,
      "step": 9423
    },
    {
      "epoch": 1.1554683668464933,
      "grad_norm": 1.8785615221955212,
      "learning_rate": 2.066323242223475e-06,
      "loss": 0.4302,
      "step": 9424
    },
    {
      "epoch": 1.155590975968612,
      "grad_norm": 2.0221734360237975,
      "learning_rate": 2.065824096024813e-06,
      "loss": 0.4479,
      "step": 9425
    },
    {
      "epoch": 1.1557135850907307,
      "grad_norm": 1.8443255959619593,
      "learning_rate": 2.0653249676715493e-06,
      "loss": 0.4298,
      "step": 9426
    },
    {
      "epoch": 1.1558361942128494,
      "grad_norm": 1.920672011969365,
      "learning_rate": 2.0648258571841994e-06,
      "loss": 0.4163,
      "step": 9427
    },
    {
      "epoch": 1.1559588033349681,
      "grad_norm": 1.7056689077055553,
      "learning_rate": 2.0643267645832766e-06,
      "loss": 0.4048,
      "step": 9428
    },
    {
      "epoch": 1.1560814124570868,
      "grad_norm": 1.8010498628473401,
      "learning_rate": 2.0638276898892952e-06,
      "loss": 0.4201,
      "step": 9429
    },
    {
      "epoch": 1.1562040215792055,
      "grad_norm": 2.018945215736541,
      "learning_rate": 2.063328633122767e-06,
      "loss": 0.4582,
      "step": 9430
    },
    {
      "epoch": 1.1563266307013242,
      "grad_norm": 1.8611533678448244,
      "learning_rate": 2.062829594304207e-06,
      "loss": 0.4222,
      "step": 9431
    },
    {
      "epoch": 1.156449239823443,
      "grad_norm": 1.9923205412625207,
      "learning_rate": 2.062330573454123e-06,
      "loss": 0.4382,
      "step": 9432
    },
    {
      "epoch": 1.1565718489455616,
      "grad_norm": 1.8094656465624088,
      "learning_rate": 2.061831570593028e-06,
      "loss": 0.3955,
      "step": 9433
    },
    {
      "epoch": 1.1566944580676801,
      "grad_norm": 1.974766861707325,
      "learning_rate": 2.061332585741431e-06,
      "loss": 0.4301,
      "step": 9434
    },
    {
      "epoch": 1.1568170671897988,
      "grad_norm": 1.8860053947924877,
      "learning_rate": 2.060833618919842e-06,
      "loss": 0.4538,
      "step": 9435
    },
    {
      "epoch": 1.1569396763119175,
      "grad_norm": 2.1522583517954805,
      "learning_rate": 2.0603346701487675e-06,
      "loss": 0.4349,
      "step": 9436
    },
    {
      "epoch": 1.1570622854340362,
      "grad_norm": 2.061491347418364,
      "learning_rate": 2.059835739448717e-06,
      "loss": 0.4276,
      "step": 9437
    },
    {
      "epoch": 1.157184894556155,
      "grad_norm": 1.9584503583395658,
      "learning_rate": 2.0593368268401964e-06,
      "loss": 0.4035,
      "step": 9438
    },
    {
      "epoch": 1.1573075036782736,
      "grad_norm": 1.9994034846515698,
      "learning_rate": 2.0588379323437133e-06,
      "loss": 0.4373,
      "step": 9439
    },
    {
      "epoch": 1.1574301128003923,
      "grad_norm": 1.877255579032252,
      "learning_rate": 2.0583390559797716e-06,
      "loss": 0.4366,
      "step": 9440
    },
    {
      "epoch": 1.157552721922511,
      "grad_norm": 2.0101469450255127,
      "learning_rate": 2.057840197768877e-06,
      "loss": 0.4175,
      "step": 9441
    },
    {
      "epoch": 1.1576753310446297,
      "grad_norm": 1.8577828387383026,
      "learning_rate": 2.0573413577315317e-06,
      "loss": 0.4514,
      "step": 9442
    },
    {
      "epoch": 1.1577979401667484,
      "grad_norm": 2.036395643383319,
      "learning_rate": 2.0568425358882426e-06,
      "loss": 0.4514,
      "step": 9443
    },
    {
      "epoch": 1.1579205492888671,
      "grad_norm": 1.9658963407297205,
      "learning_rate": 2.056343732259508e-06,
      "loss": 0.4576,
      "step": 9444
    },
    {
      "epoch": 1.1580431584109858,
      "grad_norm": 1.9020005423826574,
      "learning_rate": 2.0558449468658316e-06,
      "loss": 0.4029,
      "step": 9445
    },
    {
      "epoch": 1.1581657675331045,
      "grad_norm": 2.0847433244746667,
      "learning_rate": 2.0553461797277148e-06,
      "loss": 0.4692,
      "step": 9446
    },
    {
      "epoch": 1.1582883766552232,
      "grad_norm": 1.8791517816411973,
      "learning_rate": 2.0548474308656576e-06,
      "loss": 0.4248,
      "step": 9447
    },
    {
      "epoch": 1.158410985777342,
      "grad_norm": 2.211267308661586,
      "learning_rate": 2.054348700300158e-06,
      "loss": 0.4262,
      "step": 9448
    },
    {
      "epoch": 1.1585335948994606,
      "grad_norm": 1.7258507771153448,
      "learning_rate": 2.0538499880517162e-06,
      "loss": 0.424,
      "step": 9449
    },
    {
      "epoch": 1.1586562040215793,
      "grad_norm": 1.906217367122879,
      "learning_rate": 2.0533512941408292e-06,
      "loss": 0.3998,
      "step": 9450
    },
    {
      "epoch": 1.1587788131436978,
      "grad_norm": 2.0092275676043845,
      "learning_rate": 2.0528526185879956e-06,
      "loss": 0.4561,
      "step": 9451
    },
    {
      "epoch": 1.1589014222658165,
      "grad_norm": 2.0940235616428122,
      "learning_rate": 2.052353961413711e-06,
      "loss": 0.4577,
      "step": 9452
    },
    {
      "epoch": 1.1590240313879352,
      "grad_norm": 2.084771338104383,
      "learning_rate": 2.0518553226384707e-06,
      "loss": 0.4448,
      "step": 9453
    },
    {
      "epoch": 1.159146640510054,
      "grad_norm": 1.911411443266438,
      "learning_rate": 2.0513567022827703e-06,
      "loss": 0.4383,
      "step": 9454
    },
    {
      "epoch": 1.1592692496321726,
      "grad_norm": 2.001966318811046,
      "learning_rate": 2.0508581003671026e-06,
      "loss": 0.4472,
      "step": 9455
    },
    {
      "epoch": 1.1593918587542913,
      "grad_norm": 1.8900640559443909,
      "learning_rate": 2.0503595169119644e-06,
      "loss": 0.4198,
      "step": 9456
    },
    {
      "epoch": 1.15951446787641,
      "grad_norm": 1.848669866598948,
      "learning_rate": 2.0498609519378437e-06,
      "loss": 0.4261,
      "step": 9457
    },
    {
      "epoch": 1.1596370769985287,
      "grad_norm": 1.928409995856987,
      "learning_rate": 2.049362405465236e-06,
      "loss": 0.418,
      "step": 9458
    },
    {
      "epoch": 1.1597596861206474,
      "grad_norm": 1.9289289855058362,
      "learning_rate": 2.0488638775146307e-06,
      "loss": 0.4405,
      "step": 9459
    },
    {
      "epoch": 1.159882295242766,
      "grad_norm": 1.951360485617364,
      "learning_rate": 2.04836536810652e-06,
      "loss": 0.4358,
      "step": 9460
    },
    {
      "epoch": 1.1600049043648848,
      "grad_norm": 1.8413900916485288,
      "learning_rate": 2.047866877261391e-06,
      "loss": 0.3949,
      "step": 9461
    },
    {
      "epoch": 1.1601275134870035,
      "grad_norm": 1.937842072260595,
      "learning_rate": 2.0473684049997333e-06,
      "loss": 0.4056,
      "step": 9462
    },
    {
      "epoch": 1.1602501226091222,
      "grad_norm": 1.9185439723306457,
      "learning_rate": 2.046869951342036e-06,
      "loss": 0.4088,
      "step": 9463
    },
    {
      "epoch": 1.160372731731241,
      "grad_norm": 1.9931342415348792,
      "learning_rate": 2.0463715163087867e-06,
      "loss": 0.4449,
      "step": 9464
    },
    {
      "epoch": 1.1604953408533594,
      "grad_norm": 2.034306404324844,
      "learning_rate": 2.0458730999204704e-06,
      "loss": 0.4628,
      "step": 9465
    },
    {
      "epoch": 1.160617949975478,
      "grad_norm": 1.8031720303925667,
      "learning_rate": 2.045374702197574e-06,
      "loss": 0.4193,
      "step": 9466
    },
    {
      "epoch": 1.1607405590975968,
      "grad_norm": 2.1310358026115312,
      "learning_rate": 2.0448763231605817e-06,
      "loss": 0.416,
      "step": 9467
    },
    {
      "epoch": 1.1608631682197155,
      "grad_norm": 1.8530675857420722,
      "learning_rate": 2.0443779628299794e-06,
      "loss": 0.4242,
      "step": 9468
    },
    {
      "epoch": 1.1609857773418342,
      "grad_norm": 1.9316011428428201,
      "learning_rate": 2.0438796212262485e-06,
      "loss": 0.4546,
      "step": 9469
    },
    {
      "epoch": 1.1611083864639529,
      "grad_norm": 1.9300902523131758,
      "learning_rate": 2.043381298369872e-06,
      "loss": 0.4002,
      "step": 9470
    },
    {
      "epoch": 1.1612309955860716,
      "grad_norm": 1.9137468409599616,
      "learning_rate": 2.0428829942813334e-06,
      "loss": 0.4371,
      "step": 9471
    },
    {
      "epoch": 1.1613536047081903,
      "grad_norm": 1.8648492438918278,
      "learning_rate": 2.0423847089811133e-06,
      "loss": 0.471,
      "step": 9472
    },
    {
      "epoch": 1.161476213830309,
      "grad_norm": 1.9864893533630605,
      "learning_rate": 2.041886442489691e-06,
      "loss": 0.4421,
      "step": 9473
    },
    {
      "epoch": 1.1615988229524277,
      "grad_norm": 2.1429550006103577,
      "learning_rate": 2.0413881948275476e-06,
      "loss": 0.4369,
      "step": 9474
    },
    {
      "epoch": 1.1617214320745464,
      "grad_norm": 2.0763971181098353,
      "learning_rate": 2.0408899660151602e-06,
      "loss": 0.4507,
      "step": 9475
    },
    {
      "epoch": 1.161844041196665,
      "grad_norm": 1.731073017278292,
      "learning_rate": 2.0403917560730093e-06,
      "loss": 0.397,
      "step": 9476
    },
    {
      "epoch": 1.1619666503187838,
      "grad_norm": 1.9119250267397536,
      "learning_rate": 2.0398935650215705e-06,
      "loss": 0.4126,
      "step": 9477
    },
    {
      "epoch": 1.1620892594409025,
      "grad_norm": 1.9135389624190622,
      "learning_rate": 2.0393953928813205e-06,
      "loss": 0.4407,
      "step": 9478
    },
    {
      "epoch": 1.1622118685630212,
      "grad_norm": 1.9327252772816257,
      "learning_rate": 2.038897239672736e-06,
      "loss": 0.4547,
      "step": 9479
    },
    {
      "epoch": 1.1623344776851399,
      "grad_norm": 1.8224183374963525,
      "learning_rate": 2.03839910541629e-06,
      "loss": 0.4607,
      "step": 9480
    },
    {
      "epoch": 1.1624570868072586,
      "grad_norm": 2.01598837523945,
      "learning_rate": 2.0379009901324594e-06,
      "loss": 0.4395,
      "step": 9481
    },
    {
      "epoch": 1.1625796959293773,
      "grad_norm": 2.1126269590307896,
      "learning_rate": 2.0374028938417153e-06,
      "loss": 0.4952,
      "step": 9482
    },
    {
      "epoch": 1.1627023050514957,
      "grad_norm": 1.9410270507807574,
      "learning_rate": 2.036904816564531e-06,
      "loss": 0.4672,
      "step": 9483
    },
    {
      "epoch": 1.1628249141736144,
      "grad_norm": 2.088370591449624,
      "learning_rate": 2.036406758321379e-06,
      "loss": 0.4387,
      "step": 9484
    },
    {
      "epoch": 1.1629475232957331,
      "grad_norm": 1.8091426588577364,
      "learning_rate": 2.0359087191327303e-06,
      "loss": 0.4145,
      "step": 9485
    },
    {
      "epoch": 1.1630701324178518,
      "grad_norm": 1.8431429368150436,
      "learning_rate": 2.0354106990190544e-06,
      "loss": 0.461,
      "step": 9486
    },
    {
      "epoch": 1.1631927415399705,
      "grad_norm": 1.9417816109331325,
      "learning_rate": 2.0349126980008215e-06,
      "loss": 0.448,
      "step": 9487
    },
    {
      "epoch": 1.1633153506620892,
      "grad_norm": 2.039337881422489,
      "learning_rate": 2.0344147160984993e-06,
      "loss": 0.4054,
      "step": 9488
    },
    {
      "epoch": 1.163437959784208,
      "grad_norm": 2.141803473732646,
      "learning_rate": 2.033916753332558e-06,
      "loss": 0.4216,
      "step": 9489
    },
    {
      "epoch": 1.1635605689063266,
      "grad_norm": 1.9794199116613995,
      "learning_rate": 2.033418809723462e-06,
      "loss": 0.4011,
      "step": 9490
    },
    {
      "epoch": 1.1636831780284453,
      "grad_norm": 2.09554695650276,
      "learning_rate": 2.0329208852916795e-06,
      "loss": 0.4569,
      "step": 9491
    },
    {
      "epoch": 1.163805787150564,
      "grad_norm": 1.867159054954286,
      "learning_rate": 2.032422980057675e-06,
      "loss": 0.4074,
      "step": 9492
    },
    {
      "epoch": 1.1639283962726827,
      "grad_norm": 1.8817237915857574,
      "learning_rate": 2.031925094041915e-06,
      "loss": 0.4251,
      "step": 9493
    },
    {
      "epoch": 1.1640510053948014,
      "grad_norm": 1.9241937456596467,
      "learning_rate": 2.031427227264861e-06,
      "loss": 0.4326,
      "step": 9494
    },
    {
      "epoch": 1.1641736145169201,
      "grad_norm": 1.8122061792201227,
      "learning_rate": 2.0309293797469775e-06,
      "loss": 0.3922,
      "step": 9495
    },
    {
      "epoch": 1.1642962236390388,
      "grad_norm": 1.922425636682902,
      "learning_rate": 2.0304315515087274e-06,
      "loss": 0.4091,
      "step": 9496
    },
    {
      "epoch": 1.1644188327611573,
      "grad_norm": 2.02371984325944,
      "learning_rate": 2.0299337425705716e-06,
      "loss": 0.4382,
      "step": 9497
    },
    {
      "epoch": 1.164541441883276,
      "grad_norm": 2.16893775330621,
      "learning_rate": 2.029435952952972e-06,
      "loss": 0.4827,
      "step": 9498
    },
    {
      "epoch": 1.1646640510053947,
      "grad_norm": 1.831329602649021,
      "learning_rate": 2.0289381826763876e-06,
      "loss": 0.4534,
      "step": 9499
    },
    {
      "epoch": 1.1647866601275134,
      "grad_norm": 1.8070067028871903,
      "learning_rate": 2.0284404317612765e-06,
      "loss": 0.4821,
      "step": 9500
    },
    {
      "epoch": 1.1649092692496321,
      "grad_norm": 2.077956145304615,
      "learning_rate": 2.027942700228099e-06,
      "loss": 0.4587,
      "step": 9501
    },
    {
      "epoch": 1.1650318783717508,
      "grad_norm": 1.8197614585290922,
      "learning_rate": 2.0274449880973135e-06,
      "loss": 0.4099,
      "step": 9502
    },
    {
      "epoch": 1.1651544874938695,
      "grad_norm": 1.8934959423868822,
      "learning_rate": 2.026947295389375e-06,
      "loss": 0.4242,
      "step": 9503
    },
    {
      "epoch": 1.1652770966159882,
      "grad_norm": 1.9057877996612733,
      "learning_rate": 2.02644962212474e-06,
      "loss": 0.4211,
      "step": 9504
    },
    {
      "epoch": 1.165399705738107,
      "grad_norm": 1.9797687256929923,
      "learning_rate": 2.0259519683238635e-06,
      "loss": 0.4305,
      "step": 9505
    },
    {
      "epoch": 1.1655223148602256,
      "grad_norm": 2.150774799327222,
      "learning_rate": 2.025454334007201e-06,
      "loss": 0.4847,
      "step": 9506
    },
    {
      "epoch": 1.1656449239823443,
      "grad_norm": 1.8146341606320682,
      "learning_rate": 2.024956719195205e-06,
      "loss": 0.4273,
      "step": 9507
    },
    {
      "epoch": 1.165767533104463,
      "grad_norm": 1.852342433189321,
      "learning_rate": 2.0244591239083284e-06,
      "loss": 0.3978,
      "step": 9508
    },
    {
      "epoch": 1.1658901422265817,
      "grad_norm": 1.9411099514551917,
      "learning_rate": 2.023961548167024e-06,
      "loss": 0.432,
      "step": 9509
    },
    {
      "epoch": 1.1660127513487004,
      "grad_norm": 1.8558673959592498,
      "learning_rate": 2.0234639919917436e-06,
      "loss": 0.4304,
      "step": 9510
    },
    {
      "epoch": 1.1661353604708191,
      "grad_norm": 1.8456224417278186,
      "learning_rate": 2.022966455402936e-06,
      "loss": 0.4072,
      "step": 9511
    },
    {
      "epoch": 1.1662579695929378,
      "grad_norm": 1.8744422530633034,
      "learning_rate": 2.0224689384210515e-06,
      "loss": 0.4364,
      "step": 9512
    },
    {
      "epoch": 1.1663805787150565,
      "grad_norm": 1.9342077097179389,
      "learning_rate": 2.021971441066539e-06,
      "loss": 0.4009,
      "step": 9513
    },
    {
      "epoch": 1.166503187837175,
      "grad_norm": 2.0163668060068742,
      "learning_rate": 2.021473963359848e-06,
      "loss": 0.4516,
      "step": 9514
    },
    {
      "epoch": 1.1666257969592937,
      "grad_norm": 1.9910310719506366,
      "learning_rate": 2.0209765053214223e-06,
      "loss": 0.4386,
      "step": 9515
    },
    {
      "epoch": 1.1667484060814124,
      "grad_norm": 1.6084829387660586,
      "learning_rate": 2.0204790669717107e-06,
      "loss": 0.4255,
      "step": 9516
    },
    {
      "epoch": 1.166871015203531,
      "grad_norm": 1.98836170882953,
      "learning_rate": 2.019981648331159e-06,
      "loss": 0.4306,
      "step": 9517
    },
    {
      "epoch": 1.1669936243256498,
      "grad_norm": 1.8970576896590776,
      "learning_rate": 2.0194842494202116e-06,
      "loss": 0.4222,
      "step": 9518
    },
    {
      "epoch": 1.1671162334477685,
      "grad_norm": 2.009953121768174,
      "learning_rate": 2.0189868702593116e-06,
      "loss": 0.5022,
      "step": 9519
    },
    {
      "epoch": 1.1672388425698872,
      "grad_norm": 2.050279422333777,
      "learning_rate": 2.018489510868902e-06,
      "loss": 0.4105,
      "step": 9520
    },
    {
      "epoch": 1.167361451692006,
      "grad_norm": 1.7917060079336498,
      "learning_rate": 2.0179921712694273e-06,
      "loss": 0.4524,
      "step": 9521
    },
    {
      "epoch": 1.1674840608141246,
      "grad_norm": 1.9368826280936204,
      "learning_rate": 2.017494851481327e-06,
      "loss": 0.4442,
      "step": 9522
    },
    {
      "epoch": 1.1676066699362433,
      "grad_norm": 1.96812160075301,
      "learning_rate": 2.0169975515250436e-06,
      "loss": 0.423,
      "step": 9523
    },
    {
      "epoch": 1.167729279058362,
      "grad_norm": 1.9435631754922011,
      "learning_rate": 2.0165002714210147e-06,
      "loss": 0.4472,
      "step": 9524
    },
    {
      "epoch": 1.1678518881804807,
      "grad_norm": 1.8431440831037282,
      "learning_rate": 2.016003011189681e-06,
      "loss": 0.4495,
      "step": 9525
    },
    {
      "epoch": 1.1679744973025994,
      "grad_norm": 1.8535124645608794,
      "learning_rate": 2.01550577085148e-06,
      "loss": 0.4498,
      "step": 9526
    },
    {
      "epoch": 1.168097106424718,
      "grad_norm": 1.9478797607446583,
      "learning_rate": 2.015008550426851e-06,
      "loss": 0.3993,
      "step": 9527
    },
    {
      "epoch": 1.1682197155468366,
      "grad_norm": 1.8197062200038634,
      "learning_rate": 2.014511349936227e-06,
      "loss": 0.4296,
      "step": 9528
    },
    {
      "epoch": 1.1683423246689553,
      "grad_norm": 2.1848252809110655,
      "learning_rate": 2.0140141694000475e-06,
      "loss": 0.4371,
      "step": 9529
    },
    {
      "epoch": 1.168464933791074,
      "grad_norm": 1.7421086315355443,
      "learning_rate": 2.0135170088387457e-06,
      "loss": 0.3955,
      "step": 9530
    },
    {
      "epoch": 1.1685875429131927,
      "grad_norm": 1.9491530269269828,
      "learning_rate": 2.0130198682727566e-06,
      "loss": 0.4503,
      "step": 9531
    },
    {
      "epoch": 1.1687101520353114,
      "grad_norm": 1.9912519874367174,
      "learning_rate": 2.012522747722512e-06,
      "loss": 0.4814,
      "step": 9532
    },
    {
      "epoch": 1.16883276115743,
      "grad_norm": 2.1442212385683335,
      "learning_rate": 2.012025647208446e-06,
      "loss": 0.4388,
      "step": 9533
    },
    {
      "epoch": 1.1689553702795488,
      "grad_norm": 1.8701848094120275,
      "learning_rate": 2.0115285667509894e-06,
      "loss": 0.4689,
      "step": 9534
    },
    {
      "epoch": 1.1690779794016675,
      "grad_norm": 1.8117856165346322,
      "learning_rate": 2.0110315063705744e-06,
      "loss": 0.4369,
      "step": 9535
    },
    {
      "epoch": 1.1692005885237862,
      "grad_norm": 1.870403905946374,
      "learning_rate": 2.0105344660876293e-06,
      "loss": 0.4121,
      "step": 9536
    },
    {
      "epoch": 1.1693231976459049,
      "grad_norm": 1.9909024285300987,
      "learning_rate": 2.0100374459225845e-06,
      "loss": 0.4352,
      "step": 9537
    },
    {
      "epoch": 1.1694458067680236,
      "grad_norm": 2.0794409914393417,
      "learning_rate": 2.0095404458958674e-06,
      "loss": 0.4336,
      "step": 9538
    },
    {
      "epoch": 1.1695684158901423,
      "grad_norm": 1.8679003357865878,
      "learning_rate": 2.0090434660279077e-06,
      "loss": 0.465,
      "step": 9539
    },
    {
      "epoch": 1.169691025012261,
      "grad_norm": 1.9344009492584697,
      "learning_rate": 2.00854650633913e-06,
      "loss": 0.4633,
      "step": 9540
    },
    {
      "epoch": 1.1698136341343797,
      "grad_norm": 2.176142472453996,
      "learning_rate": 2.0080495668499604e-06,
      "loss": 0.4371,
      "step": 9541
    },
    {
      "epoch": 1.1699362432564984,
      "grad_norm": 1.7826610665629858,
      "learning_rate": 2.007552647580825e-06,
      "loss": 0.3755,
      "step": 9542
    },
    {
      "epoch": 1.170058852378617,
      "grad_norm": 1.9639737551609724,
      "learning_rate": 2.007055748552148e-06,
      "loss": 0.4275,
      "step": 9543
    },
    {
      "epoch": 1.1701814615007358,
      "grad_norm": 1.8336702790055193,
      "learning_rate": 2.006558869784352e-06,
      "loss": 0.4572,
      "step": 9544
    },
    {
      "epoch": 1.1703040706228545,
      "grad_norm": 2.2123507521085455,
      "learning_rate": 2.00606201129786e-06,
      "loss": 0.4876,
      "step": 9545
    },
    {
      "epoch": 1.170426679744973,
      "grad_norm": 1.9536961176566399,
      "learning_rate": 2.005565173113093e-06,
      "loss": 0.3883,
      "step": 9546
    },
    {
      "epoch": 1.1705492888670916,
      "grad_norm": 1.8931691601503526,
      "learning_rate": 2.0050683552504733e-06,
      "loss": 0.4059,
      "step": 9547
    },
    {
      "epoch": 1.1706718979892103,
      "grad_norm": 1.7893473424062976,
      "learning_rate": 2.0045715577304214e-06,
      "loss": 0.4215,
      "step": 9548
    },
    {
      "epoch": 1.170794507111329,
      "grad_norm": 2.0029338929611638,
      "learning_rate": 2.0040747805733545e-06,
      "loss": 0.4169,
      "step": 9549
    },
    {
      "epoch": 1.1709171162334477,
      "grad_norm": 2.1955676974292286,
      "learning_rate": 2.0035780237996925e-06,
      "loss": 0.4502,
      "step": 9550
    },
    {
      "epoch": 1.1710397253555664,
      "grad_norm": 1.8924134956052703,
      "learning_rate": 2.0030812874298518e-06,
      "loss": 0.4413,
      "step": 9551
    },
    {
      "epoch": 1.1711623344776851,
      "grad_norm": 1.8929989146595652,
      "learning_rate": 2.0025845714842514e-06,
      "loss": 0.453,
      "step": 9552
    },
    {
      "epoch": 1.1712849435998038,
      "grad_norm": 2.128472061386677,
      "learning_rate": 2.0020878759833045e-06,
      "loss": 0.4416,
      "step": 9553
    },
    {
      "epoch": 1.1714075527219225,
      "grad_norm": 1.9246890397116205,
      "learning_rate": 2.0015912009474274e-06,
      "loss": 0.3785,
      "step": 9554
    },
    {
      "epoch": 1.1715301618440412,
      "grad_norm": 1.9877186715233361,
      "learning_rate": 2.001094546397035e-06,
      "loss": 0.5014,
      "step": 9555
    },
    {
      "epoch": 1.17165277096616,
      "grad_norm": 1.9501242702959898,
      "learning_rate": 2.0005979123525398e-06,
      "loss": 0.4092,
      "step": 9556
    },
    {
      "epoch": 1.1717753800882786,
      "grad_norm": 1.7406050440658944,
      "learning_rate": 2.0001012988343545e-06,
      "loss": 0.4339,
      "step": 9557
    },
    {
      "epoch": 1.1718979892103973,
      "grad_norm": 1.9944707472861485,
      "learning_rate": 1.99960470586289e-06,
      "loss": 0.4134,
      "step": 9558
    },
    {
      "epoch": 1.1720205983325158,
      "grad_norm": 1.9969585580133402,
      "learning_rate": 1.999108133458559e-06,
      "loss": 0.4147,
      "step": 9559
    },
    {
      "epoch": 1.1721432074546345,
      "grad_norm": 1.8610600360516423,
      "learning_rate": 1.9986115816417704e-06,
      "loss": 0.477,
      "step": 9560
    },
    {
      "epoch": 1.1722658165767532,
      "grad_norm": 2.086843605213431,
      "learning_rate": 1.998115050432933e-06,
      "loss": 0.4305,
      "step": 9561
    },
    {
      "epoch": 1.172388425698872,
      "grad_norm": 1.8772529707556993,
      "learning_rate": 1.9976185398524554e-06,
      "loss": 0.4381,
      "step": 9562
    },
    {
      "epoch": 1.1725110348209906,
      "grad_norm": 1.8723465805718205,
      "learning_rate": 1.9971220499207457e-06,
      "loss": 0.4466,
      "step": 9563
    },
    {
      "epoch": 1.1726336439431093,
      "grad_norm": 2.048657430711223,
      "learning_rate": 1.9966255806582104e-06,
      "loss": 0.4369,
      "step": 9564
    },
    {
      "epoch": 1.172756253065228,
      "grad_norm": 1.9172728282712248,
      "learning_rate": 1.996129132085254e-06,
      "loss": 0.4045,
      "step": 9565
    },
    {
      "epoch": 1.1728788621873467,
      "grad_norm": 1.7140094176353073,
      "learning_rate": 1.9956327042222822e-06,
      "loss": 0.4114,
      "step": 9566
    },
    {
      "epoch": 1.1730014713094654,
      "grad_norm": 1.8791563122961075,
      "learning_rate": 1.9951362970896995e-06,
      "loss": 0.4467,
      "step": 9567
    },
    {
      "epoch": 1.1731240804315841,
      "grad_norm": 2.1223542180458312,
      "learning_rate": 1.9946399107079095e-06,
      "loss": 0.4728,
      "step": 9568
    },
    {
      "epoch": 1.1732466895537028,
      "grad_norm": 2.0099445464502237,
      "learning_rate": 1.994143545097313e-06,
      "loss": 0.437,
      "step": 9569
    },
    {
      "epoch": 1.1733692986758215,
      "grad_norm": 2.046828283054013,
      "learning_rate": 1.9936472002783125e-06,
      "loss": 0.465,
      "step": 9570
    },
    {
      "epoch": 1.1734919077979402,
      "grad_norm": 2.1059820160857896,
      "learning_rate": 1.993150876271308e-06,
      "loss": 0.4849,
      "step": 9571
    },
    {
      "epoch": 1.173614516920059,
      "grad_norm": 2.0515859185529095,
      "learning_rate": 1.9926545730967005e-06,
      "loss": 0.3977,
      "step": 9572
    },
    {
      "epoch": 1.1737371260421776,
      "grad_norm": 1.8130295454026126,
      "learning_rate": 1.9921582907748885e-06,
      "loss": 0.4399,
      "step": 9573
    },
    {
      "epoch": 1.1738597351642963,
      "grad_norm": 1.789621479926594,
      "learning_rate": 1.991662029326269e-06,
      "loss": 0.3968,
      "step": 9574
    },
    {
      "epoch": 1.173982344286415,
      "grad_norm": 1.8139951333632067,
      "learning_rate": 1.9911657887712405e-06,
      "loss": 0.4015,
      "step": 9575
    },
    {
      "epoch": 1.1741049534085337,
      "grad_norm": 1.7973979282037804,
      "learning_rate": 1.990669569130198e-06,
      "loss": 0.4063,
      "step": 9576
    },
    {
      "epoch": 1.1742275625306522,
      "grad_norm": 1.845692394714,
      "learning_rate": 1.99017337042354e-06,
      "loss": 0.4201,
      "step": 9577
    },
    {
      "epoch": 1.174350171652771,
      "grad_norm": 1.6585146282299532,
      "learning_rate": 1.9896771926716574e-06,
      "loss": 0.4638,
      "step": 9578
    },
    {
      "epoch": 1.1744727807748896,
      "grad_norm": 1.9337085653140575,
      "learning_rate": 1.989181035894946e-06,
      "loss": 0.4578,
      "step": 9579
    },
    {
      "epoch": 1.1745953898970083,
      "grad_norm": 1.7925807736866797,
      "learning_rate": 1.9886849001137984e-06,
      "loss": 0.4343,
      "step": 9580
    },
    {
      "epoch": 1.174717999019127,
      "grad_norm": 1.891436353932711,
      "learning_rate": 1.988188785348607e-06,
      "loss": 0.3974,
      "step": 9581
    },
    {
      "epoch": 1.1748406081412457,
      "grad_norm": 1.9815589762479118,
      "learning_rate": 1.987692691619762e-06,
      "loss": 0.4277,
      "step": 9582
    },
    {
      "epoch": 1.1749632172633644,
      "grad_norm": 1.8720282693164796,
      "learning_rate": 1.9871966189476546e-06,
      "loss": 0.4251,
      "step": 9583
    },
    {
      "epoch": 1.175085826385483,
      "grad_norm": 2.044820577699677,
      "learning_rate": 1.9867005673526736e-06,
      "loss": 0.4362,
      "step": 9584
    },
    {
      "epoch": 1.1752084355076018,
      "grad_norm": 1.9079828117929252,
      "learning_rate": 1.986204536855209e-06,
      "loss": 0.4268,
      "step": 9585
    },
    {
      "epoch": 1.1753310446297205,
      "grad_norm": 1.7811020165852611,
      "learning_rate": 1.9857085274756467e-06,
      "loss": 0.4131,
      "step": 9586
    },
    {
      "epoch": 1.1754536537518392,
      "grad_norm": 1.9733977524667616,
      "learning_rate": 1.9852125392343744e-06,
      "loss": 0.4032,
      "step": 9587
    },
    {
      "epoch": 1.175576262873958,
      "grad_norm": 1.8549014394115213,
      "learning_rate": 1.9847165721517784e-06,
      "loss": 0.3997,
      "step": 9588
    },
    {
      "epoch": 1.1756988719960766,
      "grad_norm": 2.2187679461711944,
      "learning_rate": 1.984220626248244e-06,
      "loss": 0.4701,
      "step": 9589
    },
    {
      "epoch": 1.1758214811181953,
      "grad_norm": 1.8052868435326888,
      "learning_rate": 1.983724701544154e-06,
      "loss": 0.406,
      "step": 9590
    },
    {
      "epoch": 1.1759440902403138,
      "grad_norm": 1.997733924785202,
      "learning_rate": 1.983228798059893e-06,
      "loss": 0.4769,
      "step": 9591
    },
    {
      "epoch": 1.1760666993624325,
      "grad_norm": 2.0519504088544105,
      "learning_rate": 1.9827329158158432e-06,
      "loss": 0.4333,
      "step": 9592
    },
    {
      "epoch": 1.1761893084845512,
      "grad_norm": 1.9722809785434887,
      "learning_rate": 1.9822370548323863e-06,
      "loss": 0.4433,
      "step": 9593
    },
    {
      "epoch": 1.1763119176066699,
      "grad_norm": 1.9392374527266323,
      "learning_rate": 1.981741215129904e-06,
      "loss": 0.3769,
      "step": 9594
    },
    {
      "epoch": 1.1764345267287886,
      "grad_norm": 1.7776332548025362,
      "learning_rate": 1.9812453967287746e-06,
      "loss": 0.4473,
      "step": 9595
    },
    {
      "epoch": 1.1765571358509073,
      "grad_norm": 1.7993923928540423,
      "learning_rate": 1.980749599649377e-06,
      "loss": 0.4387,
      "step": 9596
    },
    {
      "epoch": 1.176679744973026,
      "grad_norm": 2.066909185274517,
      "learning_rate": 1.980253823912091e-06,
      "loss": 0.4558,
      "step": 9597
    },
    {
      "epoch": 1.1768023540951447,
      "grad_norm": 2.013662235749958,
      "learning_rate": 1.9797580695372932e-06,
      "loss": 0.4429,
      "step": 9598
    },
    {
      "epoch": 1.1769249632172634,
      "grad_norm": 2.1346500457759503,
      "learning_rate": 1.9792623365453596e-06,
      "loss": 0.5028,
      "step": 9599
    },
    {
      "epoch": 1.177047572339382,
      "grad_norm": 1.707596224738986,
      "learning_rate": 1.9787666249566653e-06,
      "loss": 0.4505,
      "step": 9600
    },
    {
      "epoch": 1.1771701814615008,
      "grad_norm": 1.903479846395852,
      "learning_rate": 1.978270934791586e-06,
      "loss": 0.4533,
      "step": 9601
    },
    {
      "epoch": 1.1772927905836195,
      "grad_norm": 1.9485609846143854,
      "learning_rate": 1.9777752660704954e-06,
      "loss": 0.4217,
      "step": 9602
    },
    {
      "epoch": 1.1774153997057382,
      "grad_norm": 1.9222461138370355,
      "learning_rate": 1.977279618813765e-06,
      "loss": 0.3966,
      "step": 9603
    },
    {
      "epoch": 1.1775380088278569,
      "grad_norm": 2.029570021067037,
      "learning_rate": 1.9767839930417673e-06,
      "loss": 0.3967,
      "step": 9604
    },
    {
      "epoch": 1.1776606179499756,
      "grad_norm": 1.7420929592564522,
      "learning_rate": 1.9762883887748745e-06,
      "loss": 0.4295,
      "step": 9605
    },
    {
      "epoch": 1.1777832270720943,
      "grad_norm": 1.9326886165755701,
      "learning_rate": 1.9757928060334563e-06,
      "loss": 0.4031,
      "step": 9606
    },
    {
      "epoch": 1.177905836194213,
      "grad_norm": 1.8929899766449447,
      "learning_rate": 1.9752972448378817e-06,
      "loss": 0.4045,
      "step": 9607
    },
    {
      "epoch": 1.1780284453163314,
      "grad_norm": 1.8884548053916816,
      "learning_rate": 1.974801705208519e-06,
      "loss": 0.4137,
      "step": 9608
    },
    {
      "epoch": 1.1781510544384501,
      "grad_norm": 1.9155667725933563,
      "learning_rate": 1.9743061871657355e-06,
      "loss": 0.4343,
      "step": 9609
    },
    {
      "epoch": 1.1782736635605688,
      "grad_norm": 1.8734332431558374,
      "learning_rate": 1.9738106907299e-06,
      "loss": 0.4184,
      "step": 9610
    },
    {
      "epoch": 1.1783962726826875,
      "grad_norm": 1.9931081854542407,
      "learning_rate": 1.973315215921375e-06,
      "loss": 0.4132,
      "step": 9611
    },
    {
      "epoch": 1.1785188818048062,
      "grad_norm": 2.0540477704450772,
      "learning_rate": 1.9728197627605275e-06,
      "loss": 0.5278,
      "step": 9612
    },
    {
      "epoch": 1.178641490926925,
      "grad_norm": 1.8676702318303704,
      "learning_rate": 1.972324331267721e-06,
      "loss": 0.425,
      "step": 9613
    },
    {
      "epoch": 1.1787641000490436,
      "grad_norm": 1.7985274963724065,
      "learning_rate": 1.9718289214633195e-06,
      "loss": 0.4394,
      "step": 9614
    },
    {
      "epoch": 1.1788867091711623,
      "grad_norm": 1.9154719667167328,
      "learning_rate": 1.9713335333676835e-06,
      "loss": 0.4331,
      "step": 9615
    },
    {
      "epoch": 1.179009318293281,
      "grad_norm": 1.9786393170318513,
      "learning_rate": 1.970838167001175e-06,
      "loss": 0.4549,
      "step": 9616
    },
    {
      "epoch": 1.1791319274153997,
      "grad_norm": 2.1686984414232895,
      "learning_rate": 1.970342822384155e-06,
      "loss": 0.4544,
      "step": 9617
    },
    {
      "epoch": 1.1792545365375184,
      "grad_norm": 1.9056548175118344,
      "learning_rate": 1.9698474995369827e-06,
      "loss": 0.5059,
      "step": 9618
    },
    {
      "epoch": 1.1793771456596371,
      "grad_norm": 1.8836459559740149,
      "learning_rate": 1.9693521984800176e-06,
      "loss": 0.4161,
      "step": 9619
    },
    {
      "epoch": 1.1794997547817558,
      "grad_norm": 1.8819060722324517,
      "learning_rate": 1.9688569192336155e-06,
      "loss": 0.4309,
      "step": 9620
    },
    {
      "epoch": 1.1796223639038745,
      "grad_norm": 2.0507253122420264,
      "learning_rate": 1.9683616618181345e-06,
      "loss": 0.4764,
      "step": 9621
    },
    {
      "epoch": 1.179744973025993,
      "grad_norm": 2.0903936568656523,
      "learning_rate": 1.9678664262539297e-06,
      "loss": 0.4079,
      "step": 9622
    },
    {
      "epoch": 1.1798675821481117,
      "grad_norm": 1.8860712943291196,
      "learning_rate": 1.967371212561359e-06,
      "loss": 0.4302,
      "step": 9623
    },
    {
      "epoch": 1.1799901912702304,
      "grad_norm": 2.094202859919371,
      "learning_rate": 1.9668760207607724e-06,
      "loss": 0.4474,
      "step": 9624
    },
    {
      "epoch": 1.1801128003923491,
      "grad_norm": 2.030072120123907,
      "learning_rate": 1.966380850872526e-06,
      "loss": 0.4706,
      "step": 9625
    },
    {
      "epoch": 1.1802354095144678,
      "grad_norm": 1.7692167705460289,
      "learning_rate": 1.965885702916971e-06,
      "loss": 0.4189,
      "step": 9626
    },
    {
      "epoch": 1.1803580186365865,
      "grad_norm": 1.9358146619792909,
      "learning_rate": 1.9653905769144597e-06,
      "loss": 0.389,
      "step": 9627
    },
    {
      "epoch": 1.1804806277587052,
      "grad_norm": 1.9324018240776226,
      "learning_rate": 1.964895472885342e-06,
      "loss": 0.415,
      "step": 9628
    },
    {
      "epoch": 1.180603236880824,
      "grad_norm": 2.0050879009945892,
      "learning_rate": 1.9644003908499664e-06,
      "loss": 0.384,
      "step": 9629
    },
    {
      "epoch": 1.1807258460029426,
      "grad_norm": 1.7532047155598944,
      "learning_rate": 1.963905330828684e-06,
      "loss": 0.4377,
      "step": 9630
    },
    {
      "epoch": 1.1808484551250613,
      "grad_norm": 1.9874866749495794,
      "learning_rate": 1.963410292841842e-06,
      "loss": 0.3872,
      "step": 9631
    },
    {
      "epoch": 1.18097106424718,
      "grad_norm": 1.873729802703927,
      "learning_rate": 1.962915276909787e-06,
      "loss": 0.4142,
      "step": 9632
    },
    {
      "epoch": 1.1810936733692987,
      "grad_norm": 1.936600330031192,
      "learning_rate": 1.9624202830528642e-06,
      "loss": 0.4527,
      "step": 9633
    },
    {
      "epoch": 1.1812162824914174,
      "grad_norm": 1.9869974670694617,
      "learning_rate": 1.961925311291419e-06,
      "loss": 0.4382,
      "step": 9634
    },
    {
      "epoch": 1.1813388916135361,
      "grad_norm": 1.9756000160812668,
      "learning_rate": 1.961430361645798e-06,
      "loss": 0.4198,
      "step": 9635
    },
    {
      "epoch": 1.1814615007356548,
      "grad_norm": 1.9876658496610486,
      "learning_rate": 1.9609354341363407e-06,
      "loss": 0.4036,
      "step": 9636
    },
    {
      "epoch": 1.1815841098577735,
      "grad_norm": 1.8563273021531945,
      "learning_rate": 1.9604405287833924e-06,
      "loss": 0.4236,
      "step": 9637
    },
    {
      "epoch": 1.1817067189798922,
      "grad_norm": 1.9206004477514595,
      "learning_rate": 1.959945645607293e-06,
      "loss": 0.4455,
      "step": 9638
    },
    {
      "epoch": 1.181829328102011,
      "grad_norm": 1.7538069813715664,
      "learning_rate": 1.9594507846283843e-06,
      "loss": 0.4118,
      "step": 9639
    },
    {
      "epoch": 1.1819519372241294,
      "grad_norm": 1.9452739370253918,
      "learning_rate": 1.958955945867005e-06,
      "loss": 0.4879,
      "step": 9640
    },
    {
      "epoch": 1.182074546346248,
      "grad_norm": 1.8215772376602366,
      "learning_rate": 1.958461129343494e-06,
      "loss": 0.4258,
      "step": 9641
    },
    {
      "epoch": 1.1821971554683668,
      "grad_norm": 1.7777908901175137,
      "learning_rate": 1.9579663350781893e-06,
      "loss": 0.4116,
      "step": 9642
    },
    {
      "epoch": 1.1823197645904855,
      "grad_norm": 2.0553102964684875,
      "learning_rate": 1.957471563091428e-06,
      "loss": 0.3929,
      "step": 9643
    },
    {
      "epoch": 1.1824423737126042,
      "grad_norm": 1.9959092842177286,
      "learning_rate": 1.9569768134035465e-06,
      "loss": 0.4453,
      "step": 9644
    },
    {
      "epoch": 1.1825649828347229,
      "grad_norm": 1.8369467808923472,
      "learning_rate": 1.9564820860348785e-06,
      "loss": 0.4586,
      "step": 9645
    },
    {
      "epoch": 1.1826875919568416,
      "grad_norm": 2.002925198220995,
      "learning_rate": 1.9559873810057593e-06,
      "loss": 0.4586,
      "step": 9646
    },
    {
      "epoch": 1.1828102010789603,
      "grad_norm": 1.7354581491807928,
      "learning_rate": 1.9554926983365213e-06,
      "loss": 0.4312,
      "step": 9647
    },
    {
      "epoch": 1.182932810201079,
      "grad_norm": 1.9276543645471176,
      "learning_rate": 1.954998038047499e-06,
      "loss": 0.3739,
      "step": 9648
    },
    {
      "epoch": 1.1830554193231977,
      "grad_norm": 2.0722644063980136,
      "learning_rate": 1.95450340015902e-06,
      "loss": 0.4572,
      "step": 9649
    },
    {
      "epoch": 1.1831780284453164,
      "grad_norm": 2.042072669495963,
      "learning_rate": 1.954008784691418e-06,
      "loss": 0.4503,
      "step": 9650
    },
    {
      "epoch": 1.183300637567435,
      "grad_norm": 1.9570677126588736,
      "learning_rate": 1.953514191665021e-06,
      "loss": 0.3721,
      "step": 9651
    },
    {
      "epoch": 1.1834232466895538,
      "grad_norm": 2.1096924723571133,
      "learning_rate": 1.9530196211001596e-06,
      "loss": 0.4977,
      "step": 9652
    },
    {
      "epoch": 1.1835458558116723,
      "grad_norm": 1.8414130789735794,
      "learning_rate": 1.952525073017159e-06,
      "loss": 0.4171,
      "step": 9653
    },
    {
      "epoch": 1.183668464933791,
      "grad_norm": 1.8288767430693278,
      "learning_rate": 1.952030547436347e-06,
      "loss": 0.423,
      "step": 9654
    },
    {
      "epoch": 1.1837910740559097,
      "grad_norm": 1.9060230840859533,
      "learning_rate": 1.95153604437805e-06,
      "loss": 0.4316,
      "step": 9655
    },
    {
      "epoch": 1.1839136831780284,
      "grad_norm": 2.0335657779417553,
      "learning_rate": 1.951041563862593e-06,
      "loss": 0.4409,
      "step": 9656
    },
    {
      "epoch": 1.184036292300147,
      "grad_norm": 2.00103435635404,
      "learning_rate": 1.9505471059102994e-06,
      "loss": 0.4883,
      "step": 9657
    },
    {
      "epoch": 1.1841589014222658,
      "grad_norm": 2.052711285176142,
      "learning_rate": 1.950052670541492e-06,
      "loss": 0.4306,
      "step": 9658
    },
    {
      "epoch": 1.1842815105443845,
      "grad_norm": 1.9433723982891298,
      "learning_rate": 1.9495582577764933e-06,
      "loss": 0.3965,
      "step": 9659
    },
    {
      "epoch": 1.1844041196665032,
      "grad_norm": 1.915933654047099,
      "learning_rate": 1.949063867635626e-06,
      "loss": 0.4457,
      "step": 9660
    },
    {
      "epoch": 1.1845267287886219,
      "grad_norm": 1.826555021054483,
      "learning_rate": 1.948569500139208e-06,
      "loss": 0.458,
      "step": 9661
    },
    {
      "epoch": 1.1846493379107406,
      "grad_norm": 1.8928850599941762,
      "learning_rate": 1.948075155307559e-06,
      "loss": 0.4325,
      "step": 9662
    },
    {
      "epoch": 1.1847719470328593,
      "grad_norm": 2.0108816065578545,
      "learning_rate": 1.9475808331609994e-06,
      "loss": 0.4757,
      "step": 9663
    },
    {
      "epoch": 1.184894556154978,
      "grad_norm": 1.8891204982963163,
      "learning_rate": 1.947086533719846e-06,
      "loss": 0.4301,
      "step": 9664
    },
    {
      "epoch": 1.1850171652770967,
      "grad_norm": 2.086624630388824,
      "learning_rate": 1.9465922570044138e-06,
      "loss": 0.4385,
      "step": 9665
    },
    {
      "epoch": 1.1851397743992154,
      "grad_norm": 2.09986946285118,
      "learning_rate": 1.94609800303502e-06,
      "loss": 0.4283,
      "step": 9666
    },
    {
      "epoch": 1.185262383521334,
      "grad_norm": 2.020308313242151,
      "learning_rate": 1.945603771831978e-06,
      "loss": 0.4137,
      "step": 9667
    },
    {
      "epoch": 1.1853849926434528,
      "grad_norm": 1.8834595656079673,
      "learning_rate": 1.945109563415604e-06,
      "loss": 0.4385,
      "step": 9668
    },
    {
      "epoch": 1.1855076017655715,
      "grad_norm": 1.747376565772658,
      "learning_rate": 1.9446153778062087e-06,
      "loss": 0.4282,
      "step": 9669
    },
    {
      "epoch": 1.1856302108876902,
      "grad_norm": 2.079707639545329,
      "learning_rate": 1.944121215024104e-06,
      "loss": 0.4501,
      "step": 9670
    },
    {
      "epoch": 1.1857528200098086,
      "grad_norm": 1.9717503808433936,
      "learning_rate": 1.9436270750896025e-06,
      "loss": 0.4385,
      "step": 9671
    },
    {
      "epoch": 1.1858754291319273,
      "grad_norm": 1.8837729108732937,
      "learning_rate": 1.9431329580230118e-06,
      "loss": 0.4247,
      "step": 9672
    },
    {
      "epoch": 1.185998038254046,
      "grad_norm": 1.984244030793884,
      "learning_rate": 1.9426388638446446e-06,
      "loss": 0.4499,
      "step": 9673
    },
    {
      "epoch": 1.1861206473761647,
      "grad_norm": 2.0341579102264484,
      "learning_rate": 1.9421447925748045e-06,
      "loss": 0.4145,
      "step": 9674
    },
    {
      "epoch": 1.1862432564982834,
      "grad_norm": 1.956304333063651,
      "learning_rate": 1.9416507442338023e-06,
      "loss": 0.4602,
      "step": 9675
    },
    {
      "epoch": 1.1863658656204021,
      "grad_norm": 1.97666644885153,
      "learning_rate": 1.9411567188419423e-06,
      "loss": 0.4367,
      "step": 9676
    },
    {
      "epoch": 1.1864884747425208,
      "grad_norm": 1.8992513370056745,
      "learning_rate": 1.9406627164195318e-06,
      "loss": 0.4388,
      "step": 9677
    },
    {
      "epoch": 1.1866110838646395,
      "grad_norm": 2.0479986979933607,
      "learning_rate": 1.9401687369868727e-06,
      "loss": 0.4889,
      "step": 9678
    },
    {
      "epoch": 1.1867336929867582,
      "grad_norm": 1.9662603693795326,
      "learning_rate": 1.93967478056427e-06,
      "loss": 0.4369,
      "step": 9679
    },
    {
      "epoch": 1.186856302108877,
      "grad_norm": 1.8849474617329725,
      "learning_rate": 1.939180847172025e-06,
      "loss": 0.4138,
      "step": 9680
    },
    {
      "epoch": 1.1869789112309956,
      "grad_norm": 1.9383263029197908,
      "learning_rate": 1.938686936830442e-06,
      "loss": 0.4259,
      "step": 9681
    },
    {
      "epoch": 1.1871015203531143,
      "grad_norm": 1.8747305508724892,
      "learning_rate": 1.9381930495598176e-06,
      "loss": 0.4925,
      "step": 9682
    },
    {
      "epoch": 1.187224129475233,
      "grad_norm": 2.019652491539931,
      "learning_rate": 1.937699185380454e-06,
      "loss": 0.398,
      "step": 9683
    },
    {
      "epoch": 1.1873467385973517,
      "grad_norm": 1.9600562503966557,
      "learning_rate": 1.9372053443126498e-06,
      "loss": 0.4738,
      "step": 9684
    },
    {
      "epoch": 1.1874693477194702,
      "grad_norm": 1.796448871209664,
      "learning_rate": 1.9367115263767024e-06,
      "loss": 0.4283,
      "step": 9685
    },
    {
      "epoch": 1.187591956841589,
      "grad_norm": 2.085574132637398,
      "learning_rate": 1.936217731592908e-06,
      "loss": 0.4436,
      "step": 9686
    },
    {
      "epoch": 1.1877145659637076,
      "grad_norm": 1.7878129855533318,
      "learning_rate": 1.9357239599815627e-06,
      "loss": 0.4423,
      "step": 9687
    },
    {
      "epoch": 1.1878371750858263,
      "grad_norm": 2.0999305619753668,
      "learning_rate": 1.935230211562962e-06,
      "loss": 0.434,
      "step": 9688
    },
    {
      "epoch": 1.187959784207945,
      "grad_norm": 1.9351351876263554,
      "learning_rate": 1.9347364863573995e-06,
      "loss": 0.3987,
      "step": 9689
    },
    {
      "epoch": 1.1880823933300637,
      "grad_norm": 1.9566954054484913,
      "learning_rate": 1.9342427843851686e-06,
      "loss": 0.4375,
      "step": 9690
    },
    {
      "epoch": 1.1882050024521824,
      "grad_norm": 1.8023909872393549,
      "learning_rate": 1.9337491056665602e-06,
      "loss": 0.4519,
      "step": 9691
    },
    {
      "epoch": 1.1883276115743011,
      "grad_norm": 2.0070195340843138,
      "learning_rate": 1.9332554502218656e-06,
      "loss": 0.4199,
      "step": 9692
    },
    {
      "epoch": 1.1884502206964198,
      "grad_norm": 1.9601625001565341,
      "learning_rate": 1.932761818071376e-06,
      "loss": 0.4589,
      "step": 9693
    },
    {
      "epoch": 1.1885728298185385,
      "grad_norm": 1.8288322725870816,
      "learning_rate": 1.93226820923538e-06,
      "loss": 0.4087,
      "step": 9694
    },
    {
      "epoch": 1.1886954389406572,
      "grad_norm": 1.8421555608511995,
      "learning_rate": 1.9317746237341656e-06,
      "loss": 0.4137,
      "step": 9695
    },
    {
      "epoch": 1.188818048062776,
      "grad_norm": 1.8278341990658087,
      "learning_rate": 1.9312810615880203e-06,
      "loss": 0.3866,
      "step": 9696
    },
    {
      "epoch": 1.1889406571848946,
      "grad_norm": 1.9102607169467796,
      "learning_rate": 1.9307875228172297e-06,
      "loss": 0.4113,
      "step": 9697
    },
    {
      "epoch": 1.1890632663070133,
      "grad_norm": 1.8519765452720565,
      "learning_rate": 1.9302940074420804e-06,
      "loss": 0.4093,
      "step": 9698
    },
    {
      "epoch": 1.189185875429132,
      "grad_norm": 1.9870902525207628,
      "learning_rate": 1.9298005154828554e-06,
      "loss": 0.4065,
      "step": 9699
    },
    {
      "epoch": 1.1893084845512507,
      "grad_norm": 1.7761191803362293,
      "learning_rate": 1.929307046959838e-06,
      "loss": 0.4325,
      "step": 9700
    },
    {
      "epoch": 1.1894310936733694,
      "grad_norm": 1.7436877437174207,
      "learning_rate": 1.928813601893312e-06,
      "loss": 0.4124,
      "step": 9701
    },
    {
      "epoch": 1.1895537027954879,
      "grad_norm": 1.760627479477641,
      "learning_rate": 1.9283201803035586e-06,
      "loss": 0.374,
      "step": 9702
    },
    {
      "epoch": 1.1896763119176066,
      "grad_norm": 2.0794030797452985,
      "learning_rate": 1.9278267822108578e-06,
      "loss": 0.4499,
      "step": 9703
    },
    {
      "epoch": 1.1897989210397253,
      "grad_norm": 1.9912446061428009,
      "learning_rate": 1.9273334076354885e-06,
      "loss": 0.4383,
      "step": 9704
    },
    {
      "epoch": 1.189921530161844,
      "grad_norm": 2.0768866891832687,
      "learning_rate": 1.9268400565977295e-06,
      "loss": 0.4439,
      "step": 9705
    },
    {
      "epoch": 1.1900441392839627,
      "grad_norm": 2.0505005844430952,
      "learning_rate": 1.9263467291178613e-06,
      "loss": 0.4204,
      "step": 9706
    },
    {
      "epoch": 1.1901667484060814,
      "grad_norm": 1.9461344464999186,
      "learning_rate": 1.9258534252161556e-06,
      "loss": 0.438,
      "step": 9707
    },
    {
      "epoch": 1.1902893575282,
      "grad_norm": 1.8793164466327772,
      "learning_rate": 1.9253601449128915e-06,
      "loss": 0.3948,
      "step": 9708
    },
    {
      "epoch": 1.1904119666503188,
      "grad_norm": 2.010203406058784,
      "learning_rate": 1.924866888228343e-06,
      "loss": 0.4276,
      "step": 9709
    },
    {
      "epoch": 1.1905345757724375,
      "grad_norm": 1.9449208869729493,
      "learning_rate": 1.9243736551827833e-06,
      "loss": 0.4495,
      "step": 9710
    },
    {
      "epoch": 1.1906571848945562,
      "grad_norm": 2.0916760709206015,
      "learning_rate": 1.9238804457964854e-06,
      "loss": 0.4491,
      "step": 9711
    },
    {
      "epoch": 1.1907797940166749,
      "grad_norm": 1.8385744539658413,
      "learning_rate": 1.9233872600897207e-06,
      "loss": 0.4173,
      "step": 9712
    },
    {
      "epoch": 1.1909024031387936,
      "grad_norm": 2.1013032992388783,
      "learning_rate": 1.922894098082761e-06,
      "loss": 0.46,
      "step": 9713
    },
    {
      "epoch": 1.1910250122609123,
      "grad_norm": 2.073930930352322,
      "learning_rate": 1.9224009597958753e-06,
      "loss": 0.4985,
      "step": 9714
    },
    {
      "epoch": 1.191147621383031,
      "grad_norm": 1.8466045726089524,
      "learning_rate": 1.921907845249333e-06,
      "loss": 0.4446,
      "step": 9715
    },
    {
      "epoch": 1.1912702305051495,
      "grad_norm": 2.099275223710043,
      "learning_rate": 1.9214147544634023e-06,
      "loss": 0.4545,
      "step": 9716
    },
    {
      "epoch": 1.1913928396272682,
      "grad_norm": 1.9731333839517708,
      "learning_rate": 1.9209216874583483e-06,
      "loss": 0.4761,
      "step": 9717
    },
    {
      "epoch": 1.1915154487493869,
      "grad_norm": 2.173175004218874,
      "learning_rate": 1.920428644254438e-06,
      "loss": 0.4493,
      "step": 9718
    },
    {
      "epoch": 1.1916380578715056,
      "grad_norm": 1.8329074669115786,
      "learning_rate": 1.919935624871938e-06,
      "loss": 0.4391,
      "step": 9719
    },
    {
      "epoch": 1.1917606669936243,
      "grad_norm": 1.9028324208967144,
      "learning_rate": 1.9194426293311096e-06,
      "loss": 0.4135,
      "step": 9720
    },
    {
      "epoch": 1.191883276115743,
      "grad_norm": 1.8966771048396476,
      "learning_rate": 1.9189496576522166e-06,
      "loss": 0.397,
      "step": 9721
    },
    {
      "epoch": 1.1920058852378617,
      "grad_norm": 2.1034918554832718,
      "learning_rate": 1.9184567098555222e-06,
      "loss": 0.4192,
      "step": 9722
    },
    {
      "epoch": 1.1921284943599804,
      "grad_norm": 1.8482869049761415,
      "learning_rate": 1.9179637859612866e-06,
      "loss": 0.4663,
      "step": 9723
    },
    {
      "epoch": 1.192251103482099,
      "grad_norm": 1.837644372428737,
      "learning_rate": 1.9174708859897694e-06,
      "loss": 0.4189,
      "step": 9724
    },
    {
      "epoch": 1.1923737126042178,
      "grad_norm": 1.8806607112601765,
      "learning_rate": 1.9169780099612297e-06,
      "loss": 0.3651,
      "step": 9725
    },
    {
      "epoch": 1.1924963217263365,
      "grad_norm": 1.7705086807257886,
      "learning_rate": 1.9164851578959263e-06,
      "loss": 0.4244,
      "step": 9726
    },
    {
      "epoch": 1.1926189308484552,
      "grad_norm": 2.0857435264563002,
      "learning_rate": 1.9159923298141164e-06,
      "loss": 0.4264,
      "step": 9727
    },
    {
      "epoch": 1.1927415399705739,
      "grad_norm": 1.7099793861835733,
      "learning_rate": 1.9154995257360555e-06,
      "loss": 0.4114,
      "step": 9728
    },
    {
      "epoch": 1.1928641490926926,
      "grad_norm": 2.095866425630872,
      "learning_rate": 1.915006745681998e-06,
      "loss": 0.4157,
      "step": 9729
    },
    {
      "epoch": 1.1929867582148113,
      "grad_norm": 1.797958444416914,
      "learning_rate": 1.9145139896721993e-06,
      "loss": 0.4267,
      "step": 9730
    },
    {
      "epoch": 1.19310936733693,
      "grad_norm": 1.8927405331725196,
      "learning_rate": 1.9140212577269137e-06,
      "loss": 0.4516,
      "step": 9731
    },
    {
      "epoch": 1.1932319764590487,
      "grad_norm": 1.8897459579093805,
      "learning_rate": 1.91352854986639e-06,
      "loss": 0.4603,
      "step": 9732
    },
    {
      "epoch": 1.1933545855811674,
      "grad_norm": 2.0611623907001633,
      "learning_rate": 1.9130358661108813e-06,
      "loss": 0.4628,
      "step": 9733
    },
    {
      "epoch": 1.1934771947032858,
      "grad_norm": 1.7409556709019804,
      "learning_rate": 1.9125432064806376e-06,
      "loss": 0.3922,
      "step": 9734
    },
    {
      "epoch": 1.1935998038254045,
      "grad_norm": 2.1059548832432764,
      "learning_rate": 1.912050570995909e-06,
      "loss": 0.4293,
      "step": 9735
    },
    {
      "epoch": 1.1937224129475232,
      "grad_norm": 1.9335745656943035,
      "learning_rate": 1.9115579596769425e-06,
      "loss": 0.4646,
      "step": 9736
    },
    {
      "epoch": 1.193845022069642,
      "grad_norm": 1.9914344524981609,
      "learning_rate": 1.9110653725439853e-06,
      "loss": 0.4087,
      "step": 9737
    },
    {
      "epoch": 1.1939676311917606,
      "grad_norm": 1.8667158114237727,
      "learning_rate": 1.9105728096172836e-06,
      "loss": 0.4424,
      "step": 9738
    },
    {
      "epoch": 1.1940902403138793,
      "grad_norm": 1.9791667078131492,
      "learning_rate": 1.9100802709170834e-06,
      "loss": 0.4214,
      "step": 9739
    },
    {
      "epoch": 1.194212849435998,
      "grad_norm": 1.967443740951436,
      "learning_rate": 1.909587756463629e-06,
      "loss": 0.4554,
      "step": 9740
    },
    {
      "epoch": 1.1943354585581167,
      "grad_norm": 2.0252000535330277,
      "learning_rate": 1.9090952662771624e-06,
      "loss": 0.4171,
      "step": 9741
    },
    {
      "epoch": 1.1944580676802354,
      "grad_norm": 2.021064957087599,
      "learning_rate": 1.908602800377926e-06,
      "loss": 0.4204,
      "step": 9742
    },
    {
      "epoch": 1.1945806768023541,
      "grad_norm": 2.2389183666057417,
      "learning_rate": 1.9081103587861614e-06,
      "loss": 0.4616,
      "step": 9743
    },
    {
      "epoch": 1.1947032859244728,
      "grad_norm": 1.943990123773385,
      "learning_rate": 1.9076179415221106e-06,
      "loss": 0.4365,
      "step": 9744
    },
    {
      "epoch": 1.1948258950465915,
      "grad_norm": 1.8136817290065244,
      "learning_rate": 1.9071255486060098e-06,
      "loss": 0.4432,
      "step": 9745
    },
    {
      "epoch": 1.1949485041687102,
      "grad_norm": 1.9113626932739847,
      "learning_rate": 1.9066331800580986e-06,
      "loss": 0.4741,
      "step": 9746
    },
    {
      "epoch": 1.195071113290829,
      "grad_norm": 1.8807717924725882,
      "learning_rate": 1.9061408358986143e-06,
      "loss": 0.4314,
      "step": 9747
    },
    {
      "epoch": 1.1951937224129474,
      "grad_norm": 1.8072653075409675,
      "learning_rate": 1.905648516147794e-06,
      "loss": 0.4265,
      "step": 9748
    },
    {
      "epoch": 1.1953163315350661,
      "grad_norm": 1.8791903075344034,
      "learning_rate": 1.9051562208258707e-06,
      "loss": 0.4524,
      "step": 9749
    },
    {
      "epoch": 1.1954389406571848,
      "grad_norm": 1.8891632159928193,
      "learning_rate": 1.9046639499530795e-06,
      "loss": 0.4132,
      "step": 9750
    },
    {
      "epoch": 1.1955615497793035,
      "grad_norm": 1.7781661715840102,
      "learning_rate": 1.9041717035496546e-06,
      "loss": 0.4151,
      "step": 9751
    },
    {
      "epoch": 1.1956841589014222,
      "grad_norm": 1.7910628842343328,
      "learning_rate": 1.903679481635828e-06,
      "loss": 0.434,
      "step": 9752
    },
    {
      "epoch": 1.195806768023541,
      "grad_norm": 1.9466540944640998,
      "learning_rate": 1.9031872842318296e-06,
      "loss": 0.3994,
      "step": 9753
    },
    {
      "epoch": 1.1959293771456596,
      "grad_norm": 1.730402553281906,
      "learning_rate": 1.9026951113578906e-06,
      "loss": 0.4071,
      "step": 9754
    },
    {
      "epoch": 1.1960519862677783,
      "grad_norm": 2.070736668528493,
      "learning_rate": 1.90220296303424e-06,
      "loss": 0.3844,
      "step": 9755
    },
    {
      "epoch": 1.196174595389897,
      "grad_norm": 1.876263901560096,
      "learning_rate": 1.9017108392811065e-06,
      "loss": 0.4259,
      "step": 9756
    },
    {
      "epoch": 1.1962972045120157,
      "grad_norm": 1.888982754432265,
      "learning_rate": 1.901218740118716e-06,
      "loss": 0.4282,
      "step": 9757
    },
    {
      "epoch": 1.1964198136341344,
      "grad_norm": 2.002655657549524,
      "learning_rate": 1.900726665567295e-06,
      "loss": 0.4718,
      "step": 9758
    },
    {
      "epoch": 1.196542422756253,
      "grad_norm": 1.808024300550816,
      "learning_rate": 1.9002346156470691e-06,
      "loss": 0.4166,
      "step": 9759
    },
    {
      "epoch": 1.1966650318783718,
      "grad_norm": 1.929148580658636,
      "learning_rate": 1.899742590378263e-06,
      "loss": 0.4123,
      "step": 9760
    },
    {
      "epoch": 1.1967876410004905,
      "grad_norm": 1.986789759081058,
      "learning_rate": 1.899250589781099e-06,
      "loss": 0.4239,
      "step": 9761
    },
    {
      "epoch": 1.1969102501226092,
      "grad_norm": 1.9057848085725628,
      "learning_rate": 1.898758613875799e-06,
      "loss": 0.4133,
      "step": 9762
    },
    {
      "epoch": 1.197032859244728,
      "grad_norm": 1.8484994933177616,
      "learning_rate": 1.8982666626825837e-06,
      "loss": 0.4378,
      "step": 9763
    },
    {
      "epoch": 1.1971554683668466,
      "grad_norm": 2.050745092487904,
      "learning_rate": 1.8977747362216748e-06,
      "loss": 0.4492,
      "step": 9764
    },
    {
      "epoch": 1.197278077488965,
      "grad_norm": 2.027182690895227,
      "learning_rate": 1.8972828345132903e-06,
      "loss": 0.4491,
      "step": 9765
    },
    {
      "epoch": 1.1974006866110838,
      "grad_norm": 2.033510509226849,
      "learning_rate": 1.8967909575776482e-06,
      "loss": 0.4499,
      "step": 9766
    },
    {
      "epoch": 1.1975232957332025,
      "grad_norm": 1.947966953157027,
      "learning_rate": 1.8962991054349658e-06,
      "loss": 0.4177,
      "step": 9767
    },
    {
      "epoch": 1.1976459048553212,
      "grad_norm": 2.0664998177895533,
      "learning_rate": 1.8958072781054582e-06,
      "loss": 0.4844,
      "step": 9768
    },
    {
      "epoch": 1.1977685139774399,
      "grad_norm": 2.0571315892514734,
      "learning_rate": 1.8953154756093428e-06,
      "loss": 0.4631,
      "step": 9769
    },
    {
      "epoch": 1.1978911230995586,
      "grad_norm": 1.855651330808672,
      "learning_rate": 1.8948236979668302e-06,
      "loss": 0.4449,
      "step": 9770
    },
    {
      "epoch": 1.1980137322216773,
      "grad_norm": 1.8432064421743828,
      "learning_rate": 1.8943319451981354e-06,
      "loss": 0.3959,
      "step": 9771
    },
    {
      "epoch": 1.198136341343796,
      "grad_norm": 1.9866568689026118,
      "learning_rate": 1.89384021732347e-06,
      "loss": 0.4534,
      "step": 9772
    },
    {
      "epoch": 1.1982589504659147,
      "grad_norm": 1.9601120175626883,
      "learning_rate": 1.8933485143630453e-06,
      "loss": 0.3957,
      "step": 9773
    },
    {
      "epoch": 1.1983815595880334,
      "grad_norm": 1.8581031669122063,
      "learning_rate": 1.8928568363370702e-06,
      "loss": 0.3891,
      "step": 9774
    },
    {
      "epoch": 1.198504168710152,
      "grad_norm": 1.9250959347484011,
      "learning_rate": 1.8923651832657541e-06,
      "loss": 0.4435,
      "step": 9775
    },
    {
      "epoch": 1.1986267778322708,
      "grad_norm": 2.051653558569849,
      "learning_rate": 1.891873555169304e-06,
      "loss": 0.4788,
      "step": 9776
    },
    {
      "epoch": 1.1987493869543895,
      "grad_norm": 1.990824266896549,
      "learning_rate": 1.8913819520679282e-06,
      "loss": 0.4483,
      "step": 9777
    },
    {
      "epoch": 1.1988719960765082,
      "grad_norm": 1.9854338791692518,
      "learning_rate": 1.8908903739818316e-06,
      "loss": 0.4395,
      "step": 9778
    },
    {
      "epoch": 1.1989946051986267,
      "grad_norm": 1.9007625386760176,
      "learning_rate": 1.8903988209312188e-06,
      "loss": 0.4409,
      "step": 9779
    },
    {
      "epoch": 1.1991172143207454,
      "grad_norm": 2.0025039063393337,
      "learning_rate": 1.8899072929362936e-06,
      "loss": 0.4248,
      "step": 9780
    },
    {
      "epoch": 1.199239823442864,
      "grad_norm": 1.919813363625323,
      "learning_rate": 1.8894157900172593e-06,
      "loss": 0.4695,
      "step": 9781
    },
    {
      "epoch": 1.1993624325649828,
      "grad_norm": 1.8915611376961177,
      "learning_rate": 1.8889243121943163e-06,
      "loss": 0.3657,
      "step": 9782
    },
    {
      "epoch": 1.1994850416871015,
      "grad_norm": 1.8646789388166327,
      "learning_rate": 1.8884328594876655e-06,
      "loss": 0.4249,
      "step": 9783
    },
    {
      "epoch": 1.1996076508092202,
      "grad_norm": 1.7787681348853406,
      "learning_rate": 1.8879414319175076e-06,
      "loss": 0.4215,
      "step": 9784
    },
    {
      "epoch": 1.1997302599313389,
      "grad_norm": 2.0292407136331714,
      "learning_rate": 1.8874500295040401e-06,
      "loss": 0.4231,
      "step": 9785
    },
    {
      "epoch": 1.1998528690534576,
      "grad_norm": 2.0362534539131065,
      "learning_rate": 1.8869586522674615e-06,
      "loss": 0.4236,
      "step": 9786
    },
    {
      "epoch": 1.1999754781755763,
      "grad_norm": 1.9649358016120353,
      "learning_rate": 1.886467300227967e-06,
      "loss": 0.4649,
      "step": 9787
    },
    {
      "epoch": 1.200098087297695,
      "grad_norm": 1.92759637674115,
      "learning_rate": 1.8859759734057523e-06,
      "loss": 0.4234,
      "step": 9788
    },
    {
      "epoch": 1.2002206964198137,
      "grad_norm": 1.9889720430840852,
      "learning_rate": 1.8854846718210123e-06,
      "loss": 0.4168,
      "step": 9789
    },
    {
      "epoch": 1.2003433055419324,
      "grad_norm": 1.758950762819114,
      "learning_rate": 1.884993395493941e-06,
      "loss": 0.4441,
      "step": 9790
    },
    {
      "epoch": 1.200465914664051,
      "grad_norm": 2.213577669836138,
      "learning_rate": 1.8845021444447294e-06,
      "loss": 0.4455,
      "step": 9791
    },
    {
      "epoch": 1.2005885237861698,
      "grad_norm": 1.8796846408324592,
      "learning_rate": 1.8840109186935693e-06,
      "loss": 0.4361,
      "step": 9792
    },
    {
      "epoch": 1.2007111329082885,
      "grad_norm": 1.863082904353583,
      "learning_rate": 1.883519718260651e-06,
      "loss": 0.4474,
      "step": 9793
    },
    {
      "epoch": 1.2008337420304072,
      "grad_norm": 1.6892167350954645,
      "learning_rate": 1.883028543166164e-06,
      "loss": 0.4381,
      "step": 9794
    },
    {
      "epoch": 1.2009563511525259,
      "grad_norm": 1.7734438012618396,
      "learning_rate": 1.8825373934302957e-06,
      "loss": 0.4035,
      "step": 9795
    },
    {
      "epoch": 1.2010789602746446,
      "grad_norm": 2.0285823160827934,
      "learning_rate": 1.8820462690732334e-06,
      "loss": 0.4883,
      "step": 9796
    },
    {
      "epoch": 1.201201569396763,
      "grad_norm": 2.0867099918243195,
      "learning_rate": 1.8815551701151636e-06,
      "loss": 0.4183,
      "step": 9797
    },
    {
      "epoch": 1.2013241785188817,
      "grad_norm": 2.0184367392152693,
      "learning_rate": 1.8810640965762717e-06,
      "loss": 0.4491,
      "step": 9798
    },
    {
      "epoch": 1.2014467876410004,
      "grad_norm": 1.8967242987185509,
      "learning_rate": 1.880573048476741e-06,
      "loss": 0.3942,
      "step": 9799
    },
    {
      "epoch": 1.2015693967631191,
      "grad_norm": 2.0049027936689967,
      "learning_rate": 1.8800820258367541e-06,
      "loss": 0.3866,
      "step": 9800
    },
    {
      "epoch": 1.2016920058852378,
      "grad_norm": 2.0001028346868464,
      "learning_rate": 1.8795910286764935e-06,
      "loss": 0.4197,
      "step": 9801
    },
    {
      "epoch": 1.2018146150073565,
      "grad_norm": 1.9581925210084388,
      "learning_rate": 1.879100057016141e-06,
      "loss": 0.4305,
      "step": 9802
    },
    {
      "epoch": 1.2019372241294752,
      "grad_norm": 1.8853464651857885,
      "learning_rate": 1.8786091108758741e-06,
      "loss": 0.4154,
      "step": 9803
    },
    {
      "epoch": 1.202059833251594,
      "grad_norm": 1.8541217741090683,
      "learning_rate": 1.8781181902758733e-06,
      "loss": 0.4019,
      "step": 9804
    },
    {
      "epoch": 1.2021824423737126,
      "grad_norm": 1.9136617210655933,
      "learning_rate": 1.8776272952363159e-06,
      "loss": 0.4364,
      "step": 9805
    },
    {
      "epoch": 1.2023050514958313,
      "grad_norm": 1.8400719954483113,
      "learning_rate": 1.8771364257773788e-06,
      "loss": 0.4239,
      "step": 9806
    },
    {
      "epoch": 1.20242766061795,
      "grad_norm": 1.8047675581000162,
      "learning_rate": 1.876645581919237e-06,
      "loss": 0.3953,
      "step": 9807
    },
    {
      "epoch": 1.2025502697400687,
      "grad_norm": 1.9405003100394778,
      "learning_rate": 1.8761547636820649e-06,
      "loss": 0.4061,
      "step": 9808
    },
    {
      "epoch": 1.2026728788621874,
      "grad_norm": 1.9858613265314873,
      "learning_rate": 1.8756639710860367e-06,
      "loss": 0.4425,
      "step": 9809
    },
    {
      "epoch": 1.202795487984306,
      "grad_norm": 1.8721452768507136,
      "learning_rate": 1.8751732041513248e-06,
      "loss": 0.3777,
      "step": 9810
    },
    {
      "epoch": 1.2029180971064246,
      "grad_norm": 2.0493364777486303,
      "learning_rate": 1.874682462898101e-06,
      "loss": 0.4088,
      "step": 9811
    },
    {
      "epoch": 1.2030407062285433,
      "grad_norm": 1.792133893640071,
      "learning_rate": 1.8741917473465341e-06,
      "loss": 0.4747,
      "step": 9812
    },
    {
      "epoch": 1.203163315350662,
      "grad_norm": 1.8634266960851444,
      "learning_rate": 1.8737010575167944e-06,
      "loss": 0.3968,
      "step": 9813
    },
    {
      "epoch": 1.2032859244727807,
      "grad_norm": 1.9416629035440078,
      "learning_rate": 1.8732103934290497e-06,
      "loss": 0.4452,
      "step": 9814
    },
    {
      "epoch": 1.2034085335948994,
      "grad_norm": 1.9539820131970287,
      "learning_rate": 1.8727197551034681e-06,
      "loss": 0.4404,
      "step": 9815
    },
    {
      "epoch": 1.203531142717018,
      "grad_norm": 1.9691858114100602,
      "learning_rate": 1.872229142560215e-06,
      "loss": 0.4395,
      "step": 9816
    },
    {
      "epoch": 1.2036537518391368,
      "grad_norm": 1.7991499309729926,
      "learning_rate": 1.871738555819455e-06,
      "loss": 0.427,
      "step": 9817
    },
    {
      "epoch": 1.2037763609612555,
      "grad_norm": 1.8952888046988767,
      "learning_rate": 1.8712479949013532e-06,
      "loss": 0.4756,
      "step": 9818
    },
    {
      "epoch": 1.2038989700833742,
      "grad_norm": 1.9295965015270167,
      "learning_rate": 1.8707574598260722e-06,
      "loss": 0.4994,
      "step": 9819
    },
    {
      "epoch": 1.204021579205493,
      "grad_norm": 1.8022868937147245,
      "learning_rate": 1.8702669506137733e-06,
      "loss": 0.5037,
      "step": 9820
    },
    {
      "epoch": 1.2041441883276116,
      "grad_norm": 1.9143558226713018,
      "learning_rate": 1.8697764672846166e-06,
      "loss": 0.4345,
      "step": 9821
    },
    {
      "epoch": 1.2042667974497303,
      "grad_norm": 2.0321131232935854,
      "learning_rate": 1.8692860098587637e-06,
      "loss": 0.4423,
      "step": 9822
    },
    {
      "epoch": 1.204389406571849,
      "grad_norm": 1.957266754947342,
      "learning_rate": 1.8687955783563727e-06,
      "loss": 0.426,
      "step": 9823
    },
    {
      "epoch": 1.2045120156939677,
      "grad_norm": 1.8671618685131586,
      "learning_rate": 1.8683051727976004e-06,
      "loss": 0.4199,
      "step": 9824
    },
    {
      "epoch": 1.2046346248160864,
      "grad_norm": 1.7744119295185201,
      "learning_rate": 1.8678147932026043e-06,
      "loss": 0.4165,
      "step": 9825
    },
    {
      "epoch": 1.204757233938205,
      "grad_norm": 1.652791153086052,
      "learning_rate": 1.8673244395915379e-06,
      "loss": 0.4355,
      "step": 9826
    },
    {
      "epoch": 1.2048798430603238,
      "grad_norm": 2.043416013710964,
      "learning_rate": 1.8668341119845596e-06,
      "loss": 0.4577,
      "step": 9827
    },
    {
      "epoch": 1.2050024521824423,
      "grad_norm": 1.8844314015628472,
      "learning_rate": 1.8663438104018182e-06,
      "loss": 0.4024,
      "step": 9828
    },
    {
      "epoch": 1.205125061304561,
      "grad_norm": 1.9615777735091369,
      "learning_rate": 1.8658535348634686e-06,
      "loss": 0.478,
      "step": 9829
    },
    {
      "epoch": 1.2052476704266797,
      "grad_norm": 2.057701191878268,
      "learning_rate": 1.8653632853896613e-06,
      "loss": 0.3995,
      "step": 9830
    },
    {
      "epoch": 1.2053702795487984,
      "grad_norm": 1.9354695361254983,
      "learning_rate": 1.8648730620005472e-06,
      "loss": 0.3805,
      "step": 9831
    },
    {
      "epoch": 1.205492888670917,
      "grad_norm": 1.9700662295281628,
      "learning_rate": 1.864382864716274e-06,
      "loss": 0.4038,
      "step": 9832
    },
    {
      "epoch": 1.2056154977930358,
      "grad_norm": 2.022888690428263,
      "learning_rate": 1.8638926935569904e-06,
      "loss": 0.428,
      "step": 9833
    },
    {
      "epoch": 1.2057381069151545,
      "grad_norm": 1.8745433357091137,
      "learning_rate": 1.8634025485428426e-06,
      "loss": 0.4226,
      "step": 9834
    },
    {
      "epoch": 1.2058607160372732,
      "grad_norm": 1.9489456309771322,
      "learning_rate": 1.8629124296939778e-06,
      "loss": 0.4418,
      "step": 9835
    },
    {
      "epoch": 1.2059833251593919,
      "grad_norm": 1.8554630665218272,
      "learning_rate": 1.8624223370305402e-06,
      "loss": 0.3918,
      "step": 9836
    },
    {
      "epoch": 1.2061059342815106,
      "grad_norm": 2.1780161399897717,
      "learning_rate": 1.8619322705726731e-06,
      "loss": 0.4861,
      "step": 9837
    },
    {
      "epoch": 1.2062285434036293,
      "grad_norm": 1.99710954147032,
      "learning_rate": 1.861442230340519e-06,
      "loss": 0.4484,
      "step": 9838
    },
    {
      "epoch": 1.206351152525748,
      "grad_norm": 1.8807209685613366,
      "learning_rate": 1.8609522163542194e-06,
      "loss": 0.407,
      "step": 9839
    },
    {
      "epoch": 1.2064737616478667,
      "grad_norm": 2.1571752187369295,
      "learning_rate": 1.8604622286339167e-06,
      "loss": 0.4398,
      "step": 9840
    },
    {
      "epoch": 1.2065963707699854,
      "grad_norm": 1.861080298678922,
      "learning_rate": 1.8599722671997467e-06,
      "loss": 0.4524,
      "step": 9841
    },
    {
      "epoch": 1.2067189798921039,
      "grad_norm": 2.0173692457840593,
      "learning_rate": 1.8594823320718504e-06,
      "loss": 0.4475,
      "step": 9842
    },
    {
      "epoch": 1.2068415890142226,
      "grad_norm": 1.9213552063469699,
      "learning_rate": 1.8589924232703643e-06,
      "loss": 0.4254,
      "step": 9843
    },
    {
      "epoch": 1.2069641981363413,
      "grad_norm": 1.9193951769978201,
      "learning_rate": 1.8585025408154248e-06,
      "loss": 0.4667,
      "step": 9844
    },
    {
      "epoch": 1.20708680725846,
      "grad_norm": 1.748666573999243,
      "learning_rate": 1.858012684727166e-06,
      "loss": 0.3659,
      "step": 9845
    },
    {
      "epoch": 1.2072094163805787,
      "grad_norm": 1.7815983553969672,
      "learning_rate": 1.8575228550257219e-06,
      "loss": 0.4287,
      "step": 9846
    },
    {
      "epoch": 1.2073320255026974,
      "grad_norm": 2.0579100792027165,
      "learning_rate": 1.8570330517312263e-06,
      "loss": 0.4016,
      "step": 9847
    },
    {
      "epoch": 1.207454634624816,
      "grad_norm": 1.821581793241723,
      "learning_rate": 1.8565432748638112e-06,
      "loss": 0.418,
      "step": 9848
    },
    {
      "epoch": 1.2075772437469348,
      "grad_norm": 1.90836740278613,
      "learning_rate": 1.8560535244436063e-06,
      "loss": 0.4571,
      "step": 9849
    },
    {
      "epoch": 1.2076998528690535,
      "grad_norm": 2.0487187867777834,
      "learning_rate": 1.855563800490741e-06,
      "loss": 0.4176,
      "step": 9850
    },
    {
      "epoch": 1.2078224619911722,
      "grad_norm": 1.7583770878698128,
      "learning_rate": 1.855074103025345e-06,
      "loss": 0.4276,
      "step": 9851
    },
    {
      "epoch": 1.2079450711132909,
      "grad_norm": 1.812737245400013,
      "learning_rate": 1.8545844320675454e-06,
      "loss": 0.4124,
      "step": 9852
    },
    {
      "epoch": 1.2080676802354096,
      "grad_norm": 1.8231410282748841,
      "learning_rate": 1.854094787637467e-06,
      "loss": 0.4441,
      "step": 9853
    },
    {
      "epoch": 1.2081902893575283,
      "grad_norm": 2.7340119671468424,
      "learning_rate": 1.8536051697552372e-06,
      "loss": 0.4248,
      "step": 9854
    },
    {
      "epoch": 1.208312898479647,
      "grad_norm": 2.0633995006510584,
      "learning_rate": 1.853115578440979e-06,
      "loss": 0.4266,
      "step": 9855
    },
    {
      "epoch": 1.2084355076017657,
      "grad_norm": 1.7687761774170867,
      "learning_rate": 1.852626013714816e-06,
      "loss": 0.4637,
      "step": 9856
    },
    {
      "epoch": 1.2085581167238844,
      "grad_norm": 2.0271145394255163,
      "learning_rate": 1.8521364755968701e-06,
      "loss": 0.4245,
      "step": 9857
    },
    {
      "epoch": 1.208680725846003,
      "grad_norm": 2.128941202977411,
      "learning_rate": 1.8516469641072618e-06,
      "loss": 0.5165,
      "step": 9858
    },
    {
      "epoch": 1.2088033349681215,
      "grad_norm": 1.5642188647778146,
      "learning_rate": 1.8511574792661104e-06,
      "loss": 0.4059,
      "step": 9859
    },
    {
      "epoch": 1.2089259440902402,
      "grad_norm": 1.7896728596115277,
      "learning_rate": 1.850668021093536e-06,
      "loss": 0.4463,
      "step": 9860
    },
    {
      "epoch": 1.209048553212359,
      "grad_norm": 1.8835337622189656,
      "learning_rate": 1.8501785896096558e-06,
      "loss": 0.4483,
      "step": 9861
    },
    {
      "epoch": 1.2091711623344776,
      "grad_norm": 2.21563653342365,
      "learning_rate": 1.8496891848345856e-06,
      "loss": 0.4344,
      "step": 9862
    },
    {
      "epoch": 1.2092937714565963,
      "grad_norm": 1.9938620755384886,
      "learning_rate": 1.849199806788441e-06,
      "loss": 0.4207,
      "step": 9863
    },
    {
      "epoch": 1.209416380578715,
      "grad_norm": 2.05944512485212,
      "learning_rate": 1.8487104554913362e-06,
      "loss": 0.4715,
      "step": 9864
    },
    {
      "epoch": 1.2095389897008337,
      "grad_norm": 1.9362242708592234,
      "learning_rate": 1.8482211309633862e-06,
      "loss": 0.3968,
      "step": 9865
    },
    {
      "epoch": 1.2096615988229524,
      "grad_norm": 2.1068241629530995,
      "learning_rate": 1.8477318332247001e-06,
      "loss": 0.4628,
      "step": 9866
    },
    {
      "epoch": 1.2097842079450711,
      "grad_norm": 1.9139172217703315,
      "learning_rate": 1.8472425622953908e-06,
      "loss": 0.4695,
      "step": 9867
    },
    {
      "epoch": 1.2099068170671898,
      "grad_norm": 1.8814619842679845,
      "learning_rate": 1.8467533181955677e-06,
      "loss": 0.433,
      "step": 9868
    },
    {
      "epoch": 1.2100294261893085,
      "grad_norm": 2.117043276382797,
      "learning_rate": 1.8462641009453402e-06,
      "loss": 0.4452,
      "step": 9869
    },
    {
      "epoch": 1.2101520353114272,
      "grad_norm": 1.9994484988260568,
      "learning_rate": 1.8457749105648153e-06,
      "loss": 0.4538,
      "step": 9870
    },
    {
      "epoch": 1.210274644433546,
      "grad_norm": 1.7889633125505888,
      "learning_rate": 1.8452857470740997e-06,
      "loss": 0.4378,
      "step": 9871
    },
    {
      "epoch": 1.2103972535556646,
      "grad_norm": 1.9765872203953134,
      "learning_rate": 1.8447966104932985e-06,
      "loss": 0.4041,
      "step": 9872
    },
    {
      "epoch": 1.210519862677783,
      "grad_norm": 1.9434140498574106,
      "learning_rate": 1.8443075008425174e-06,
      "loss": 0.4117,
      "step": 9873
    },
    {
      "epoch": 1.2106424717999018,
      "grad_norm": 1.8907257519746579,
      "learning_rate": 1.8438184181418583e-06,
      "loss": 0.421,
      "step": 9874
    },
    {
      "epoch": 1.2107650809220205,
      "grad_norm": 1.9400564413395285,
      "learning_rate": 1.843329362411424e-06,
      "loss": 0.4197,
      "step": 9875
    },
    {
      "epoch": 1.2108876900441392,
      "grad_norm": 1.9816224690614936,
      "learning_rate": 1.8428403336713156e-06,
      "loss": 0.4273,
      "step": 9876
    },
    {
      "epoch": 1.211010299166258,
      "grad_norm": 1.8978560985997774,
      "learning_rate": 1.842351331941633e-06,
      "loss": 0.4142,
      "step": 9877
    },
    {
      "epoch": 1.2111329082883766,
      "grad_norm": 2.0248428731033363,
      "learning_rate": 1.8418623572424749e-06,
      "loss": 0.4917,
      "step": 9878
    },
    {
      "epoch": 1.2112555174104953,
      "grad_norm": 1.9747473392335226,
      "learning_rate": 1.841373409593938e-06,
      "loss": 0.4038,
      "step": 9879
    },
    {
      "epoch": 1.211378126532614,
      "grad_norm": 1.743008744925239,
      "learning_rate": 1.840884489016121e-06,
      "loss": 0.4074,
      "step": 9880
    },
    {
      "epoch": 1.2115007356547327,
      "grad_norm": 2.003744523718956,
      "learning_rate": 1.840395595529118e-06,
      "loss": 0.4488,
      "step": 9881
    },
    {
      "epoch": 1.2116233447768514,
      "grad_norm": 1.972767999529552,
      "learning_rate": 1.8399067291530245e-06,
      "loss": 0.4501,
      "step": 9882
    },
    {
      "epoch": 1.21174595389897,
      "grad_norm": 2.0612394757959573,
      "learning_rate": 1.8394178899079324e-06,
      "loss": 0.4514,
      "step": 9883
    },
    {
      "epoch": 1.2118685630210888,
      "grad_norm": 1.820756910523204,
      "learning_rate": 1.838929077813934e-06,
      "loss": 0.4092,
      "step": 9884
    },
    {
      "epoch": 1.2119911721432075,
      "grad_norm": 2.0919846872561,
      "learning_rate": 1.8384402928911215e-06,
      "loss": 0.4604,
      "step": 9885
    },
    {
      "epoch": 1.2121137812653262,
      "grad_norm": 1.9748609797183354,
      "learning_rate": 1.8379515351595845e-06,
      "loss": 0.3904,
      "step": 9886
    },
    {
      "epoch": 1.212236390387445,
      "grad_norm": 1.975800218564764,
      "learning_rate": 1.8374628046394111e-06,
      "loss": 0.4403,
      "step": 9887
    },
    {
      "epoch": 1.2123589995095636,
      "grad_norm": 2.0771149187272515,
      "learning_rate": 1.8369741013506891e-06,
      "loss": 0.4273,
      "step": 9888
    },
    {
      "epoch": 1.2124816086316823,
      "grad_norm": 2.076752209880471,
      "learning_rate": 1.8364854253135056e-06,
      "loss": 0.4451,
      "step": 9889
    },
    {
      "epoch": 1.212604217753801,
      "grad_norm": 1.9465810587098622,
      "learning_rate": 1.8359967765479465e-06,
      "loss": 0.4006,
      "step": 9890
    },
    {
      "epoch": 1.2127268268759195,
      "grad_norm": 1.9087203950154075,
      "learning_rate": 1.8355081550740939e-06,
      "loss": 0.4,
      "step": 9891
    },
    {
      "epoch": 1.2128494359980382,
      "grad_norm": 1.8926549410858897,
      "learning_rate": 1.835019560912033e-06,
      "loss": 0.4116,
      "step": 9892
    },
    {
      "epoch": 1.2129720451201569,
      "grad_norm": 2.001466174239206,
      "learning_rate": 1.8345309940818456e-06,
      "loss": 0.4648,
      "step": 9893
    },
    {
      "epoch": 1.2130946542422756,
      "grad_norm": 2.0176186376856404,
      "learning_rate": 1.834042454603613e-06,
      "loss": 0.4128,
      "step": 9894
    },
    {
      "epoch": 1.2132172633643943,
      "grad_norm": 1.9817296690977027,
      "learning_rate": 1.833553942497414e-06,
      "loss": 0.4533,
      "step": 9895
    },
    {
      "epoch": 1.213339872486513,
      "grad_norm": 1.9230992978267045,
      "learning_rate": 1.8330654577833278e-06,
      "loss": 0.4189,
      "step": 9896
    },
    {
      "epoch": 1.2134624816086317,
      "grad_norm": 2.0028047483952727,
      "learning_rate": 1.8325770004814313e-06,
      "loss": 0.4036,
      "step": 9897
    },
    {
      "epoch": 1.2135850907307504,
      "grad_norm": 2.0962663222960165,
      "learning_rate": 1.8320885706118035e-06,
      "loss": 0.4273,
      "step": 9898
    },
    {
      "epoch": 1.213707699852869,
      "grad_norm": 2.0319148810073817,
      "learning_rate": 1.8316001681945164e-06,
      "loss": 0.425,
      "step": 9899
    },
    {
      "epoch": 1.2138303089749878,
      "grad_norm": 1.9009950106775602,
      "learning_rate": 1.8311117932496458e-06,
      "loss": 0.4628,
      "step": 9900
    },
    {
      "epoch": 1.2139529180971065,
      "grad_norm": 1.9448148058686423,
      "learning_rate": 1.8306234457972652e-06,
      "loss": 0.4509,
      "step": 9901
    },
    {
      "epoch": 1.2140755272192252,
      "grad_norm": 1.9009125215935812,
      "learning_rate": 1.8301351258574463e-06,
      "loss": 0.3856,
      "step": 9902
    },
    {
      "epoch": 1.2141981363413439,
      "grad_norm": 1.939753357460399,
      "learning_rate": 1.829646833450259e-06,
      "loss": 0.4484,
      "step": 9903
    },
    {
      "epoch": 1.2143207454634624,
      "grad_norm": 1.8467228877359276,
      "learning_rate": 1.829158568595773e-06,
      "loss": 0.4183,
      "step": 9904
    },
    {
      "epoch": 1.214443354585581,
      "grad_norm": 1.9994304826563334,
      "learning_rate": 1.828670331314058e-06,
      "loss": 0.4197,
      "step": 9905
    },
    {
      "epoch": 1.2145659637076998,
      "grad_norm": 2.03323918691938,
      "learning_rate": 1.828182121625181e-06,
      "loss": 0.4481,
      "step": 9906
    },
    {
      "epoch": 1.2146885728298185,
      "grad_norm": 2.079455350906223,
      "learning_rate": 1.8276939395492088e-06,
      "loss": 0.4571,
      "step": 9907
    },
    {
      "epoch": 1.2148111819519372,
      "grad_norm": 2.013599448015019,
      "learning_rate": 1.8272057851062052e-06,
      "loss": 0.3966,
      "step": 9908
    },
    {
      "epoch": 1.2149337910740559,
      "grad_norm": 1.7506767203292912,
      "learning_rate": 1.8267176583162346e-06,
      "loss": 0.432,
      "step": 9909
    },
    {
      "epoch": 1.2150564001961746,
      "grad_norm": 2.0194039887385693,
      "learning_rate": 1.8262295591993606e-06,
      "loss": 0.4235,
      "step": 9910
    },
    {
      "epoch": 1.2151790093182933,
      "grad_norm": 2.1369490382937135,
      "learning_rate": 1.8257414877756452e-06,
      "loss": 0.4709,
      "step": 9911
    },
    {
      "epoch": 1.215301618440412,
      "grad_norm": 2.1843235332056374,
      "learning_rate": 1.8252534440651476e-06,
      "loss": 0.4344,
      "step": 9912
    },
    {
      "epoch": 1.2154242275625307,
      "grad_norm": 1.9130651909089698,
      "learning_rate": 1.824765428087928e-06,
      "loss": 0.4313,
      "step": 9913
    },
    {
      "epoch": 1.2155468366846494,
      "grad_norm": 1.932204591998885,
      "learning_rate": 1.8242774398640448e-06,
      "loss": 0.4389,
      "step": 9914
    },
    {
      "epoch": 1.215669445806768,
      "grad_norm": 1.8810730483448599,
      "learning_rate": 1.8237894794135559e-06,
      "loss": 0.4003,
      "step": 9915
    },
    {
      "epoch": 1.2157920549288868,
      "grad_norm": 1.9626006254909354,
      "learning_rate": 1.823301546756515e-06,
      "loss": 0.3914,
      "step": 9916
    },
    {
      "epoch": 1.2159146640510055,
      "grad_norm": 1.8025468567347034,
      "learning_rate": 1.822813641912979e-06,
      "loss": 0.4081,
      "step": 9917
    },
    {
      "epoch": 1.2160372731731242,
      "grad_norm": 1.9567647973938747,
      "learning_rate": 1.8223257649030013e-06,
      "loss": 0.4207,
      "step": 9918
    },
    {
      "epoch": 1.2161598822952429,
      "grad_norm": 1.9273279636557479,
      "learning_rate": 1.8218379157466355e-06,
      "loss": 0.4165,
      "step": 9919
    },
    {
      "epoch": 1.2162824914173616,
      "grad_norm": 2.042708534349464,
      "learning_rate": 1.8213500944639306e-06,
      "loss": 0.4525,
      "step": 9920
    },
    {
      "epoch": 1.2164051005394803,
      "grad_norm": 2.223555676864219,
      "learning_rate": 1.8208623010749389e-06,
      "loss": 0.4658,
      "step": 9921
    },
    {
      "epoch": 1.2165277096615987,
      "grad_norm": 1.8224527322728954,
      "learning_rate": 1.820374535599708e-06,
      "loss": 0.401,
      "step": 9922
    },
    {
      "epoch": 1.2166503187837174,
      "grad_norm": 1.916225397325457,
      "learning_rate": 1.8198867980582885e-06,
      "loss": 0.4219,
      "step": 9923
    },
    {
      "epoch": 1.2167729279058361,
      "grad_norm": 1.9964561025277971,
      "learning_rate": 1.8193990884707241e-06,
      "loss": 0.3965,
      "step": 9924
    },
    {
      "epoch": 1.2168955370279548,
      "grad_norm": 1.8909642485863798,
      "learning_rate": 1.8189114068570626e-06,
      "loss": 0.4338,
      "step": 9925
    },
    {
      "epoch": 1.2170181461500735,
      "grad_norm": 1.7540518812454575,
      "learning_rate": 1.8184237532373483e-06,
      "loss": 0.4272,
      "step": 9926
    },
    {
      "epoch": 1.2171407552721922,
      "grad_norm": 1.9762536825216315,
      "learning_rate": 1.8179361276316245e-06,
      "loss": 0.4296,
      "step": 9927
    },
    {
      "epoch": 1.217263364394311,
      "grad_norm": 2.021833708816464,
      "learning_rate": 1.8174485300599331e-06,
      "loss": 0.404,
      "step": 9928
    },
    {
      "epoch": 1.2173859735164296,
      "grad_norm": 1.7873278445291432,
      "learning_rate": 1.816960960542315e-06,
      "loss": 0.4373,
      "step": 9929
    },
    {
      "epoch": 1.2175085826385483,
      "grad_norm": 2.1299961585853753,
      "learning_rate": 1.816473419098811e-06,
      "loss": 0.4577,
      "step": 9930
    },
    {
      "epoch": 1.217631191760667,
      "grad_norm": 1.845924125012016,
      "learning_rate": 1.8159859057494595e-06,
      "loss": 0.4526,
      "step": 9931
    },
    {
      "epoch": 1.2177538008827857,
      "grad_norm": 1.9600752865435214,
      "learning_rate": 1.815498420514299e-06,
      "loss": 0.4487,
      "step": 9932
    },
    {
      "epoch": 1.2178764100049044,
      "grad_norm": 2.03607581970038,
      "learning_rate": 1.8150109634133644e-06,
      "loss": 0.4439,
      "step": 9933
    },
    {
      "epoch": 1.2179990191270231,
      "grad_norm": 1.917863641138397,
      "learning_rate": 1.8145235344666923e-06,
      "loss": 0.4438,
      "step": 9934
    },
    {
      "epoch": 1.2181216282491418,
      "grad_norm": 1.9772056215783667,
      "learning_rate": 1.8140361336943157e-06,
      "loss": 0.3874,
      "step": 9935
    },
    {
      "epoch": 1.2182442373712603,
      "grad_norm": 1.7829629228091257,
      "learning_rate": 1.81354876111627e-06,
      "loss": 0.4164,
      "step": 9936
    },
    {
      "epoch": 1.218366846493379,
      "grad_norm": 2.059141734157605,
      "learning_rate": 1.813061416752584e-06,
      "loss": 0.4136,
      "step": 9937
    },
    {
      "epoch": 1.2184894556154977,
      "grad_norm": 1.7419029268473523,
      "learning_rate": 1.8125741006232903e-06,
      "loss": 0.4674,
      "step": 9938
    },
    {
      "epoch": 1.2186120647376164,
      "grad_norm": 2.026104096584858,
      "learning_rate": 1.8120868127484182e-06,
      "loss": 0.4366,
      "step": 9939
    },
    {
      "epoch": 1.218734673859735,
      "grad_norm": 1.9456235658382302,
      "learning_rate": 1.8115995531479963e-06,
      "loss": 0.4376,
      "step": 9940
    },
    {
      "epoch": 1.2188572829818538,
      "grad_norm": 2.031412140630869,
      "learning_rate": 1.811112321842051e-06,
      "loss": 0.4596,
      "step": 9941
    },
    {
      "epoch": 1.2189798921039725,
      "grad_norm": 1.7133569851936885,
      "learning_rate": 1.8106251188506082e-06,
      "loss": 0.4289,
      "step": 9942
    },
    {
      "epoch": 1.2191025012260912,
      "grad_norm": 1.7741319115204055,
      "learning_rate": 1.8101379441936942e-06,
      "loss": 0.4533,
      "step": 9943
    },
    {
      "epoch": 1.21922511034821,
      "grad_norm": 1.96790100792542,
      "learning_rate": 1.8096507978913325e-06,
      "loss": 0.4012,
      "step": 9944
    },
    {
      "epoch": 1.2193477194703286,
      "grad_norm": 1.7378141575208474,
      "learning_rate": 1.8091636799635445e-06,
      "loss": 0.4175,
      "step": 9945
    },
    {
      "epoch": 1.2194703285924473,
      "grad_norm": 1.8930614456363455,
      "learning_rate": 1.8086765904303524e-06,
      "loss": 0.4025,
      "step": 9946
    },
    {
      "epoch": 1.219592937714566,
      "grad_norm": 2.2724081852165536,
      "learning_rate": 1.8081895293117758e-06,
      "loss": 0.391,
      "step": 9947
    },
    {
      "epoch": 1.2197155468366847,
      "grad_norm": 1.905419930092055,
      "learning_rate": 1.8077024966278359e-06,
      "loss": 0.4089,
      "step": 9948
    },
    {
      "epoch": 1.2198381559588034,
      "grad_norm": 2.0033456306048083,
      "learning_rate": 1.8072154923985473e-06,
      "loss": 0.3857,
      "step": 9949
    },
    {
      "epoch": 1.219960765080922,
      "grad_norm": 2.089000204015401,
      "learning_rate": 1.8067285166439292e-06,
      "loss": 0.4375,
      "step": 9950
    },
    {
      "epoch": 1.2200833742030408,
      "grad_norm": 1.7974064641264784,
      "learning_rate": 1.8062415693839963e-06,
      "loss": 0.449,
      "step": 9951
    },
    {
      "epoch": 1.2202059833251595,
      "grad_norm": 2.0844739739457134,
      "learning_rate": 1.805754650638763e-06,
      "loss": 0.4258,
      "step": 9952
    },
    {
      "epoch": 1.220328592447278,
      "grad_norm": 1.783641540903737,
      "learning_rate": 1.8052677604282432e-06,
      "loss": 0.4037,
      "step": 9953
    },
    {
      "epoch": 1.2204512015693967,
      "grad_norm": 1.821558298678057,
      "learning_rate": 1.8047808987724477e-06,
      "loss": 0.4537,
      "step": 9954
    },
    {
      "epoch": 1.2205738106915154,
      "grad_norm": 1.8603408870153864,
      "learning_rate": 1.8042940656913877e-06,
      "loss": 0.4526,
      "step": 9955
    },
    {
      "epoch": 1.220696419813634,
      "grad_norm": 1.8609623484374471,
      "learning_rate": 1.8038072612050738e-06,
      "loss": 0.3896,
      "step": 9956
    },
    {
      "epoch": 1.2208190289357528,
      "grad_norm": 2.1070331953344343,
      "learning_rate": 1.8033204853335146e-06,
      "loss": 0.4199,
      "step": 9957
    },
    {
      "epoch": 1.2209416380578715,
      "grad_norm": 1.9573920493572707,
      "learning_rate": 1.8028337380967158e-06,
      "loss": 0.4598,
      "step": 9958
    },
    {
      "epoch": 1.2210642471799902,
      "grad_norm": 1.9901429867733011,
      "learning_rate": 1.8023470195146853e-06,
      "loss": 0.4467,
      "step": 9959
    },
    {
      "epoch": 1.2211868563021089,
      "grad_norm": 2.0137442247532005,
      "learning_rate": 1.8018603296074266e-06,
      "loss": 0.4223,
      "step": 9960
    },
    {
      "epoch": 1.2213094654242276,
      "grad_norm": 2.0149055264475013,
      "learning_rate": 1.8013736683949456e-06,
      "loss": 0.4629,
      "step": 9961
    },
    {
      "epoch": 1.2214320745463463,
      "grad_norm": 2.080137061463665,
      "learning_rate": 1.8008870358972424e-06,
      "loss": 0.4198,
      "step": 9962
    },
    {
      "epoch": 1.221554683668465,
      "grad_norm": 1.7803672073481063,
      "learning_rate": 1.80040043213432e-06,
      "loss": 0.3741,
      "step": 9963
    },
    {
      "epoch": 1.2216772927905837,
      "grad_norm": 1.6505958013666548,
      "learning_rate": 1.7999138571261788e-06,
      "loss": 0.4014,
      "step": 9964
    },
    {
      "epoch": 1.2217999019127024,
      "grad_norm": 1.9750042467880722,
      "learning_rate": 1.7994273108928177e-06,
      "loss": 0.424,
      "step": 9965
    },
    {
      "epoch": 1.221922511034821,
      "grad_norm": 1.7503970785596394,
      "learning_rate": 1.7989407934542336e-06,
      "loss": 0.4121,
      "step": 9966
    },
    {
      "epoch": 1.2220451201569396,
      "grad_norm": 1.9130743248129263,
      "learning_rate": 1.7984543048304238e-06,
      "loss": 0.4469,
      "step": 9967
    },
    {
      "epoch": 1.2221677292790583,
      "grad_norm": 1.9738795859039124,
      "learning_rate": 1.7979678450413845e-06,
      "loss": 0.4303,
      "step": 9968
    },
    {
      "epoch": 1.222290338401177,
      "grad_norm": 1.8460301584892593,
      "learning_rate": 1.7974814141071104e-06,
      "loss": 0.4853,
      "step": 9969
    },
    {
      "epoch": 1.2224129475232957,
      "grad_norm": 1.7999748344436002,
      "learning_rate": 1.7969950120475931e-06,
      "loss": 0.4086,
      "step": 9970
    },
    {
      "epoch": 1.2225355566454144,
      "grad_norm": 1.8905514165733994,
      "learning_rate": 1.7965086388828255e-06,
      "loss": 0.4617,
      "step": 9971
    },
    {
      "epoch": 1.222658165767533,
      "grad_norm": 1.8109693267186369,
      "learning_rate": 1.7960222946327982e-06,
      "loss": 0.4312,
      "step": 9972
    },
    {
      "epoch": 1.2227807748896518,
      "grad_norm": 1.9739812554358875,
      "learning_rate": 1.7955359793175014e-06,
      "loss": 0.4465,
      "step": 9973
    },
    {
      "epoch": 1.2229033840117705,
      "grad_norm": 1.8580237966291948,
      "learning_rate": 1.7950496929569226e-06,
      "loss": 0.4283,
      "step": 9974
    },
    {
      "epoch": 1.2230259931338892,
      "grad_norm": 1.8971082622151816,
      "learning_rate": 1.7945634355710486e-06,
      "loss": 0.4311,
      "step": 9975
    },
    {
      "epoch": 1.2231486022560079,
      "grad_norm": 2.1059488650579636,
      "learning_rate": 1.7940772071798671e-06,
      "loss": 0.4288,
      "step": 9976
    },
    {
      "epoch": 1.2232712113781266,
      "grad_norm": 1.9293267228011508,
      "learning_rate": 1.793591007803362e-06,
      "loss": 0.441,
      "step": 9977
    },
    {
      "epoch": 1.2233938205002453,
      "grad_norm": 1.8938104946526684,
      "learning_rate": 1.7931048374615179e-06,
      "loss": 0.4267,
      "step": 9978
    },
    {
      "epoch": 1.223516429622364,
      "grad_norm": 1.8008988429432373,
      "learning_rate": 1.7926186961743152e-06,
      "loss": 0.4223,
      "step": 9979
    },
    {
      "epoch": 1.2236390387444827,
      "grad_norm": 2.0217781746562697,
      "learning_rate": 1.7921325839617366e-06,
      "loss": 0.4259,
      "step": 9980
    },
    {
      "epoch": 1.2237616478666014,
      "grad_norm": 1.995389410953172,
      "learning_rate": 1.7916465008437622e-06,
      "loss": 0.4097,
      "step": 9981
    },
    {
      "epoch": 1.22388425698872,
      "grad_norm": 1.8560904193554277,
      "learning_rate": 1.7911604468403714e-06,
      "loss": 0.4101,
      "step": 9982
    },
    {
      "epoch": 1.2240068661108388,
      "grad_norm": 1.895668524634168,
      "learning_rate": 1.7906744219715406e-06,
      "loss": 0.4341,
      "step": 9983
    },
    {
      "epoch": 1.2241294752329575,
      "grad_norm": 1.7664206285456765,
      "learning_rate": 1.7901884262572464e-06,
      "loss": 0.4096,
      "step": 9984
    },
    {
      "epoch": 1.224252084355076,
      "grad_norm": 1.8190659763323813,
      "learning_rate": 1.7897024597174641e-06,
      "loss": 0.408,
      "step": 9985
    },
    {
      "epoch": 1.2243746934771946,
      "grad_norm": 2.068855490548775,
      "learning_rate": 1.78921652237217e-06,
      "loss": 0.4615,
      "step": 9986
    },
    {
      "epoch": 1.2244973025993133,
      "grad_norm": 1.961121629481307,
      "learning_rate": 1.7887306142413335e-06,
      "loss": 0.4296,
      "step": 9987
    },
    {
      "epoch": 1.224619911721432,
      "grad_norm": 1.9372205277992112,
      "learning_rate": 1.7882447353449284e-06,
      "loss": 0.4399,
      "step": 9988
    },
    {
      "epoch": 1.2247425208435507,
      "grad_norm": 1.9002173990408577,
      "learning_rate": 1.7877588857029248e-06,
      "loss": 0.3751,
      "step": 9989
    },
    {
      "epoch": 1.2248651299656694,
      "grad_norm": 1.9470533676510762,
      "learning_rate": 1.7872730653352927e-06,
      "loss": 0.4279,
      "step": 9990
    },
    {
      "epoch": 1.2249877390877881,
      "grad_norm": 2.0573305420357033,
      "learning_rate": 1.7867872742619985e-06,
      "loss": 0.4199,
      "step": 9991
    },
    {
      "epoch": 1.2251103482099068,
      "grad_norm": 1.9808078264789069,
      "learning_rate": 1.7863015125030102e-06,
      "loss": 0.4347,
      "step": 9992
    },
    {
      "epoch": 1.2252329573320255,
      "grad_norm": 1.7913743382432374,
      "learning_rate": 1.7858157800782927e-06,
      "loss": 0.4529,
      "step": 9993
    },
    {
      "epoch": 1.2253555664541442,
      "grad_norm": 1.897785170207114,
      "learning_rate": 1.7853300770078124e-06,
      "loss": 0.3984,
      "step": 9994
    },
    {
      "epoch": 1.225478175576263,
      "grad_norm": 1.9686427723795805,
      "learning_rate": 1.7848444033115297e-06,
      "loss": 0.4277,
      "step": 9995
    },
    {
      "epoch": 1.2256007846983816,
      "grad_norm": 1.7473396103630754,
      "learning_rate": 1.7843587590094087e-06,
      "loss": 0.4032,
      "step": 9996
    },
    {
      "epoch": 1.2257233938205003,
      "grad_norm": 1.8895558002413029,
      "learning_rate": 1.7838731441214096e-06,
      "loss": 0.4018,
      "step": 9997
    },
    {
      "epoch": 1.2258460029426188,
      "grad_norm": 1.9417753189228342,
      "learning_rate": 1.7833875586674926e-06,
      "loss": 0.4282,
      "step": 9998
    },
    {
      "epoch": 1.2259686120647375,
      "grad_norm": 2.010838494109737,
      "learning_rate": 1.782902002667615e-06,
      "loss": 0.4196,
      "step": 9999
    },
    {
      "epoch": 1.2260912211868562,
      "grad_norm": 1.9429478614086964,
      "learning_rate": 1.782416476141734e-06,
      "loss": 0.4903,
      "step": 10000
    },
    {
      "epoch": 1.226213830308975,
      "grad_norm": 1.887956597513716,
      "learning_rate": 1.7819309791098069e-06,
      "loss": 0.4076,
      "step": 10001
    },
    {
      "epoch": 1.2263364394310936,
      "grad_norm": 1.9341187291849782,
      "learning_rate": 1.7814455115917879e-06,
      "loss": 0.4388,
      "step": 10002
    },
    {
      "epoch": 1.2264590485532123,
      "grad_norm": 1.9760950459377526,
      "learning_rate": 1.7809600736076308e-06,
      "loss": 0.4298,
      "step": 10003
    },
    {
      "epoch": 1.226581657675331,
      "grad_norm": 1.9220829469959746,
      "learning_rate": 1.7804746651772872e-06,
      "loss": 0.429,
      "step": 10004
    },
    {
      "epoch": 1.2267042667974497,
      "grad_norm": 1.9351079485803018,
      "learning_rate": 1.779989286320708e-06,
      "loss": 0.3937,
      "step": 10005
    },
    {
      "epoch": 1.2268268759195684,
      "grad_norm": 1.8825208785082914,
      "learning_rate": 1.7795039370578446e-06,
      "loss": 0.4405,
      "step": 10006
    },
    {
      "epoch": 1.226949485041687,
      "grad_norm": 1.9148614698134352,
      "learning_rate": 1.7790186174086455e-06,
      "loss": 0.4043,
      "step": 10007
    },
    {
      "epoch": 1.2270720941638058,
      "grad_norm": 2.013442369361243,
      "learning_rate": 1.7785333273930566e-06,
      "loss": 0.4292,
      "step": 10008
    },
    {
      "epoch": 1.2271947032859245,
      "grad_norm": 1.8671111081794631,
      "learning_rate": 1.7780480670310257e-06,
      "loss": 0.4292,
      "step": 10009
    },
    {
      "epoch": 1.2273173124080432,
      "grad_norm": 1.7649214150277024,
      "learning_rate": 1.7775628363424974e-06,
      "loss": 0.4155,
      "step": 10010
    },
    {
      "epoch": 1.227439921530162,
      "grad_norm": 2.0931731163727902,
      "learning_rate": 1.7770776353474156e-06,
      "loss": 0.4548,
      "step": 10011
    },
    {
      "epoch": 1.2275625306522806,
      "grad_norm": 1.9519569859010593,
      "learning_rate": 1.7765924640657226e-06,
      "loss": 0.4685,
      "step": 10012
    },
    {
      "epoch": 1.2276851397743993,
      "grad_norm": 1.9606575327034337,
      "learning_rate": 1.7761073225173592e-06,
      "loss": 0.4474,
      "step": 10013
    },
    {
      "epoch": 1.227807748896518,
      "grad_norm": 1.8600558104643359,
      "learning_rate": 1.775622210722267e-06,
      "loss": 0.4382,
      "step": 10014
    },
    {
      "epoch": 1.2279303580186367,
      "grad_norm": 1.8028735828877382,
      "learning_rate": 1.7751371287003852e-06,
      "loss": 0.4127,
      "step": 10015
    },
    {
      "epoch": 1.2280529671407552,
      "grad_norm": 1.9770914856111679,
      "learning_rate": 1.7746520764716501e-06,
      "loss": 0.4607,
      "step": 10016
    },
    {
      "epoch": 1.2281755762628739,
      "grad_norm": 1.9009661063535457,
      "learning_rate": 1.7741670540559986e-06,
      "loss": 0.4032,
      "step": 10017
    },
    {
      "epoch": 1.2282981853849926,
      "grad_norm": 1.8593309957199664,
      "learning_rate": 1.7736820614733657e-06,
      "loss": 0.4339,
      "step": 10018
    },
    {
      "epoch": 1.2284207945071113,
      "grad_norm": 2.0081700789930594,
      "learning_rate": 1.7731970987436876e-06,
      "loss": 0.4459,
      "step": 10019
    },
    {
      "epoch": 1.22854340362923,
      "grad_norm": 2.0626114287307216,
      "learning_rate": 1.7727121658868934e-06,
      "loss": 0.4376,
      "step": 10020
    },
    {
      "epoch": 1.2286660127513487,
      "grad_norm": 2.134565060828012,
      "learning_rate": 1.7722272629229175e-06,
      "loss": 0.4428,
      "step": 10021
    },
    {
      "epoch": 1.2287886218734674,
      "grad_norm": 2.0452344433803633,
      "learning_rate": 1.77174238987169e-06,
      "loss": 0.4636,
      "step": 10022
    },
    {
      "epoch": 1.228911230995586,
      "grad_norm": 1.8925812891120395,
      "learning_rate": 1.7712575467531385e-06,
      "loss": 0.4409,
      "step": 10023
    },
    {
      "epoch": 1.2290338401177048,
      "grad_norm": 1.6982887327695504,
      "learning_rate": 1.7707727335871939e-06,
      "loss": 0.3603,
      "step": 10024
    },
    {
      "epoch": 1.2291564492398235,
      "grad_norm": 2.0484502271116467,
      "learning_rate": 1.770287950393779e-06,
      "loss": 0.433,
      "step": 10025
    },
    {
      "epoch": 1.2292790583619422,
      "grad_norm": 2.046492201599539,
      "learning_rate": 1.7698031971928218e-06,
      "loss": 0.4277,
      "step": 10026
    },
    {
      "epoch": 1.2294016674840609,
      "grad_norm": 1.7639214003634647,
      "learning_rate": 1.769318474004246e-06,
      "loss": 0.4122,
      "step": 10027
    },
    {
      "epoch": 1.2295242766061796,
      "grad_norm": 2.0495400480769623,
      "learning_rate": 1.7688337808479749e-06,
      "loss": 0.4427,
      "step": 10028
    },
    {
      "epoch": 1.2296468857282983,
      "grad_norm": 1.950597923144575,
      "learning_rate": 1.7683491177439294e-06,
      "loss": 0.3809,
      "step": 10029
    },
    {
      "epoch": 1.2297694948504168,
      "grad_norm": 1.8722840282185167,
      "learning_rate": 1.7678644847120305e-06,
      "loss": 0.4109,
      "step": 10030
    },
    {
      "epoch": 1.2298921039725355,
      "grad_norm": 2.111670522064236,
      "learning_rate": 1.7673798817721971e-06,
      "loss": 0.4311,
      "step": 10031
    },
    {
      "epoch": 1.2300147130946542,
      "grad_norm": 1.781410110176503,
      "learning_rate": 1.7668953089443487e-06,
      "loss": 0.4104,
      "step": 10032
    },
    {
      "epoch": 1.2301373222167729,
      "grad_norm": 2.0065223659875087,
      "learning_rate": 1.7664107662483994e-06,
      "loss": 0.4411,
      "step": 10033
    },
    {
      "epoch": 1.2302599313388916,
      "grad_norm": 1.899238377448539,
      "learning_rate": 1.765926253704267e-06,
      "loss": 0.4544,
      "step": 10034
    },
    {
      "epoch": 1.2303825404610103,
      "grad_norm": 1.8963343713175966,
      "learning_rate": 1.7654417713318655e-06,
      "loss": 0.4352,
      "step": 10035
    },
    {
      "epoch": 1.230505149583129,
      "grad_norm": 2.2024560410287446,
      "learning_rate": 1.7649573191511076e-06,
      "loss": 0.4511,
      "step": 10036
    },
    {
      "epoch": 1.2306277587052477,
      "grad_norm": 1.8416233463401979,
      "learning_rate": 1.764472897181905e-06,
      "loss": 0.4085,
      "step": 10037
    },
    {
      "epoch": 1.2307503678273664,
      "grad_norm": 2.122254029563955,
      "learning_rate": 1.763988505444168e-06,
      "loss": 0.4515,
      "step": 10038
    },
    {
      "epoch": 1.230872976949485,
      "grad_norm": 1.7000097698540542,
      "learning_rate": 1.7635041439578065e-06,
      "loss": 0.453,
      "step": 10039
    },
    {
      "epoch": 1.2309955860716038,
      "grad_norm": 1.9235412208542217,
      "learning_rate": 1.7630198127427295e-06,
      "loss": 0.4512,
      "step": 10040
    },
    {
      "epoch": 1.2311181951937225,
      "grad_norm": 1.893487744905097,
      "learning_rate": 1.7625355118188422e-06,
      "loss": 0.4182,
      "step": 10041
    },
    {
      "epoch": 1.2312408043158412,
      "grad_norm": 1.9302711297053712,
      "learning_rate": 1.7620512412060508e-06,
      "loss": 0.436,
      "step": 10042
    },
    {
      "epoch": 1.2313634134379599,
      "grad_norm": 1.9384696194606854,
      "learning_rate": 1.7615670009242596e-06,
      "loss": 0.4412,
      "step": 10043
    },
    {
      "epoch": 1.2314860225600786,
      "grad_norm": 1.88223177947568,
      "learning_rate": 1.7610827909933736e-06,
      "loss": 0.4533,
      "step": 10044
    },
    {
      "epoch": 1.2316086316821973,
      "grad_norm": 1.8345999613767343,
      "learning_rate": 1.7605986114332913e-06,
      "loss": 0.4404,
      "step": 10045
    },
    {
      "epoch": 1.231731240804316,
      "grad_norm": 1.8508894613035585,
      "learning_rate": 1.7601144622639154e-06,
      "loss": 0.3923,
      "step": 10046
    },
    {
      "epoch": 1.2318538499264344,
      "grad_norm": 2.0159531041622993,
      "learning_rate": 1.7596303435051454e-06,
      "loss": 0.4342,
      "step": 10047
    },
    {
      "epoch": 1.2319764590485531,
      "grad_norm": 2.1029496425623426,
      "learning_rate": 1.7591462551768785e-06,
      "loss": 0.4687,
      "step": 10048
    },
    {
      "epoch": 1.2320990681706718,
      "grad_norm": 1.9728114811135413,
      "learning_rate": 1.7586621972990132e-06,
      "loss": 0.4285,
      "step": 10049
    },
    {
      "epoch": 1.2322216772927905,
      "grad_norm": 1.8052547001404897,
      "learning_rate": 1.758178169891443e-06,
      "loss": 0.4176,
      "step": 10050
    },
    {
      "epoch": 1.2323442864149092,
      "grad_norm": 2.091150904621165,
      "learning_rate": 1.7576941729740629e-06,
      "loss": 0.4258,
      "step": 10051
    },
    {
      "epoch": 1.232466895537028,
      "grad_norm": 1.910084633928383,
      "learning_rate": 1.757210206566767e-06,
      "loss": 0.4267,
      "step": 10052
    },
    {
      "epoch": 1.2325895046591466,
      "grad_norm": 1.9826490543439177,
      "learning_rate": 1.7567262706894472e-06,
      "loss": 0.4369,
      "step": 10053
    },
    {
      "epoch": 1.2327121137812653,
      "grad_norm": 2.0715521799220964,
      "learning_rate": 1.7562423653619931e-06,
      "loss": 0.4179,
      "step": 10054
    },
    {
      "epoch": 1.232834722903384,
      "grad_norm": 1.9099968869155064,
      "learning_rate": 1.755758490604294e-06,
      "loss": 0.4409,
      "step": 10055
    },
    {
      "epoch": 1.2329573320255027,
      "grad_norm": 1.9225641861527623,
      "learning_rate": 1.7552746464362386e-06,
      "loss": 0.3905,
      "step": 10056
    },
    {
      "epoch": 1.2330799411476214,
      "grad_norm": 1.9709578160989059,
      "learning_rate": 1.754790832877715e-06,
      "loss": 0.4466,
      "step": 10057
    },
    {
      "epoch": 1.2332025502697401,
      "grad_norm": 1.9091086748806163,
      "learning_rate": 1.7543070499486059e-06,
      "loss": 0.4124,
      "step": 10058
    },
    {
      "epoch": 1.2333251593918588,
      "grad_norm": 1.8598001392533883,
      "learning_rate": 1.7538232976687977e-06,
      "loss": 0.4172,
      "step": 10059
    },
    {
      "epoch": 1.2334477685139775,
      "grad_norm": 1.9053435791971585,
      "learning_rate": 1.7533395760581727e-06,
      "loss": 0.4375,
      "step": 10060
    },
    {
      "epoch": 1.233570377636096,
      "grad_norm": 1.893093585058641,
      "learning_rate": 1.752855885136614e-06,
      "loss": 0.4765,
      "step": 10061
    },
    {
      "epoch": 1.2336929867582147,
      "grad_norm": 2.160993450927698,
      "learning_rate": 1.7523722249240005e-06,
      "loss": 0.4615,
      "step": 10062
    },
    {
      "epoch": 1.2338155958803334,
      "grad_norm": 1.7250816226811883,
      "learning_rate": 1.7518885954402114e-06,
      "loss": 0.4246,
      "step": 10063
    },
    {
      "epoch": 1.233938205002452,
      "grad_norm": 1.8967255937207603,
      "learning_rate": 1.7514049967051261e-06,
      "loss": 0.4704,
      "step": 10064
    },
    {
      "epoch": 1.2340608141245708,
      "grad_norm": 1.9942285952785066,
      "learning_rate": 1.750921428738621e-06,
      "loss": 0.4291,
      "step": 10065
    },
    {
      "epoch": 1.2341834232466895,
      "grad_norm": 1.8712062013799229,
      "learning_rate": 1.7504378915605714e-06,
      "loss": 0.3991,
      "step": 10066
    },
    {
      "epoch": 1.2343060323688082,
      "grad_norm": 1.9890426829834595,
      "learning_rate": 1.7499543851908507e-06,
      "loss": 0.4943,
      "step": 10067
    },
    {
      "epoch": 1.234428641490927,
      "grad_norm": 1.8705370423858438,
      "learning_rate": 1.749470909649333e-06,
      "loss": 0.3992,
      "step": 10068
    },
    {
      "epoch": 1.2345512506130456,
      "grad_norm": 1.8295745040764666,
      "learning_rate": 1.7489874649558902e-06,
      "loss": 0.4545,
      "step": 10069
    },
    {
      "epoch": 1.2346738597351643,
      "grad_norm": 2.2525621563858933,
      "learning_rate": 1.7485040511303917e-06,
      "loss": 0.4629,
      "step": 10070
    },
    {
      "epoch": 1.234796468857283,
      "grad_norm": 2.1403016093132985,
      "learning_rate": 1.7480206681927064e-06,
      "loss": 0.4402,
      "step": 10071
    },
    {
      "epoch": 1.2349190779794017,
      "grad_norm": 1.7994696468133822,
      "learning_rate": 1.7475373161627034e-06,
      "loss": 0.4327,
      "step": 10072
    },
    {
      "epoch": 1.2350416871015204,
      "grad_norm": 1.8799784047723886,
      "learning_rate": 1.7470539950602489e-06,
      "loss": 0.4067,
      "step": 10073
    },
    {
      "epoch": 1.235164296223639,
      "grad_norm": 1.999207285279998,
      "learning_rate": 1.7465707049052088e-06,
      "loss": 0.437,
      "step": 10074
    },
    {
      "epoch": 1.2352869053457578,
      "grad_norm": 1.8879836480376402,
      "learning_rate": 1.746087445717446e-06,
      "loss": 0.493,
      "step": 10075
    },
    {
      "epoch": 1.2354095144678765,
      "grad_norm": 1.882674943515463,
      "learning_rate": 1.7456042175168233e-06,
      "loss": 0.427,
      "step": 10076
    },
    {
      "epoch": 1.2355321235899952,
      "grad_norm": 2.0216932587299534,
      "learning_rate": 1.7451210203232033e-06,
      "loss": 0.428,
      "step": 10077
    },
    {
      "epoch": 1.235654732712114,
      "grad_norm": 1.9474898526780906,
      "learning_rate": 1.7446378541564464e-06,
      "loss": 0.4429,
      "step": 10078
    },
    {
      "epoch": 1.2357773418342324,
      "grad_norm": 1.872698067791567,
      "learning_rate": 1.7441547190364105e-06,
      "loss": 0.4174,
      "step": 10079
    },
    {
      "epoch": 1.235899950956351,
      "grad_norm": 1.7603334614618147,
      "learning_rate": 1.7436716149829537e-06,
      "loss": 0.4137,
      "step": 10080
    },
    {
      "epoch": 1.2360225600784698,
      "grad_norm": 2.0267326192643393,
      "learning_rate": 1.7431885420159319e-06,
      "loss": 0.4366,
      "step": 10081
    },
    {
      "epoch": 1.2361451692005885,
      "grad_norm": 1.9647369006004547,
      "learning_rate": 1.7427055001552024e-06,
      "loss": 0.448,
      "step": 10082
    },
    {
      "epoch": 1.2362677783227072,
      "grad_norm": 1.8732039799545437,
      "learning_rate": 1.742222489420616e-06,
      "loss": 0.3923,
      "step": 10083
    },
    {
      "epoch": 1.2363903874448259,
      "grad_norm": 1.8966129714348852,
      "learning_rate": 1.7417395098320276e-06,
      "loss": 0.4342,
      "step": 10084
    },
    {
      "epoch": 1.2365129965669446,
      "grad_norm": 2.019959933016654,
      "learning_rate": 1.7412565614092874e-06,
      "loss": 0.4292,
      "step": 10085
    },
    {
      "epoch": 1.2366356056890633,
      "grad_norm": 1.8796528731328597,
      "learning_rate": 1.7407736441722465e-06,
      "loss": 0.4557,
      "step": 10086
    },
    {
      "epoch": 1.236758214811182,
      "grad_norm": 1.7740870478514636,
      "learning_rate": 1.7402907581407521e-06,
      "loss": 0.4175,
      "step": 10087
    },
    {
      "epoch": 1.2368808239333007,
      "grad_norm": 2.014774408137351,
      "learning_rate": 1.7398079033346526e-06,
      "loss": 0.4608,
      "step": 10088
    },
    {
      "epoch": 1.2370034330554194,
      "grad_norm": 1.9962526725722982,
      "learning_rate": 1.7393250797737936e-06,
      "loss": 0.5126,
      "step": 10089
    },
    {
      "epoch": 1.237126042177538,
      "grad_norm": 1.8338723417874068,
      "learning_rate": 1.7388422874780226e-06,
      "loss": 0.4245,
      "step": 10090
    },
    {
      "epoch": 1.2372486512996568,
      "grad_norm": 2.082509374698488,
      "learning_rate": 1.7383595264671788e-06,
      "loss": 0.4749,
      "step": 10091
    },
    {
      "epoch": 1.2373712604217755,
      "grad_norm": 2.074305439649711,
      "learning_rate": 1.7378767967611076e-06,
      "loss": 0.4317,
      "step": 10092
    },
    {
      "epoch": 1.237493869543894,
      "grad_norm": 1.8635716236712776,
      "learning_rate": 1.7373940983796495e-06,
      "loss": 0.4519,
      "step": 10093
    },
    {
      "epoch": 1.2376164786660127,
      "grad_norm": 1.8540057548841677,
      "learning_rate": 1.7369114313426444e-06,
      "loss": 0.4591,
      "step": 10094
    },
    {
      "epoch": 1.2377390877881314,
      "grad_norm": 2.0166623588761787,
      "learning_rate": 1.73642879566993e-06,
      "loss": 0.46,
      "step": 10095
    },
    {
      "epoch": 1.23786169691025,
      "grad_norm": 1.8687088615613965,
      "learning_rate": 1.735946191381343e-06,
      "loss": 0.4449,
      "step": 10096
    },
    {
      "epoch": 1.2379843060323688,
      "grad_norm": 2.0971900050382946,
      "learning_rate": 1.735463618496721e-06,
      "loss": 0.412,
      "step": 10097
    },
    {
      "epoch": 1.2381069151544875,
      "grad_norm": 2.1159166026881566,
      "learning_rate": 1.7349810770358977e-06,
      "loss": 0.4058,
      "step": 10098
    },
    {
      "epoch": 1.2382295242766062,
      "grad_norm": 1.9136956471073945,
      "learning_rate": 1.7344985670187072e-06,
      "loss": 0.4063,
      "step": 10099
    },
    {
      "epoch": 1.2383521333987249,
      "grad_norm": 1.7874563941647053,
      "learning_rate": 1.7340160884649804e-06,
      "loss": 0.3902,
      "step": 10100
    },
    {
      "epoch": 1.2384747425208436,
      "grad_norm": 1.763969437608381,
      "learning_rate": 1.7335336413945475e-06,
      "loss": 0.4166,
      "step": 10101
    },
    {
      "epoch": 1.2385973516429623,
      "grad_norm": 1.8415279689459347,
      "learning_rate": 1.7330512258272394e-06,
      "loss": 0.3679,
      "step": 10102
    },
    {
      "epoch": 1.238719960765081,
      "grad_norm": 2.0478121933751847,
      "learning_rate": 1.7325688417828845e-06,
      "loss": 0.4573,
      "step": 10103
    },
    {
      "epoch": 1.2388425698871997,
      "grad_norm": 1.8189809862994764,
      "learning_rate": 1.7320864892813082e-06,
      "loss": 0.4237,
      "step": 10104
    },
    {
      "epoch": 1.2389651790093184,
      "grad_norm": 1.828921470115686,
      "learning_rate": 1.7316041683423368e-06,
      "loss": 0.3897,
      "step": 10105
    },
    {
      "epoch": 1.239087788131437,
      "grad_norm": 1.840775467765995,
      "learning_rate": 1.7311218789857943e-06,
      "loss": 0.4425,
      "step": 10106
    },
    {
      "epoch": 1.2392103972535558,
      "grad_norm": 1.8886550576910728,
      "learning_rate": 1.730639621231504e-06,
      "loss": 0.4573,
      "step": 10107
    },
    {
      "epoch": 1.2393330063756745,
      "grad_norm": 1.8580476541993698,
      "learning_rate": 1.7301573950992872e-06,
      "loss": 0.4325,
      "step": 10108
    },
    {
      "epoch": 1.2394556154977932,
      "grad_norm": 1.9627197998771742,
      "learning_rate": 1.7296752006089635e-06,
      "loss": 0.4445,
      "step": 10109
    },
    {
      "epoch": 1.2395782246199116,
      "grad_norm": 1.9415326430516968,
      "learning_rate": 1.7291930377803534e-06,
      "loss": 0.4354,
      "step": 10110
    },
    {
      "epoch": 1.2397008337420303,
      "grad_norm": 1.9948177934651001,
      "learning_rate": 1.7287109066332747e-06,
      "loss": 0.4645,
      "step": 10111
    },
    {
      "epoch": 1.239823442864149,
      "grad_norm": 1.8054533540153392,
      "learning_rate": 1.728228807187543e-06,
      "loss": 0.4402,
      "step": 10112
    },
    {
      "epoch": 1.2399460519862677,
      "grad_norm": 1.8549649312246732,
      "learning_rate": 1.7277467394629732e-06,
      "loss": 0.4227,
      "step": 10113
    },
    {
      "epoch": 1.2400686611083864,
      "grad_norm": 2.1589363249174793,
      "learning_rate": 1.727264703479379e-06,
      "loss": 0.454,
      "step": 10114
    },
    {
      "epoch": 1.2401912702305051,
      "grad_norm": 1.9062206612439603,
      "learning_rate": 1.7267826992565753e-06,
      "loss": 0.4341,
      "step": 10115
    },
    {
      "epoch": 1.2403138793526238,
      "grad_norm": 2.0414203828639934,
      "learning_rate": 1.72630072681437e-06,
      "loss": 0.4533,
      "step": 10116
    },
    {
      "epoch": 1.2404364884747425,
      "grad_norm": 1.9065710945951502,
      "learning_rate": 1.725818786172575e-06,
      "loss": 0.4245,
      "step": 10117
    },
    {
      "epoch": 1.2405590975968612,
      "grad_norm": 2.039822073867584,
      "learning_rate": 1.7253368773509982e-06,
      "loss": 0.4617,
      "step": 10118
    },
    {
      "epoch": 1.24068170671898,
      "grad_norm": 1.9455500175192169,
      "learning_rate": 1.724855000369447e-06,
      "loss": 0.4653,
      "step": 10119
    },
    {
      "epoch": 1.2408043158410986,
      "grad_norm": 1.8706176989521102,
      "learning_rate": 1.724373155247729e-06,
      "loss": 0.429,
      "step": 10120
    },
    {
      "epoch": 1.2409269249632173,
      "grad_norm": 1.7626436179253215,
      "learning_rate": 1.723891342005646e-06,
      "loss": 0.4298,
      "step": 10121
    },
    {
      "epoch": 1.241049534085336,
      "grad_norm": 1.9053267688421085,
      "learning_rate": 1.7234095606630032e-06,
      "loss": 0.4239,
      "step": 10122
    },
    {
      "epoch": 1.2411721432074547,
      "grad_norm": 1.9072664726216555,
      "learning_rate": 1.7229278112396026e-06,
      "loss": 0.4541,
      "step": 10123
    },
    {
      "epoch": 1.2412947523295732,
      "grad_norm": 1.993958960027741,
      "learning_rate": 1.7224460937552449e-06,
      "loss": 0.434,
      "step": 10124
    },
    {
      "epoch": 1.241417361451692,
      "grad_norm": 1.9184342404769124,
      "learning_rate": 1.7219644082297287e-06,
      "loss": 0.4192,
      "step": 10125
    },
    {
      "epoch": 1.2415399705738106,
      "grad_norm": 1.9581113963420775,
      "learning_rate": 1.7214827546828528e-06,
      "loss": 0.4424,
      "step": 10126
    },
    {
      "epoch": 1.2416625796959293,
      "grad_norm": 1.742898314209368,
      "learning_rate": 1.7210011331344134e-06,
      "loss": 0.422,
      "step": 10127
    },
    {
      "epoch": 1.241785188818048,
      "grad_norm": 1.7352936426601953,
      "learning_rate": 1.7205195436042082e-06,
      "loss": 0.4282,
      "step": 10128
    },
    {
      "epoch": 1.2419077979401667,
      "grad_norm": 1.7796906789495344,
      "learning_rate": 1.7200379861120284e-06,
      "loss": 0.4433,
      "step": 10129
    },
    {
      "epoch": 1.2420304070622854,
      "grad_norm": 1.9728354488152542,
      "learning_rate": 1.7195564606776683e-06,
      "loss": 0.5043,
      "step": 10130
    },
    {
      "epoch": 1.242153016184404,
      "grad_norm": 1.9117562392088618,
      "learning_rate": 1.7190749673209195e-06,
      "loss": 0.3985,
      "step": 10131
    },
    {
      "epoch": 1.2422756253065228,
      "grad_norm": 1.7909338181215018,
      "learning_rate": 1.7185935060615726e-06,
      "loss": 0.4145,
      "step": 10132
    },
    {
      "epoch": 1.2423982344286415,
      "grad_norm": 1.753550855831828,
      "learning_rate": 1.7181120769194154e-06,
      "loss": 0.3867,
      "step": 10133
    },
    {
      "epoch": 1.2425208435507602,
      "grad_norm": 1.8348516098929233,
      "learning_rate": 1.7176306799142356e-06,
      "loss": 0.425,
      "step": 10134
    },
    {
      "epoch": 1.242643452672879,
      "grad_norm": 1.7335046095069717,
      "learning_rate": 1.7171493150658203e-06,
      "loss": 0.4402,
      "step": 10135
    },
    {
      "epoch": 1.2427660617949976,
      "grad_norm": 2.1785564655796015,
      "learning_rate": 1.7166679823939547e-06,
      "loss": 0.4082,
      "step": 10136
    },
    {
      "epoch": 1.2428886709171163,
      "grad_norm": 1.6337220068227158,
      "learning_rate": 1.7161866819184214e-06,
      "loss": 0.45,
      "step": 10137
    },
    {
      "epoch": 1.243011280039235,
      "grad_norm": 1.8048126599349095,
      "learning_rate": 1.7157054136590028e-06,
      "loss": 0.4433,
      "step": 10138
    },
    {
      "epoch": 1.2431338891613537,
      "grad_norm": 2.048421885173375,
      "learning_rate": 1.71522417763548e-06,
      "loss": 0.4146,
      "step": 10139
    },
    {
      "epoch": 1.2432564982834724,
      "grad_norm": 1.6873969366125607,
      "learning_rate": 1.7147429738676345e-06,
      "loss": 0.4712,
      "step": 10140
    },
    {
      "epoch": 1.243379107405591,
      "grad_norm": 2.0307055763619926,
      "learning_rate": 1.714261802375241e-06,
      "loss": 0.4198,
      "step": 10141
    },
    {
      "epoch": 1.2435017165277096,
      "grad_norm": 1.86553826540649,
      "learning_rate": 1.713780663178079e-06,
      "loss": 0.4492,
      "step": 10142
    },
    {
      "epoch": 1.2436243256498283,
      "grad_norm": 1.815024940608984,
      "learning_rate": 1.7132995562959237e-06,
      "loss": 0.4055,
      "step": 10143
    },
    {
      "epoch": 1.243746934771947,
      "grad_norm": 1.9140234830839407,
      "learning_rate": 1.7128184817485496e-06,
      "loss": 0.3983,
      "step": 10144
    },
    {
      "epoch": 1.2438695438940657,
      "grad_norm": 1.8948645200838607,
      "learning_rate": 1.71233743955573e-06,
      "loss": 0.4462,
      "step": 10145
    },
    {
      "epoch": 1.2439921530161844,
      "grad_norm": 1.9182405345199252,
      "learning_rate": 1.7118564297372353e-06,
      "loss": 0.4376,
      "step": 10146
    },
    {
      "epoch": 1.244114762138303,
      "grad_norm": 1.882167375688723,
      "learning_rate": 1.711375452312836e-06,
      "loss": 0.4134,
      "step": 10147
    },
    {
      "epoch": 1.2442373712604218,
      "grad_norm": 1.8157689057939879,
      "learning_rate": 1.7108945073023028e-06,
      "loss": 0.4372,
      "step": 10148
    },
    {
      "epoch": 1.2443599803825405,
      "grad_norm": 1.931170327506357,
      "learning_rate": 1.7104135947254026e-06,
      "loss": 0.439,
      "step": 10149
    },
    {
      "epoch": 1.2444825895046592,
      "grad_norm": 1.950239140646935,
      "learning_rate": 1.709932714601901e-06,
      "loss": 0.415,
      "step": 10150
    },
    {
      "epoch": 1.2446051986267779,
      "grad_norm": 1.8528493778873634,
      "learning_rate": 1.7094518669515636e-06,
      "loss": 0.4023,
      "step": 10151
    },
    {
      "epoch": 1.2447278077488966,
      "grad_norm": 2.0009411793919876,
      "learning_rate": 1.7089710517941533e-06,
      "loss": 0.4422,
      "step": 10152
    },
    {
      "epoch": 1.2448504168710153,
      "grad_norm": 1.9412876865472752,
      "learning_rate": 1.7084902691494348e-06,
      "loss": 0.3981,
      "step": 10153
    },
    {
      "epoch": 1.244973025993134,
      "grad_norm": 1.778254468114847,
      "learning_rate": 1.708009519037166e-06,
      "loss": 0.3787,
      "step": 10154
    },
    {
      "epoch": 1.2450956351152525,
      "grad_norm": 2.0323695652843234,
      "learning_rate": 1.7075288014771086e-06,
      "loss": 0.3931,
      "step": 10155
    },
    {
      "epoch": 1.2452182442373712,
      "grad_norm": 1.9740548854159314,
      "learning_rate": 1.7070481164890207e-06,
      "loss": 0.421,
      "step": 10156
    },
    {
      "epoch": 1.2453408533594899,
      "grad_norm": 1.823177189138229,
      "learning_rate": 1.706567464092659e-06,
      "loss": 0.3959,
      "step": 10157
    },
    {
      "epoch": 1.2454634624816086,
      "grad_norm": 1.8555066745687427,
      "learning_rate": 1.7060868443077787e-06,
      "loss": 0.4671,
      "step": 10158
    },
    {
      "epoch": 1.2455860716037273,
      "grad_norm": 1.875907944076417,
      "learning_rate": 1.7056062571541345e-06,
      "loss": 0.4247,
      "step": 10159
    },
    {
      "epoch": 1.245708680725846,
      "grad_norm": 1.8282762359038554,
      "learning_rate": 1.70512570265148e-06,
      "loss": 0.4025,
      "step": 10160
    },
    {
      "epoch": 1.2458312898479647,
      "grad_norm": 1.981571027150637,
      "learning_rate": 1.7046451808195666e-06,
      "loss": 0.4503,
      "step": 10161
    },
    {
      "epoch": 1.2459538989700834,
      "grad_norm": 1.8894486620282183,
      "learning_rate": 1.704164691678144e-06,
      "loss": 0.4448,
      "step": 10162
    },
    {
      "epoch": 1.246076508092202,
      "grad_norm": 1.6678962767824925,
      "learning_rate": 1.7036842352469613e-06,
      "loss": 0.4223,
      "step": 10163
    },
    {
      "epoch": 1.2461991172143208,
      "grad_norm": 1.9997851894252674,
      "learning_rate": 1.7032038115457664e-06,
      "loss": 0.383,
      "step": 10164
    },
    {
      "epoch": 1.2463217263364395,
      "grad_norm": 2.1460300895623177,
      "learning_rate": 1.7027234205943061e-06,
      "loss": 0.4264,
      "step": 10165
    },
    {
      "epoch": 1.2464443354585582,
      "grad_norm": 1.8713516163344308,
      "learning_rate": 1.7022430624123237e-06,
      "loss": 0.4103,
      "step": 10166
    },
    {
      "epoch": 1.2465669445806769,
      "grad_norm": 1.9444036835585665,
      "learning_rate": 1.701762737019564e-06,
      "loss": 0.4656,
      "step": 10167
    },
    {
      "epoch": 1.2466895537027955,
      "grad_norm": 1.7860132209853592,
      "learning_rate": 1.701282444435769e-06,
      "loss": 0.4865,
      "step": 10168
    },
    {
      "epoch": 1.2468121628249142,
      "grad_norm": 2.0490610783473127,
      "learning_rate": 1.7008021846806795e-06,
      "loss": 0.4265,
      "step": 10169
    },
    {
      "epoch": 1.246934771947033,
      "grad_norm": 2.1843183530933694,
      "learning_rate": 1.7003219577740356e-06,
      "loss": 0.44,
      "step": 10170
    },
    {
      "epoch": 1.2470573810691516,
      "grad_norm": 1.956130536777006,
      "learning_rate": 1.6998417637355746e-06,
      "loss": 0.4272,
      "step": 10171
    },
    {
      "epoch": 1.2471799901912703,
      "grad_norm": 1.931510067490861,
      "learning_rate": 1.699361602585033e-06,
      "loss": 0.3891,
      "step": 10172
    },
    {
      "epoch": 1.2473025993133888,
      "grad_norm": 1.8909139975160205,
      "learning_rate": 1.6988814743421478e-06,
      "loss": 0.452,
      "step": 10173
    },
    {
      "epoch": 1.2474252084355075,
      "grad_norm": 2.0440996145214454,
      "learning_rate": 1.6984013790266525e-06,
      "loss": 0.4342,
      "step": 10174
    },
    {
      "epoch": 1.2475478175576262,
      "grad_norm": 1.8251774096749747,
      "learning_rate": 1.6979213166582792e-06,
      "loss": 0.3975,
      "step": 10175
    },
    {
      "epoch": 1.247670426679745,
      "grad_norm": 2.071234801737569,
      "learning_rate": 1.6974412872567598e-06,
      "loss": 0.4736,
      "step": 10176
    },
    {
      "epoch": 1.2477930358018636,
      "grad_norm": 1.888650816166245,
      "learning_rate": 1.6969612908418237e-06,
      "loss": 0.3776,
      "step": 10177
    },
    {
      "epoch": 1.2479156449239823,
      "grad_norm": 1.8954420427609775,
      "learning_rate": 1.6964813274332018e-06,
      "loss": 0.4307,
      "step": 10178
    },
    {
      "epoch": 1.248038254046101,
      "grad_norm": 1.8459142288431951,
      "learning_rate": 1.6960013970506183e-06,
      "loss": 0.4465,
      "step": 10179
    },
    {
      "epoch": 1.2481608631682197,
      "grad_norm": 1.8278770328137381,
      "learning_rate": 1.695521499713801e-06,
      "loss": 0.4683,
      "step": 10180
    },
    {
      "epoch": 1.2482834722903384,
      "grad_norm": 1.9408460853437135,
      "learning_rate": 1.6950416354424748e-06,
      "loss": 0.4397,
      "step": 10181
    },
    {
      "epoch": 1.2484060814124571,
      "grad_norm": 1.8375141063389446,
      "learning_rate": 1.6945618042563628e-06,
      "loss": 0.4691,
      "step": 10182
    },
    {
      "epoch": 1.2485286905345758,
      "grad_norm": 2.023477835311206,
      "learning_rate": 1.6940820061751859e-06,
      "loss": 0.4523,
      "step": 10183
    },
    {
      "epoch": 1.2486512996566945,
      "grad_norm": 1.8905817228357067,
      "learning_rate": 1.6936022412186653e-06,
      "loss": 0.4256,
      "step": 10184
    },
    {
      "epoch": 1.2487739087788132,
      "grad_norm": 1.9343124438803994,
      "learning_rate": 1.6931225094065197e-06,
      "loss": 0.4078,
      "step": 10185
    },
    {
      "epoch": 1.248896517900932,
      "grad_norm": 2.0950480699853293,
      "learning_rate": 1.692642810758469e-06,
      "loss": 0.5006,
      "step": 10186
    },
    {
      "epoch": 1.2490191270230504,
      "grad_norm": 1.854348000213881,
      "learning_rate": 1.6921631452942267e-06,
      "loss": 0.3984,
      "step": 10187
    },
    {
      "epoch": 1.249141736145169,
      "grad_norm": 1.9641226672634038,
      "learning_rate": 1.6916835130335097e-06,
      "loss": 0.4624,
      "step": 10188
    },
    {
      "epoch": 1.2492643452672878,
      "grad_norm": 1.7900900963294932,
      "learning_rate": 1.6912039139960312e-06,
      "loss": 0.4299,
      "step": 10189
    },
    {
      "epoch": 1.2493869543894065,
      "grad_norm": 1.9766837137766735,
      "learning_rate": 1.6907243482015045e-06,
      "loss": 0.4603,
      "step": 10190
    },
    {
      "epoch": 1.2495095635115252,
      "grad_norm": 2.1188201343402024,
      "learning_rate": 1.690244815669639e-06,
      "loss": 0.4454,
      "step": 10191
    },
    {
      "epoch": 1.249632172633644,
      "grad_norm": 1.8771017606524238,
      "learning_rate": 1.689765316420145e-06,
      "loss": 0.4075,
      "step": 10192
    },
    {
      "epoch": 1.2497547817557626,
      "grad_norm": 1.9235444254295204,
      "learning_rate": 1.689285850472731e-06,
      "loss": 0.4031,
      "step": 10193
    },
    {
      "epoch": 1.2498773908778813,
      "grad_norm": 1.9710469192278426,
      "learning_rate": 1.688806417847104e-06,
      "loss": 0.4206,
      "step": 10194
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.9516788856610368,
      "learning_rate": 1.6883270185629696e-06,
      "loss": 0.4518,
      "step": 10195
    },
    {
      "epoch": 1.2501226091221187,
      "grad_norm": 1.7991231322907906,
      "learning_rate": 1.6878476526400315e-06,
      "loss": 0.3896,
      "step": 10196
    },
    {
      "epoch": 1.2502452182442374,
      "grad_norm": 1.9280931662360516,
      "learning_rate": 1.6873683200979918e-06,
      "loss": 0.4429,
      "step": 10197
    },
    {
      "epoch": 1.250367827366356,
      "grad_norm": 1.959093887497656,
      "learning_rate": 1.6868890209565535e-06,
      "loss": 0.4008,
      "step": 10198
    },
    {
      "epoch": 1.2504904364884748,
      "grad_norm": 1.770589782508337,
      "learning_rate": 1.686409755235417e-06,
      "loss": 0.4247,
      "step": 10199
    },
    {
      "epoch": 1.2506130456105935,
      "grad_norm": 1.8471944836648142,
      "learning_rate": 1.6859305229542788e-06,
      "loss": 0.4661,
      "step": 10200
    },
    {
      "epoch": 1.2507356547327122,
      "grad_norm": 2.0004784104348525,
      "learning_rate": 1.6854513241328375e-06,
      "loss": 0.4591,
      "step": 10201
    },
    {
      "epoch": 1.250858263854831,
      "grad_norm": 1.952898487839193,
      "learning_rate": 1.6849721587907887e-06,
      "loss": 0.4261,
      "step": 10202
    },
    {
      "epoch": 1.2509808729769496,
      "grad_norm": 1.8056627917205383,
      "learning_rate": 1.6844930269478274e-06,
      "loss": 0.4516,
      "step": 10203
    },
    {
      "epoch": 1.2511034820990683,
      "grad_norm": 1.9935981044621194,
      "learning_rate": 1.6840139286236462e-06,
      "loss": 0.4319,
      "step": 10204
    },
    {
      "epoch": 1.2512260912211868,
      "grad_norm": 2.051550671248021,
      "learning_rate": 1.6835348638379364e-06,
      "loss": 0.4633,
      "step": 10205
    },
    {
      "epoch": 1.2513487003433055,
      "grad_norm": 1.8808072471559651,
      "learning_rate": 1.6830558326103896e-06,
      "loss": 0.4677,
      "step": 10206
    },
    {
      "epoch": 1.2514713094654242,
      "grad_norm": 2.1595106523094145,
      "learning_rate": 1.6825768349606953e-06,
      "loss": 0.4607,
      "step": 10207
    },
    {
      "epoch": 1.2515939185875429,
      "grad_norm": 1.8530146737272937,
      "learning_rate": 1.6820978709085395e-06,
      "loss": 0.4473,
      "step": 10208
    },
    {
      "epoch": 1.2517165277096616,
      "grad_norm": 1.8698890528792393,
      "learning_rate": 1.6816189404736092e-06,
      "loss": 0.4228,
      "step": 10209
    },
    {
      "epoch": 1.2518391368317803,
      "grad_norm": 1.9639835421821532,
      "learning_rate": 1.6811400436755888e-06,
      "loss": 0.4711,
      "step": 10210
    },
    {
      "epoch": 1.251961745953899,
      "grad_norm": 1.7440498893814724,
      "learning_rate": 1.6806611805341636e-06,
      "loss": 0.4363,
      "step": 10211
    },
    {
      "epoch": 1.2520843550760177,
      "grad_norm": 1.8622364695110851,
      "learning_rate": 1.680182351069013e-06,
      "loss": 0.4093,
      "step": 10212
    },
    {
      "epoch": 1.2522069641981364,
      "grad_norm": 1.8802689456256623,
      "learning_rate": 1.6797035552998197e-06,
      "loss": 0.4248,
      "step": 10213
    },
    {
      "epoch": 1.252329573320255,
      "grad_norm": 1.9906214308315253,
      "learning_rate": 1.6792247932462623e-06,
      "loss": 0.437,
      "step": 10214
    },
    {
      "epoch": 1.2524521824423738,
      "grad_norm": 1.8818582385630294,
      "learning_rate": 1.6787460649280185e-06,
      "loss": 0.4234,
      "step": 10215
    },
    {
      "epoch": 1.2525747915644925,
      "grad_norm": 1.7803357989442103,
      "learning_rate": 1.6782673703647672e-06,
      "loss": 0.4373,
      "step": 10216
    },
    {
      "epoch": 1.252697400686611,
      "grad_norm": 1.9693953351381384,
      "learning_rate": 1.67778870957618e-06,
      "loss": 0.4634,
      "step": 10217
    },
    {
      "epoch": 1.2528200098087297,
      "grad_norm": 1.7755810038681563,
      "learning_rate": 1.6773100825819328e-06,
      "loss": 0.3482,
      "step": 10218
    },
    {
      "epoch": 1.2529426189308484,
      "grad_norm": 2.0270958645903527,
      "learning_rate": 1.676831489401698e-06,
      "loss": 0.4552,
      "step": 10219
    },
    {
      "epoch": 1.253065228052967,
      "grad_norm": 1.9373156400484646,
      "learning_rate": 1.6763529300551464e-06,
      "loss": 0.3855,
      "step": 10220
    },
    {
      "epoch": 1.2531878371750858,
      "grad_norm": 1.8264347022326326,
      "learning_rate": 1.6758744045619473e-06,
      "loss": 0.4255,
      "step": 10221
    },
    {
      "epoch": 1.2533104462972045,
      "grad_norm": 1.8387381401249385,
      "learning_rate": 1.675395912941769e-06,
      "loss": 0.4783,
      "step": 10222
    },
    {
      "epoch": 1.2534330554193231,
      "grad_norm": 1.84337252348498,
      "learning_rate": 1.6749174552142783e-06,
      "loss": 0.4225,
      "step": 10223
    },
    {
      "epoch": 1.2535556645414418,
      "grad_norm": 2.0059946516769145,
      "learning_rate": 1.674439031399142e-06,
      "loss": 0.4522,
      "step": 10224
    },
    {
      "epoch": 1.2536782736635605,
      "grad_norm": 1.9124729600558545,
      "learning_rate": 1.673960641516022e-06,
      "loss": 0.4289,
      "step": 10225
    },
    {
      "epoch": 1.2538008827856792,
      "grad_norm": 1.9185439200379846,
      "learning_rate": 1.6734822855845823e-06,
      "loss": 0.431,
      "step": 10226
    },
    {
      "epoch": 1.253923491907798,
      "grad_norm": 1.9608478767562871,
      "learning_rate": 1.6730039636244837e-06,
      "loss": 0.4653,
      "step": 10227
    },
    {
      "epoch": 1.2540461010299166,
      "grad_norm": 2.0091459099042552,
      "learning_rate": 1.672525675655387e-06,
      "loss": 0.4364,
      "step": 10228
    },
    {
      "epoch": 1.2541687101520353,
      "grad_norm": 1.9371195129422483,
      "learning_rate": 1.6720474216969495e-06,
      "loss": 0.4444,
      "step": 10229
    },
    {
      "epoch": 1.254291319274154,
      "grad_norm": 1.866353396321514,
      "learning_rate": 1.6715692017688285e-06,
      "loss": 0.4296,
      "step": 10230
    },
    {
      "epoch": 1.2544139283962727,
      "grad_norm": 1.923999087395895,
      "learning_rate": 1.6710910158906798e-06,
      "loss": 0.4116,
      "step": 10231
    },
    {
      "epoch": 1.2545365375183914,
      "grad_norm": 1.8360003516130237,
      "learning_rate": 1.6706128640821586e-06,
      "loss": 0.3828,
      "step": 10232
    },
    {
      "epoch": 1.2546591466405101,
      "grad_norm": 1.9775830631614082,
      "learning_rate": 1.6701347463629167e-06,
      "loss": 0.426,
      "step": 10233
    },
    {
      "epoch": 1.2547817557626288,
      "grad_norm": 2.085313608299195,
      "learning_rate": 1.6696566627526057e-06,
      "loss": 0.389,
      "step": 10234
    },
    {
      "epoch": 1.2549043648847475,
      "grad_norm": 1.9752391479970168,
      "learning_rate": 1.6691786132708753e-06,
      "loss": 0.4389,
      "step": 10235
    },
    {
      "epoch": 1.2550269740068662,
      "grad_norm": 1.9733324982936398,
      "learning_rate": 1.6687005979373764e-06,
      "loss": 0.4192,
      "step": 10236
    },
    {
      "epoch": 1.2551495831289847,
      "grad_norm": 1.9644227985648939,
      "learning_rate": 1.668222616771753e-06,
      "loss": 0.4345,
      "step": 10237
    },
    {
      "epoch": 1.2552721922511034,
      "grad_norm": 1.978010360335402,
      "learning_rate": 1.667744669793653e-06,
      "loss": 0.4267,
      "step": 10238
    },
    {
      "epoch": 1.2553948013732221,
      "grad_norm": 1.9069496329092794,
      "learning_rate": 1.6672667570227207e-06,
      "loss": 0.4221,
      "step": 10239
    },
    {
      "epoch": 1.2555174104953408,
      "grad_norm": 2.0293943489110715,
      "learning_rate": 1.6667888784785988e-06,
      "loss": 0.4429,
      "step": 10240
    },
    {
      "epoch": 1.2556400196174595,
      "grad_norm": 2.01406851493118,
      "learning_rate": 1.6663110341809297e-06,
      "loss": 0.4334,
      "step": 10241
    },
    {
      "epoch": 1.2557626287395782,
      "grad_norm": 1.7622515752435075,
      "learning_rate": 1.6658332241493525e-06,
      "loss": 0.4317,
      "step": 10242
    },
    {
      "epoch": 1.255885237861697,
      "grad_norm": 1.8965560798007601,
      "learning_rate": 1.6653554484035057e-06,
      "loss": 0.3885,
      "step": 10243
    },
    {
      "epoch": 1.2560078469838156,
      "grad_norm": 1.851671792169237,
      "learning_rate": 1.6648777069630283e-06,
      "loss": 0.4003,
      "step": 10244
    },
    {
      "epoch": 1.2561304561059343,
      "grad_norm": 1.88362140212542,
      "learning_rate": 1.6643999998475562e-06,
      "loss": 0.4474,
      "step": 10245
    },
    {
      "epoch": 1.256253065228053,
      "grad_norm": 1.9499500196061927,
      "learning_rate": 1.663922327076723e-06,
      "loss": 0.472,
      "step": 10246
    },
    {
      "epoch": 1.2563756743501717,
      "grad_norm": 2.0556915199313326,
      "learning_rate": 1.6634446886701622e-06,
      "loss": 0.4856,
      "step": 10247
    },
    {
      "epoch": 1.2564982834722902,
      "grad_norm": 1.9082216007187809,
      "learning_rate": 1.6629670846475055e-06,
      "loss": 0.4926,
      "step": 10248
    },
    {
      "epoch": 1.256620892594409,
      "grad_norm": 2.0055436635194006,
      "learning_rate": 1.6624895150283847e-06,
      "loss": 0.4509,
      "step": 10249
    },
    {
      "epoch": 1.2567435017165276,
      "grad_norm": 1.9246808929593107,
      "learning_rate": 1.6620119798324257e-06,
      "loss": 0.4576,
      "step": 10250
    },
    {
      "epoch": 1.2568661108386463,
      "grad_norm": 1.8587426364805604,
      "learning_rate": 1.661534479079259e-06,
      "loss": 0.3976,
      "step": 10251
    },
    {
      "epoch": 1.256988719960765,
      "grad_norm": 1.8089975960841158,
      "learning_rate": 1.6610570127885093e-06,
      "loss": 0.427,
      "step": 10252
    },
    {
      "epoch": 1.2571113290828837,
      "grad_norm": 1.9108718907760556,
      "learning_rate": 1.660579580979802e-06,
      "loss": 0.4604,
      "step": 10253
    },
    {
      "epoch": 1.2572339382050024,
      "grad_norm": 1.7121586935345134,
      "learning_rate": 1.6601021836727597e-06,
      "loss": 0.3954,
      "step": 10254
    },
    {
      "epoch": 1.257356547327121,
      "grad_norm": 1.8665023319482674,
      "learning_rate": 1.6596248208870038e-06,
      "loss": 0.4638,
      "step": 10255
    },
    {
      "epoch": 1.2574791564492398,
      "grad_norm": 1.8969455317446595,
      "learning_rate": 1.6591474926421563e-06,
      "loss": 0.3842,
      "step": 10256
    },
    {
      "epoch": 1.2576017655713585,
      "grad_norm": 1.9119227821073002,
      "learning_rate": 1.6586701989578359e-06,
      "loss": 0.4379,
      "step": 10257
    },
    {
      "epoch": 1.2577243746934772,
      "grad_norm": 2.0351194388109546,
      "learning_rate": 1.6581929398536593e-06,
      "loss": 0.4162,
      "step": 10258
    },
    {
      "epoch": 1.257846983815596,
      "grad_norm": 1.9449687127824677,
      "learning_rate": 1.6577157153492429e-06,
      "loss": 0.459,
      "step": 10259
    },
    {
      "epoch": 1.2579695929377146,
      "grad_norm": 1.9920905521433723,
      "learning_rate": 1.657238525464202e-06,
      "loss": 0.4447,
      "step": 10260
    },
    {
      "epoch": 1.2580922020598333,
      "grad_norm": 1.7252360825171529,
      "learning_rate": 1.6567613702181504e-06,
      "loss": 0.4286,
      "step": 10261
    },
    {
      "epoch": 1.258214811181952,
      "grad_norm": 1.891651296047943,
      "learning_rate": 1.6562842496306985e-06,
      "loss": 0.4378,
      "step": 10262
    },
    {
      "epoch": 1.2583374203040707,
      "grad_norm": 1.899298135058244,
      "learning_rate": 1.6558071637214573e-06,
      "loss": 0.4672,
      "step": 10263
    },
    {
      "epoch": 1.2584600294261894,
      "grad_norm": 1.811227009718225,
      "learning_rate": 1.6553301125100365e-06,
      "loss": 0.4652,
      "step": 10264
    },
    {
      "epoch": 1.258582638548308,
      "grad_norm": 1.7363653934034988,
      "learning_rate": 1.6548530960160436e-06,
      "loss": 0.4456,
      "step": 10265
    },
    {
      "epoch": 1.2587052476704268,
      "grad_norm": 1.9948797800039904,
      "learning_rate": 1.6543761142590853e-06,
      "loss": 0.4525,
      "step": 10266
    },
    {
      "epoch": 1.2588278567925455,
      "grad_norm": 1.9106367104051702,
      "learning_rate": 1.6538991672587648e-06,
      "loss": 0.4238,
      "step": 10267
    },
    {
      "epoch": 1.258950465914664,
      "grad_norm": 1.942539196479234,
      "learning_rate": 1.6534222550346863e-06,
      "loss": 0.4553,
      "step": 10268
    },
    {
      "epoch": 1.2590730750367827,
      "grad_norm": 1.8715733582001908,
      "learning_rate": 1.6529453776064521e-06,
      "loss": 0.3953,
      "step": 10269
    },
    {
      "epoch": 1.2591956841589014,
      "grad_norm": 1.8142970896121267,
      "learning_rate": 1.652468534993663e-06,
      "loss": 0.4103,
      "step": 10270
    },
    {
      "epoch": 1.25931829328102,
      "grad_norm": 1.95089773494751,
      "learning_rate": 1.6519917272159176e-06,
      "loss": 0.4214,
      "step": 10271
    },
    {
      "epoch": 1.2594409024031388,
      "grad_norm": 1.914776310045468,
      "learning_rate": 1.651514954292813e-06,
      "loss": 0.4611,
      "step": 10272
    },
    {
      "epoch": 1.2595635115252575,
      "grad_norm": 1.7781251202221142,
      "learning_rate": 1.6510382162439454e-06,
      "loss": 0.4461,
      "step": 10273
    },
    {
      "epoch": 1.2596861206473762,
      "grad_norm": 1.956970626413874,
      "learning_rate": 1.650561513088912e-06,
      "loss": 0.4113,
      "step": 10274
    },
    {
      "epoch": 1.2598087297694949,
      "grad_norm": 2.143752624142924,
      "learning_rate": 1.6500848448473023e-06,
      "loss": 0.4391,
      "step": 10275
    },
    {
      "epoch": 1.2599313388916136,
      "grad_norm": 1.9782545593437952,
      "learning_rate": 1.6496082115387107e-06,
      "loss": 0.3865,
      "step": 10276
    },
    {
      "epoch": 1.2600539480137323,
      "grad_norm": 1.9521838667719151,
      "learning_rate": 1.649131613182727e-06,
      "loss": 0.4598,
      "step": 10277
    },
    {
      "epoch": 1.260176557135851,
      "grad_norm": 2.1972291260220707,
      "learning_rate": 1.6486550497989413e-06,
      "loss": 0.432,
      "step": 10278
    },
    {
      "epoch": 1.2602991662579697,
      "grad_norm": 1.9592804094994838,
      "learning_rate": 1.648178521406939e-06,
      "loss": 0.4483,
      "step": 10279
    },
    {
      "epoch": 1.2604217753800881,
      "grad_norm": 1.8867393601352356,
      "learning_rate": 1.647702028026308e-06,
      "loss": 0.3765,
      "step": 10280
    },
    {
      "epoch": 1.2605443845022068,
      "grad_norm": 1.980662309828109,
      "learning_rate": 1.6472255696766321e-06,
      "loss": 0.4575,
      "step": 10281
    },
    {
      "epoch": 1.2606669936243255,
      "grad_norm": 1.8950925538984742,
      "learning_rate": 1.6467491463774966e-06,
      "loss": 0.4375,
      "step": 10282
    },
    {
      "epoch": 1.2607896027464442,
      "grad_norm": 1.8538723494855238,
      "learning_rate": 1.6462727581484797e-06,
      "loss": 0.4411,
      "step": 10283
    },
    {
      "epoch": 1.260912211868563,
      "grad_norm": 1.9077291528406495,
      "learning_rate": 1.6457964050091646e-06,
      "loss": 0.4653,
      "step": 10284
    },
    {
      "epoch": 1.2610348209906816,
      "grad_norm": 1.898360645313844,
      "learning_rate": 1.6453200869791296e-06,
      "loss": 0.455,
      "step": 10285
    },
    {
      "epoch": 1.2611574301128003,
      "grad_norm": 1.9454959684194844,
      "learning_rate": 1.6448438040779516e-06,
      "loss": 0.4445,
      "step": 10286
    },
    {
      "epoch": 1.261280039234919,
      "grad_norm": 1.915422646344522,
      "learning_rate": 1.6443675563252087e-06,
      "loss": 0.3918,
      "step": 10287
    },
    {
      "epoch": 1.2614026483570377,
      "grad_norm": 2.022466476232195,
      "learning_rate": 1.6438913437404725e-06,
      "loss": 0.4446,
      "step": 10288
    },
    {
      "epoch": 1.2615252574791564,
      "grad_norm": 2.010762601268627,
      "learning_rate": 1.6434151663433179e-06,
      "loss": 0.463,
      "step": 10289
    },
    {
      "epoch": 1.2616478666012751,
      "grad_norm": 1.7220016833038394,
      "learning_rate": 1.6429390241533166e-06,
      "loss": 0.4566,
      "step": 10290
    },
    {
      "epoch": 1.2617704757233938,
      "grad_norm": 2.138002124558919,
      "learning_rate": 1.6424629171900397e-06,
      "loss": 0.456,
      "step": 10291
    },
    {
      "epoch": 1.2618930848455125,
      "grad_norm": 1.957694628137156,
      "learning_rate": 1.6419868454730539e-06,
      "loss": 0.4341,
      "step": 10292
    },
    {
      "epoch": 1.2620156939676312,
      "grad_norm": 1.760573609974831,
      "learning_rate": 1.6415108090219276e-06,
      "loss": 0.4546,
      "step": 10293
    },
    {
      "epoch": 1.26213830308975,
      "grad_norm": 1.9916405853778552,
      "learning_rate": 1.641034807856227e-06,
      "loss": 0.4154,
      "step": 10294
    },
    {
      "epoch": 1.2622609122118686,
      "grad_norm": 1.9678559077996671,
      "learning_rate": 1.6405588419955177e-06,
      "loss": 0.4618,
      "step": 10295
    },
    {
      "epoch": 1.2623835213339873,
      "grad_norm": 1.8961485464971177,
      "learning_rate": 1.6400829114593603e-06,
      "loss": 0.374,
      "step": 10296
    },
    {
      "epoch": 1.262506130456106,
      "grad_norm": 2.049172005401446,
      "learning_rate": 1.6396070162673179e-06,
      "loss": 0.4094,
      "step": 10297
    },
    {
      "epoch": 1.2626287395782247,
      "grad_norm": 1.8594256929634765,
      "learning_rate": 1.6391311564389503e-06,
      "loss": 0.4164,
      "step": 10298
    },
    {
      "epoch": 1.2627513487003432,
      "grad_norm": 1.9992396828911645,
      "learning_rate": 1.638655331993817e-06,
      "loss": 0.4351,
      "step": 10299
    },
    {
      "epoch": 1.262873957822462,
      "grad_norm": 1.982179005351445,
      "learning_rate": 1.6381795429514735e-06,
      "loss": 0.4374,
      "step": 10300
    },
    {
      "epoch": 1.2629965669445806,
      "grad_norm": 1.819715039669406,
      "learning_rate": 1.6377037893314763e-06,
      "loss": 0.4122,
      "step": 10301
    },
    {
      "epoch": 1.2631191760666993,
      "grad_norm": 1.957119207030772,
      "learning_rate": 1.63722807115338e-06,
      "loss": 0.4203,
      "step": 10302
    },
    {
      "epoch": 1.263241785188818,
      "grad_norm": 2.1555636836869816,
      "learning_rate": 1.6367523884367385e-06,
      "loss": 0.4044,
      "step": 10303
    },
    {
      "epoch": 1.2633643943109367,
      "grad_norm": 1.9412260645750266,
      "learning_rate": 1.636276741201101e-06,
      "loss": 0.4878,
      "step": 10304
    },
    {
      "epoch": 1.2634870034330554,
      "grad_norm": 2.065818365472094,
      "learning_rate": 1.6358011294660191e-06,
      "loss": 0.4461,
      "step": 10305
    },
    {
      "epoch": 1.2636096125551741,
      "grad_norm": 2.101728136902036,
      "learning_rate": 1.63532555325104e-06,
      "loss": 0.4351,
      "step": 10306
    },
    {
      "epoch": 1.2637322216772928,
      "grad_norm": 2.125064312020328,
      "learning_rate": 1.6348500125757128e-06,
      "loss": 0.4658,
      "step": 10307
    },
    {
      "epoch": 1.2638548307994115,
      "grad_norm": 2.1887953506415023,
      "learning_rate": 1.6343745074595805e-06,
      "loss": 0.3975,
      "step": 10308
    },
    {
      "epoch": 1.2639774399215302,
      "grad_norm": 2.0044679781660046,
      "learning_rate": 1.6338990379221886e-06,
      "loss": 0.4619,
      "step": 10309
    },
    {
      "epoch": 1.264100049043649,
      "grad_norm": 2.0849612394193744,
      "learning_rate": 1.6334236039830799e-06,
      "loss": 0.4358,
      "step": 10310
    },
    {
      "epoch": 1.2642226581657674,
      "grad_norm": 1.794809168184676,
      "learning_rate": 1.6329482056617946e-06,
      "loss": 0.4489,
      "step": 10311
    },
    {
      "epoch": 1.264345267287886,
      "grad_norm": 2.014329363911146,
      "learning_rate": 1.6324728429778741e-06,
      "loss": 0.451,
      "step": 10312
    },
    {
      "epoch": 1.2644678764100048,
      "grad_norm": 1.982591007694564,
      "learning_rate": 1.6319975159508544e-06,
      "loss": 0.4102,
      "step": 10313
    },
    {
      "epoch": 1.2645904855321235,
      "grad_norm": 1.9131807540740338,
      "learning_rate": 1.6315222246002744e-06,
      "loss": 0.4262,
      "step": 10314
    },
    {
      "epoch": 1.2647130946542422,
      "grad_norm": 2.292052636150806,
      "learning_rate": 1.631046968945668e-06,
      "loss": 0.4146,
      "step": 10315
    },
    {
      "epoch": 1.264835703776361,
      "grad_norm": 2.0294955556767142,
      "learning_rate": 1.6305717490065702e-06,
      "loss": 0.4681,
      "step": 10316
    },
    {
      "epoch": 1.2649583128984796,
      "grad_norm": 1.7608096267501465,
      "learning_rate": 1.6300965648025121e-06,
      "loss": 0.4005,
      "step": 10317
    },
    {
      "epoch": 1.2650809220205983,
      "grad_norm": 1.8893139520570092,
      "learning_rate": 1.6296214163530255e-06,
      "loss": 0.417,
      "step": 10318
    },
    {
      "epoch": 1.265203531142717,
      "grad_norm": 1.7190784133396408,
      "learning_rate": 1.6291463036776386e-06,
      "loss": 0.3971,
      "step": 10319
    },
    {
      "epoch": 1.2653261402648357,
      "grad_norm": 1.8069552019889654,
      "learning_rate": 1.628671226795882e-06,
      "loss": 0.3961,
      "step": 10320
    },
    {
      "epoch": 1.2654487493869544,
      "grad_norm": 1.8547743020401657,
      "learning_rate": 1.6281961857272794e-06,
      "loss": 0.3944,
      "step": 10321
    },
    {
      "epoch": 1.265571358509073,
      "grad_norm": 1.8106952945447325,
      "learning_rate": 1.6277211804913573e-06,
      "loss": 0.4164,
      "step": 10322
    },
    {
      "epoch": 1.2656939676311918,
      "grad_norm": 1.9234676720336918,
      "learning_rate": 1.6272462111076387e-06,
      "loss": 0.4132,
      "step": 10323
    },
    {
      "epoch": 1.2658165767533105,
      "grad_norm": 1.9428871696962708,
      "learning_rate": 1.6267712775956468e-06,
      "loss": 0.4245,
      "step": 10324
    },
    {
      "epoch": 1.2659391858754292,
      "grad_norm": 1.8388595327362913,
      "learning_rate": 1.6262963799749005e-06,
      "loss": 0.4255,
      "step": 10325
    },
    {
      "epoch": 1.266061794997548,
      "grad_norm": 2.0630985838204294,
      "learning_rate": 1.6258215182649196e-06,
      "loss": 0.4172,
      "step": 10326
    },
    {
      "epoch": 1.2661844041196666,
      "grad_norm": 2.0178260874029736,
      "learning_rate": 1.6253466924852219e-06,
      "loss": 0.4149,
      "step": 10327
    },
    {
      "epoch": 1.2663070132417853,
      "grad_norm": 1.945836928453348,
      "learning_rate": 1.6248719026553244e-06,
      "loss": 0.4473,
      "step": 10328
    },
    {
      "epoch": 1.266429622363904,
      "grad_norm": 1.8601803106985857,
      "learning_rate": 1.6243971487947404e-06,
      "loss": 0.3978,
      "step": 10329
    },
    {
      "epoch": 1.2665522314860227,
      "grad_norm": 2.0329396561975073,
      "learning_rate": 1.623922430922984e-06,
      "loss": 0.4388,
      "step": 10330
    },
    {
      "epoch": 1.2666748406081412,
      "grad_norm": 1.8023556437286166,
      "learning_rate": 1.6234477490595658e-06,
      "loss": 0.4261,
      "step": 10331
    },
    {
      "epoch": 1.2667974497302599,
      "grad_norm": 1.980627114224291,
      "learning_rate": 1.6229731032239986e-06,
      "loss": 0.3933,
      "step": 10332
    },
    {
      "epoch": 1.2669200588523786,
      "grad_norm": 2.1177065298119557,
      "learning_rate": 1.622498493435788e-06,
      "loss": 0.4558,
      "step": 10333
    },
    {
      "epoch": 1.2670426679744973,
      "grad_norm": 1.9018258361727836,
      "learning_rate": 1.6220239197144432e-06,
      "loss": 0.4127,
      "step": 10334
    },
    {
      "epoch": 1.267165277096616,
      "grad_norm": 1.8815633061182724,
      "learning_rate": 1.6215493820794697e-06,
      "loss": 0.4285,
      "step": 10335
    },
    {
      "epoch": 1.2672878862187347,
      "grad_norm": 1.9639243195529148,
      "learning_rate": 1.6210748805503717e-06,
      "loss": 0.4091,
      "step": 10336
    },
    {
      "epoch": 1.2674104953408534,
      "grad_norm": 1.9990047671144748,
      "learning_rate": 1.6206004151466529e-06,
      "loss": 0.4276,
      "step": 10337
    },
    {
      "epoch": 1.267533104462972,
      "grad_norm": 1.7738782317549913,
      "learning_rate": 1.6201259858878132e-06,
      "loss": 0.4504,
      "step": 10338
    },
    {
      "epoch": 1.2676557135850908,
      "grad_norm": 2.0322379993996393,
      "learning_rate": 1.6196515927933525e-06,
      "loss": 0.4457,
      "step": 10339
    },
    {
      "epoch": 1.2677783227072095,
      "grad_norm": 1.765913478229395,
      "learning_rate": 1.6191772358827707e-06,
      "loss": 0.4336,
      "step": 10340
    },
    {
      "epoch": 1.2679009318293282,
      "grad_norm": 1.911647384720148,
      "learning_rate": 1.618702915175564e-06,
      "loss": 0.3819,
      "step": 10341
    },
    {
      "epoch": 1.2680235409514469,
      "grad_norm": 1.725412614102264,
      "learning_rate": 1.6182286306912275e-06,
      "loss": 0.385,
      "step": 10342
    },
    {
      "epoch": 1.2681461500735653,
      "grad_norm": 1.91852386145087,
      "learning_rate": 1.6177543824492554e-06,
      "loss": 0.4174,
      "step": 10343
    },
    {
      "epoch": 1.268268759195684,
      "grad_norm": 2.1458141341700836,
      "learning_rate": 1.6172801704691394e-06,
      "loss": 0.4717,
      "step": 10344
    },
    {
      "epoch": 1.2683913683178027,
      "grad_norm": 2.048073495085659,
      "learning_rate": 1.6168059947703727e-06,
      "loss": 0.4656,
      "step": 10345
    },
    {
      "epoch": 1.2685139774399214,
      "grad_norm": 1.8641229966113848,
      "learning_rate": 1.6163318553724416e-06,
      "loss": 0.4321,
      "step": 10346
    },
    {
      "epoch": 1.2686365865620401,
      "grad_norm": 1.8462389023225323,
      "learning_rate": 1.6158577522948362e-06,
      "loss": 0.4272,
      "step": 10347
    },
    {
      "epoch": 1.2687591956841588,
      "grad_norm": 1.9159069312154606,
      "learning_rate": 1.6153836855570426e-06,
      "loss": 0.4237,
      "step": 10348
    },
    {
      "epoch": 1.2688818048062775,
      "grad_norm": 2.0940444219265877,
      "learning_rate": 1.6149096551785458e-06,
      "loss": 0.446,
      "step": 10349
    },
    {
      "epoch": 1.2690044139283962,
      "grad_norm": 1.9951537146628149,
      "learning_rate": 1.6144356611788286e-06,
      "loss": 0.4085,
      "step": 10350
    },
    {
      "epoch": 1.269127023050515,
      "grad_norm": 1.8844050538008466,
      "learning_rate": 1.6139617035773732e-06,
      "loss": 0.4221,
      "step": 10351
    },
    {
      "epoch": 1.2692496321726336,
      "grad_norm": 1.8889397545908162,
      "learning_rate": 1.613487782393661e-06,
      "loss": 0.4353,
      "step": 10352
    },
    {
      "epoch": 1.2693722412947523,
      "grad_norm": 2.0783806757213563,
      "learning_rate": 1.6130138976471706e-06,
      "loss": 0.4229,
      "step": 10353
    },
    {
      "epoch": 1.269494850416871,
      "grad_norm": 2.037954202342598,
      "learning_rate": 1.612540049357379e-06,
      "loss": 0.4962,
      "step": 10354
    },
    {
      "epoch": 1.2696174595389897,
      "grad_norm": 1.785501917009773,
      "learning_rate": 1.6120662375437623e-06,
      "loss": 0.4117,
      "step": 10355
    },
    {
      "epoch": 1.2697400686611084,
      "grad_norm": 1.97059014504888,
      "learning_rate": 1.6115924622257956e-06,
      "loss": 0.4453,
      "step": 10356
    },
    {
      "epoch": 1.2698626777832271,
      "grad_norm": 1.7761971741922316,
      "learning_rate": 1.6111187234229515e-06,
      "loss": 0.4224,
      "step": 10357
    },
    {
      "epoch": 1.2699852869053458,
      "grad_norm": 1.9226916920987354,
      "learning_rate": 1.6106450211547015e-06,
      "loss": 0.4336,
      "step": 10358
    },
    {
      "epoch": 1.2701078960274645,
      "grad_norm": 1.8505616081422886,
      "learning_rate": 1.6101713554405145e-06,
      "loss": 0.4235,
      "step": 10359
    },
    {
      "epoch": 1.2702305051495832,
      "grad_norm": 2.0832764251988163,
      "learning_rate": 1.609697726299861e-06,
      "loss": 0.4307,
      "step": 10360
    },
    {
      "epoch": 1.270353114271702,
      "grad_norm": 1.8892411605908037,
      "learning_rate": 1.6092241337522071e-06,
      "loss": 0.4122,
      "step": 10361
    },
    {
      "epoch": 1.2704757233938204,
      "grad_norm": 2.172013925917402,
      "learning_rate": 1.6087505778170187e-06,
      "loss": 0.3788,
      "step": 10362
    },
    {
      "epoch": 1.2705983325159391,
      "grad_norm": 1.8046945644868275,
      "learning_rate": 1.608277058513759e-06,
      "loss": 0.4097,
      "step": 10363
    },
    {
      "epoch": 1.2707209416380578,
      "grad_norm": 1.9560957421533611,
      "learning_rate": 1.6078035758618904e-06,
      "loss": 0.4135,
      "step": 10364
    },
    {
      "epoch": 1.2708435507601765,
      "grad_norm": 1.8991319081384335,
      "learning_rate": 1.6073301298808746e-06,
      "loss": 0.429,
      "step": 10365
    },
    {
      "epoch": 1.2709661598822952,
      "grad_norm": 1.801884589067912,
      "learning_rate": 1.6068567205901714e-06,
      "loss": 0.4002,
      "step": 10366
    },
    {
      "epoch": 1.271088769004414,
      "grad_norm": 1.968131867372523,
      "learning_rate": 1.6063833480092379e-06,
      "loss": 0.4306,
      "step": 10367
    },
    {
      "epoch": 1.2712113781265326,
      "grad_norm": 1.8328933404868737,
      "learning_rate": 1.6059100121575306e-06,
      "loss": 0.4218,
      "step": 10368
    },
    {
      "epoch": 1.2713339872486513,
      "grad_norm": 2.05607509058105,
      "learning_rate": 1.605436713054504e-06,
      "loss": 0.4889,
      "step": 10369
    },
    {
      "epoch": 1.27145659637077,
      "grad_norm": 2.0527386101245932,
      "learning_rate": 1.604963450719614e-06,
      "loss": 0.4217,
      "step": 10370
    },
    {
      "epoch": 1.2715792054928887,
      "grad_norm": 1.8780407241279082,
      "learning_rate": 1.6044902251723094e-06,
      "loss": 0.4537,
      "step": 10371
    },
    {
      "epoch": 1.2717018146150074,
      "grad_norm": 1.6995817473835677,
      "learning_rate": 1.6040170364320418e-06,
      "loss": 0.4289,
      "step": 10372
    },
    {
      "epoch": 1.2718244237371261,
      "grad_norm": 2.1771263012269153,
      "learning_rate": 1.6035438845182604e-06,
      "loss": 0.4572,
      "step": 10373
    },
    {
      "epoch": 1.2719470328592446,
      "grad_norm": 2.1764883406826336,
      "learning_rate": 1.6030707694504133e-06,
      "loss": 0.4301,
      "step": 10374
    },
    {
      "epoch": 1.2720696419813633,
      "grad_norm": 1.9170714640938453,
      "learning_rate": 1.6025976912479447e-06,
      "loss": 0.4482,
      "step": 10375
    },
    {
      "epoch": 1.272192251103482,
      "grad_norm": 1.7996237780000768,
      "learning_rate": 1.6021246499302997e-06,
      "loss": 0.4293,
      "step": 10376
    },
    {
      "epoch": 1.2723148602256007,
      "grad_norm": 1.9001301404908906,
      "learning_rate": 1.6016516455169206e-06,
      "loss": 0.4343,
      "step": 10377
    },
    {
      "epoch": 1.2724374693477194,
      "grad_norm": 2.033363496147357,
      "learning_rate": 1.601178678027251e-06,
      "loss": 0.4412,
      "step": 10378
    },
    {
      "epoch": 1.272560078469838,
      "grad_norm": 1.9017005034901897,
      "learning_rate": 1.6007057474807272e-06,
      "loss": 0.4211,
      "step": 10379
    },
    {
      "epoch": 1.2726826875919568,
      "grad_norm": 1.8099880330986235,
      "learning_rate": 1.6002328538967898e-06,
      "loss": 0.4502,
      "step": 10380
    },
    {
      "epoch": 1.2728052967140755,
      "grad_norm": 1.927154251996966,
      "learning_rate": 1.5997599972948752e-06,
      "loss": 0.4132,
      "step": 10381
    },
    {
      "epoch": 1.2729279058361942,
      "grad_norm": 1.9436286920008936,
      "learning_rate": 1.599287177694418e-06,
      "loss": 0.4373,
      "step": 10382
    },
    {
      "epoch": 1.273050514958313,
      "grad_norm": 2.067201645020609,
      "learning_rate": 1.5988143951148535e-06,
      "loss": 0.4023,
      "step": 10383
    },
    {
      "epoch": 1.2731731240804316,
      "grad_norm": 1.8506241647832407,
      "learning_rate": 1.5983416495756116e-06,
      "loss": 0.3983,
      "step": 10384
    },
    {
      "epoch": 1.2732957332025503,
      "grad_norm": 1.898639421886969,
      "learning_rate": 1.5978689410961246e-06,
      "loss": 0.4105,
      "step": 10385
    },
    {
      "epoch": 1.273418342324669,
      "grad_norm": 1.8698382855592366,
      "learning_rate": 1.5973962696958211e-06,
      "loss": 0.408,
      "step": 10386
    },
    {
      "epoch": 1.2735409514467877,
      "grad_norm": 1.921995508853287,
      "learning_rate": 1.5969236353941295e-06,
      "loss": 0.4406,
      "step": 10387
    },
    {
      "epoch": 1.2736635605689064,
      "grad_norm": 1.9115160639968438,
      "learning_rate": 1.5964510382104747e-06,
      "loss": 0.4237,
      "step": 10388
    },
    {
      "epoch": 1.273786169691025,
      "grad_norm": 1.912227472851072,
      "learning_rate": 1.595978478164281e-06,
      "loss": 0.4232,
      "step": 10389
    },
    {
      "epoch": 1.2739087788131438,
      "grad_norm": 2.067892688152965,
      "learning_rate": 1.5955059552749735e-06,
      "loss": 0.4622,
      "step": 10390
    },
    {
      "epoch": 1.2740313879352625,
      "grad_norm": 1.8994757961416127,
      "learning_rate": 1.5950334695619726e-06,
      "loss": 0.3899,
      "step": 10391
    },
    {
      "epoch": 1.2741539970573812,
      "grad_norm": 1.8221227784338414,
      "learning_rate": 1.5945610210446976e-06,
      "loss": 0.4348,
      "step": 10392
    },
    {
      "epoch": 1.2742766061795,
      "grad_norm": 2.0586176127810174,
      "learning_rate": 1.594088609742568e-06,
      "loss": 0.3901,
      "step": 10393
    },
    {
      "epoch": 1.2743992153016184,
      "grad_norm": 2.03065396207809,
      "learning_rate": 1.5936162356750001e-06,
      "loss": 0.4527,
      "step": 10394
    },
    {
      "epoch": 1.274521824423737,
      "grad_norm": 1.9401135176009465,
      "learning_rate": 1.59314389886141e-06,
      "loss": 0.4846,
      "step": 10395
    },
    {
      "epoch": 1.2746444335458558,
      "grad_norm": 1.877778109763121,
      "learning_rate": 1.5926715993212106e-06,
      "loss": 0.514,
      "step": 10396
    },
    {
      "epoch": 1.2747670426679745,
      "grad_norm": 1.9712118448509022,
      "learning_rate": 1.5921993370738143e-06,
      "loss": 0.432,
      "step": 10397
    },
    {
      "epoch": 1.2748896517900932,
      "grad_norm": 1.925526262291843,
      "learning_rate": 1.5917271121386329e-06,
      "loss": 0.4264,
      "step": 10398
    },
    {
      "epoch": 1.2750122609122119,
      "grad_norm": 2.002705716984728,
      "learning_rate": 1.5912549245350757e-06,
      "loss": 0.4368,
      "step": 10399
    },
    {
      "epoch": 1.2751348700343306,
      "grad_norm": 1.9015518685339943,
      "learning_rate": 1.5907827742825493e-06,
      "loss": 0.4652,
      "step": 10400
    },
    {
      "epoch": 1.2752574791564493,
      "grad_norm": 1.7918042420626574,
      "learning_rate": 1.5903106614004607e-06,
      "loss": 0.4248,
      "step": 10401
    },
    {
      "epoch": 1.275380088278568,
      "grad_norm": 2.0903528617506164,
      "learning_rate": 1.5898385859082138e-06,
      "loss": 0.4966,
      "step": 10402
    },
    {
      "epoch": 1.2755026974006867,
      "grad_norm": 1.8670924772329092,
      "learning_rate": 1.5893665478252139e-06,
      "loss": 0.4783,
      "step": 10403
    },
    {
      "epoch": 1.2756253065228054,
      "grad_norm": 2.151809576681785,
      "learning_rate": 1.5888945471708594e-06,
      "loss": 0.4881,
      "step": 10404
    },
    {
      "epoch": 1.2757479156449238,
      "grad_norm": 1.9657200777174768,
      "learning_rate": 1.5884225839645524e-06,
      "loss": 0.439,
      "step": 10405
    },
    {
      "epoch": 1.2758705247670425,
      "grad_norm": 1.9575512212069346,
      "learning_rate": 1.5879506582256913e-06,
      "loss": 0.4119,
      "step": 10406
    },
    {
      "epoch": 1.2759931338891612,
      "grad_norm": 1.7955327193215707,
      "learning_rate": 1.5874787699736722e-06,
      "loss": 0.4276,
      "step": 10407
    },
    {
      "epoch": 1.27611574301128,
      "grad_norm": 2.0447092250540235,
      "learning_rate": 1.5870069192278925e-06,
      "loss": 0.4371,
      "step": 10408
    },
    {
      "epoch": 1.2762383521333986,
      "grad_norm": 2.0185953579258085,
      "learning_rate": 1.5865351060077432e-06,
      "loss": 0.4695,
      "step": 10409
    },
    {
      "epoch": 1.2763609612555173,
      "grad_norm": 1.726221569176056,
      "learning_rate": 1.586063330332619e-06,
      "loss": 0.423,
      "step": 10410
    },
    {
      "epoch": 1.276483570377636,
      "grad_norm": 1.8435844863411375,
      "learning_rate": 1.5855915922219098e-06,
      "loss": 0.4347,
      "step": 10411
    },
    {
      "epoch": 1.2766061794997547,
      "grad_norm": 2.0391116386945494,
      "learning_rate": 1.5851198916950055e-06,
      "loss": 0.3795,
      "step": 10412
    },
    {
      "epoch": 1.2767287886218734,
      "grad_norm": 1.8613179179370107,
      "learning_rate": 1.5846482287712928e-06,
      "loss": 0.4446,
      "step": 10413
    },
    {
      "epoch": 1.2768513977439921,
      "grad_norm": 1.8102516051845974,
      "learning_rate": 1.5841766034701583e-06,
      "loss": 0.4308,
      "step": 10414
    },
    {
      "epoch": 1.2769740068661108,
      "grad_norm": 2.019854915655876,
      "learning_rate": 1.5837050158109862e-06,
      "loss": 0.4231,
      "step": 10415
    },
    {
      "epoch": 1.2770966159882295,
      "grad_norm": 2.0477598760557636,
      "learning_rate": 1.583233465813162e-06,
      "loss": 0.4174,
      "step": 10416
    },
    {
      "epoch": 1.2772192251103482,
      "grad_norm": 1.860527987752115,
      "learning_rate": 1.5827619534960636e-06,
      "loss": 0.4016,
      "step": 10417
    },
    {
      "epoch": 1.277341834232467,
      "grad_norm": 1.9604765622463194,
      "learning_rate": 1.5822904788790733e-06,
      "loss": 0.4156,
      "step": 10418
    },
    {
      "epoch": 1.2774644433545856,
      "grad_norm": 1.956734658305388,
      "learning_rate": 1.5818190419815689e-06,
      "loss": 0.38,
      "step": 10419
    },
    {
      "epoch": 1.2775870524767043,
      "grad_norm": 1.7993724936858673,
      "learning_rate": 1.581347642822928e-06,
      "loss": 0.4188,
      "step": 10420
    },
    {
      "epoch": 1.277709661598823,
      "grad_norm": 1.9230031117932334,
      "learning_rate": 1.5808762814225248e-06,
      "loss": 0.46,
      "step": 10421
    },
    {
      "epoch": 1.2778322707209417,
      "grad_norm": 1.8106888220852648,
      "learning_rate": 1.580404957799733e-06,
      "loss": 0.4197,
      "step": 10422
    },
    {
      "epoch": 1.2779548798430604,
      "grad_norm": 1.7088757904657095,
      "learning_rate": 1.579933671973926e-06,
      "loss": 0.461,
      "step": 10423
    },
    {
      "epoch": 1.2780774889651791,
      "grad_norm": 1.9294714293902246,
      "learning_rate": 1.5794624239644745e-06,
      "loss": 0.4279,
      "step": 10424
    },
    {
      "epoch": 1.2782000980872976,
      "grad_norm": 2.034412698175529,
      "learning_rate": 1.5789912137907466e-06,
      "loss": 0.4563,
      "step": 10425
    },
    {
      "epoch": 1.2783227072094163,
      "grad_norm": 1.9695017383431983,
      "learning_rate": 1.5785200414721103e-06,
      "loss": 0.4756,
      "step": 10426
    },
    {
      "epoch": 1.278445316331535,
      "grad_norm": 1.9327992694922331,
      "learning_rate": 1.578048907027931e-06,
      "loss": 0.4033,
      "step": 10427
    },
    {
      "epoch": 1.2785679254536537,
      "grad_norm": 1.9378797882289518,
      "learning_rate": 1.5775778104775755e-06,
      "loss": 0.401,
      "step": 10428
    },
    {
      "epoch": 1.2786905345757724,
      "grad_norm": 2.002405107324638,
      "learning_rate": 1.5771067518404032e-06,
      "loss": 0.4491,
      "step": 10429
    },
    {
      "epoch": 1.2788131436978911,
      "grad_norm": 1.9364075224273611,
      "learning_rate": 1.5766357311357783e-06,
      "loss": 0.4143,
      "step": 10430
    },
    {
      "epoch": 1.2789357528200098,
      "grad_norm": 2.1471890718159603,
      "learning_rate": 1.5761647483830589e-06,
      "loss": 0.4331,
      "step": 10431
    },
    {
      "epoch": 1.2790583619421285,
      "grad_norm": 1.848849567524375,
      "learning_rate": 1.5756938036016046e-06,
      "loss": 0.433,
      "step": 10432
    },
    {
      "epoch": 1.2791809710642472,
      "grad_norm": 2.1558075579017077,
      "learning_rate": 1.5752228968107713e-06,
      "loss": 0.47,
      "step": 10433
    },
    {
      "epoch": 1.279303580186366,
      "grad_norm": 1.8640719913157449,
      "learning_rate": 1.5747520280299136e-06,
      "loss": 0.4501,
      "step": 10434
    },
    {
      "epoch": 1.2794261893084846,
      "grad_norm": 1.982845363853828,
      "learning_rate": 1.574281197278385e-06,
      "loss": 0.4127,
      "step": 10435
    },
    {
      "epoch": 1.2795487984306033,
      "grad_norm": 1.9507442568266113,
      "learning_rate": 1.573810404575539e-06,
      "loss": 0.4136,
      "step": 10436
    },
    {
      "epoch": 1.2796714075527218,
      "grad_norm": 2.044473219203943,
      "learning_rate": 1.5733396499407256e-06,
      "loss": 0.4363,
      "step": 10437
    },
    {
      "epoch": 1.2797940166748405,
      "grad_norm": 1.990400094343297,
      "learning_rate": 1.572868933393292e-06,
      "loss": 0.4367,
      "step": 10438
    },
    {
      "epoch": 1.2799166257969592,
      "grad_norm": 2.1236154119765183,
      "learning_rate": 1.5723982549525868e-06,
      "loss": 0.52,
      "step": 10439
    },
    {
      "epoch": 1.280039234919078,
      "grad_norm": 1.7123305324422466,
      "learning_rate": 1.5719276146379553e-06,
      "loss": 0.4262,
      "step": 10440
    },
    {
      "epoch": 1.2801618440411966,
      "grad_norm": 1.9821149140020116,
      "learning_rate": 1.5714570124687434e-06,
      "loss": 0.4654,
      "step": 10441
    },
    {
      "epoch": 1.2802844531633153,
      "grad_norm": 2.006955619182133,
      "learning_rate": 1.5709864484642901e-06,
      "loss": 0.3959,
      "step": 10442
    },
    {
      "epoch": 1.280407062285434,
      "grad_norm": 1.9594037859829772,
      "learning_rate": 1.5705159226439392e-06,
      "loss": 0.4132,
      "step": 10443
    },
    {
      "epoch": 1.2805296714075527,
      "grad_norm": 1.7985004067130579,
      "learning_rate": 1.5700454350270296e-06,
      "loss": 0.445,
      "step": 10444
    },
    {
      "epoch": 1.2806522805296714,
      "grad_norm": 1.791356362267658,
      "learning_rate": 1.5695749856328996e-06,
      "loss": 0.4428,
      "step": 10445
    },
    {
      "epoch": 1.28077488965179,
      "grad_norm": 1.9708176461703624,
      "learning_rate": 1.5691045744808841e-06,
      "loss": 0.4262,
      "step": 10446
    },
    {
      "epoch": 1.2808974987739088,
      "grad_norm": 1.9284366237611728,
      "learning_rate": 1.5686342015903184e-06,
      "loss": 0.4183,
      "step": 10447
    },
    {
      "epoch": 1.2810201078960275,
      "grad_norm": 1.7961841255192421,
      "learning_rate": 1.568163866980536e-06,
      "loss": 0.4042,
      "step": 10448
    },
    {
      "epoch": 1.2811427170181462,
      "grad_norm": 1.7926780708947256,
      "learning_rate": 1.567693570670869e-06,
      "loss": 0.4199,
      "step": 10449
    },
    {
      "epoch": 1.281265326140265,
      "grad_norm": 1.9043412240836621,
      "learning_rate": 1.5672233126806463e-06,
      "loss": 0.4104,
      "step": 10450
    },
    {
      "epoch": 1.2813879352623836,
      "grad_norm": 2.0318363772514765,
      "learning_rate": 1.5667530930291966e-06,
      "loss": 0.4472,
      "step": 10451
    },
    {
      "epoch": 1.2815105443845023,
      "grad_norm": 1.9615280550078968,
      "learning_rate": 1.5662829117358468e-06,
      "loss": 0.4624,
      "step": 10452
    },
    {
      "epoch": 1.281633153506621,
      "grad_norm": 1.8827981177715996,
      "learning_rate": 1.5658127688199235e-06,
      "loss": 0.4264,
      "step": 10453
    },
    {
      "epoch": 1.2817557626287397,
      "grad_norm": 1.7235246753892517,
      "learning_rate": 1.565342664300748e-06,
      "loss": 0.4183,
      "step": 10454
    },
    {
      "epoch": 1.2818783717508584,
      "grad_norm": 2.0259108624254725,
      "learning_rate": 1.5648725981976437e-06,
      "loss": 0.4558,
      "step": 10455
    },
    {
      "epoch": 1.2820009808729769,
      "grad_norm": 2.057743734749939,
      "learning_rate": 1.5644025705299318e-06,
      "loss": 0.4214,
      "step": 10456
    },
    {
      "epoch": 1.2821235899950956,
      "grad_norm": 1.8902770524457042,
      "learning_rate": 1.5639325813169304e-06,
      "loss": 0.4137,
      "step": 10457
    },
    {
      "epoch": 1.2822461991172143,
      "grad_norm": 1.984029166222234,
      "learning_rate": 1.5634626305779574e-06,
      "loss": 0.4103,
      "step": 10458
    },
    {
      "epoch": 1.282368808239333,
      "grad_norm": 2.1260907529375244,
      "learning_rate": 1.5629927183323278e-06,
      "loss": 0.5283,
      "step": 10459
    },
    {
      "epoch": 1.2824914173614517,
      "grad_norm": 1.9088594860708499,
      "learning_rate": 1.562522844599356e-06,
      "loss": 0.3749,
      "step": 10460
    },
    {
      "epoch": 1.2826140264835704,
      "grad_norm": 1.9967335310410133,
      "learning_rate": 1.562053009398355e-06,
      "loss": 0.4527,
      "step": 10461
    },
    {
      "epoch": 1.282736635605689,
      "grad_norm": 1.972895393355578,
      "learning_rate": 1.5615832127486368e-06,
      "loss": 0.454,
      "step": 10462
    },
    {
      "epoch": 1.2828592447278078,
      "grad_norm": 1.8457308966237405,
      "learning_rate": 1.5611134546695095e-06,
      "loss": 0.4305,
      "step": 10463
    },
    {
      "epoch": 1.2829818538499265,
      "grad_norm": 1.8954844719662387,
      "learning_rate": 1.560643735180281e-06,
      "loss": 0.4258,
      "step": 10464
    },
    {
      "epoch": 1.2831044629720452,
      "grad_norm": 1.8721143331741574,
      "learning_rate": 1.5601740543002575e-06,
      "loss": 0.4812,
      "step": 10465
    },
    {
      "epoch": 1.2832270720941639,
      "grad_norm": 1.8446743930972462,
      "learning_rate": 1.5597044120487459e-06,
      "loss": 0.4461,
      "step": 10466
    },
    {
      "epoch": 1.2833496812162826,
      "grad_norm": 1.943073519720839,
      "learning_rate": 1.5592348084450462e-06,
      "loss": 0.4861,
      "step": 10467
    },
    {
      "epoch": 1.283472290338401,
      "grad_norm": 1.8601311190012326,
      "learning_rate": 1.5587652435084616e-06,
      "loss": 0.4273,
      "step": 10468
    },
    {
      "epoch": 1.2835948994605197,
      "grad_norm": 1.8416698812352834,
      "learning_rate": 1.558295717258292e-06,
      "loss": 0.4334,
      "step": 10469
    },
    {
      "epoch": 1.2837175085826384,
      "grad_norm": 1.9719044096190976,
      "learning_rate": 1.557826229713836e-06,
      "loss": 0.4228,
      "step": 10470
    },
    {
      "epoch": 1.2838401177047571,
      "grad_norm": 2.071068117277784,
      "learning_rate": 1.5573567808943895e-06,
      "loss": 0.4909,
      "step": 10471
    },
    {
      "epoch": 1.2839627268268758,
      "grad_norm": 1.8777038571298181,
      "learning_rate": 1.5568873708192482e-06,
      "loss": 0.3994,
      "step": 10472
    },
    {
      "epoch": 1.2840853359489945,
      "grad_norm": 1.922572716916648,
      "learning_rate": 1.556417999507705e-06,
      "loss": 0.4272,
      "step": 10473
    },
    {
      "epoch": 1.2842079450711132,
      "grad_norm": 1.8235418317570604,
      "learning_rate": 1.5559486669790536e-06,
      "loss": 0.4362,
      "step": 10474
    },
    {
      "epoch": 1.284330554193232,
      "grad_norm": 2.152173208171396,
      "learning_rate": 1.5554793732525825e-06,
      "loss": 0.4846,
      "step": 10475
    },
    {
      "epoch": 1.2844531633153506,
      "grad_norm": 1.8185581762617697,
      "learning_rate": 1.5550101183475817e-06,
      "loss": 0.4016,
      "step": 10476
    },
    {
      "epoch": 1.2845757724374693,
      "grad_norm": 1.9086641157751028,
      "learning_rate": 1.5545409022833376e-06,
      "loss": 0.4295,
      "step": 10477
    },
    {
      "epoch": 1.284698381559588,
      "grad_norm": 1.8909200493683693,
      "learning_rate": 1.5540717250791359e-06,
      "loss": 0.4015,
      "step": 10478
    },
    {
      "epoch": 1.2848209906817067,
      "grad_norm": 2.2599284635435177,
      "learning_rate": 1.5536025867542626e-06,
      "loss": 0.5172,
      "step": 10479
    },
    {
      "epoch": 1.2849435998038254,
      "grad_norm": 2.0694953206383615,
      "learning_rate": 1.5531334873279968e-06,
      "loss": 0.4165,
      "step": 10480
    },
    {
      "epoch": 1.2850662089259441,
      "grad_norm": 1.9675509173904007,
      "learning_rate": 1.5526644268196214e-06,
      "loss": 0.406,
      "step": 10481
    },
    {
      "epoch": 1.2851888180480628,
      "grad_norm": 1.855898226257467,
      "learning_rate": 1.5521954052484154e-06,
      "loss": 0.4331,
      "step": 10482
    },
    {
      "epoch": 1.2853114271701815,
      "grad_norm": 1.9175002160354535,
      "learning_rate": 1.5517264226336565e-06,
      "loss": 0.4551,
      "step": 10483
    },
    {
      "epoch": 1.2854340362923002,
      "grad_norm": 1.9772567947610142,
      "learning_rate": 1.55125747899462e-06,
      "loss": 0.4095,
      "step": 10484
    },
    {
      "epoch": 1.285556645414419,
      "grad_norm": 1.918987119348559,
      "learning_rate": 1.5507885743505803e-06,
      "loss": 0.4536,
      "step": 10485
    },
    {
      "epoch": 1.2856792545365376,
      "grad_norm": 1.7865224219250229,
      "learning_rate": 1.550319708720811e-06,
      "loss": 0.4123,
      "step": 10486
    },
    {
      "epoch": 1.2858018636586563,
      "grad_norm": 1.9723197160895716,
      "learning_rate": 1.5498508821245834e-06,
      "loss": 0.4426,
      "step": 10487
    },
    {
      "epoch": 1.2859244727807748,
      "grad_norm": 1.9687575375427118,
      "learning_rate": 1.549382094581166e-06,
      "loss": 0.4718,
      "step": 10488
    },
    {
      "epoch": 1.2860470819028935,
      "grad_norm": 2.020196536136972,
      "learning_rate": 1.548913346109828e-06,
      "loss": 0.4067,
      "step": 10489
    },
    {
      "epoch": 1.2861696910250122,
      "grad_norm": 1.8853165512770669,
      "learning_rate": 1.548444636729835e-06,
      "loss": 0.4093,
      "step": 10490
    },
    {
      "epoch": 1.286292300147131,
      "grad_norm": 2.019615412641986,
      "learning_rate": 1.5479759664604523e-06,
      "loss": 0.4644,
      "step": 10491
    },
    {
      "epoch": 1.2864149092692496,
      "grad_norm": 1.9185825031607688,
      "learning_rate": 1.5475073353209419e-06,
      "loss": 0.4192,
      "step": 10492
    },
    {
      "epoch": 1.2865375183913683,
      "grad_norm": 2.020557159521674,
      "learning_rate": 1.547038743330567e-06,
      "loss": 0.4454,
      "step": 10493
    },
    {
      "epoch": 1.286660127513487,
      "grad_norm": 1.8725623479757836,
      "learning_rate": 1.5465701905085868e-06,
      "loss": 0.4383,
      "step": 10494
    },
    {
      "epoch": 1.2867827366356057,
      "grad_norm": 2.205274632360215,
      "learning_rate": 1.5461016768742598e-06,
      "loss": 0.4729,
      "step": 10495
    },
    {
      "epoch": 1.2869053457577244,
      "grad_norm": 1.784323956484448,
      "learning_rate": 1.5456332024468423e-06,
      "loss": 0.3898,
      "step": 10496
    },
    {
      "epoch": 1.2870279548798431,
      "grad_norm": 2.039877862296934,
      "learning_rate": 1.54516476724559e-06,
      "loss": 0.433,
      "step": 10497
    },
    {
      "epoch": 1.2871505640019618,
      "grad_norm": 1.9695965293196542,
      "learning_rate": 1.5446963712897557e-06,
      "loss": 0.4318,
      "step": 10498
    },
    {
      "epoch": 1.2872731731240803,
      "grad_norm": 1.9865460724934774,
      "learning_rate": 1.544228014598593e-06,
      "loss": 0.4626,
      "step": 10499
    },
    {
      "epoch": 1.287395782246199,
      "grad_norm": 2.0714589023506504,
      "learning_rate": 1.5437596971913494e-06,
      "loss": 0.4394,
      "step": 10500
    },
    {
      "epoch": 1.2875183913683177,
      "grad_norm": 1.953635704633351,
      "learning_rate": 1.5432914190872757e-06,
      "loss": 0.3811,
      "step": 10501
    },
    {
      "epoch": 1.2876410004904364,
      "grad_norm": 2.001572752202231,
      "learning_rate": 1.5428231803056187e-06,
      "loss": 0.4042,
      "step": 10502
    },
    {
      "epoch": 1.287763609612555,
      "grad_norm": 1.922234580425819,
      "learning_rate": 1.5423549808656228e-06,
      "loss": 0.4114,
      "step": 10503
    },
    {
      "epoch": 1.2878862187346738,
      "grad_norm": 1.969787384439761,
      "learning_rate": 1.5418868207865339e-06,
      "loss": 0.4421,
      "step": 10504
    },
    {
      "epoch": 1.2880088278567925,
      "grad_norm": 2.1192785678136095,
      "learning_rate": 1.541418700087592e-06,
      "loss": 0.4432,
      "step": 10505
    },
    {
      "epoch": 1.2881314369789112,
      "grad_norm": 1.8571659222644494,
      "learning_rate": 1.5409506187880383e-06,
      "loss": 0.3835,
      "step": 10506
    },
    {
      "epoch": 1.28825404610103,
      "grad_norm": 2.0704949657352216,
      "learning_rate": 1.5404825769071124e-06,
      "loss": 0.4445,
      "step": 10507
    },
    {
      "epoch": 1.2883766552231486,
      "grad_norm": 1.916469469151982,
      "learning_rate": 1.5400145744640517e-06,
      "loss": 0.4386,
      "step": 10508
    },
    {
      "epoch": 1.2884992643452673,
      "grad_norm": 1.9999763668976156,
      "learning_rate": 1.539546611478091e-06,
      "loss": 0.4297,
      "step": 10509
    },
    {
      "epoch": 1.288621873467386,
      "grad_norm": 1.8710855694258388,
      "learning_rate": 1.539078687968465e-06,
      "loss": 0.4013,
      "step": 10510
    },
    {
      "epoch": 1.2887444825895047,
      "grad_norm": 1.8923376942599455,
      "learning_rate": 1.5386108039544057e-06,
      "loss": 0.3921,
      "step": 10511
    },
    {
      "epoch": 1.2888670917116234,
      "grad_norm": 1.8399647917614583,
      "learning_rate": 1.5381429594551451e-06,
      "loss": 0.4303,
      "step": 10512
    },
    {
      "epoch": 1.288989700833742,
      "grad_norm": 1.8342006454551587,
      "learning_rate": 1.5376751544899113e-06,
      "loss": 0.4223,
      "step": 10513
    },
    {
      "epoch": 1.2891123099558608,
      "grad_norm": 1.8244698688860188,
      "learning_rate": 1.537207389077932e-06,
      "loss": 0.3954,
      "step": 10514
    },
    {
      "epoch": 1.2892349190779795,
      "grad_norm": 1.9118224128956396,
      "learning_rate": 1.536739663238434e-06,
      "loss": 0.4501,
      "step": 10515
    },
    {
      "epoch": 1.2893575282000982,
      "grad_norm": 1.8799514342771322,
      "learning_rate": 1.5362719769906415e-06,
      "loss": 0.4036,
      "step": 10516
    },
    {
      "epoch": 1.289480137322217,
      "grad_norm": 1.65476370850344,
      "learning_rate": 1.5358043303537765e-06,
      "loss": 0.456,
      "step": 10517
    },
    {
      "epoch": 1.2896027464443356,
      "grad_norm": 1.9272503163836516,
      "learning_rate": 1.5353367233470597e-06,
      "loss": 0.4637,
      "step": 10518
    },
    {
      "epoch": 1.289725355566454,
      "grad_norm": 1.925170316663788,
      "learning_rate": 1.5348691559897122e-06,
      "loss": 0.3795,
      "step": 10519
    },
    {
      "epoch": 1.2898479646885728,
      "grad_norm": 1.811272839375592,
      "learning_rate": 1.5344016283009513e-06,
      "loss": 0.383,
      "step": 10520
    },
    {
      "epoch": 1.2899705738106915,
      "grad_norm": 2.1736319760621057,
      "learning_rate": 1.5339341402999924e-06,
      "loss": 0.4444,
      "step": 10521
    },
    {
      "epoch": 1.2900931829328102,
      "grad_norm": 1.9249320104732552,
      "learning_rate": 1.5334666920060509e-06,
      "loss": 0.3732,
      "step": 10522
    },
    {
      "epoch": 1.2902157920549289,
      "grad_norm": 1.9687588811436973,
      "learning_rate": 1.532999283438339e-06,
      "loss": 0.4294,
      "step": 10523
    },
    {
      "epoch": 1.2903384011770476,
      "grad_norm": 1.9937295582375851,
      "learning_rate": 1.5325319146160694e-06,
      "loss": 0.4454,
      "step": 10524
    },
    {
      "epoch": 1.2904610102991663,
      "grad_norm": 1.9862007825114953,
      "learning_rate": 1.5320645855584498e-06,
      "loss": 0.4407,
      "step": 10525
    },
    {
      "epoch": 1.290583619421285,
      "grad_norm": 1.6811819829918857,
      "learning_rate": 1.5315972962846897e-06,
      "loss": 0.4407,
      "step": 10526
    },
    {
      "epoch": 1.2907062285434037,
      "grad_norm": 1.8647605929245146,
      "learning_rate": 1.5311300468139957e-06,
      "loss": 0.3896,
      "step": 10527
    },
    {
      "epoch": 1.2908288376655224,
      "grad_norm": 2.006407883448202,
      "learning_rate": 1.5306628371655712e-06,
      "loss": 0.4369,
      "step": 10528
    },
    {
      "epoch": 1.290951446787641,
      "grad_norm": 2.13016570902873,
      "learning_rate": 1.530195667358621e-06,
      "loss": 0.4466,
      "step": 10529
    },
    {
      "epoch": 1.2910740559097598,
      "grad_norm": 1.9515155121845218,
      "learning_rate": 1.529728537412345e-06,
      "loss": 0.4442,
      "step": 10530
    },
    {
      "epoch": 1.2911966650318782,
      "grad_norm": 1.842948841044871,
      "learning_rate": 1.5292614473459439e-06,
      "loss": 0.4379,
      "step": 10531
    },
    {
      "epoch": 1.291319274153997,
      "grad_norm": 2.0160060572623557,
      "learning_rate": 1.5287943971786161e-06,
      "loss": 0.3993,
      "step": 10532
    },
    {
      "epoch": 1.2914418832761156,
      "grad_norm": 2.080487224253795,
      "learning_rate": 1.5283273869295586e-06,
      "loss": 0.4357,
      "step": 10533
    },
    {
      "epoch": 1.2915644923982343,
      "grad_norm": 2.2846488954158515,
      "learning_rate": 1.527860416617965e-06,
      "loss": 0.409,
      "step": 10534
    },
    {
      "epoch": 1.291687101520353,
      "grad_norm": 1.9578066256817033,
      "learning_rate": 1.5273934862630296e-06,
      "loss": 0.4136,
      "step": 10535
    },
    {
      "epoch": 1.2918097106424717,
      "grad_norm": 1.8561271651624107,
      "learning_rate": 1.526926595883943e-06,
      "loss": 0.442,
      "step": 10536
    },
    {
      "epoch": 1.2919323197645904,
      "grad_norm": 1.918999658579956,
      "learning_rate": 1.5264597454998976e-06,
      "loss": 0.4602,
      "step": 10537
    },
    {
      "epoch": 1.2920549288867091,
      "grad_norm": 2.003498308225778,
      "learning_rate": 1.5259929351300786e-06,
      "loss": 0.4859,
      "step": 10538
    },
    {
      "epoch": 1.2921775380088278,
      "grad_norm": 1.834672526577544,
      "learning_rate": 1.5255261647936748e-06,
      "loss": 0.433,
      "step": 10539
    },
    {
      "epoch": 1.2923001471309465,
      "grad_norm": 2.0149866402636563,
      "learning_rate": 1.5250594345098709e-06,
      "loss": 0.4692,
      "step": 10540
    },
    {
      "epoch": 1.2924227562530652,
      "grad_norm": 2.011937013532791,
      "learning_rate": 1.5245927442978508e-06,
      "loss": 0.4376,
      "step": 10541
    },
    {
      "epoch": 1.292545365375184,
      "grad_norm": 1.9659041458858268,
      "learning_rate": 1.524126094176795e-06,
      "loss": 0.4366,
      "step": 10542
    },
    {
      "epoch": 1.2926679744973026,
      "grad_norm": 1.9438203638209912,
      "learning_rate": 1.523659484165884e-06,
      "loss": 0.4462,
      "step": 10543
    },
    {
      "epoch": 1.2927905836194213,
      "grad_norm": 1.7361898135219858,
      "learning_rate": 1.523192914284297e-06,
      "loss": 0.4065,
      "step": 10544
    },
    {
      "epoch": 1.29291319274154,
      "grad_norm": 1.863407318808668,
      "learning_rate": 1.5227263845512114e-06,
      "loss": 0.4066,
      "step": 10545
    },
    {
      "epoch": 1.2930358018636587,
      "grad_norm": 1.9823037970849893,
      "learning_rate": 1.5222598949858009e-06,
      "loss": 0.4265,
      "step": 10546
    },
    {
      "epoch": 1.2931584109857774,
      "grad_norm": 1.7876361879697835,
      "learning_rate": 1.5217934456072396e-06,
      "loss": 0.4312,
      "step": 10547
    },
    {
      "epoch": 1.2932810201078961,
      "grad_norm": 1.89925283014909,
      "learning_rate": 1.5213270364346994e-06,
      "loss": 0.37,
      "step": 10548
    },
    {
      "epoch": 1.2934036292300148,
      "grad_norm": 2.156083989729787,
      "learning_rate": 1.5208606674873502e-06,
      "loss": 0.4091,
      "step": 10549
    },
    {
      "epoch": 1.2935262383521333,
      "grad_norm": 2.0473142174800345,
      "learning_rate": 1.520394338784362e-06,
      "loss": 0.4738,
      "step": 10550
    },
    {
      "epoch": 1.293648847474252,
      "grad_norm": 1.7818100304760391,
      "learning_rate": 1.5199280503449003e-06,
      "loss": 0.4162,
      "step": 10551
    },
    {
      "epoch": 1.2937714565963707,
      "grad_norm": 1.8705401709669742,
      "learning_rate": 1.5194618021881308e-06,
      "loss": 0.4649,
      "step": 10552
    },
    {
      "epoch": 1.2938940657184894,
      "grad_norm": 1.8863659046021093,
      "learning_rate": 1.518995594333217e-06,
      "loss": 0.4436,
      "step": 10553
    },
    {
      "epoch": 1.2940166748406081,
      "grad_norm": 1.9319898171322805,
      "learning_rate": 1.5185294267993214e-06,
      "loss": 0.4147,
      "step": 10554
    },
    {
      "epoch": 1.2941392839627268,
      "grad_norm": 1.7915535479355253,
      "learning_rate": 1.5180632996056037e-06,
      "loss": 0.4255,
      "step": 10555
    },
    {
      "epoch": 1.2942618930848455,
      "grad_norm": 1.884226529417556,
      "learning_rate": 1.517597212771222e-06,
      "loss": 0.4391,
      "step": 10556
    },
    {
      "epoch": 1.2943845022069642,
      "grad_norm": 1.7902028670185424,
      "learning_rate": 1.517131166315335e-06,
      "loss": 0.4468,
      "step": 10557
    },
    {
      "epoch": 1.294507111329083,
      "grad_norm": 1.6540820444627118,
      "learning_rate": 1.516665160257097e-06,
      "loss": 0.4133,
      "step": 10558
    },
    {
      "epoch": 1.2946297204512016,
      "grad_norm": 1.9901869371736851,
      "learning_rate": 1.5161991946156617e-06,
      "loss": 0.4138,
      "step": 10559
    },
    {
      "epoch": 1.2947523295733203,
      "grad_norm": 1.9411727299062502,
      "learning_rate": 1.5157332694101812e-06,
      "loss": 0.4339,
      "step": 10560
    },
    {
      "epoch": 1.294874938695439,
      "grad_norm": 1.9860973205128656,
      "learning_rate": 1.5152673846598048e-06,
      "loss": 0.4499,
      "step": 10561
    },
    {
      "epoch": 1.2949975478175575,
      "grad_norm": 1.894697626218328,
      "learning_rate": 1.514801540383684e-06,
      "loss": 0.4425,
      "step": 10562
    },
    {
      "epoch": 1.2951201569396762,
      "grad_norm": 2.081545389014243,
      "learning_rate": 1.5143357366009632e-06,
      "loss": 0.3804,
      "step": 10563
    },
    {
      "epoch": 1.295242766061795,
      "grad_norm": 2.018299584634764,
      "learning_rate": 1.5138699733307882e-06,
      "loss": 0.4226,
      "step": 10564
    },
    {
      "epoch": 1.2953653751839136,
      "grad_norm": 1.8969308056984078,
      "learning_rate": 1.5134042505923036e-06,
      "loss": 0.4455,
      "step": 10565
    },
    {
      "epoch": 1.2954879843060323,
      "grad_norm": 1.9087477444148517,
      "learning_rate": 1.5129385684046512e-06,
      "loss": 0.4195,
      "step": 10566
    },
    {
      "epoch": 1.295610593428151,
      "grad_norm": 1.7977311238264402,
      "learning_rate": 1.5124729267869709e-06,
      "loss": 0.4183,
      "step": 10567
    },
    {
      "epoch": 1.2957332025502697,
      "grad_norm": 1.9509470511408167,
      "learning_rate": 1.5120073257584006e-06,
      "loss": 0.4079,
      "step": 10568
    },
    {
      "epoch": 1.2958558116723884,
      "grad_norm": 1.9126009649287556,
      "learning_rate": 1.5115417653380793e-06,
      "loss": 0.4542,
      "step": 10569
    },
    {
      "epoch": 1.295978420794507,
      "grad_norm": 2.03907111203802,
      "learning_rate": 1.5110762455451417e-06,
      "loss": 0.4417,
      "step": 10570
    },
    {
      "epoch": 1.2961010299166258,
      "grad_norm": 2.113930359841901,
      "learning_rate": 1.5106107663987205e-06,
      "loss": 0.4312,
      "step": 10571
    },
    {
      "epoch": 1.2962236390387445,
      "grad_norm": 1.9238693756085425,
      "learning_rate": 1.5101453279179484e-06,
      "loss": 0.453,
      "step": 10572
    },
    {
      "epoch": 1.2963462481608632,
      "grad_norm": 1.8068748195760547,
      "learning_rate": 1.5096799301219558e-06,
      "loss": 0.4427,
      "step": 10573
    },
    {
      "epoch": 1.296468857282982,
      "grad_norm": 1.8956643890066511,
      "learning_rate": 1.5092145730298707e-06,
      "loss": 0.4224,
      "step": 10574
    },
    {
      "epoch": 1.2965914664051006,
      "grad_norm": 1.9627687002239291,
      "learning_rate": 1.5087492566608225e-06,
      "loss": 0.4225,
      "step": 10575
    },
    {
      "epoch": 1.2967140755272193,
      "grad_norm": 1.871493207464716,
      "learning_rate": 1.508283981033933e-06,
      "loss": 0.4084,
      "step": 10576
    },
    {
      "epoch": 1.296836684649338,
      "grad_norm": 1.907676148452872,
      "learning_rate": 1.5078187461683281e-06,
      "loss": 0.4018,
      "step": 10577
    },
    {
      "epoch": 1.2969592937714567,
      "grad_norm": 1.9768685677563511,
      "learning_rate": 1.5073535520831295e-06,
      "loss": 0.4496,
      "step": 10578
    },
    {
      "epoch": 1.2970819028935754,
      "grad_norm": 2.1057608482853447,
      "learning_rate": 1.5068883987974576e-06,
      "loss": 0.4481,
      "step": 10579
    },
    {
      "epoch": 1.297204512015694,
      "grad_norm": 2.0070336624271876,
      "learning_rate": 1.5064232863304297e-06,
      "loss": 0.4048,
      "step": 10580
    },
    {
      "epoch": 1.2973271211378128,
      "grad_norm": 1.9346679204451127,
      "learning_rate": 1.5059582147011636e-06,
      "loss": 0.4456,
      "step": 10581
    },
    {
      "epoch": 1.2974497302599313,
      "grad_norm": 1.915849402843404,
      "learning_rate": 1.5054931839287754e-06,
      "loss": 0.4454,
      "step": 10582
    },
    {
      "epoch": 1.29757233938205,
      "grad_norm": 2.0223954492047063,
      "learning_rate": 1.5050281940323782e-06,
      "loss": 0.418,
      "step": 10583
    },
    {
      "epoch": 1.2976949485041687,
      "grad_norm": 1.8844603182003763,
      "learning_rate": 1.5045632450310833e-06,
      "loss": 0.4171,
      "step": 10584
    },
    {
      "epoch": 1.2978175576262874,
      "grad_norm": 2.2397543314040793,
      "learning_rate": 1.5040983369440013e-06,
      "loss": 0.4426,
      "step": 10585
    },
    {
      "epoch": 1.297940166748406,
      "grad_norm": 1.986718463391932,
      "learning_rate": 1.5036334697902403e-06,
      "loss": 0.4181,
      "step": 10586
    },
    {
      "epoch": 1.2980627758705248,
      "grad_norm": 1.8537212347338328,
      "learning_rate": 1.503168643588909e-06,
      "loss": 0.4717,
      "step": 10587
    },
    {
      "epoch": 1.2981853849926435,
      "grad_norm": 2.0263204602053806,
      "learning_rate": 1.5027038583591096e-06,
      "loss": 0.4161,
      "step": 10588
    },
    {
      "epoch": 1.2983079941147622,
      "grad_norm": 1.7656467447106468,
      "learning_rate": 1.502239114119948e-06,
      "loss": 0.4066,
      "step": 10589
    },
    {
      "epoch": 1.2984306032368809,
      "grad_norm": 1.911954256079451,
      "learning_rate": 1.501774410890525e-06,
      "loss": 0.4564,
      "step": 10590
    },
    {
      "epoch": 1.2985532123589996,
      "grad_norm": 1.9318054702365983,
      "learning_rate": 1.5013097486899413e-06,
      "loss": 0.4259,
      "step": 10591
    },
    {
      "epoch": 1.2986758214811183,
      "grad_norm": 1.9489377602800604,
      "learning_rate": 1.5008451275372948e-06,
      "loss": 0.4305,
      "step": 10592
    },
    {
      "epoch": 1.2987984306032367,
      "grad_norm": 2.0952964284337683,
      "learning_rate": 1.5003805474516826e-06,
      "loss": 0.4625,
      "step": 10593
    },
    {
      "epoch": 1.2989210397253554,
      "grad_norm": 1.7406753011019518,
      "learning_rate": 1.4999160084521986e-06,
      "loss": 0.4432,
      "step": 10594
    },
    {
      "epoch": 1.2990436488474741,
      "grad_norm": 1.8012878182949368,
      "learning_rate": 1.4994515105579388e-06,
      "loss": 0.4045,
      "step": 10595
    },
    {
      "epoch": 1.2991662579695928,
      "grad_norm": 2.00841991731565,
      "learning_rate": 1.4989870537879917e-06,
      "loss": 0.4469,
      "step": 10596
    },
    {
      "epoch": 1.2992888670917115,
      "grad_norm": 2.0163765875349475,
      "learning_rate": 1.4985226381614492e-06,
      "loss": 0.4679,
      "step": 10597
    },
    {
      "epoch": 1.2994114762138302,
      "grad_norm": 1.954309821726893,
      "learning_rate": 1.4980582636973995e-06,
      "loss": 0.4831,
      "step": 10598
    },
    {
      "epoch": 1.299534085335949,
      "grad_norm": 1.8618147647361194,
      "learning_rate": 1.4975939304149285e-06,
      "loss": 0.3964,
      "step": 10599
    },
    {
      "epoch": 1.2996566944580676,
      "grad_norm": 1.671825889247512,
      "learning_rate": 1.497129638333123e-06,
      "loss": 0.3817,
      "step": 10600
    },
    {
      "epoch": 1.2997793035801863,
      "grad_norm": 1.7410980248418233,
      "learning_rate": 1.4966653874710635e-06,
      "loss": 0.4342,
      "step": 10601
    },
    {
      "epoch": 1.299901912702305,
      "grad_norm": 1.9573212118985075,
      "learning_rate": 1.496201177847833e-06,
      "loss": 0.428,
      "step": 10602
    },
    {
      "epoch": 1.3000245218244237,
      "grad_norm": 1.9439582410917127,
      "learning_rate": 1.4957370094825113e-06,
      "loss": 0.4465,
      "step": 10603
    },
    {
      "epoch": 1.3001471309465424,
      "grad_norm": 1.9674070093911638,
      "learning_rate": 1.495272882394177e-06,
      "loss": 0.4727,
      "step": 10604
    },
    {
      "epoch": 1.3002697400686611,
      "grad_norm": 1.8700911340785462,
      "learning_rate": 1.4948087966019054e-06,
      "loss": 0.4214,
      "step": 10605
    },
    {
      "epoch": 1.3003923491907798,
      "grad_norm": 1.8436587808190519,
      "learning_rate": 1.4943447521247715e-06,
      "loss": 0.4356,
      "step": 10606
    },
    {
      "epoch": 1.3005149583128985,
      "grad_norm": 2.122885133941642,
      "learning_rate": 1.4938807489818489e-06,
      "loss": 0.4331,
      "step": 10607
    },
    {
      "epoch": 1.3006375674350172,
      "grad_norm": 1.8578630099629276,
      "learning_rate": 1.4934167871922093e-06,
      "loss": 0.3742,
      "step": 10608
    },
    {
      "epoch": 1.300760176557136,
      "grad_norm": 1.8446263101457347,
      "learning_rate": 1.492952866774921e-06,
      "loss": 0.4147,
      "step": 10609
    },
    {
      "epoch": 1.3008827856792546,
      "grad_norm": 1.6739272117945156,
      "learning_rate": 1.4924889877490531e-06,
      "loss": 0.4065,
      "step": 10610
    },
    {
      "epoch": 1.3010053948013733,
      "grad_norm": 1.8322788167333748,
      "learning_rate": 1.4920251501336714e-06,
      "loss": 0.4114,
      "step": 10611
    },
    {
      "epoch": 1.301128003923492,
      "grad_norm": 2.1449135496499965,
      "learning_rate": 1.4915613539478408e-06,
      "loss": 0.4707,
      "step": 10612
    },
    {
      "epoch": 1.3012506130456105,
      "grad_norm": 1.964485187166347,
      "learning_rate": 1.4910975992106235e-06,
      "loss": 0.4205,
      "step": 10613
    },
    {
      "epoch": 1.3013732221677292,
      "grad_norm": 1.8813630769312792,
      "learning_rate": 1.4906338859410807e-06,
      "loss": 0.446,
      "step": 10614
    },
    {
      "epoch": 1.301495831289848,
      "grad_norm": 1.9143378058472966,
      "learning_rate": 1.4901702141582724e-06,
      "loss": 0.4499,
      "step": 10615
    },
    {
      "epoch": 1.3016184404119666,
      "grad_norm": 1.8061822311815805,
      "learning_rate": 1.4897065838812564e-06,
      "loss": 0.4342,
      "step": 10616
    },
    {
      "epoch": 1.3017410495340853,
      "grad_norm": 1.8937599957777507,
      "learning_rate": 1.4892429951290881e-06,
      "loss": 0.4005,
      "step": 10617
    },
    {
      "epoch": 1.301863658656204,
      "grad_norm": 1.8906847168792618,
      "learning_rate": 1.4887794479208224e-06,
      "loss": 0.4048,
      "step": 10618
    },
    {
      "epoch": 1.3019862677783227,
      "grad_norm": 1.7446418132363992,
      "learning_rate": 1.4883159422755106e-06,
      "loss": 0.36,
      "step": 10619
    },
    {
      "epoch": 1.3021088769004414,
      "grad_norm": 2.140080113904166,
      "learning_rate": 1.487852478212206e-06,
      "loss": 0.4531,
      "step": 10620
    },
    {
      "epoch": 1.3022314860225601,
      "grad_norm": 1.9597237631267028,
      "learning_rate": 1.4873890557499555e-06,
      "loss": 0.428,
      "step": 10621
    },
    {
      "epoch": 1.3023540951446788,
      "grad_norm": 1.94068957581759,
      "learning_rate": 1.4869256749078076e-06,
      "loss": 0.4005,
      "step": 10622
    },
    {
      "epoch": 1.3024767042667975,
      "grad_norm": 1.9583430388313132,
      "learning_rate": 1.486462335704808e-06,
      "loss": 0.446,
      "step": 10623
    },
    {
      "epoch": 1.3025993133889162,
      "grad_norm": 1.9779300079631559,
      "learning_rate": 1.4859990381600004e-06,
      "loss": 0.4574,
      "step": 10624
    },
    {
      "epoch": 1.3027219225110347,
      "grad_norm": 1.9523338576580935,
      "learning_rate": 1.4855357822924288e-06,
      "loss": 0.4304,
      "step": 10625
    },
    {
      "epoch": 1.3028445316331534,
      "grad_norm": 2.036157168521065,
      "learning_rate": 1.4850725681211312e-06,
      "loss": 0.41,
      "step": 10626
    },
    {
      "epoch": 1.302967140755272,
      "grad_norm": 1.9811296455472605,
      "learning_rate": 1.4846093956651481e-06,
      "loss": 0.431,
      "step": 10627
    },
    {
      "epoch": 1.3030897498773908,
      "grad_norm": 1.6152841338663133,
      "learning_rate": 1.4841462649435168e-06,
      "loss": 0.4157,
      "step": 10628
    },
    {
      "epoch": 1.3032123589995095,
      "grad_norm": 1.8809350498534851,
      "learning_rate": 1.4836831759752726e-06,
      "loss": 0.4116,
      "step": 10629
    },
    {
      "epoch": 1.3033349681216282,
      "grad_norm": 2.249445864937077,
      "learning_rate": 1.4832201287794486e-06,
      "loss": 0.4765,
      "step": 10630
    },
    {
      "epoch": 1.303457577243747,
      "grad_norm": 1.840910883896281,
      "learning_rate": 1.4827571233750776e-06,
      "loss": 0.4169,
      "step": 10631
    },
    {
      "epoch": 1.3035801863658656,
      "grad_norm": 1.8872314528266132,
      "learning_rate": 1.4822941597811891e-06,
      "loss": 0.4507,
      "step": 10632
    },
    {
      "epoch": 1.3037027954879843,
      "grad_norm": 2.036578373287276,
      "learning_rate": 1.481831238016814e-06,
      "loss": 0.4211,
      "step": 10633
    },
    {
      "epoch": 1.303825404610103,
      "grad_norm": 1.9569999339176094,
      "learning_rate": 1.4813683581009758e-06,
      "loss": 0.4472,
      "step": 10634
    },
    {
      "epoch": 1.3039480137322217,
      "grad_norm": 1.9956876077772332,
      "learning_rate": 1.4809055200527022e-06,
      "loss": 0.4748,
      "step": 10635
    },
    {
      "epoch": 1.3040706228543404,
      "grad_norm": 1.7997814446922389,
      "learning_rate": 1.4804427238910157e-06,
      "loss": 0.4225,
      "step": 10636
    },
    {
      "epoch": 1.304193231976459,
      "grad_norm": 2.0544067635119463,
      "learning_rate": 1.4799799696349387e-06,
      "loss": 0.4754,
      "step": 10637
    },
    {
      "epoch": 1.3043158410985778,
      "grad_norm": 1.8840973188277608,
      "learning_rate": 1.4795172573034902e-06,
      "loss": 0.4073,
      "step": 10638
    },
    {
      "epoch": 1.3044384502206965,
      "grad_norm": 1.8587617861374301,
      "learning_rate": 1.4790545869156885e-06,
      "loss": 0.3931,
      "step": 10639
    },
    {
      "epoch": 1.3045610593428152,
      "grad_norm": 2.088640782500384,
      "learning_rate": 1.4785919584905517e-06,
      "loss": 0.4845,
      "step": 10640
    },
    {
      "epoch": 1.3046836684649339,
      "grad_norm": 1.8446084098581776,
      "learning_rate": 1.4781293720470939e-06,
      "loss": 0.3714,
      "step": 10641
    },
    {
      "epoch": 1.3048062775870526,
      "grad_norm": 1.8702268967750302,
      "learning_rate": 1.4776668276043272e-06,
      "loss": 0.4239,
      "step": 10642
    },
    {
      "epoch": 1.3049288867091713,
      "grad_norm": 1.9249813674171925,
      "learning_rate": 1.477204325181264e-06,
      "loss": 0.424,
      "step": 10643
    },
    {
      "epoch": 1.3050514958312898,
      "grad_norm": 1.8126341981555296,
      "learning_rate": 1.4767418647969134e-06,
      "loss": 0.437,
      "step": 10644
    },
    {
      "epoch": 1.3051741049534085,
      "grad_norm": 1.9761912824779395,
      "learning_rate": 1.476279446470284e-06,
      "loss": 0.409,
      "step": 10645
    },
    {
      "epoch": 1.3052967140755272,
      "grad_norm": 1.9137134077001778,
      "learning_rate": 1.4758170702203823e-06,
      "loss": 0.4562,
      "step": 10646
    },
    {
      "epoch": 1.3054193231976459,
      "grad_norm": 1.8010221118781229,
      "learning_rate": 1.4753547360662117e-06,
      "loss": 0.4038,
      "step": 10647
    },
    {
      "epoch": 1.3055419323197646,
      "grad_norm": 1.9823033099149334,
      "learning_rate": 1.4748924440267753e-06,
      "loss": 0.4385,
      "step": 10648
    },
    {
      "epoch": 1.3056645414418833,
      "grad_norm": 1.8831042140685899,
      "learning_rate": 1.4744301941210743e-06,
      "loss": 0.4236,
      "step": 10649
    },
    {
      "epoch": 1.305787150564002,
      "grad_norm": 1.8775412148807893,
      "learning_rate": 1.4739679863681086e-06,
      "loss": 0.4441,
      "step": 10650
    },
    {
      "epoch": 1.3059097596861207,
      "grad_norm": 1.9112876754552135,
      "learning_rate": 1.4735058207868747e-06,
      "loss": 0.3899,
      "step": 10651
    },
    {
      "epoch": 1.3060323688082394,
      "grad_norm": 1.8897753610203343,
      "learning_rate": 1.473043697396368e-06,
      "loss": 0.488,
      "step": 10652
    },
    {
      "epoch": 1.306154977930358,
      "grad_norm": 2.148680919412513,
      "learning_rate": 1.472581616215584e-06,
      "loss": 0.4052,
      "step": 10653
    },
    {
      "epoch": 1.3062775870524768,
      "grad_norm": 2.1647351328262756,
      "learning_rate": 1.472119577263515e-06,
      "loss": 0.4358,
      "step": 10654
    },
    {
      "epoch": 1.3064001961745955,
      "grad_norm": 1.8870253916584792,
      "learning_rate": 1.4716575805591507e-06,
      "loss": 0.3934,
      "step": 10655
    },
    {
      "epoch": 1.306522805296714,
      "grad_norm": 2.1765889902982387,
      "learning_rate": 1.4711956261214804e-06,
      "loss": 0.4782,
      "step": 10656
    },
    {
      "epoch": 1.3066454144188326,
      "grad_norm": 1.8372756986811136,
      "learning_rate": 1.4707337139694906e-06,
      "loss": 0.4248,
      "step": 10657
    },
    {
      "epoch": 1.3067680235409513,
      "grad_norm": 1.8948194427296363,
      "learning_rate": 1.4702718441221687e-06,
      "loss": 0.4515,
      "step": 10658
    },
    {
      "epoch": 1.30689063266307,
      "grad_norm": 1.8081184010739884,
      "learning_rate": 1.4698100165984957e-06,
      "loss": 0.445,
      "step": 10659
    },
    {
      "epoch": 1.3070132417851887,
      "grad_norm": 1.832313994276987,
      "learning_rate": 1.4693482314174553e-06,
      "loss": 0.4184,
      "step": 10660
    },
    {
      "epoch": 1.3071358509073074,
      "grad_norm": 1.9533113893854228,
      "learning_rate": 1.4688864885980272e-06,
      "loss": 0.4458,
      "step": 10661
    },
    {
      "epoch": 1.3072584600294261,
      "grad_norm": 1.9741575018824637,
      "learning_rate": 1.4684247881591904e-06,
      "loss": 0.4095,
      "step": 10662
    },
    {
      "epoch": 1.3073810691515448,
      "grad_norm": 1.8875630004994142,
      "learning_rate": 1.4679631301199204e-06,
      "loss": 0.4249,
      "step": 10663
    },
    {
      "epoch": 1.3075036782736635,
      "grad_norm": 2.09398778357582,
      "learning_rate": 1.4675015144991929e-06,
      "loss": 0.4644,
      "step": 10664
    },
    {
      "epoch": 1.3076262873957822,
      "grad_norm": 2.1846029989944844,
      "learning_rate": 1.4670399413159808e-06,
      "loss": 0.4434,
      "step": 10665
    },
    {
      "epoch": 1.307748896517901,
      "grad_norm": 1.8749239792571564,
      "learning_rate": 1.4665784105892566e-06,
      "loss": 0.4187,
      "step": 10666
    },
    {
      "epoch": 1.3078715056400196,
      "grad_norm": 1.9415940707544734,
      "learning_rate": 1.466116922337989e-06,
      "loss": 0.3821,
      "step": 10667
    },
    {
      "epoch": 1.3079941147621383,
      "grad_norm": 1.9181139098148923,
      "learning_rate": 1.465655476581146e-06,
      "loss": 0.4513,
      "step": 10668
    },
    {
      "epoch": 1.308116723884257,
      "grad_norm": 2.025183898322893,
      "learning_rate": 1.465194073337694e-06,
      "loss": 0.4662,
      "step": 10669
    },
    {
      "epoch": 1.3082393330063757,
      "grad_norm": 2.023316805813226,
      "learning_rate": 1.464732712626597e-06,
      "loss": 0.4165,
      "step": 10670
    },
    {
      "epoch": 1.3083619421284944,
      "grad_norm": 1.8954898441824544,
      "learning_rate": 1.4642713944668203e-06,
      "loss": 0.4231,
      "step": 10671
    },
    {
      "epoch": 1.3084845512506131,
      "grad_norm": 1.8086740410051318,
      "learning_rate": 1.4638101188773207e-06,
      "loss": 0.4433,
      "step": 10672
    },
    {
      "epoch": 1.3086071603727318,
      "grad_norm": 2.0731980373253394,
      "learning_rate": 1.4633488858770605e-06,
      "loss": 0.445,
      "step": 10673
    },
    {
      "epoch": 1.3087297694948505,
      "grad_norm": 2.048792444824615,
      "learning_rate": 1.4628876954849964e-06,
      "loss": 0.4162,
      "step": 10674
    },
    {
      "epoch": 1.3088523786169692,
      "grad_norm": 2.097392655495172,
      "learning_rate": 1.4624265477200843e-06,
      "loss": 0.4272,
      "step": 10675
    },
    {
      "epoch": 1.3089749877390877,
      "grad_norm": 2.0466146914422105,
      "learning_rate": 1.4619654426012781e-06,
      "loss": 0.4584,
      "step": 10676
    },
    {
      "epoch": 1.3090975968612064,
      "grad_norm": 2.059399552914958,
      "learning_rate": 1.4615043801475287e-06,
      "loss": 0.4186,
      "step": 10677
    },
    {
      "epoch": 1.3092202059833251,
      "grad_norm": 1.9021007548728592,
      "learning_rate": 1.4610433603777888e-06,
      "loss": 0.3636,
      "step": 10678
    },
    {
      "epoch": 1.3093428151054438,
      "grad_norm": 1.9419083957643937,
      "learning_rate": 1.4605823833110063e-06,
      "loss": 0.4249,
      "step": 10679
    },
    {
      "epoch": 1.3094654242275625,
      "grad_norm": 1.8002132365889614,
      "learning_rate": 1.4601214489661276e-06,
      "loss": 0.4283,
      "step": 10680
    },
    {
      "epoch": 1.3095880333496812,
      "grad_norm": 1.7866960967436287,
      "learning_rate": 1.4596605573620985e-06,
      "loss": 0.4434,
      "step": 10681
    },
    {
      "epoch": 1.3097106424718,
      "grad_norm": 1.81809067606283,
      "learning_rate": 1.459199708517862e-06,
      "loss": 0.4352,
      "step": 10682
    },
    {
      "epoch": 1.3098332515939186,
      "grad_norm": 2.130730580207847,
      "learning_rate": 1.4587389024523607e-06,
      "loss": 0.4287,
      "step": 10683
    },
    {
      "epoch": 1.3099558607160373,
      "grad_norm": 1.9926310287240405,
      "learning_rate": 1.4582781391845328e-06,
      "loss": 0.4454,
      "step": 10684
    },
    {
      "epoch": 1.310078469838156,
      "grad_norm": 1.8782829899140412,
      "learning_rate": 1.4578174187333185e-06,
      "loss": 0.4052,
      "step": 10685
    },
    {
      "epoch": 1.3102010789602747,
      "grad_norm": 1.9120063209203408,
      "learning_rate": 1.4573567411176526e-06,
      "loss": 0.4165,
      "step": 10686
    },
    {
      "epoch": 1.3103236880823934,
      "grad_norm": 1.7384241771917948,
      "learning_rate": 1.4568961063564713e-06,
      "loss": 0.4087,
      "step": 10687
    },
    {
      "epoch": 1.310446297204512,
      "grad_norm": 2.0026718470366256,
      "learning_rate": 1.456435514468706e-06,
      "loss": 0.4693,
      "step": 10688
    },
    {
      "epoch": 1.3105689063266306,
      "grad_norm": 1.8642302706739673,
      "learning_rate": 1.455974965473289e-06,
      "loss": 0.4097,
      "step": 10689
    },
    {
      "epoch": 1.3106915154487493,
      "grad_norm": 1.8094320089686882,
      "learning_rate": 1.4555144593891487e-06,
      "loss": 0.4454,
      "step": 10690
    },
    {
      "epoch": 1.310814124570868,
      "grad_norm": 1.952956074395931,
      "learning_rate": 1.4550539962352139e-06,
      "loss": 0.4646,
      "step": 10691
    },
    {
      "epoch": 1.3109367336929867,
      "grad_norm": 1.9156579170433228,
      "learning_rate": 1.4545935760304104e-06,
      "loss": 0.412,
      "step": 10692
    },
    {
      "epoch": 1.3110593428151054,
      "grad_norm": 1.8182205648601422,
      "learning_rate": 1.45413319879366e-06,
      "loss": 0.4503,
      "step": 10693
    },
    {
      "epoch": 1.311181951937224,
      "grad_norm": 1.8219596042808062,
      "learning_rate": 1.4536728645438868e-06,
      "loss": 0.4656,
      "step": 10694
    },
    {
      "epoch": 1.3113045610593428,
      "grad_norm": 2.0343492572168977,
      "learning_rate": 1.4532125733000124e-06,
      "loss": 0.3955,
      "step": 10695
    },
    {
      "epoch": 1.3114271701814615,
      "grad_norm": 2.1110264892542387,
      "learning_rate": 1.4527523250809544e-06,
      "loss": 0.4398,
      "step": 10696
    },
    {
      "epoch": 1.3115497793035802,
      "grad_norm": 2.0603998803197805,
      "learning_rate": 1.452292119905629e-06,
      "loss": 0.4565,
      "step": 10697
    },
    {
      "epoch": 1.3116723884256989,
      "grad_norm": 2.111962157191251,
      "learning_rate": 1.4518319577929529e-06,
      "loss": 0.3955,
      "step": 10698
    },
    {
      "epoch": 1.3117949975478176,
      "grad_norm": 1.8730640199140103,
      "learning_rate": 1.4513718387618383e-06,
      "loss": 0.4138,
      "step": 10699
    },
    {
      "epoch": 1.3119176066699363,
      "grad_norm": 1.8706114655076376,
      "learning_rate": 1.4509117628311986e-06,
      "loss": 0.3884,
      "step": 10700
    },
    {
      "epoch": 1.312040215792055,
      "grad_norm": 1.9298307428045105,
      "learning_rate": 1.4504517300199416e-06,
      "loss": 0.4396,
      "step": 10701
    },
    {
      "epoch": 1.3121628249141737,
      "grad_norm": 2.0540387384923875,
      "learning_rate": 1.4499917403469777e-06,
      "loss": 0.4057,
      "step": 10702
    },
    {
      "epoch": 1.3122854340362924,
      "grad_norm": 2.0826659852484615,
      "learning_rate": 1.4495317938312109e-06,
      "loss": 0.4936,
      "step": 10703
    },
    {
      "epoch": 1.312408043158411,
      "grad_norm": 1.8435199864597949,
      "learning_rate": 1.4490718904915482e-06,
      "loss": 0.3899,
      "step": 10704
    },
    {
      "epoch": 1.3125306522805298,
      "grad_norm": 1.8973622836723691,
      "learning_rate": 1.4486120303468915e-06,
      "loss": 0.4487,
      "step": 10705
    },
    {
      "epoch": 1.3126532614026485,
      "grad_norm": 1.8252810733019473,
      "learning_rate": 1.4481522134161404e-06,
      "loss": 0.4107,
      "step": 10706
    },
    {
      "epoch": 1.312775870524767,
      "grad_norm": 2.1234720110040715,
      "learning_rate": 1.4476924397181958e-06,
      "loss": 0.4481,
      "step": 10707
    },
    {
      "epoch": 1.3128984796468857,
      "grad_norm": 1.9923804671585612,
      "learning_rate": 1.4472327092719557e-06,
      "loss": 0.4342,
      "step": 10708
    },
    {
      "epoch": 1.3130210887690044,
      "grad_norm": 2.016317499917443,
      "learning_rate": 1.4467730220963149e-06,
      "loss": 0.424,
      "step": 10709
    },
    {
      "epoch": 1.313143697891123,
      "grad_norm": 2.024100276485018,
      "learning_rate": 1.4463133782101665e-06,
      "loss": 0.4327,
      "step": 10710
    },
    {
      "epoch": 1.3132663070132418,
      "grad_norm": 1.939045979989504,
      "learning_rate": 1.4458537776324038e-06,
      "loss": 0.4318,
      "step": 10711
    },
    {
      "epoch": 1.3133889161353605,
      "grad_norm": 2.237844980554918,
      "learning_rate": 1.445394220381919e-06,
      "loss": 0.4533,
      "step": 10712
    },
    {
      "epoch": 1.3135115252574792,
      "grad_norm": 2.002103865288168,
      "learning_rate": 1.444934706477597e-06,
      "loss": 0.4146,
      "step": 10713
    },
    {
      "epoch": 1.3136341343795979,
      "grad_norm": 1.978769767568507,
      "learning_rate": 1.4444752359383263e-06,
      "loss": 0.4708,
      "step": 10714
    },
    {
      "epoch": 1.3137567435017166,
      "grad_norm": 2.099533717641542,
      "learning_rate": 1.4440158087829928e-06,
      "loss": 0.4163,
      "step": 10715
    },
    {
      "epoch": 1.3138793526238353,
      "grad_norm": 1.8208647903549458,
      "learning_rate": 1.4435564250304793e-06,
      "loss": 0.4177,
      "step": 10716
    },
    {
      "epoch": 1.314001961745954,
      "grad_norm": 2.17340408825657,
      "learning_rate": 1.443097084699666e-06,
      "loss": 0.47,
      "step": 10717
    },
    {
      "epoch": 1.3141245708680727,
      "grad_norm": 1.8172690634175221,
      "learning_rate": 1.4426377878094343e-06,
      "loss": 0.4122,
      "step": 10718
    },
    {
      "epoch": 1.3142471799901911,
      "grad_norm": 2.0109658428348403,
      "learning_rate": 1.442178534378661e-06,
      "loss": 0.4296,
      "step": 10719
    },
    {
      "epoch": 1.3143697891123098,
      "grad_norm": 2.392001580280038,
      "learning_rate": 1.4417193244262222e-06,
      "loss": 0.418,
      "step": 10720
    },
    {
      "epoch": 1.3144923982344285,
      "grad_norm": 1.9619765006660244,
      "learning_rate": 1.441260157970994e-06,
      "loss": 0.4402,
      "step": 10721
    },
    {
      "epoch": 1.3146150073565472,
      "grad_norm": 1.844746740252348,
      "learning_rate": 1.4408010350318475e-06,
      "loss": 0.4248,
      "step": 10722
    },
    {
      "epoch": 1.314737616478666,
      "grad_norm": 1.9337318785128823,
      "learning_rate": 1.4403419556276523e-06,
      "loss": 0.4569,
      "step": 10723
    },
    {
      "epoch": 1.3148602256007846,
      "grad_norm": 1.8949441714766455,
      "learning_rate": 1.439882919777279e-06,
      "loss": 0.4266,
      "step": 10724
    },
    {
      "epoch": 1.3149828347229033,
      "grad_norm": 2.0369073207480333,
      "learning_rate": 1.4394239274995966e-06,
      "loss": 0.4122,
      "step": 10725
    },
    {
      "epoch": 1.315105443845022,
      "grad_norm": 1.9915706337081103,
      "learning_rate": 1.4389649788134662e-06,
      "loss": 0.4298,
      "step": 10726
    },
    {
      "epoch": 1.3152280529671407,
      "grad_norm": 1.7520351485442056,
      "learning_rate": 1.4385060737377538e-06,
      "loss": 0.4432,
      "step": 10727
    },
    {
      "epoch": 1.3153506620892594,
      "grad_norm": 1.8041463484489688,
      "learning_rate": 1.438047212291322e-06,
      "loss": 0.4585,
      "step": 10728
    },
    {
      "epoch": 1.3154732712113781,
      "grad_norm": 1.9419382012526727,
      "learning_rate": 1.4375883944930298e-06,
      "loss": 0.4533,
      "step": 10729
    },
    {
      "epoch": 1.3155958803334968,
      "grad_norm": 1.9532364560887276,
      "learning_rate": 1.4371296203617342e-06,
      "loss": 0.4424,
      "step": 10730
    },
    {
      "epoch": 1.3157184894556155,
      "grad_norm": 1.936434982394715,
      "learning_rate": 1.4366708899162928e-06,
      "loss": 0.4365,
      "step": 10731
    },
    {
      "epoch": 1.3158410985777342,
      "grad_norm": 2.134568468835814,
      "learning_rate": 1.4362122031755616e-06,
      "loss": 0.4286,
      "step": 10732
    },
    {
      "epoch": 1.315963707699853,
      "grad_norm": 1.8476085157844098,
      "learning_rate": 1.4357535601583922e-06,
      "loss": 0.3708,
      "step": 10733
    },
    {
      "epoch": 1.3160863168219716,
      "grad_norm": 1.9163750207521972,
      "learning_rate": 1.4352949608836345e-06,
      "loss": 0.3947,
      "step": 10734
    },
    {
      "epoch": 1.3162089259440903,
      "grad_norm": 2.032251229632666,
      "learning_rate": 1.4348364053701397e-06,
      "loss": 0.4762,
      "step": 10735
    },
    {
      "epoch": 1.316331535066209,
      "grad_norm": 1.8499391430991952,
      "learning_rate": 1.4343778936367534e-06,
      "loss": 0.41,
      "step": 10736
    },
    {
      "epoch": 1.3164541441883277,
      "grad_norm": 1.8534698283982611,
      "learning_rate": 1.433919425702323e-06,
      "loss": 0.4242,
      "step": 10737
    },
    {
      "epoch": 1.3165767533104464,
      "grad_norm": 1.7262789050865623,
      "learning_rate": 1.433461001585692e-06,
      "loss": 0.4544,
      "step": 10738
    },
    {
      "epoch": 1.316699362432565,
      "grad_norm": 1.8453716714414676,
      "learning_rate": 1.4330026213057007e-06,
      "loss": 0.425,
      "step": 10739
    },
    {
      "epoch": 1.3168219715546836,
      "grad_norm": 2.0095233824304612,
      "learning_rate": 1.4325442848811908e-06,
      "loss": 0.4096,
      "step": 10740
    },
    {
      "epoch": 1.3169445806768023,
      "grad_norm": 1.857679502663763,
      "learning_rate": 1.4320859923310014e-06,
      "loss": 0.4002,
      "step": 10741
    },
    {
      "epoch": 1.317067189798921,
      "grad_norm": 2.1591833333441617,
      "learning_rate": 1.4316277436739684e-06,
      "loss": 0.4002,
      "step": 10742
    },
    {
      "epoch": 1.3171897989210397,
      "grad_norm": 1.9581754995503093,
      "learning_rate": 1.4311695389289254e-06,
      "loss": 0.4305,
      "step": 10743
    },
    {
      "epoch": 1.3173124080431584,
      "grad_norm": 2.1077782282030637,
      "learning_rate": 1.4307113781147066e-06,
      "loss": 0.4417,
      "step": 10744
    },
    {
      "epoch": 1.3174350171652771,
      "grad_norm": 1.7111974424188159,
      "learning_rate": 1.4302532612501444e-06,
      "loss": 0.3876,
      "step": 10745
    },
    {
      "epoch": 1.3175576262873958,
      "grad_norm": 1.910657348512978,
      "learning_rate": 1.4297951883540667e-06,
      "loss": 0.4378,
      "step": 10746
    },
    {
      "epoch": 1.3176802354095145,
      "grad_norm": 2.05399235030994,
      "learning_rate": 1.4293371594453007e-06,
      "loss": 0.4284,
      "step": 10747
    },
    {
      "epoch": 1.3178028445316332,
      "grad_norm": 1.9593613849520242,
      "learning_rate": 1.428879174542674e-06,
      "loss": 0.4739,
      "step": 10748
    },
    {
      "epoch": 1.317925453653752,
      "grad_norm": 1.9282475903943495,
      "learning_rate": 1.4284212336650081e-06,
      "loss": 0.4595,
      "step": 10749
    },
    {
      "epoch": 1.3180480627758704,
      "grad_norm": 1.9121317985348054,
      "learning_rate": 1.4279633368311277e-06,
      "loss": 0.439,
      "step": 10750
    },
    {
      "epoch": 1.318170671897989,
      "grad_norm": 1.8071069940670792,
      "learning_rate": 1.4275054840598512e-06,
      "loss": 0.4129,
      "step": 10751
    },
    {
      "epoch": 1.3182932810201078,
      "grad_norm": 1.7120956910760112,
      "learning_rate": 1.427047675369999e-06,
      "loss": 0.3763,
      "step": 10752
    },
    {
      "epoch": 1.3184158901422265,
      "grad_norm": 1.9271999424918422,
      "learning_rate": 1.4265899107803855e-06,
      "loss": 0.4387,
      "step": 10753
    },
    {
      "epoch": 1.3185384992643452,
      "grad_norm": 1.8810909116202008,
      "learning_rate": 1.4261321903098284e-06,
      "loss": 0.4371,
      "step": 10754
    },
    {
      "epoch": 1.3186611083864639,
      "grad_norm": 1.8304516360988932,
      "learning_rate": 1.425674513977139e-06,
      "loss": 0.3848,
      "step": 10755
    },
    {
      "epoch": 1.3187837175085826,
      "grad_norm": 1.7454413578712165,
      "learning_rate": 1.4252168818011287e-06,
      "loss": 0.3817,
      "step": 10756
    },
    {
      "epoch": 1.3189063266307013,
      "grad_norm": 2.064938529313384,
      "learning_rate": 1.4247592938006066e-06,
      "loss": 0.3853,
      "step": 10757
    },
    {
      "epoch": 1.31902893575282,
      "grad_norm": 2.0099191221214063,
      "learning_rate": 1.4243017499943834e-06,
      "loss": 0.4317,
      "step": 10758
    },
    {
      "epoch": 1.3191515448749387,
      "grad_norm": 1.7614208299785692,
      "learning_rate": 1.4238442504012607e-06,
      "loss": 0.4313,
      "step": 10759
    },
    {
      "epoch": 1.3192741539970574,
      "grad_norm": 1.916295596638816,
      "learning_rate": 1.4233867950400444e-06,
      "loss": 0.4335,
      "step": 10760
    },
    {
      "epoch": 1.319396763119176,
      "grad_norm": 1.8824272137506026,
      "learning_rate": 1.4229293839295377e-06,
      "loss": 0.4085,
      "step": 10761
    },
    {
      "epoch": 1.3195193722412948,
      "grad_norm": 1.790196115837573,
      "learning_rate": 1.4224720170885403e-06,
      "loss": 0.4064,
      "step": 10762
    },
    {
      "epoch": 1.3196419813634135,
      "grad_norm": 1.940633521916587,
      "learning_rate": 1.4220146945358498e-06,
      "loss": 0.4288,
      "step": 10763
    },
    {
      "epoch": 1.3197645904855322,
      "grad_norm": 2.0215566273442174,
      "learning_rate": 1.4215574162902633e-06,
      "loss": 0.4639,
      "step": 10764
    },
    {
      "epoch": 1.3198871996076509,
      "grad_norm": 2.068646284259938,
      "learning_rate": 1.4211001823705778e-06,
      "loss": 0.4474,
      "step": 10765
    },
    {
      "epoch": 1.3200098087297696,
      "grad_norm": 1.929375564093246,
      "learning_rate": 1.4206429927955834e-06,
      "loss": 0.4653,
      "step": 10766
    },
    {
      "epoch": 1.3201324178518883,
      "grad_norm": 1.9183313803501378,
      "learning_rate": 1.420185847584074e-06,
      "loss": 0.3743,
      "step": 10767
    },
    {
      "epoch": 1.320255026974007,
      "grad_norm": 2.029274155324717,
      "learning_rate": 1.4197287467548379e-06,
      "loss": 0.4905,
      "step": 10768
    },
    {
      "epoch": 1.3203776360961257,
      "grad_norm": 1.8460873350385438,
      "learning_rate": 1.4192716903266618e-06,
      "loss": 0.4396,
      "step": 10769
    },
    {
      "epoch": 1.3205002452182442,
      "grad_norm": 1.9696303484404625,
      "learning_rate": 1.4188146783183323e-06,
      "loss": 0.4625,
      "step": 10770
    },
    {
      "epoch": 1.3206228543403629,
      "grad_norm": 1.9798686468400875,
      "learning_rate": 1.4183577107486348e-06,
      "loss": 0.4333,
      "step": 10771
    },
    {
      "epoch": 1.3207454634624816,
      "grad_norm": 1.8799836602785265,
      "learning_rate": 1.4179007876363504e-06,
      "loss": 0.4165,
      "step": 10772
    },
    {
      "epoch": 1.3208680725846003,
      "grad_norm": 1.9538760208009274,
      "learning_rate": 1.417443909000258e-06,
      "loss": 0.437,
      "step": 10773
    },
    {
      "epoch": 1.320990681706719,
      "grad_norm": 2.052601375931178,
      "learning_rate": 1.4169870748591388e-06,
      "loss": 0.4431,
      "step": 10774
    },
    {
      "epoch": 1.3211132908288377,
      "grad_norm": 2.045463895044091,
      "learning_rate": 1.416530285231768e-06,
      "loss": 0.4443,
      "step": 10775
    },
    {
      "epoch": 1.3212358999509564,
      "grad_norm": 1.977562632993728,
      "learning_rate": 1.4160735401369197e-06,
      "loss": 0.4194,
      "step": 10776
    },
    {
      "epoch": 1.321358509073075,
      "grad_norm": 1.965810618855252,
      "learning_rate": 1.4156168395933678e-06,
      "loss": 0.4098,
      "step": 10777
    },
    {
      "epoch": 1.3214811181951938,
      "grad_norm": 2.168026058876588,
      "learning_rate": 1.4151601836198844e-06,
      "loss": 0.4353,
      "step": 10778
    },
    {
      "epoch": 1.3216037273173125,
      "grad_norm": 1.8617748715642013,
      "learning_rate": 1.4147035722352382e-06,
      "loss": 0.4436,
      "step": 10779
    },
    {
      "epoch": 1.3217263364394312,
      "grad_norm": 1.9273061462706544,
      "learning_rate": 1.4142470054581954e-06,
      "loss": 0.4283,
      "step": 10780
    },
    {
      "epoch": 1.3218489455615499,
      "grad_norm": 1.98121226804472,
      "learning_rate": 1.4137904833075238e-06,
      "loss": 0.4516,
      "step": 10781
    },
    {
      "epoch": 1.3219715546836683,
      "grad_norm": 1.760032390109154,
      "learning_rate": 1.4133340058019857e-06,
      "loss": 0.4275,
      "step": 10782
    },
    {
      "epoch": 1.322094163805787,
      "grad_norm": 1.8438995758983823,
      "learning_rate": 1.4128775729603444e-06,
      "loss": 0.3876,
      "step": 10783
    },
    {
      "epoch": 1.3222167729279057,
      "grad_norm": 1.9258757888811187,
      "learning_rate": 1.4124211848013586e-06,
      "loss": 0.4026,
      "step": 10784
    },
    {
      "epoch": 1.3223393820500244,
      "grad_norm": 1.9764352940580643,
      "learning_rate": 1.4119648413437883e-06,
      "loss": 0.3985,
      "step": 10785
    },
    {
      "epoch": 1.3224619911721431,
      "grad_norm": 2.05501231924688,
      "learning_rate": 1.4115085426063886e-06,
      "loss": 0.4849,
      "step": 10786
    },
    {
      "epoch": 1.3225846002942618,
      "grad_norm": 1.9867725964510965,
      "learning_rate": 1.4110522886079154e-06,
      "loss": 0.4525,
      "step": 10787
    },
    {
      "epoch": 1.3227072094163805,
      "grad_norm": 1.9501692843128384,
      "learning_rate": 1.4105960793671214e-06,
      "loss": 0.4424,
      "step": 10788
    },
    {
      "epoch": 1.3228298185384992,
      "grad_norm": 1.9599339510848397,
      "learning_rate": 1.4101399149027557e-06,
      "loss": 0.4222,
      "step": 10789
    },
    {
      "epoch": 1.322952427660618,
      "grad_norm": 1.9992753814680748,
      "learning_rate": 1.4096837952335693e-06,
      "loss": 0.4547,
      "step": 10790
    },
    {
      "epoch": 1.3230750367827366,
      "grad_norm": 1.8310037035177509,
      "learning_rate": 1.40922772037831e-06,
      "loss": 0.4168,
      "step": 10791
    },
    {
      "epoch": 1.3231976459048553,
      "grad_norm": 2.117062778374776,
      "learning_rate": 1.4087716903557225e-06,
      "loss": 0.4889,
      "step": 10792
    },
    {
      "epoch": 1.323320255026974,
      "grad_norm": 1.895527897388112,
      "learning_rate": 1.4083157051845495e-06,
      "loss": 0.4446,
      "step": 10793
    },
    {
      "epoch": 1.3234428641490927,
      "grad_norm": 1.8568433907602413,
      "learning_rate": 1.4078597648835346e-06,
      "loss": 0.4227,
      "step": 10794
    },
    {
      "epoch": 1.3235654732712114,
      "grad_norm": 2.0609914577876305,
      "learning_rate": 1.407403869471416e-06,
      "loss": 0.4406,
      "step": 10795
    },
    {
      "epoch": 1.3236880823933301,
      "grad_norm": 2.0217934210393738,
      "learning_rate": 1.4069480189669332e-06,
      "loss": 0.4606,
      "step": 10796
    },
    {
      "epoch": 1.3238106915154488,
      "grad_norm": 1.9621563204350891,
      "learning_rate": 1.4064922133888214e-06,
      "loss": 0.4341,
      "step": 10797
    },
    {
      "epoch": 1.3239333006375675,
      "grad_norm": 1.9340050275508232,
      "learning_rate": 1.4060364527558162e-06,
      "loss": 0.4713,
      "step": 10798
    },
    {
      "epoch": 1.3240559097596862,
      "grad_norm": 1.8698384534329806,
      "learning_rate": 1.4055807370866488e-06,
      "loss": 0.4005,
      "step": 10799
    },
    {
      "epoch": 1.324178518881805,
      "grad_norm": 1.8895795876754349,
      "learning_rate": 1.4051250664000515e-06,
      "loss": 0.4271,
      "step": 10800
    },
    {
      "epoch": 1.3243011280039234,
      "grad_norm": 1.7638302223480937,
      "learning_rate": 1.404669440714752e-06,
      "loss": 0.4159,
      "step": 10801
    },
    {
      "epoch": 1.324423737126042,
      "grad_norm": 1.8803077079879496,
      "learning_rate": 1.404213860049477e-06,
      "loss": 0.4502,
      "step": 10802
    },
    {
      "epoch": 1.3245463462481608,
      "grad_norm": 1.9411329713773053,
      "learning_rate": 1.4037583244229522e-06,
      "loss": 0.4409,
      "step": 10803
    },
    {
      "epoch": 1.3246689553702795,
      "grad_norm": 2.0048952950540806,
      "learning_rate": 1.403302833853902e-06,
      "loss": 0.4333,
      "step": 10804
    },
    {
      "epoch": 1.3247915644923982,
      "grad_norm": 1.951596096854189,
      "learning_rate": 1.4028473883610468e-06,
      "loss": 0.41,
      "step": 10805
    },
    {
      "epoch": 1.324914173614517,
      "grad_norm": 1.9038319209585228,
      "learning_rate": 1.4023919879631054e-06,
      "loss": 0.4067,
      "step": 10806
    },
    {
      "epoch": 1.3250367827366356,
      "grad_norm": 1.8505612912126395,
      "learning_rate": 1.4019366326787965e-06,
      "loss": 0.4344,
      "step": 10807
    },
    {
      "epoch": 1.3251593918587543,
      "grad_norm": 1.7991468558618038,
      "learning_rate": 1.4014813225268378e-06,
      "loss": 0.4111,
      "step": 10808
    },
    {
      "epoch": 1.325282000980873,
      "grad_norm": 1.8577806933419878,
      "learning_rate": 1.4010260575259397e-06,
      "loss": 0.3613,
      "step": 10809
    },
    {
      "epoch": 1.3254046101029917,
      "grad_norm": 2.1086247972189462,
      "learning_rate": 1.400570837694816e-06,
      "loss": 0.3785,
      "step": 10810
    },
    {
      "epoch": 1.3255272192251104,
      "grad_norm": 1.8380637387530867,
      "learning_rate": 1.4001156630521784e-06,
      "loss": 0.4246,
      "step": 10811
    },
    {
      "epoch": 1.325649828347229,
      "grad_norm": 1.7831614960705742,
      "learning_rate": 1.3996605336167332e-06,
      "loss": 0.4808,
      "step": 10812
    },
    {
      "epoch": 1.3257724374693476,
      "grad_norm": 1.9621572123297488,
      "learning_rate": 1.3992054494071891e-06,
      "loss": 0.4108,
      "step": 10813
    },
    {
      "epoch": 1.3258950465914663,
      "grad_norm": 1.9445993917052318,
      "learning_rate": 1.3987504104422498e-06,
      "loss": 0.4227,
      "step": 10814
    },
    {
      "epoch": 1.326017655713585,
      "grad_norm": 1.9812914918208824,
      "learning_rate": 1.3982954167406171e-06,
      "loss": 0.4093,
      "step": 10815
    },
    {
      "epoch": 1.3261402648357037,
      "grad_norm": 2.025289085211325,
      "learning_rate": 1.3978404683209933e-06,
      "loss": 0.4281,
      "step": 10816
    },
    {
      "epoch": 1.3262628739578224,
      "grad_norm": 1.8930013429763615,
      "learning_rate": 1.3973855652020788e-06,
      "loss": 0.4281,
      "step": 10817
    },
    {
      "epoch": 1.326385483079941,
      "grad_norm": 1.9876703079243134,
      "learning_rate": 1.3969307074025692e-06,
      "loss": 0.425,
      "step": 10818
    },
    {
      "epoch": 1.3265080922020598,
      "grad_norm": 2.0043335863590905,
      "learning_rate": 1.3964758949411594e-06,
      "loss": 0.4772,
      "step": 10819
    },
    {
      "epoch": 1.3266307013241785,
      "grad_norm": 1.8497105936177483,
      "learning_rate": 1.396021127836544e-06,
      "loss": 0.3974,
      "step": 10820
    },
    {
      "epoch": 1.3267533104462972,
      "grad_norm": 1.9162851481248548,
      "learning_rate": 1.3955664061074166e-06,
      "loss": 0.4087,
      "step": 10821
    },
    {
      "epoch": 1.3268759195684159,
      "grad_norm": 1.9504265761402368,
      "learning_rate": 1.3951117297724634e-06,
      "loss": 0.4322,
      "step": 10822
    },
    {
      "epoch": 1.3269985286905346,
      "grad_norm": 1.7955847484032685,
      "learning_rate": 1.394657098850374e-06,
      "loss": 0.4544,
      "step": 10823
    },
    {
      "epoch": 1.3271211378126533,
      "grad_norm": 1.9624579120196577,
      "learning_rate": 1.3942025133598357e-06,
      "loss": 0.463,
      "step": 10824
    },
    {
      "epoch": 1.327243746934772,
      "grad_norm": 2.0800266964001635,
      "learning_rate": 1.393747973319532e-06,
      "loss": 0.4772,
      "step": 10825
    },
    {
      "epoch": 1.3273663560568907,
      "grad_norm": 2.0469124326064576,
      "learning_rate": 1.3932934787481437e-06,
      "loss": 0.4796,
      "step": 10826
    },
    {
      "epoch": 1.3274889651790094,
      "grad_norm": 1.807658412021446,
      "learning_rate": 1.392839029664353e-06,
      "loss": 0.443,
      "step": 10827
    },
    {
      "epoch": 1.327611574301128,
      "grad_norm": 2.108194948024072,
      "learning_rate": 1.392384626086839e-06,
      "loss": 0.4487,
      "step": 10828
    },
    {
      "epoch": 1.3277341834232468,
      "grad_norm": 2.0080852262404645,
      "learning_rate": 1.3919302680342778e-06,
      "loss": 0.4124,
      "step": 10829
    },
    {
      "epoch": 1.3278567925453655,
      "grad_norm": 1.9161833077775345,
      "learning_rate": 1.3914759555253432e-06,
      "loss": 0.4407,
      "step": 10830
    },
    {
      "epoch": 1.3279794016674842,
      "grad_norm": 2.0240552878197415,
      "learning_rate": 1.3910216885787103e-06,
      "loss": 0.4211,
      "step": 10831
    },
    {
      "epoch": 1.3281020107896029,
      "grad_norm": 1.940278805567213,
      "learning_rate": 1.3905674672130484e-06,
      "loss": 0.3941,
      "step": 10832
    },
    {
      "epoch": 1.3282246199117214,
      "grad_norm": 2.0625621690622298,
      "learning_rate": 1.390113291447029e-06,
      "loss": 0.438,
      "step": 10833
    },
    {
      "epoch": 1.32834722903384,
      "grad_norm": 1.861266783217368,
      "learning_rate": 1.3896591612993177e-06,
      "loss": 0.4152,
      "step": 10834
    },
    {
      "epoch": 1.3284698381559588,
      "grad_norm": 1.8925562526932842,
      "learning_rate": 1.3892050767885798e-06,
      "loss": 0.4048,
      "step": 10835
    },
    {
      "epoch": 1.3285924472780775,
      "grad_norm": 2.1249951727641165,
      "learning_rate": 1.3887510379334795e-06,
      "loss": 0.3987,
      "step": 10836
    },
    {
      "epoch": 1.3287150564001962,
      "grad_norm": 1.7827093040280302,
      "learning_rate": 1.3882970447526802e-06,
      "loss": 0.405,
      "step": 10837
    },
    {
      "epoch": 1.3288376655223149,
      "grad_norm": 1.841131728971879,
      "learning_rate": 1.3878430972648406e-06,
      "loss": 0.4272,
      "step": 10838
    },
    {
      "epoch": 1.3289602746444336,
      "grad_norm": 1.768288265703344,
      "learning_rate": 1.387389195488617e-06,
      "loss": 0.4221,
      "step": 10839
    },
    {
      "epoch": 1.3290828837665523,
      "grad_norm": 2.029592158805746,
      "learning_rate": 1.3869353394426677e-06,
      "loss": 0.4282,
      "step": 10840
    },
    {
      "epoch": 1.329205492888671,
      "grad_norm": 1.9121041024220289,
      "learning_rate": 1.3864815291456473e-06,
      "loss": 0.4726,
      "step": 10841
    },
    {
      "epoch": 1.3293281020107897,
      "grad_norm": 1.9377243735295913,
      "learning_rate": 1.3860277646162076e-06,
      "loss": 0.4203,
      "step": 10842
    },
    {
      "epoch": 1.3294507111329084,
      "grad_norm": 1.7387076501174341,
      "learning_rate": 1.3855740458729978e-06,
      "loss": 0.4319,
      "step": 10843
    },
    {
      "epoch": 1.3295733202550268,
      "grad_norm": 1.900565946359088,
      "learning_rate": 1.3851203729346685e-06,
      "loss": 0.4207,
      "step": 10844
    },
    {
      "epoch": 1.3296959293771455,
      "grad_norm": 1.8662765592355897,
      "learning_rate": 1.3846667458198648e-06,
      "loss": 0.42,
      "step": 10845
    },
    {
      "epoch": 1.3298185384992642,
      "grad_norm": 1.9846112413299761,
      "learning_rate": 1.3842131645472334e-06,
      "loss": 0.4166,
      "step": 10846
    },
    {
      "epoch": 1.329941147621383,
      "grad_norm": 1.9641844566060964,
      "learning_rate": 1.383759629135415e-06,
      "loss": 0.3899,
      "step": 10847
    },
    {
      "epoch": 1.3300637567435016,
      "grad_norm": 2.1520375176550575,
      "learning_rate": 1.3833061396030535e-06,
      "loss": 0.4162,
      "step": 10848
    },
    {
      "epoch": 1.3301863658656203,
      "grad_norm": 1.9360917995953355,
      "learning_rate": 1.382852695968785e-06,
      "loss": 0.4239,
      "step": 10849
    },
    {
      "epoch": 1.330308974987739,
      "grad_norm": 1.9910627124501985,
      "learning_rate": 1.3823992982512495e-06,
      "loss": 0.3941,
      "step": 10850
    },
    {
      "epoch": 1.3304315841098577,
      "grad_norm": 1.8745847154071156,
      "learning_rate": 1.3819459464690818e-06,
      "loss": 0.4232,
      "step": 10851
    },
    {
      "epoch": 1.3305541932319764,
      "grad_norm": 1.9082585761971014,
      "learning_rate": 1.381492640640914e-06,
      "loss": 0.4186,
      "step": 10852
    },
    {
      "epoch": 1.3306768023540951,
      "grad_norm": 1.8940403785313833,
      "learning_rate": 1.3810393807853786e-06,
      "loss": 0.4152,
      "step": 10853
    },
    {
      "epoch": 1.3307994114762138,
      "grad_norm": 1.997721850595597,
      "learning_rate": 1.3805861669211078e-06,
      "loss": 0.4265,
      "step": 10854
    },
    {
      "epoch": 1.3309220205983325,
      "grad_norm": 2.112575325349255,
      "learning_rate": 1.3801329990667251e-06,
      "loss": 0.4617,
      "step": 10855
    },
    {
      "epoch": 1.3310446297204512,
      "grad_norm": 2.1837167390759418,
      "learning_rate": 1.379679877240859e-06,
      "loss": 0.421,
      "step": 10856
    },
    {
      "epoch": 1.33116723884257,
      "grad_norm": 2.1060379008878036,
      "learning_rate": 1.379226801462134e-06,
      "loss": 0.4005,
      "step": 10857
    },
    {
      "epoch": 1.3312898479646886,
      "grad_norm": 2.0606597625422114,
      "learning_rate": 1.3787737717491717e-06,
      "loss": 0.4431,
      "step": 10858
    },
    {
      "epoch": 1.3314124570868073,
      "grad_norm": 1.8956315508099404,
      "learning_rate": 1.3783207881205917e-06,
      "loss": 0.4261,
      "step": 10859
    },
    {
      "epoch": 1.331535066208926,
      "grad_norm": 1.8252857881588251,
      "learning_rate": 1.3778678505950127e-06,
      "loss": 0.402,
      "step": 10860
    },
    {
      "epoch": 1.3316576753310447,
      "grad_norm": 1.9242284878837606,
      "learning_rate": 1.377414959191053e-06,
      "loss": 0.4209,
      "step": 10861
    },
    {
      "epoch": 1.3317802844531634,
      "grad_norm": 2.014429760216457,
      "learning_rate": 1.3769621139273243e-06,
      "loss": 0.4288,
      "step": 10862
    },
    {
      "epoch": 1.3319028935752821,
      "grad_norm": 1.991189330335743,
      "learning_rate": 1.3765093148224423e-06,
      "loss": 0.4172,
      "step": 10863
    },
    {
      "epoch": 1.3320255026974006,
      "grad_norm": 1.9474472976270345,
      "learning_rate": 1.3760565618950164e-06,
      "loss": 0.4459,
      "step": 10864
    },
    {
      "epoch": 1.3321481118195193,
      "grad_norm": 1.9669689759448286,
      "learning_rate": 1.3756038551636547e-06,
      "loss": 0.4256,
      "step": 10865
    },
    {
      "epoch": 1.332270720941638,
      "grad_norm": 2.057282514376171,
      "learning_rate": 1.3751511946469648e-06,
      "loss": 0.3946,
      "step": 10866
    },
    {
      "epoch": 1.3323933300637567,
      "grad_norm": 2.229083145499855,
      "learning_rate": 1.3746985803635534e-06,
      "loss": 0.4083,
      "step": 10867
    },
    {
      "epoch": 1.3325159391858754,
      "grad_norm": 1.960872917081508,
      "learning_rate": 1.3742460123320225e-06,
      "loss": 0.4444,
      "step": 10868
    },
    {
      "epoch": 1.332638548307994,
      "grad_norm": 1.8939611518450874,
      "learning_rate": 1.3737934905709726e-06,
      "loss": 0.4149,
      "step": 10869
    },
    {
      "epoch": 1.3327611574301128,
      "grad_norm": 1.9686809749046237,
      "learning_rate": 1.3733410150990049e-06,
      "loss": 0.4237,
      "step": 10870
    },
    {
      "epoch": 1.3328837665522315,
      "grad_norm": 1.9094895714299205,
      "learning_rate": 1.3728885859347161e-06,
      "loss": 0.4382,
      "step": 10871
    },
    {
      "epoch": 1.3330063756743502,
      "grad_norm": 2.0581216540139846,
      "learning_rate": 1.3724362030967008e-06,
      "loss": 0.4327,
      "step": 10872
    },
    {
      "epoch": 1.333128984796469,
      "grad_norm": 2.0227060947145814,
      "learning_rate": 1.3719838666035536e-06,
      "loss": 0.4515,
      "step": 10873
    },
    {
      "epoch": 1.3332515939185876,
      "grad_norm": 2.094439463389834,
      "learning_rate": 1.3715315764738678e-06,
      "loss": 0.4075,
      "step": 10874
    },
    {
      "epoch": 1.3333742030407063,
      "grad_norm": 1.974833691775156,
      "learning_rate": 1.3710793327262317e-06,
      "loss": 0.4254,
      "step": 10875
    },
    {
      "epoch": 1.3334968121628248,
      "grad_norm": 2.0649028249003902,
      "learning_rate": 1.3706271353792327e-06,
      "loss": 0.4144,
      "step": 10876
    },
    {
      "epoch": 1.3336194212849435,
      "grad_norm": 2.1152393531993416,
      "learning_rate": 1.370174984451459e-06,
      "loss": 0.4076,
      "step": 10877
    },
    {
      "epoch": 1.3337420304070622,
      "grad_norm": 1.873021151045132,
      "learning_rate": 1.3697228799614926e-06,
      "loss": 0.4426,
      "step": 10878
    },
    {
      "epoch": 1.3338646395291809,
      "grad_norm": 2.1072581890626143,
      "learning_rate": 1.3692708219279177e-06,
      "loss": 0.4137,
      "step": 10879
    },
    {
      "epoch": 1.3339872486512996,
      "grad_norm": 2.0988801097332597,
      "learning_rate": 1.368818810369313e-06,
      "loss": 0.4491,
      "step": 10880
    },
    {
      "epoch": 1.3341098577734183,
      "grad_norm": 1.9585386776961073,
      "learning_rate": 1.3683668453042587e-06,
      "loss": 0.4212,
      "step": 10881
    },
    {
      "epoch": 1.334232466895537,
      "grad_norm": 1.9514657940177245,
      "learning_rate": 1.3679149267513292e-06,
      "loss": 0.4102,
      "step": 10882
    },
    {
      "epoch": 1.3343550760176557,
      "grad_norm": 1.8885186829445457,
      "learning_rate": 1.3674630547291019e-06,
      "loss": 0.4394,
      "step": 10883
    },
    {
      "epoch": 1.3344776851397744,
      "grad_norm": 2.0155442724717294,
      "learning_rate": 1.3670112292561478e-06,
      "loss": 0.4529,
      "step": 10884
    },
    {
      "epoch": 1.334600294261893,
      "grad_norm": 1.8743801835502607,
      "learning_rate": 1.3665594503510368e-06,
      "loss": 0.4114,
      "step": 10885
    },
    {
      "epoch": 1.3347229033840118,
      "grad_norm": 2.0923156638335563,
      "learning_rate": 1.366107718032339e-06,
      "loss": 0.44,
      "step": 10886
    },
    {
      "epoch": 1.3348455125061305,
      "grad_norm": 2.0215975112438396,
      "learning_rate": 1.3656560323186227e-06,
      "loss": 0.417,
      "step": 10887
    },
    {
      "epoch": 1.3349681216282492,
      "grad_norm": 2.0917221594201547,
      "learning_rate": 1.3652043932284514e-06,
      "loss": 0.3902,
      "step": 10888
    },
    {
      "epoch": 1.3350907307503679,
      "grad_norm": 1.9165950278972115,
      "learning_rate": 1.364752800780388e-06,
      "loss": 0.4556,
      "step": 10889
    },
    {
      "epoch": 1.3352133398724866,
      "grad_norm": 2.127158965532642,
      "learning_rate": 1.3643012549929946e-06,
      "loss": 0.4528,
      "step": 10890
    },
    {
      "epoch": 1.3353359489946053,
      "grad_norm": 1.942823369768634,
      "learning_rate": 1.3638497558848296e-06,
      "loss": 0.4573,
      "step": 10891
    },
    {
      "epoch": 1.335458558116724,
      "grad_norm": 1.8987403280352275,
      "learning_rate": 1.363398303474452e-06,
      "loss": 0.4315,
      "step": 10892
    },
    {
      "epoch": 1.3355811672388427,
      "grad_norm": 2.0954639977334386,
      "learning_rate": 1.3629468977804154e-06,
      "loss": 0.4178,
      "step": 10893
    },
    {
      "epoch": 1.3357037763609614,
      "grad_norm": 2.008279346896758,
      "learning_rate": 1.362495538821275e-06,
      "loss": 0.4452,
      "step": 10894
    },
    {
      "epoch": 1.3358263854830799,
      "grad_norm": 1.8617526167445628,
      "learning_rate": 1.362044226615581e-06,
      "loss": 0.419,
      "step": 10895
    },
    {
      "epoch": 1.3359489946051986,
      "grad_norm": 1.906067652812006,
      "learning_rate": 1.3615929611818849e-06,
      "loss": 0.3955,
      "step": 10896
    },
    {
      "epoch": 1.3360716037273173,
      "grad_norm": 2.0839259819062086,
      "learning_rate": 1.3611417425387335e-06,
      "loss": 0.4498,
      "step": 10897
    },
    {
      "epoch": 1.336194212849436,
      "grad_norm": 1.930666888025227,
      "learning_rate": 1.3606905707046713e-06,
      "loss": 0.4085,
      "step": 10898
    },
    {
      "epoch": 1.3363168219715547,
      "grad_norm": 2.035460960055637,
      "learning_rate": 1.360239445698244e-06,
      "loss": 0.43,
      "step": 10899
    },
    {
      "epoch": 1.3364394310936734,
      "grad_norm": 1.7932269489748482,
      "learning_rate": 1.3597883675379942e-06,
      "loss": 0.4404,
      "step": 10900
    },
    {
      "epoch": 1.336562040215792,
      "grad_norm": 2.0568665270205684,
      "learning_rate": 1.359337336242461e-06,
      "loss": 0.4334,
      "step": 10901
    },
    {
      "epoch": 1.3366846493379108,
      "grad_norm": 1.7510424891819216,
      "learning_rate": 1.358886351830182e-06,
      "loss": 0.3927,
      "step": 10902
    },
    {
      "epoch": 1.3368072584600295,
      "grad_norm": 2.058433926144466,
      "learning_rate": 1.3584354143196937e-06,
      "loss": 0.4921,
      "step": 10903
    },
    {
      "epoch": 1.3369298675821482,
      "grad_norm": 2.010651355163482,
      "learning_rate": 1.357984523729533e-06,
      "loss": 0.4325,
      "step": 10904
    },
    {
      "epoch": 1.3370524767042669,
      "grad_norm": 1.8088083152951704,
      "learning_rate": 1.3575336800782278e-06,
      "loss": 0.4109,
      "step": 10905
    },
    {
      "epoch": 1.3371750858263856,
      "grad_norm": 1.985068899006535,
      "learning_rate": 1.3570828833843117e-06,
      "loss": 0.4128,
      "step": 10906
    },
    {
      "epoch": 1.337297694948504,
      "grad_norm": 1.735678235100045,
      "learning_rate": 1.356632133666313e-06,
      "loss": 0.4086,
      "step": 10907
    },
    {
      "epoch": 1.3374203040706227,
      "grad_norm": 2.0224599122887756,
      "learning_rate": 1.3561814309427568e-06,
      "loss": 0.4694,
      "step": 10908
    },
    {
      "epoch": 1.3375429131927414,
      "grad_norm": 1.8586798505916853,
      "learning_rate": 1.3557307752321698e-06,
      "loss": 0.3963,
      "step": 10909
    },
    {
      "epoch": 1.3376655223148601,
      "grad_norm": 1.9841381859248255,
      "learning_rate": 1.3552801665530736e-06,
      "loss": 0.4406,
      "step": 10910
    },
    {
      "epoch": 1.3377881314369788,
      "grad_norm": 1.985801792647366,
      "learning_rate": 1.3548296049239884e-06,
      "loss": 0.4488,
      "step": 10911
    },
    {
      "epoch": 1.3379107405590975,
      "grad_norm": 1.8446148092669714,
      "learning_rate": 1.3543790903634335e-06,
      "loss": 0.4209,
      "step": 10912
    },
    {
      "epoch": 1.3380333496812162,
      "grad_norm": 2.0101215096968206,
      "learning_rate": 1.3539286228899273e-06,
      "loss": 0.4571,
      "step": 10913
    },
    {
      "epoch": 1.338155958803335,
      "grad_norm": 1.8821023483994694,
      "learning_rate": 1.3534782025219836e-06,
      "loss": 0.4216,
      "step": 10914
    },
    {
      "epoch": 1.3382785679254536,
      "grad_norm": 1.9715631085759018,
      "learning_rate": 1.3530278292781146e-06,
      "loss": 0.4545,
      "step": 10915
    },
    {
      "epoch": 1.3384011770475723,
      "grad_norm": 1.8449265489134945,
      "learning_rate": 1.3525775031768323e-06,
      "loss": 0.428,
      "step": 10916
    },
    {
      "epoch": 1.338523786169691,
      "grad_norm": 1.8610728889337214,
      "learning_rate": 1.3521272242366479e-06,
      "loss": 0.4178,
      "step": 10917
    },
    {
      "epoch": 1.3386463952918097,
      "grad_norm": 1.912794019358751,
      "learning_rate": 1.3516769924760648e-06,
      "loss": 0.4456,
      "step": 10918
    },
    {
      "epoch": 1.3387690044139284,
      "grad_norm": 1.8220554117900103,
      "learning_rate": 1.3512268079135905e-06,
      "loss": 0.4091,
      "step": 10919
    },
    {
      "epoch": 1.3388916135360471,
      "grad_norm": 2.0140818040305555,
      "learning_rate": 1.3507766705677288e-06,
      "loss": 0.4053,
      "step": 10920
    },
    {
      "epoch": 1.3390142226581658,
      "grad_norm": 1.9518390694239411,
      "learning_rate": 1.3503265804569807e-06,
      "loss": 0.4481,
      "step": 10921
    },
    {
      "epoch": 1.3391368317802845,
      "grad_norm": 1.8946995614568214,
      "learning_rate": 1.3498765375998447e-06,
      "loss": 0.4533,
      "step": 10922
    },
    {
      "epoch": 1.3392594409024032,
      "grad_norm": 2.080664553116613,
      "learning_rate": 1.3494265420148183e-06,
      "loss": 0.4433,
      "step": 10923
    },
    {
      "epoch": 1.339382050024522,
      "grad_norm": 1.8612778147009978,
      "learning_rate": 1.3489765937203996e-06,
      "loss": 0.4168,
      "step": 10924
    },
    {
      "epoch": 1.3395046591466406,
      "grad_norm": 2.051929166618159,
      "learning_rate": 1.3485266927350803e-06,
      "loss": 0.4391,
      "step": 10925
    },
    {
      "epoch": 1.3396272682687593,
      "grad_norm": 2.1192532569272218,
      "learning_rate": 1.348076839077352e-06,
      "loss": 0.426,
      "step": 10926
    },
    {
      "epoch": 1.3397498773908778,
      "grad_norm": 1.9492786427594007,
      "learning_rate": 1.3476270327657053e-06,
      "loss": 0.4188,
      "step": 10927
    },
    {
      "epoch": 1.3398724865129965,
      "grad_norm": 1.928334812849865,
      "learning_rate": 1.3471772738186273e-06,
      "loss": 0.4072,
      "step": 10928
    },
    {
      "epoch": 1.3399950956351152,
      "grad_norm": 1.9975784927033386,
      "learning_rate": 1.346727562254605e-06,
      "loss": 0.4443,
      "step": 10929
    },
    {
      "epoch": 1.340117704757234,
      "grad_norm": 1.8257506236371484,
      "learning_rate": 1.3462778980921214e-06,
      "loss": 0.4477,
      "step": 10930
    },
    {
      "epoch": 1.3402403138793526,
      "grad_norm": 2.0420798688294797,
      "learning_rate": 1.3458282813496576e-06,
      "loss": 0.4642,
      "step": 10931
    },
    {
      "epoch": 1.3403629230014713,
      "grad_norm": 1.8777607657391358,
      "learning_rate": 1.3453787120456952e-06,
      "loss": 0.4314,
      "step": 10932
    },
    {
      "epoch": 1.34048553212359,
      "grad_norm": 1.9607178480104932,
      "learning_rate": 1.3449291901987127e-06,
      "loss": 0.3817,
      "step": 10933
    },
    {
      "epoch": 1.3406081412457087,
      "grad_norm": 1.834051171082689,
      "learning_rate": 1.3444797158271849e-06,
      "loss": 0.4218,
      "step": 10934
    },
    {
      "epoch": 1.3407307503678274,
      "grad_norm": 2.0719771146924577,
      "learning_rate": 1.344030288949586e-06,
      "loss": 0.4741,
      "step": 10935
    },
    {
      "epoch": 1.340853359489946,
      "grad_norm": 1.9980529490131407,
      "learning_rate": 1.3435809095843886e-06,
      "loss": 0.396,
      "step": 10936
    },
    {
      "epoch": 1.3409759686120648,
      "grad_norm": 2.0243863807610123,
      "learning_rate": 1.343131577750064e-06,
      "loss": 0.4717,
      "step": 10937
    },
    {
      "epoch": 1.3410985777341835,
      "grad_norm": 1.9214497201528606,
      "learning_rate": 1.3426822934650796e-06,
      "loss": 0.4329,
      "step": 10938
    },
    {
      "epoch": 1.341221186856302,
      "grad_norm": 2.0678176245762176,
      "learning_rate": 1.3422330567479008e-06,
      "loss": 0.4767,
      "step": 10939
    },
    {
      "epoch": 1.3413437959784207,
      "grad_norm": 1.865279710549465,
      "learning_rate": 1.3417838676169943e-06,
      "loss": 0.4169,
      "step": 10940
    },
    {
      "epoch": 1.3414664051005394,
      "grad_norm": 1.9403615100320277,
      "learning_rate": 1.3413347260908199e-06,
      "loss": 0.4543,
      "step": 10941
    },
    {
      "epoch": 1.341589014222658,
      "grad_norm": 1.9883100547393537,
      "learning_rate": 1.3408856321878407e-06,
      "loss": 0.4252,
      "step": 10942
    },
    {
      "epoch": 1.3417116233447768,
      "grad_norm": 1.9448433196778183,
      "learning_rate": 1.3404365859265132e-06,
      "loss": 0.4698,
      "step": 10943
    },
    {
      "epoch": 1.3418342324668955,
      "grad_norm": 2.1501784362257776,
      "learning_rate": 1.3399875873252956e-06,
      "loss": 0.4118,
      "step": 10944
    },
    {
      "epoch": 1.3419568415890142,
      "grad_norm": 1.851090622394317,
      "learning_rate": 1.3395386364026412e-06,
      "loss": 0.4333,
      "step": 10945
    },
    {
      "epoch": 1.3420794507111329,
      "grad_norm": 2.003209297322733,
      "learning_rate": 1.339089733177004e-06,
      "loss": 0.4013,
      "step": 10946
    },
    {
      "epoch": 1.3422020598332516,
      "grad_norm": 2.1082672658836463,
      "learning_rate": 1.3386408776668338e-06,
      "loss": 0.4375,
      "step": 10947
    },
    {
      "epoch": 1.3423246689553703,
      "grad_norm": 1.8816192446180156,
      "learning_rate": 1.3381920698905788e-06,
      "loss": 0.3924,
      "step": 10948
    },
    {
      "epoch": 1.342447278077489,
      "grad_norm": 1.9461439818807849,
      "learning_rate": 1.3377433098666864e-06,
      "loss": 0.4426,
      "step": 10949
    },
    {
      "epoch": 1.3425698871996077,
      "grad_norm": 2.1873298434622637,
      "learning_rate": 1.3372945976136037e-06,
      "loss": 0.4484,
      "step": 10950
    },
    {
      "epoch": 1.3426924963217264,
      "grad_norm": 1.9171642295789721,
      "learning_rate": 1.3368459331497695e-06,
      "loss": 0.4182,
      "step": 10951
    },
    {
      "epoch": 1.342815105443845,
      "grad_norm": 1.942947281942877,
      "learning_rate": 1.3363973164936267e-06,
      "loss": 0.4052,
      "step": 10952
    },
    {
      "epoch": 1.3429377145659638,
      "grad_norm": 1.8989803475689633,
      "learning_rate": 1.335948747663615e-06,
      "loss": 0.4556,
      "step": 10953
    },
    {
      "epoch": 1.3430603236880825,
      "grad_norm": 2.011674559766333,
      "learning_rate": 1.3355002266781707e-06,
      "loss": 0.44,
      "step": 10954
    },
    {
      "epoch": 1.3431829328102012,
      "grad_norm": 1.9811771259061264,
      "learning_rate": 1.3350517535557279e-06,
      "loss": 0.3901,
      "step": 10955
    },
    {
      "epoch": 1.3433055419323199,
      "grad_norm": 1.9352506026823202,
      "learning_rate": 1.3346033283147196e-06,
      "loss": 0.4441,
      "step": 10956
    },
    {
      "epoch": 1.3434281510544386,
      "grad_norm": 1.9196637709784738,
      "learning_rate": 1.3341549509735796e-06,
      "loss": 0.4296,
      "step": 10957
    },
    {
      "epoch": 1.343550760176557,
      "grad_norm": 1.8090776917525955,
      "learning_rate": 1.3337066215507336e-06,
      "loss": 0.4171,
      "step": 10958
    },
    {
      "epoch": 1.3436733692986758,
      "grad_norm": 1.9087552360869058,
      "learning_rate": 1.3332583400646113e-06,
      "loss": 0.4249,
      "step": 10959
    },
    {
      "epoch": 1.3437959784207945,
      "grad_norm": 1.8362925672493762,
      "learning_rate": 1.3328101065336369e-06,
      "loss": 0.4131,
      "step": 10960
    },
    {
      "epoch": 1.3439185875429132,
      "grad_norm": 1.983019527818816,
      "learning_rate": 1.3323619209762322e-06,
      "loss": 0.4419,
      "step": 10961
    },
    {
      "epoch": 1.3440411966650319,
      "grad_norm": 1.9917974713001105,
      "learning_rate": 1.3319137834108197e-06,
      "loss": 0.446,
      "step": 10962
    },
    {
      "epoch": 1.3441638057871506,
      "grad_norm": 1.8226147938524193,
      "learning_rate": 1.33146569385582e-06,
      "loss": 0.3862,
      "step": 10963
    },
    {
      "epoch": 1.3442864149092693,
      "grad_norm": 1.8370634732356859,
      "learning_rate": 1.3310176523296486e-06,
      "loss": 0.3991,
      "step": 10964
    },
    {
      "epoch": 1.344409024031388,
      "grad_norm": 2.2424893202965066,
      "learning_rate": 1.3305696588507206e-06,
      "loss": 0.4301,
      "step": 10965
    },
    {
      "epoch": 1.3445316331535067,
      "grad_norm": 1.9015665315089332,
      "learning_rate": 1.3301217134374505e-06,
      "loss": 0.3934,
      "step": 10966
    },
    {
      "epoch": 1.3446542422756254,
      "grad_norm": 1.9661845684211519,
      "learning_rate": 1.3296738161082492e-06,
      "loss": 0.4479,
      "step": 10967
    },
    {
      "epoch": 1.344776851397744,
      "grad_norm": 1.9019425837930968,
      "learning_rate": 1.3292259668815253e-06,
      "loss": 0.428,
      "step": 10968
    },
    {
      "epoch": 1.3448994605198628,
      "grad_norm": 1.897468598705865,
      "learning_rate": 1.3287781657756865e-06,
      "loss": 0.4339,
      "step": 10969
    },
    {
      "epoch": 1.3450220696419812,
      "grad_norm": 2.035113868822205,
      "learning_rate": 1.3283304128091395e-06,
      "loss": 0.4372,
      "step": 10970
    },
    {
      "epoch": 1.3451446787641,
      "grad_norm": 2.0896710538117547,
      "learning_rate": 1.3278827080002871e-06,
      "loss": 0.4533,
      "step": 10971
    },
    {
      "epoch": 1.3452672878862186,
      "grad_norm": 1.9163336671072235,
      "learning_rate": 1.3274350513675295e-06,
      "loss": 0.4001,
      "step": 10972
    },
    {
      "epoch": 1.3453898970083373,
      "grad_norm": 1.9502586460575368,
      "learning_rate": 1.326987442929268e-06,
      "loss": 0.4079,
      "step": 10973
    },
    {
      "epoch": 1.345512506130456,
      "grad_norm": 1.9269284441335717,
      "learning_rate": 1.3265398827038983e-06,
      "loss": 0.4353,
      "step": 10974
    },
    {
      "epoch": 1.3456351152525747,
      "grad_norm": 1.9706129147251894,
      "learning_rate": 1.3260923707098178e-06,
      "loss": 0.4856,
      "step": 10975
    },
    {
      "epoch": 1.3457577243746934,
      "grad_norm": 1.8781335614367651,
      "learning_rate": 1.3256449069654182e-06,
      "loss": 0.3852,
      "step": 10976
    },
    {
      "epoch": 1.3458803334968121,
      "grad_norm": 1.9194024262869342,
      "learning_rate": 1.3251974914890929e-06,
      "loss": 0.3999,
      "step": 10977
    },
    {
      "epoch": 1.3460029426189308,
      "grad_norm": 2.1971045603737185,
      "learning_rate": 1.3247501242992293e-06,
      "loss": 0.411,
      "step": 10978
    },
    {
      "epoch": 1.3461255517410495,
      "grad_norm": 1.95285159736614,
      "learning_rate": 1.3243028054142176e-06,
      "loss": 0.4341,
      "step": 10979
    },
    {
      "epoch": 1.3462481608631682,
      "grad_norm": 1.7063067100801605,
      "learning_rate": 1.3238555348524419e-06,
      "loss": 0.4322,
      "step": 10980
    },
    {
      "epoch": 1.346370769985287,
      "grad_norm": 1.9915018891161942,
      "learning_rate": 1.3234083126322847e-06,
      "loss": 0.4122,
      "step": 10981
    },
    {
      "epoch": 1.3464933791074056,
      "grad_norm": 1.9811147966836578,
      "learning_rate": 1.3229611387721291e-06,
      "loss": 0.4042,
      "step": 10982
    },
    {
      "epoch": 1.3466159882295243,
      "grad_norm": 2.0321672917145506,
      "learning_rate": 1.3225140132903552e-06,
      "loss": 0.4328,
      "step": 10983
    },
    {
      "epoch": 1.346738597351643,
      "grad_norm": 1.9065496824080643,
      "learning_rate": 1.3220669362053402e-06,
      "loss": 0.4358,
      "step": 10984
    },
    {
      "epoch": 1.3468612064737617,
      "grad_norm": 2.0099525324962753,
      "learning_rate": 1.3216199075354585e-06,
      "loss": 0.3932,
      "step": 10985
    },
    {
      "epoch": 1.3469838155958804,
      "grad_norm": 1.9669962193510966,
      "learning_rate": 1.3211729272990855e-06,
      "loss": 0.4187,
      "step": 10986
    },
    {
      "epoch": 1.3471064247179991,
      "grad_norm": 2.1453696644991185,
      "learning_rate": 1.3207259955145912e-06,
      "loss": 0.4162,
      "step": 10987
    },
    {
      "epoch": 1.3472290338401178,
      "grad_norm": 2.1659446160100266,
      "learning_rate": 1.3202791122003476e-06,
      "loss": 0.4392,
      "step": 10988
    },
    {
      "epoch": 1.3473516429622365,
      "grad_norm": 1.938046961374517,
      "learning_rate": 1.3198322773747197e-06,
      "loss": 0.424,
      "step": 10989
    },
    {
      "epoch": 1.347474252084355,
      "grad_norm": 1.9439157416839015,
      "learning_rate": 1.3193854910560755e-06,
      "loss": 0.4466,
      "step": 10990
    },
    {
      "epoch": 1.3475968612064737,
      "grad_norm": 1.967860069016155,
      "learning_rate": 1.3189387532627768e-06,
      "loss": 0.4339,
      "step": 10991
    },
    {
      "epoch": 1.3477194703285924,
      "grad_norm": 2.116051761118465,
      "learning_rate": 1.318492064013187e-06,
      "loss": 0.4221,
      "step": 10992
    },
    {
      "epoch": 1.347842079450711,
      "grad_norm": 2.090035035155755,
      "learning_rate": 1.3180454233256656e-06,
      "loss": 0.4364,
      "step": 10993
    },
    {
      "epoch": 1.3479646885728298,
      "grad_norm": 2.0147591208824465,
      "learning_rate": 1.3175988312185684e-06,
      "loss": 0.4684,
      "step": 10994
    },
    {
      "epoch": 1.3480872976949485,
      "grad_norm": 1.9821646906309418,
      "learning_rate": 1.3171522877102527e-06,
      "loss": 0.4156,
      "step": 10995
    },
    {
      "epoch": 1.3482099068170672,
      "grad_norm": 1.9757524709609353,
      "learning_rate": 1.3167057928190729e-06,
      "loss": 0.4336,
      "step": 10996
    },
    {
      "epoch": 1.348332515939186,
      "grad_norm": 2.229760794195089,
      "learning_rate": 1.31625934656338e-06,
      "loss": 0.4364,
      "step": 10997
    },
    {
      "epoch": 1.3484551250613046,
      "grad_norm": 1.9775343967594243,
      "learning_rate": 1.3158129489615223e-06,
      "loss": 0.4501,
      "step": 10998
    },
    {
      "epoch": 1.3485777341834233,
      "grad_norm": 1.8222403096730042,
      "learning_rate": 1.315366600031849e-06,
      "loss": 0.3869,
      "step": 10999
    },
    {
      "epoch": 1.348700343305542,
      "grad_norm": 1.94262701742343,
      "learning_rate": 1.314920299792708e-06,
      "loss": 0.3981,
      "step": 11000
    },
    {
      "epoch": 1.3488229524276605,
      "grad_norm": 1.9797725083862592,
      "learning_rate": 1.3144740482624383e-06,
      "loss": 0.4642,
      "step": 11001
    },
    {
      "epoch": 1.3489455615497792,
      "grad_norm": 1.8830645754429884,
      "learning_rate": 1.314027845459384e-06,
      "loss": 0.4197,
      "step": 11002
    },
    {
      "epoch": 1.3490681706718979,
      "grad_norm": 1.7851359343788655,
      "learning_rate": 1.3135816914018862e-06,
      "loss": 0.4506,
      "step": 11003
    },
    {
      "epoch": 1.3491907797940166,
      "grad_norm": 2.0347305807781635,
      "learning_rate": 1.3131355861082801e-06,
      "loss": 0.4237,
      "step": 11004
    },
    {
      "epoch": 1.3493133889161353,
      "grad_norm": 1.7531239032713721,
      "learning_rate": 1.3126895295969032e-06,
      "loss": 0.4414,
      "step": 11005
    },
    {
      "epoch": 1.349435998038254,
      "grad_norm": 1.8384213963604754,
      "learning_rate": 1.3122435218860891e-06,
      "loss": 0.4603,
      "step": 11006
    },
    {
      "epoch": 1.3495586071603727,
      "grad_norm": 1.8272914739564878,
      "learning_rate": 1.3117975629941678e-06,
      "loss": 0.4118,
      "step": 11007
    },
    {
      "epoch": 1.3496812162824914,
      "grad_norm": 2.1382313101911428,
      "learning_rate": 1.3113516529394705e-06,
      "loss": 0.4507,
      "step": 11008
    },
    {
      "epoch": 1.34980382540461,
      "grad_norm": 1.9376851351004298,
      "learning_rate": 1.3109057917403254e-06,
      "loss": 0.4319,
      "step": 11009
    },
    {
      "epoch": 1.3499264345267288,
      "grad_norm": 1.9157256475259947,
      "learning_rate": 1.3104599794150573e-06,
      "loss": 0.4374,
      "step": 11010
    },
    {
      "epoch": 1.3500490436488475,
      "grad_norm": 1.934297843746467,
      "learning_rate": 1.310014215981989e-06,
      "loss": 0.4592,
      "step": 11011
    },
    {
      "epoch": 1.3501716527709662,
      "grad_norm": 1.8892272710851996,
      "learning_rate": 1.309568501459443e-06,
      "loss": 0.4357,
      "step": 11012
    },
    {
      "epoch": 1.3502942618930849,
      "grad_norm": 1.9709882257088036,
      "learning_rate": 1.3091228358657416e-06,
      "loss": 0.4119,
      "step": 11013
    },
    {
      "epoch": 1.3504168710152036,
      "grad_norm": 2.207960077108373,
      "learning_rate": 1.308677219219197e-06,
      "loss": 0.4164,
      "step": 11014
    },
    {
      "epoch": 1.3505394801373223,
      "grad_norm": 1.9697726258436554,
      "learning_rate": 1.3082316515381289e-06,
      "loss": 0.4192,
      "step": 11015
    },
    {
      "epoch": 1.350662089259441,
      "grad_norm": 1.8333566735951872,
      "learning_rate": 1.3077861328408497e-06,
      "loss": 0.4524,
      "step": 11016
    },
    {
      "epoch": 1.3507846983815597,
      "grad_norm": 2.1312671719471186,
      "learning_rate": 1.307340663145672e-06,
      "loss": 0.4252,
      "step": 11017
    },
    {
      "epoch": 1.3509073075036784,
      "grad_norm": 1.9844938412050108,
      "learning_rate": 1.3068952424709031e-06,
      "loss": 0.4138,
      "step": 11018
    },
    {
      "epoch": 1.351029916625797,
      "grad_norm": 1.8796685471292225,
      "learning_rate": 1.3064498708348517e-06,
      "loss": 0.404,
      "step": 11019
    },
    {
      "epoch": 1.3511525257479158,
      "grad_norm": 1.9652662937903238,
      "learning_rate": 1.3060045482558248e-06,
      "loss": 0.4265,
      "step": 11020
    },
    {
      "epoch": 1.3512751348700343,
      "grad_norm": 1.799125692246554,
      "learning_rate": 1.3055592747521251e-06,
      "loss": 0.4395,
      "step": 11021
    },
    {
      "epoch": 1.351397743992153,
      "grad_norm": 1.9622202156340234,
      "learning_rate": 1.3051140503420524e-06,
      "loss": 0.4455,
      "step": 11022
    },
    {
      "epoch": 1.3515203531142717,
      "grad_norm": 1.837194421487768,
      "learning_rate": 1.3046688750439085e-06,
      "loss": 0.4409,
      "step": 11023
    },
    {
      "epoch": 1.3516429622363904,
      "grad_norm": 1.7666410373080943,
      "learning_rate": 1.3042237488759895e-06,
      "loss": 0.4148,
      "step": 11024
    },
    {
      "epoch": 1.351765571358509,
      "grad_norm": 2.0333398968296907,
      "learning_rate": 1.303778671856592e-06,
      "loss": 0.411,
      "step": 11025
    },
    {
      "epoch": 1.3518881804806278,
      "grad_norm": 2.0932153511112532,
      "learning_rate": 1.303333644004009e-06,
      "loss": 0.4673,
      "step": 11026
    },
    {
      "epoch": 1.3520107896027465,
      "grad_norm": 2.064080530893604,
      "learning_rate": 1.3028886653365308e-06,
      "loss": 0.4427,
      "step": 11027
    },
    {
      "epoch": 1.3521333987248652,
      "grad_norm": 1.951158794835792,
      "learning_rate": 1.3024437358724477e-06,
      "loss": 0.4103,
      "step": 11028
    },
    {
      "epoch": 1.3522560078469839,
      "grad_norm": 1.8905789752654358,
      "learning_rate": 1.3019988556300478e-06,
      "loss": 0.3886,
      "step": 11029
    },
    {
      "epoch": 1.3523786169691026,
      "grad_norm": 1.9224808254075747,
      "learning_rate": 1.3015540246276165e-06,
      "loss": 0.428,
      "step": 11030
    },
    {
      "epoch": 1.3525012260912213,
      "grad_norm": 1.9330020853994505,
      "learning_rate": 1.3011092428834348e-06,
      "loss": 0.4336,
      "step": 11031
    },
    {
      "epoch": 1.35262383521334,
      "grad_norm": 1.8959054661238925,
      "learning_rate": 1.3006645104157861e-06,
      "loss": 0.4395,
      "step": 11032
    },
    {
      "epoch": 1.3527464443354584,
      "grad_norm": 2.0087322117333493,
      "learning_rate": 1.3002198272429503e-06,
      "loss": 0.4572,
      "step": 11033
    },
    {
      "epoch": 1.3528690534575771,
      "grad_norm": 2.023187322848293,
      "learning_rate": 1.2997751933832038e-06,
      "loss": 0.4676,
      "step": 11034
    },
    {
      "epoch": 1.3529916625796958,
      "grad_norm": 1.9233003748870938,
      "learning_rate": 1.2993306088548202e-06,
      "loss": 0.4309,
      "step": 11035
    },
    {
      "epoch": 1.3531142717018145,
      "grad_norm": 2.147563996175926,
      "learning_rate": 1.2988860736760756e-06,
      "loss": 0.4161,
      "step": 11036
    },
    {
      "epoch": 1.3532368808239332,
      "grad_norm": 1.838574873379109,
      "learning_rate": 1.2984415878652383e-06,
      "loss": 0.3997,
      "step": 11037
    },
    {
      "epoch": 1.353359489946052,
      "grad_norm": 1.994748799120817,
      "learning_rate": 1.2979971514405804e-06,
      "loss": 0.4554,
      "step": 11038
    },
    {
      "epoch": 1.3534820990681706,
      "grad_norm": 1.877468719447401,
      "learning_rate": 1.2975527644203664e-06,
      "loss": 0.4447,
      "step": 11039
    },
    {
      "epoch": 1.3536047081902893,
      "grad_norm": 2.2197677026130433,
      "learning_rate": 1.2971084268228634e-06,
      "loss": 0.4058,
      "step": 11040
    },
    {
      "epoch": 1.353727317312408,
      "grad_norm": 1.8606401160687995,
      "learning_rate": 1.2966641386663328e-06,
      "loss": 0.4251,
      "step": 11041
    },
    {
      "epoch": 1.3538499264345267,
      "grad_norm": 1.8903167317187834,
      "learning_rate": 1.2962198999690373e-06,
      "loss": 0.4219,
      "step": 11042
    },
    {
      "epoch": 1.3539725355566454,
      "grad_norm": 2.0313429198588806,
      "learning_rate": 1.2957757107492353e-06,
      "loss": 0.4709,
      "step": 11043
    },
    {
      "epoch": 1.3540951446787641,
      "grad_norm": 2.0162964427028385,
      "learning_rate": 1.2953315710251823e-06,
      "loss": 0.4477,
      "step": 11044
    },
    {
      "epoch": 1.3542177538008828,
      "grad_norm": 2.091002653908817,
      "learning_rate": 1.2948874808151345e-06,
      "loss": 0.4191,
      "step": 11045
    },
    {
      "epoch": 1.3543403629230015,
      "grad_norm": 1.926071108571457,
      "learning_rate": 1.2944434401373465e-06,
      "loss": 0.4502,
      "step": 11046
    },
    {
      "epoch": 1.3544629720451202,
      "grad_norm": 1.909944356572315,
      "learning_rate": 1.2939994490100655e-06,
      "loss": 0.4334,
      "step": 11047
    },
    {
      "epoch": 1.354585581167239,
      "grad_norm": 1.9483613083342242,
      "learning_rate": 1.2935555074515423e-06,
      "loss": 0.4087,
      "step": 11048
    },
    {
      "epoch": 1.3547081902893576,
      "grad_norm": 1.8129960783628802,
      "learning_rate": 1.2931116154800246e-06,
      "loss": 0.3952,
      "step": 11049
    },
    {
      "epoch": 1.3548307994114763,
      "grad_norm": 2.201523558566915,
      "learning_rate": 1.2926677731137561e-06,
      "loss": 0.4263,
      "step": 11050
    },
    {
      "epoch": 1.354953408533595,
      "grad_norm": 2.038597012421775,
      "learning_rate": 1.2922239803709785e-06,
      "loss": 0.4324,
      "step": 11051
    },
    {
      "epoch": 1.3550760176557135,
      "grad_norm": 2.0395069192612585,
      "learning_rate": 1.2917802372699334e-06,
      "loss": 0.4511,
      "step": 11052
    },
    {
      "epoch": 1.3551986267778322,
      "grad_norm": 2.079590847310631,
      "learning_rate": 1.2913365438288608e-06,
      "loss": 0.4357,
      "step": 11053
    },
    {
      "epoch": 1.355321235899951,
      "grad_norm": 1.9289646838491832,
      "learning_rate": 1.2908929000659947e-06,
      "loss": 0.4082,
      "step": 11054
    },
    {
      "epoch": 1.3554438450220696,
      "grad_norm": 1.869487310587727,
      "learning_rate": 1.2904493059995723e-06,
      "loss": 0.4373,
      "step": 11055
    },
    {
      "epoch": 1.3555664541441883,
      "grad_norm": 1.9598692199080872,
      "learning_rate": 1.2900057616478245e-06,
      "loss": 0.4663,
      "step": 11056
    },
    {
      "epoch": 1.355689063266307,
      "grad_norm": 1.9800725357276463,
      "learning_rate": 1.2895622670289812e-06,
      "loss": 0.3981,
      "step": 11057
    },
    {
      "epoch": 1.3558116723884257,
      "grad_norm": 2.0591697154226,
      "learning_rate": 1.2891188221612716e-06,
      "loss": 0.4531,
      "step": 11058
    },
    {
      "epoch": 1.3559342815105444,
      "grad_norm": 2.123618125486444,
      "learning_rate": 1.288675427062923e-06,
      "loss": 0.3869,
      "step": 11059
    },
    {
      "epoch": 1.356056890632663,
      "grad_norm": 1.9217553397641651,
      "learning_rate": 1.2882320817521587e-06,
      "loss": 0.4648,
      "step": 11060
    },
    {
      "epoch": 1.3561794997547818,
      "grad_norm": 1.8280233827025514,
      "learning_rate": 1.2877887862472006e-06,
      "loss": 0.4644,
      "step": 11061
    },
    {
      "epoch": 1.3563021088769005,
      "grad_norm": 1.8054848525214338,
      "learning_rate": 1.2873455405662699e-06,
      "loss": 0.4232,
      "step": 11062
    },
    {
      "epoch": 1.3564247179990192,
      "grad_norm": 1.9547860008410465,
      "learning_rate": 1.2869023447275847e-06,
      "loss": 0.3908,
      "step": 11063
    },
    {
      "epoch": 1.3565473271211377,
      "grad_norm": 1.7626920586148493,
      "learning_rate": 1.2864591987493596e-06,
      "loss": 0.3822,
      "step": 11064
    },
    {
      "epoch": 1.3566699362432564,
      "grad_norm": 2.013790043957605,
      "learning_rate": 1.2860161026498096e-06,
      "loss": 0.432,
      "step": 11065
    },
    {
      "epoch": 1.356792545365375,
      "grad_norm": 1.9514134109319714,
      "learning_rate": 1.2855730564471486e-06,
      "loss": 0.4181,
      "step": 11066
    },
    {
      "epoch": 1.3569151544874938,
      "grad_norm": 1.8291371412548514,
      "learning_rate": 1.2851300601595846e-06,
      "loss": 0.3813,
      "step": 11067
    },
    {
      "epoch": 1.3570377636096125,
      "grad_norm": 1.9230513929167141,
      "learning_rate": 1.2846871138053245e-06,
      "loss": 0.4758,
      "step": 11068
    },
    {
      "epoch": 1.3571603727317312,
      "grad_norm": 1.9585402374795764,
      "learning_rate": 1.284244217402577e-06,
      "loss": 0.3897,
      "step": 11069
    },
    {
      "epoch": 1.3572829818538499,
      "grad_norm": 1.815717738985523,
      "learning_rate": 1.2838013709695435e-06,
      "loss": 0.4526,
      "step": 11070
    },
    {
      "epoch": 1.3574055909759686,
      "grad_norm": 1.7023149085470635,
      "learning_rate": 1.2833585745244277e-06,
      "loss": 0.4268,
      "step": 11071
    },
    {
      "epoch": 1.3575282000980873,
      "grad_norm": 1.9250234392047831,
      "learning_rate": 1.2829158280854274e-06,
      "loss": 0.4024,
      "step": 11072
    },
    {
      "epoch": 1.357650809220206,
      "grad_norm": 1.854032330124736,
      "learning_rate": 1.2824731316707424e-06,
      "loss": 0.4333,
      "step": 11073
    },
    {
      "epoch": 1.3577734183423247,
      "grad_norm": 2.0080876687229763,
      "learning_rate": 1.2820304852985657e-06,
      "loss": 0.4075,
      "step": 11074
    },
    {
      "epoch": 1.3578960274644434,
      "grad_norm": 1.9253429822933696,
      "learning_rate": 1.2815878889870924e-06,
      "loss": 0.4173,
      "step": 11075
    },
    {
      "epoch": 1.358018636586562,
      "grad_norm": 2.030119981621597,
      "learning_rate": 1.2811453427545162e-06,
      "loss": 0.4239,
      "step": 11076
    },
    {
      "epoch": 1.3581412457086808,
      "grad_norm": 2.0258422666825417,
      "learning_rate": 1.2807028466190219e-06,
      "loss": 0.4637,
      "step": 11077
    },
    {
      "epoch": 1.3582638548307995,
      "grad_norm": 1.9721428256240852,
      "learning_rate": 1.2802604005987995e-06,
      "loss": 0.4022,
      "step": 11078
    },
    {
      "epoch": 1.3583864639529182,
      "grad_norm": 1.8238883299329263,
      "learning_rate": 1.2798180047120348e-06,
      "loss": 0.4448,
      "step": 11079
    },
    {
      "epoch": 1.3585090730750369,
      "grad_norm": 1.9774259387094966,
      "learning_rate": 1.2793756589769102e-06,
      "loss": 0.384,
      "step": 11080
    },
    {
      "epoch": 1.3586316821971556,
      "grad_norm": 1.9581317644874077,
      "learning_rate": 1.2789333634116063e-06,
      "loss": 0.4784,
      "step": 11081
    },
    {
      "epoch": 1.3587542913192743,
      "grad_norm": 2.1434674907355324,
      "learning_rate": 1.2784911180343037e-06,
      "loss": 0.4385,
      "step": 11082
    },
    {
      "epoch": 1.358876900441393,
      "grad_norm": 2.1650134119091056,
      "learning_rate": 1.2780489228631777e-06,
      "loss": 0.4476,
      "step": 11083
    },
    {
      "epoch": 1.3589995095635115,
      "grad_norm": 1.9435380356294119,
      "learning_rate": 1.2776067779164053e-06,
      "loss": 0.471,
      "step": 11084
    },
    {
      "epoch": 1.3591221186856302,
      "grad_norm": 1.8752510234300372,
      "learning_rate": 1.2771646832121571e-06,
      "loss": 0.4532,
      "step": 11085
    },
    {
      "epoch": 1.3592447278077489,
      "grad_norm": 1.8165601820356574,
      "learning_rate": 1.2767226387686065e-06,
      "loss": 0.3669,
      "step": 11086
    },
    {
      "epoch": 1.3593673369298676,
      "grad_norm": 1.905946299839483,
      "learning_rate": 1.2762806446039204e-06,
      "loss": 0.452,
      "step": 11087
    },
    {
      "epoch": 1.3594899460519863,
      "grad_norm": 1.9884160995518347,
      "learning_rate": 1.2758387007362666e-06,
      "loss": 0.429,
      "step": 11088
    },
    {
      "epoch": 1.359612555174105,
      "grad_norm": 1.9062311049755287,
      "learning_rate": 1.27539680718381e-06,
      "loss": 0.398,
      "step": 11089
    },
    {
      "epoch": 1.3597351642962237,
      "grad_norm": 1.8867015779512002,
      "learning_rate": 1.2749549639647113e-06,
      "loss": 0.4286,
      "step": 11090
    },
    {
      "epoch": 1.3598577734183424,
      "grad_norm": 1.9102826625380998,
      "learning_rate": 1.2745131710971323e-06,
      "loss": 0.4103,
      "step": 11091
    },
    {
      "epoch": 1.359980382540461,
      "grad_norm": 1.7880072693929154,
      "learning_rate": 1.2740714285992327e-06,
      "loss": 0.3665,
      "step": 11092
    },
    {
      "epoch": 1.3601029916625798,
      "grad_norm": 1.9699190168966403,
      "learning_rate": 1.2736297364891675e-06,
      "loss": 0.423,
      "step": 11093
    },
    {
      "epoch": 1.3602256007846985,
      "grad_norm": 1.9819056232115955,
      "learning_rate": 1.2731880947850904e-06,
      "loss": 0.4485,
      "step": 11094
    },
    {
      "epoch": 1.360348209906817,
      "grad_norm": 2.063320825752317,
      "learning_rate": 1.2727465035051545e-06,
      "loss": 0.4598,
      "step": 11095
    },
    {
      "epoch": 1.3604708190289356,
      "grad_norm": 1.936275608066348,
      "learning_rate": 1.2723049626675122e-06,
      "loss": 0.4257,
      "step": 11096
    },
    {
      "epoch": 1.3605934281510543,
      "grad_norm": 1.9210793886223339,
      "learning_rate": 1.2718634722903073e-06,
      "loss": 0.4246,
      "step": 11097
    },
    {
      "epoch": 1.360716037273173,
      "grad_norm": 1.8440003712340174,
      "learning_rate": 1.2714220323916876e-06,
      "loss": 0.4378,
      "step": 11098
    },
    {
      "epoch": 1.3608386463952917,
      "grad_norm": 1.9093796233098395,
      "learning_rate": 1.2709806429897985e-06,
      "loss": 0.4365,
      "step": 11099
    },
    {
      "epoch": 1.3609612555174104,
      "grad_norm": 1.8675147934688399,
      "learning_rate": 1.27053930410278e-06,
      "loss": 0.4391,
      "step": 11100
    },
    {
      "epoch": 1.3610838646395291,
      "grad_norm": 1.966505195542314,
      "learning_rate": 1.2700980157487735e-06,
      "loss": 0.4129,
      "step": 11101
    },
    {
      "epoch": 1.3612064737616478,
      "grad_norm": 2.1260094931731595,
      "learning_rate": 1.2696567779459163e-06,
      "loss": 0.4561,
      "step": 11102
    },
    {
      "epoch": 1.3613290828837665,
      "grad_norm": 1.9576909540895764,
      "learning_rate": 1.269215590712342e-06,
      "loss": 0.4119,
      "step": 11103
    },
    {
      "epoch": 1.3614516920058852,
      "grad_norm": 1.8399270905193372,
      "learning_rate": 1.2687744540661864e-06,
      "loss": 0.4511,
      "step": 11104
    },
    {
      "epoch": 1.361574301128004,
      "grad_norm": 2.2498247070655557,
      "learning_rate": 1.2683333680255814e-06,
      "loss": 0.473,
      "step": 11105
    },
    {
      "epoch": 1.3616969102501226,
      "grad_norm": 2.0078541439381694,
      "learning_rate": 1.2678923326086551e-06,
      "loss": 0.3869,
      "step": 11106
    },
    {
      "epoch": 1.3618195193722413,
      "grad_norm": 1.7875073178996586,
      "learning_rate": 1.2674513478335346e-06,
      "loss": 0.3991,
      "step": 11107
    },
    {
      "epoch": 1.36194212849436,
      "grad_norm": 1.923756298136517,
      "learning_rate": 1.2670104137183456e-06,
      "loss": 0.3968,
      "step": 11108
    },
    {
      "epoch": 1.3620647376164787,
      "grad_norm": 2.0873985090682123,
      "learning_rate": 1.2665695302812138e-06,
      "loss": 0.4178,
      "step": 11109
    },
    {
      "epoch": 1.3621873467385974,
      "grad_norm": 2.1837196009206905,
      "learning_rate": 1.2661286975402555e-06,
      "loss": 0.4545,
      "step": 11110
    },
    {
      "epoch": 1.3623099558607161,
      "grad_norm": 2.0151876342386505,
      "learning_rate": 1.265687915513592e-06,
      "loss": 0.4086,
      "step": 11111
    },
    {
      "epoch": 1.3624325649828348,
      "grad_norm": 1.9531107631550908,
      "learning_rate": 1.2652471842193415e-06,
      "loss": 0.4356,
      "step": 11112
    },
    {
      "epoch": 1.3625551741049535,
      "grad_norm": 1.9971348126572022,
      "learning_rate": 1.2648065036756178e-06,
      "loss": 0.4184,
      "step": 11113
    },
    {
      "epoch": 1.3626777832270722,
      "grad_norm": 2.003228375628797,
      "learning_rate": 1.2643658739005325e-06,
      "loss": 0.4519,
      "step": 11114
    },
    {
      "epoch": 1.3628003923491907,
      "grad_norm": 1.8593762893341792,
      "learning_rate": 1.2639252949121972e-06,
      "loss": 0.4092,
      "step": 11115
    },
    {
      "epoch": 1.3629230014713094,
      "grad_norm": 2.0220632926337885,
      "learning_rate": 1.2634847667287218e-06,
      "loss": 0.4438,
      "step": 11116
    },
    {
      "epoch": 1.363045610593428,
      "grad_norm": 1.7683734539470046,
      "learning_rate": 1.2630442893682112e-06,
      "loss": 0.4338,
      "step": 11117
    },
    {
      "epoch": 1.3631682197155468,
      "grad_norm": 1.9146768227212827,
      "learning_rate": 1.2626038628487696e-06,
      "loss": 0.4041,
      "step": 11118
    },
    {
      "epoch": 1.3632908288376655,
      "grad_norm": 1.8553975949260384,
      "learning_rate": 1.2621634871885006e-06,
      "loss": 0.435,
      "step": 11119
    },
    {
      "epoch": 1.3634134379597842,
      "grad_norm": 2.0344967057083805,
      "learning_rate": 1.2617231624055027e-06,
      "loss": 0.431,
      "step": 11120
    },
    {
      "epoch": 1.363536047081903,
      "grad_norm": 1.8643572181772357,
      "learning_rate": 1.2612828885178762e-06,
      "loss": 0.4142,
      "step": 11121
    },
    {
      "epoch": 1.3636586562040216,
      "grad_norm": 2.033525481830496,
      "learning_rate": 1.2608426655437148e-06,
      "loss": 0.4764,
      "step": 11122
    },
    {
      "epoch": 1.3637812653261403,
      "grad_norm": 1.9266384367876717,
      "learning_rate": 1.2604024935011145e-06,
      "loss": 0.3994,
      "step": 11123
    },
    {
      "epoch": 1.363903874448259,
      "grad_norm": 2.0590824539251864,
      "learning_rate": 1.2599623724081656e-06,
      "loss": 0.4127,
      "step": 11124
    },
    {
      "epoch": 1.3640264835703777,
      "grad_norm": 2.040174061864635,
      "learning_rate": 1.2595223022829592e-06,
      "loss": 0.4208,
      "step": 11125
    },
    {
      "epoch": 1.3641490926924964,
      "grad_norm": 2.059823400277569,
      "learning_rate": 1.2590822831435822e-06,
      "loss": 0.4124,
      "step": 11126
    },
    {
      "epoch": 1.3642717018146149,
      "grad_norm": 2.0323776789983787,
      "learning_rate": 1.2586423150081194e-06,
      "loss": 0.4094,
      "step": 11127
    },
    {
      "epoch": 1.3643943109367336,
      "grad_norm": 1.6908065683950555,
      "learning_rate": 1.258202397894655e-06,
      "loss": 0.3969,
      "step": 11128
    },
    {
      "epoch": 1.3645169200588523,
      "grad_norm": 1.8397387717445104,
      "learning_rate": 1.2577625318212717e-06,
      "loss": 0.4334,
      "step": 11129
    },
    {
      "epoch": 1.364639529180971,
      "grad_norm": 1.9252569204209575,
      "learning_rate": 1.2573227168060476e-06,
      "loss": 0.4709,
      "step": 11130
    },
    {
      "epoch": 1.3647621383030897,
      "grad_norm": 1.7807301791351513,
      "learning_rate": 1.2568829528670585e-06,
      "loss": 0.3873,
      "step": 11131
    },
    {
      "epoch": 1.3648847474252084,
      "grad_norm": 2.0680084335381714,
      "learning_rate": 1.2564432400223819e-06,
      "loss": 0.4359,
      "step": 11132
    },
    {
      "epoch": 1.365007356547327,
      "grad_norm": 1.9251448975378975,
      "learning_rate": 1.2560035782900889e-06,
      "loss": 0.421,
      "step": 11133
    },
    {
      "epoch": 1.3651299656694458,
      "grad_norm": 2.0783215544753433,
      "learning_rate": 1.255563967688252e-06,
      "loss": 0.4355,
      "step": 11134
    },
    {
      "epoch": 1.3652525747915645,
      "grad_norm": 1.8688262882796152,
      "learning_rate": 1.2551244082349382e-06,
      "loss": 0.4002,
      "step": 11135
    },
    {
      "epoch": 1.3653751839136832,
      "grad_norm": 1.9880561846458267,
      "learning_rate": 1.254684899948216e-06,
      "loss": 0.4384,
      "step": 11136
    },
    {
      "epoch": 1.3654977930358019,
      "grad_norm": 2.009784115132032,
      "learning_rate": 1.254245442846148e-06,
      "loss": 0.4318,
      "step": 11137
    },
    {
      "epoch": 1.3656204021579206,
      "grad_norm": 2.0314517827156755,
      "learning_rate": 1.2538060369467989e-06,
      "loss": 0.4362,
      "step": 11138
    },
    {
      "epoch": 1.3657430112800393,
      "grad_norm": 2.102315253466902,
      "learning_rate": 1.2533666822682278e-06,
      "loss": 0.444,
      "step": 11139
    },
    {
      "epoch": 1.365865620402158,
      "grad_norm": 1.8075847484143923,
      "learning_rate": 1.2529273788284921e-06,
      "loss": 0.4241,
      "step": 11140
    },
    {
      "epoch": 1.3659882295242767,
      "grad_norm": 1.7925990973024595,
      "learning_rate": 1.252488126645649e-06,
      "loss": 0.4391,
      "step": 11141
    },
    {
      "epoch": 1.3661108386463954,
      "grad_norm": 2.0646741290740716,
      "learning_rate": 1.2520489257377534e-06,
      "loss": 0.4457,
      "step": 11142
    },
    {
      "epoch": 1.366233447768514,
      "grad_norm": 1.826689379012147,
      "learning_rate": 1.2516097761228557e-06,
      "loss": 0.3839,
      "step": 11143
    },
    {
      "epoch": 1.3663560568906328,
      "grad_norm": 2.0024913306696233,
      "learning_rate": 1.2511706778190061e-06,
      "loss": 0.4642,
      "step": 11144
    },
    {
      "epoch": 1.3664786660127515,
      "grad_norm": 1.9635048903211052,
      "learning_rate": 1.2507316308442529e-06,
      "loss": 0.4434,
      "step": 11145
    },
    {
      "epoch": 1.36660127513487,
      "grad_norm": 1.8921240031262354,
      "learning_rate": 1.2502926352166416e-06,
      "loss": 0.4011,
      "step": 11146
    },
    {
      "epoch": 1.3667238842569887,
      "grad_norm": 1.955515251214281,
      "learning_rate": 1.2498536909542146e-06,
      "loss": 0.4297,
      "step": 11147
    },
    {
      "epoch": 1.3668464933791074,
      "grad_norm": 1.8613150934432658,
      "learning_rate": 1.249414798075014e-06,
      "loss": 0.4434,
      "step": 11148
    },
    {
      "epoch": 1.366969102501226,
      "grad_norm": 1.945698084394588,
      "learning_rate": 1.2489759565970801e-06,
      "loss": 0.4189,
      "step": 11149
    },
    {
      "epoch": 1.3670917116233448,
      "grad_norm": 1.8727860493998512,
      "learning_rate": 1.2485371665384486e-06,
      "loss": 0.4479,
      "step": 11150
    },
    {
      "epoch": 1.3672143207454635,
      "grad_norm": 1.9342511296017981,
      "learning_rate": 1.2480984279171555e-06,
      "loss": 0.4472,
      "step": 11151
    },
    {
      "epoch": 1.3673369298675822,
      "grad_norm": 1.855113505798946,
      "learning_rate": 1.2476597407512337e-06,
      "loss": 0.4137,
      "step": 11152
    },
    {
      "epoch": 1.3674595389897009,
      "grad_norm": 1.8612169486599088,
      "learning_rate": 1.2472211050587124e-06,
      "loss": 0.3958,
      "step": 11153
    },
    {
      "epoch": 1.3675821481118196,
      "grad_norm": 1.9774985700829564,
      "learning_rate": 1.2467825208576215e-06,
      "loss": 0.4235,
      "step": 11154
    },
    {
      "epoch": 1.3677047572339383,
      "grad_norm": 1.9112977886137383,
      "learning_rate": 1.246343988165989e-06,
      "loss": 0.4175,
      "step": 11155
    },
    {
      "epoch": 1.367827366356057,
      "grad_norm": 1.8530923029797048,
      "learning_rate": 1.2459055070018374e-06,
      "loss": 0.4491,
      "step": 11156
    },
    {
      "epoch": 1.3679499754781757,
      "grad_norm": 1.9602355442569255,
      "learning_rate": 1.2454670773831893e-06,
      "loss": 0.4021,
      "step": 11157
    },
    {
      "epoch": 1.3680725846002941,
      "grad_norm": 2.0678692517854604,
      "learning_rate": 1.245028699328066e-06,
      "loss": 0.433,
      "step": 11158
    },
    {
      "epoch": 1.3681951937224128,
      "grad_norm": 2.0725636438672552,
      "learning_rate": 1.244590372854485e-06,
      "loss": 0.475,
      "step": 11159
    },
    {
      "epoch": 1.3683178028445315,
      "grad_norm": 2.0337253134259603,
      "learning_rate": 1.244152097980461e-06,
      "loss": 0.423,
      "step": 11160
    },
    {
      "epoch": 1.3684404119666502,
      "grad_norm": 2.030061692767978,
      "learning_rate": 1.2437138747240094e-06,
      "loss": 0.4795,
      "step": 11161
    },
    {
      "epoch": 1.368563021088769,
      "grad_norm": 1.960066992367198,
      "learning_rate": 1.2432757031031426e-06,
      "loss": 0.3896,
      "step": 11162
    },
    {
      "epoch": 1.3686856302108876,
      "grad_norm": 1.9623214151299186,
      "learning_rate": 1.2428375831358688e-06,
      "loss": 0.4079,
      "step": 11163
    },
    {
      "epoch": 1.3688082393330063,
      "grad_norm": 1.8952754271866485,
      "learning_rate": 1.2423995148401954e-06,
      "loss": 0.4264,
      "step": 11164
    },
    {
      "epoch": 1.368930848455125,
      "grad_norm": 1.6873242199674148,
      "learning_rate": 1.2419614982341295e-06,
      "loss": 0.4476,
      "step": 11165
    },
    {
      "epoch": 1.3690534575772437,
      "grad_norm": 1.9006608937130354,
      "learning_rate": 1.2415235333356718e-06,
      "loss": 0.4222,
      "step": 11166
    },
    {
      "epoch": 1.3691760666993624,
      "grad_norm": 2.0770031977821515,
      "learning_rate": 1.2410856201628258e-06,
      "loss": 0.4454,
      "step": 11167
    },
    {
      "epoch": 1.3692986758214811,
      "grad_norm": 2.0141299724454798,
      "learning_rate": 1.2406477587335888e-06,
      "loss": 0.4378,
      "step": 11168
    },
    {
      "epoch": 1.3694212849435998,
      "grad_norm": 2.102666982109699,
      "learning_rate": 1.2402099490659595e-06,
      "loss": 0.4102,
      "step": 11169
    },
    {
      "epoch": 1.3695438940657185,
      "grad_norm": 1.9299828385531006,
      "learning_rate": 1.2397721911779303e-06,
      "loss": 0.4029,
      "step": 11170
    },
    {
      "epoch": 1.3696665031878372,
      "grad_norm": 2.1395704944909695,
      "learning_rate": 1.2393344850874953e-06,
      "loss": 0.4645,
      "step": 11171
    },
    {
      "epoch": 1.369789112309956,
      "grad_norm": 1.916627721996921,
      "learning_rate": 1.2388968308126464e-06,
      "loss": 0.4602,
      "step": 11172
    },
    {
      "epoch": 1.3699117214320746,
      "grad_norm": 1.9222830752618918,
      "learning_rate": 1.2384592283713689e-06,
      "loss": 0.4601,
      "step": 11173
    },
    {
      "epoch": 1.3700343305541933,
      "grad_norm": 1.7370577749974896,
      "learning_rate": 1.2380216777816503e-06,
      "loss": 0.4012,
      "step": 11174
    },
    {
      "epoch": 1.370156939676312,
      "grad_norm": 2.0260603129315453,
      "learning_rate": 1.2375841790614759e-06,
      "loss": 0.471,
      "step": 11175
    },
    {
      "epoch": 1.3702795487984307,
      "grad_norm": 1.7558723394747788,
      "learning_rate": 1.2371467322288267e-06,
      "loss": 0.4142,
      "step": 11176
    },
    {
      "epoch": 1.3704021579205494,
      "grad_norm": 1.7270202487766015,
      "learning_rate": 1.2367093373016814e-06,
      "loss": 0.4064,
      "step": 11177
    },
    {
      "epoch": 1.370524767042668,
      "grad_norm": 2.0328999937956933,
      "learning_rate": 1.2362719942980198e-06,
      "loss": 0.3892,
      "step": 11178
    },
    {
      "epoch": 1.3706473761647866,
      "grad_norm": 1.9763528460667352,
      "learning_rate": 1.2358347032358155e-06,
      "loss": 0.3952,
      "step": 11179
    },
    {
      "epoch": 1.3707699852869053,
      "grad_norm": 1.993553606604473,
      "learning_rate": 1.235397464133044e-06,
      "loss": 0.4345,
      "step": 11180
    },
    {
      "epoch": 1.370892594409024,
      "grad_norm": 1.9673937463245819,
      "learning_rate": 1.2349602770076746e-06,
      "loss": 0.4011,
      "step": 11181
    },
    {
      "epoch": 1.3710152035311427,
      "grad_norm": 2.048265336089534,
      "learning_rate": 1.2345231418776782e-06,
      "loss": 0.4641,
      "step": 11182
    },
    {
      "epoch": 1.3711378126532614,
      "grad_norm": 2.0532909532710177,
      "learning_rate": 1.2340860587610201e-06,
      "loss": 0.4513,
      "step": 11183
    },
    {
      "epoch": 1.37126042177538,
      "grad_norm": 1.9760461614786535,
      "learning_rate": 1.2336490276756672e-06,
      "loss": 0.4445,
      "step": 11184
    },
    {
      "epoch": 1.3713830308974988,
      "grad_norm": 1.7971975530197188,
      "learning_rate": 1.233212048639581e-06,
      "loss": 0.4078,
      "step": 11185
    },
    {
      "epoch": 1.3715056400196175,
      "grad_norm": 2.023157833725587,
      "learning_rate": 1.2327751216707215e-06,
      "loss": 0.4582,
      "step": 11186
    },
    {
      "epoch": 1.3716282491417362,
      "grad_norm": 1.9938981086465717,
      "learning_rate": 1.2323382467870478e-06,
      "loss": 0.4664,
      "step": 11187
    },
    {
      "epoch": 1.371750858263855,
      "grad_norm": 1.985242895174446,
      "learning_rate": 1.2319014240065174e-06,
      "loss": 0.425,
      "step": 11188
    },
    {
      "epoch": 1.3718734673859736,
      "grad_norm": 2.144270190909894,
      "learning_rate": 1.2314646533470834e-06,
      "loss": 0.4465,
      "step": 11189
    },
    {
      "epoch": 1.371996076508092,
      "grad_norm": 1.787986731393133,
      "learning_rate": 1.231027934826697e-06,
      "loss": 0.4262,
      "step": 11190
    },
    {
      "epoch": 1.3721186856302108,
      "grad_norm": 1.8969894115679038,
      "learning_rate": 1.230591268463309e-06,
      "loss": 0.4426,
      "step": 11191
    },
    {
      "epoch": 1.3722412947523295,
      "grad_norm": 1.736443977736195,
      "learning_rate": 1.2301546542748691e-06,
      "loss": 0.4016,
      "step": 11192
    },
    {
      "epoch": 1.3723639038744482,
      "grad_norm": 1.8544075315553656,
      "learning_rate": 1.229718092279319e-06,
      "loss": 0.4204,
      "step": 11193
    },
    {
      "epoch": 1.3724865129965669,
      "grad_norm": 2.060602126538161,
      "learning_rate": 1.2292815824946045e-06,
      "loss": 0.4456,
      "step": 11194
    },
    {
      "epoch": 1.3726091221186856,
      "grad_norm": 1.8658700403109978,
      "learning_rate": 1.228845124938667e-06,
      "loss": 0.4403,
      "step": 11195
    },
    {
      "epoch": 1.3727317312408043,
      "grad_norm": 2.0901875576318463,
      "learning_rate": 1.2284087196294447e-06,
      "loss": 0.4181,
      "step": 11196
    },
    {
      "epoch": 1.372854340362923,
      "grad_norm": 2.034959249952078,
      "learning_rate": 1.227972366584876e-06,
      "loss": 0.4796,
      "step": 11197
    },
    {
      "epoch": 1.3729769494850417,
      "grad_norm": 2.110439832665451,
      "learning_rate": 1.2275360658228942e-06,
      "loss": 0.4403,
      "step": 11198
    },
    {
      "epoch": 1.3730995586071604,
      "grad_norm": 2.0433783621858477,
      "learning_rate": 1.2270998173614336e-06,
      "loss": 0.4445,
      "step": 11199
    },
    {
      "epoch": 1.373222167729279,
      "grad_norm": 1.906738040358573,
      "learning_rate": 1.226663621218423e-06,
      "loss": 0.4354,
      "step": 11200
    },
    {
      "epoch": 1.3733447768513978,
      "grad_norm": 1.9750543832903569,
      "learning_rate": 1.226227477411793e-06,
      "loss": 0.4204,
      "step": 11201
    },
    {
      "epoch": 1.3734673859735165,
      "grad_norm": 1.885778637198433,
      "learning_rate": 1.2257913859594687e-06,
      "loss": 0.4079,
      "step": 11202
    },
    {
      "epoch": 1.3735899950956352,
      "grad_norm": 1.8209509454775281,
      "learning_rate": 1.225355346879373e-06,
      "loss": 0.4006,
      "step": 11203
    },
    {
      "epoch": 1.3737126042177539,
      "grad_norm": 1.9560955153581316,
      "learning_rate": 1.2249193601894294e-06,
      "loss": 0.4102,
      "step": 11204
    },
    {
      "epoch": 1.3738352133398726,
      "grad_norm": 2.0450786152262217,
      "learning_rate": 1.2244834259075592e-06,
      "loss": 0.4181,
      "step": 11205
    },
    {
      "epoch": 1.3739578224619913,
      "grad_norm": 2.0503783344595514,
      "learning_rate": 1.2240475440516766e-06,
      "loss": 0.4265,
      "step": 11206
    },
    {
      "epoch": 1.37408043158411,
      "grad_norm": 1.8007046726918376,
      "learning_rate": 1.2236117146396987e-06,
      "loss": 0.4317,
      "step": 11207
    },
    {
      "epoch": 1.3742030407062287,
      "grad_norm": 1.8773593317805153,
      "learning_rate": 1.22317593768954e-06,
      "loss": 0.4099,
      "step": 11208
    },
    {
      "epoch": 1.3743256498283472,
      "grad_norm": 1.8961310506715652,
      "learning_rate": 1.2227402132191105e-06,
      "loss": 0.4245,
      "step": 11209
    },
    {
      "epoch": 1.3744482589504659,
      "grad_norm": 1.9460557404265817,
      "learning_rate": 1.2223045412463189e-06,
      "loss": 0.4275,
      "step": 11210
    },
    {
      "epoch": 1.3745708680725846,
      "grad_norm": 2.0336816781292195,
      "learning_rate": 1.2218689217890723e-06,
      "loss": 0.4684,
      "step": 11211
    },
    {
      "epoch": 1.3746934771947033,
      "grad_norm": 1.8278130564679873,
      "learning_rate": 1.221433354865277e-06,
      "loss": 0.4186,
      "step": 11212
    },
    {
      "epoch": 1.374816086316822,
      "grad_norm": 2.065475679964007,
      "learning_rate": 1.2209978404928339e-06,
      "loss": 0.4459,
      "step": 11213
    },
    {
      "epoch": 1.3749386954389407,
      "grad_norm": 1.914708938407618,
      "learning_rate": 1.2205623786896431e-06,
      "loss": 0.4675,
      "step": 11214
    },
    {
      "epoch": 1.3750613045610593,
      "grad_norm": 2.0806799635892324,
      "learning_rate": 1.2201269694736046e-06,
      "loss": 0.4197,
      "step": 11215
    },
    {
      "epoch": 1.375183913683178,
      "grad_norm": 2.1174870689652003,
      "learning_rate": 1.2196916128626126e-06,
      "loss": 0.4425,
      "step": 11216
    },
    {
      "epoch": 1.3753065228052967,
      "grad_norm": 2.1415270383297944,
      "learning_rate": 1.2192563088745628e-06,
      "loss": 0.4723,
      "step": 11217
    },
    {
      "epoch": 1.3754291319274154,
      "grad_norm": 2.0032792467683453,
      "learning_rate": 1.218821057527345e-06,
      "loss": 0.4464,
      "step": 11218
    },
    {
      "epoch": 1.3755517410495341,
      "grad_norm": 1.8687479982570032,
      "learning_rate": 1.218385858838851e-06,
      "loss": 0.4227,
      "step": 11219
    },
    {
      "epoch": 1.3756743501716528,
      "grad_norm": 1.989649127791325,
      "learning_rate": 1.2179507128269663e-06,
      "loss": 0.4369,
      "step": 11220
    },
    {
      "epoch": 1.3757969592937713,
      "grad_norm": 1.9847203491574477,
      "learning_rate": 1.2175156195095778e-06,
      "loss": 0.4233,
      "step": 11221
    },
    {
      "epoch": 1.37591956841589,
      "grad_norm": 2.0350327503950516,
      "learning_rate": 1.2170805789045679e-06,
      "loss": 0.4306,
      "step": 11222
    },
    {
      "epoch": 1.3760421775380087,
      "grad_norm": 1.9084033788218686,
      "learning_rate": 1.2166455910298164e-06,
      "loss": 0.4315,
      "step": 11223
    },
    {
      "epoch": 1.3761647866601274,
      "grad_norm": 1.9207776558431373,
      "learning_rate": 1.216210655903203e-06,
      "loss": 0.4105,
      "step": 11224
    },
    {
      "epoch": 1.3762873957822461,
      "grad_norm": 2.0743961582973918,
      "learning_rate": 1.2157757735426055e-06,
      "loss": 0.4357,
      "step": 11225
    },
    {
      "epoch": 1.3764100049043648,
      "grad_norm": 1.926925935256418,
      "learning_rate": 1.2153409439658973e-06,
      "loss": 0.4554,
      "step": 11226
    },
    {
      "epoch": 1.3765326140264835,
      "grad_norm": 2.052391575779459,
      "learning_rate": 1.2149061671909496e-06,
      "loss": 0.4117,
      "step": 11227
    },
    {
      "epoch": 1.3766552231486022,
      "grad_norm": 2.163112698859204,
      "learning_rate": 1.2144714432356344e-06,
      "loss": 0.44,
      "step": 11228
    },
    {
      "epoch": 1.376777832270721,
      "grad_norm": 1.6929971784043873,
      "learning_rate": 1.2140367721178178e-06,
      "loss": 0.3811,
      "step": 11229
    },
    {
      "epoch": 1.3769004413928396,
      "grad_norm": 1.8411375421746885,
      "learning_rate": 1.2136021538553675e-06,
      "loss": 0.4368,
      "step": 11230
    },
    {
      "epoch": 1.3770230505149583,
      "grad_norm": 2.017146113246042,
      "learning_rate": 1.2131675884661447e-06,
      "loss": 0.4323,
      "step": 11231
    },
    {
      "epoch": 1.377145659637077,
      "grad_norm": 2.040865197170647,
      "learning_rate": 1.2127330759680137e-06,
      "loss": 0.4763,
      "step": 11232
    },
    {
      "epoch": 1.3772682687591957,
      "grad_norm": 1.9495772767059891,
      "learning_rate": 1.2122986163788311e-06,
      "loss": 0.4276,
      "step": 11233
    },
    {
      "epoch": 1.3773908778813144,
      "grad_norm": 1.8382524987687447,
      "learning_rate": 1.211864209716456e-06,
      "loss": 0.4446,
      "step": 11234
    },
    {
      "epoch": 1.3775134870034331,
      "grad_norm": 1.9053936034442955,
      "learning_rate": 1.2114298559987422e-06,
      "loss": 0.4156,
      "step": 11235
    },
    {
      "epoch": 1.3776360961255518,
      "grad_norm": 1.9567977790519826,
      "learning_rate": 1.2109955552435419e-06,
      "loss": 0.401,
      "step": 11236
    },
    {
      "epoch": 1.3777587052476705,
      "grad_norm": 2.1245500633967516,
      "learning_rate": 1.210561307468706e-06,
      "loss": 0.4803,
      "step": 11237
    },
    {
      "epoch": 1.3778813143697892,
      "grad_norm": 1.9725687986137375,
      "learning_rate": 1.210127112692084e-06,
      "loss": 0.4308,
      "step": 11238
    },
    {
      "epoch": 1.378003923491908,
      "grad_norm": 2.0008193203208813,
      "learning_rate": 1.2096929709315215e-06,
      "loss": 0.4313,
      "step": 11239
    },
    {
      "epoch": 1.3781265326140264,
      "grad_norm": 1.9307768511623369,
      "learning_rate": 1.209258882204861e-06,
      "loss": 0.4916,
      "step": 11240
    },
    {
      "epoch": 1.378249141736145,
      "grad_norm": 1.9107974291495984,
      "learning_rate": 1.2088248465299465e-06,
      "loss": 0.4478,
      "step": 11241
    },
    {
      "epoch": 1.3783717508582638,
      "grad_norm": 1.8942114869400617,
      "learning_rate": 1.2083908639246169e-06,
      "loss": 0.4531,
      "step": 11242
    },
    {
      "epoch": 1.3784943599803825,
      "grad_norm": 1.9440126107892615,
      "learning_rate": 1.2079569344067083e-06,
      "loss": 0.4148,
      "step": 11243
    },
    {
      "epoch": 1.3786169691025012,
      "grad_norm": 1.9681414689280377,
      "learning_rate": 1.2075230579940569e-06,
      "loss": 0.4292,
      "step": 11244
    },
    {
      "epoch": 1.37873957822462,
      "grad_norm": 1.8090514831277689,
      "learning_rate": 1.2070892347044974e-06,
      "loss": 0.4218,
      "step": 11245
    },
    {
      "epoch": 1.3788621873467386,
      "grad_norm": 1.7976640016837742,
      "learning_rate": 1.2066554645558578e-06,
      "loss": 0.4167,
      "step": 11246
    },
    {
      "epoch": 1.3789847964688573,
      "grad_norm": 1.910767073135734,
      "learning_rate": 1.2062217475659696e-06,
      "loss": 0.44,
      "step": 11247
    },
    {
      "epoch": 1.379107405590976,
      "grad_norm": 1.8934553435597887,
      "learning_rate": 1.2057880837526578e-06,
      "loss": 0.445,
      "step": 11248
    },
    {
      "epoch": 1.3792300147130947,
      "grad_norm": 1.9619102872966416,
      "learning_rate": 1.2053544731337463e-06,
      "loss": 0.4457,
      "step": 11249
    },
    {
      "epoch": 1.3793526238352134,
      "grad_norm": 1.849898735142869,
      "learning_rate": 1.2049209157270578e-06,
      "loss": 0.4342,
      "step": 11250
    },
    {
      "epoch": 1.379475232957332,
      "grad_norm": 2.1603529160470782,
      "learning_rate": 1.2044874115504133e-06,
      "loss": 0.4501,
      "step": 11251
    },
    {
      "epoch": 1.3795978420794506,
      "grad_norm": 2.0045186941033375,
      "learning_rate": 1.2040539606216298e-06,
      "loss": 0.4413,
      "step": 11252
    },
    {
      "epoch": 1.3797204512015693,
      "grad_norm": 1.8408373037300156,
      "learning_rate": 1.2036205629585218e-06,
      "loss": 0.3773,
      "step": 11253
    },
    {
      "epoch": 1.379843060323688,
      "grad_norm": 1.8990018899040675,
      "learning_rate": 1.2031872185789036e-06,
      "loss": 0.4089,
      "step": 11254
    },
    {
      "epoch": 1.3799656694458067,
      "grad_norm": 1.9881646575287866,
      "learning_rate": 1.2027539275005886e-06,
      "loss": 0.492,
      "step": 11255
    },
    {
      "epoch": 1.3800882785679254,
      "grad_norm": 1.9536770962988168,
      "learning_rate": 1.202320689741382e-06,
      "loss": 0.3897,
      "step": 11256
    },
    {
      "epoch": 1.380210887690044,
      "grad_norm": 1.829311440516881,
      "learning_rate": 1.2018875053190921e-06,
      "loss": 0.4518,
      "step": 11257
    },
    {
      "epoch": 1.3803334968121628,
      "grad_norm": 1.9733895678057252,
      "learning_rate": 1.201454374251525e-06,
      "loss": 0.3966,
      "step": 11258
    },
    {
      "epoch": 1.3804561059342815,
      "grad_norm": 2.053288437562838,
      "learning_rate": 1.2010212965564818e-06,
      "loss": 0.415,
      "step": 11259
    },
    {
      "epoch": 1.3805787150564002,
      "grad_norm": 1.8373685828754174,
      "learning_rate": 1.2005882722517623e-06,
      "loss": 0.4111,
      "step": 11260
    },
    {
      "epoch": 1.3807013241785189,
      "grad_norm": 2.001417255250608,
      "learning_rate": 1.2001553013551661e-06,
      "loss": 0.3934,
      "step": 11261
    },
    {
      "epoch": 1.3808239333006376,
      "grad_norm": 2.1479728086797096,
      "learning_rate": 1.1997223838844874e-06,
      "loss": 0.4598,
      "step": 11262
    },
    {
      "epoch": 1.3809465424227563,
      "grad_norm": 1.8864802832734073,
      "learning_rate": 1.1992895198575214e-06,
      "loss": 0.4324,
      "step": 11263
    },
    {
      "epoch": 1.381069151544875,
      "grad_norm": 2.0614932884278567,
      "learning_rate": 1.198856709292058e-06,
      "loss": 0.4038,
      "step": 11264
    },
    {
      "epoch": 1.3811917606669937,
      "grad_norm": 1.8441070047447023,
      "learning_rate": 1.1984239522058882e-06,
      "loss": 0.4218,
      "step": 11265
    },
    {
      "epoch": 1.3813143697891124,
      "grad_norm": 2.273158768414034,
      "learning_rate": 1.1979912486167973e-06,
      "loss": 0.4667,
      "step": 11266
    },
    {
      "epoch": 1.381436978911231,
      "grad_norm": 1.853579045356991,
      "learning_rate": 1.1975585985425713e-06,
      "loss": 0.4291,
      "step": 11267
    },
    {
      "epoch": 1.3815595880333498,
      "grad_norm": 1.8116538668819355,
      "learning_rate": 1.1971260020009944e-06,
      "loss": 0.4198,
      "step": 11268
    },
    {
      "epoch": 1.3816821971554685,
      "grad_norm": 1.8340834341330141,
      "learning_rate": 1.1966934590098436e-06,
      "loss": 0.3899,
      "step": 11269
    },
    {
      "epoch": 1.3818048062775872,
      "grad_norm": 1.9250746302985309,
      "learning_rate": 1.196260969586899e-06,
      "loss": 0.4594,
      "step": 11270
    },
    {
      "epoch": 1.3819274153997059,
      "grad_norm": 1.8600245150497914,
      "learning_rate": 1.1958285337499373e-06,
      "loss": 0.4523,
      "step": 11271
    },
    {
      "epoch": 1.3820500245218243,
      "grad_norm": 1.9506425632357551,
      "learning_rate": 1.195396151516732e-06,
      "loss": 0.4775,
      "step": 11272
    },
    {
      "epoch": 1.382172633643943,
      "grad_norm": 1.9809337870529466,
      "learning_rate": 1.1949638229050533e-06,
      "loss": 0.4179,
      "step": 11273
    },
    {
      "epoch": 1.3822952427660617,
      "grad_norm": 1.8846466844683472,
      "learning_rate": 1.1945315479326722e-06,
      "loss": 0.4678,
      "step": 11274
    },
    {
      "epoch": 1.3824178518881804,
      "grad_norm": 1.7272823791999214,
      "learning_rate": 1.1940993266173562e-06,
      "loss": 0.4376,
      "step": 11275
    },
    {
      "epoch": 1.3825404610102991,
      "grad_norm": 1.8900529352966329,
      "learning_rate": 1.19366715897687e-06,
      "loss": 0.4338,
      "step": 11276
    },
    {
      "epoch": 1.3826630701324178,
      "grad_norm": 1.8700457006711895,
      "learning_rate": 1.1932350450289753e-06,
      "loss": 0.449,
      "step": 11277
    },
    {
      "epoch": 1.3827856792545365,
      "grad_norm": 1.9949712622179663,
      "learning_rate": 1.1928029847914346e-06,
      "loss": 0.4087,
      "step": 11278
    },
    {
      "epoch": 1.3829082883766552,
      "grad_norm": 1.9914825690253624,
      "learning_rate": 1.1923709782820046e-06,
      "loss": 0.3914,
      "step": 11279
    },
    {
      "epoch": 1.383030897498774,
      "grad_norm": 2.074311905154754,
      "learning_rate": 1.1919390255184432e-06,
      "loss": 0.4424,
      "step": 11280
    },
    {
      "epoch": 1.3831535066208926,
      "grad_norm": 1.9255833809741014,
      "learning_rate": 1.1915071265185036e-06,
      "loss": 0.465,
      "step": 11281
    },
    {
      "epoch": 1.3832761157430113,
      "grad_norm": 2.089941415204288,
      "learning_rate": 1.1910752812999366e-06,
      "loss": 0.4764,
      "step": 11282
    },
    {
      "epoch": 1.38339872486513,
      "grad_norm": 2.085344091584988,
      "learning_rate": 1.190643489880493e-06,
      "loss": 0.4517,
      "step": 11283
    },
    {
      "epoch": 1.3835213339872485,
      "grad_norm": 1.9455648391326983,
      "learning_rate": 1.1902117522779207e-06,
      "loss": 0.4017,
      "step": 11284
    },
    {
      "epoch": 1.3836439431093672,
      "grad_norm": 2.0574437096312788,
      "learning_rate": 1.1897800685099644e-06,
      "loss": 0.4258,
      "step": 11285
    },
    {
      "epoch": 1.383766552231486,
      "grad_norm": 1.894634148091439,
      "learning_rate": 1.1893484385943657e-06,
      "loss": 0.4222,
      "step": 11286
    },
    {
      "epoch": 1.3838891613536046,
      "grad_norm": 1.911848714767016,
      "learning_rate": 1.1889168625488667e-06,
      "loss": 0.44,
      "step": 11287
    },
    {
      "epoch": 1.3840117704757233,
      "grad_norm": 1.8002276727812683,
      "learning_rate": 1.1884853403912076e-06,
      "loss": 0.4451,
      "step": 11288
    },
    {
      "epoch": 1.384134379597842,
      "grad_norm": 1.6430904394413168,
      "learning_rate": 1.1880538721391208e-06,
      "loss": 0.4509,
      "step": 11289
    },
    {
      "epoch": 1.3842569887199607,
      "grad_norm": 1.8738312592669106,
      "learning_rate": 1.1876224578103426e-06,
      "loss": 0.4213,
      "step": 11290
    },
    {
      "epoch": 1.3843795978420794,
      "grad_norm": 1.9962182321658746,
      "learning_rate": 1.1871910974226055e-06,
      "loss": 0.4163,
      "step": 11291
    },
    {
      "epoch": 1.3845022069641981,
      "grad_norm": 1.8571871227390349,
      "learning_rate": 1.1867597909936379e-06,
      "loss": 0.4425,
      "step": 11292
    },
    {
      "epoch": 1.3846248160863168,
      "grad_norm": 2.0999768808940646,
      "learning_rate": 1.1863285385411684e-06,
      "loss": 0.4032,
      "step": 11293
    },
    {
      "epoch": 1.3847474252084355,
      "grad_norm": 2.1045488465260087,
      "learning_rate": 1.1858973400829208e-06,
      "loss": 0.4321,
      "step": 11294
    },
    {
      "epoch": 1.3848700343305542,
      "grad_norm": 2.200077648049212,
      "learning_rate": 1.18546619563662e-06,
      "loss": 0.4553,
      "step": 11295
    },
    {
      "epoch": 1.384992643452673,
      "grad_norm": 2.319541967036666,
      "learning_rate": 1.1850351052199847e-06,
      "loss": 0.4109,
      "step": 11296
    },
    {
      "epoch": 1.3851152525747916,
      "grad_norm": 2.0379247287226203,
      "learning_rate": 1.1846040688507353e-06,
      "loss": 0.4478,
      "step": 11297
    },
    {
      "epoch": 1.3852378616969103,
      "grad_norm": 1.9752901048738534,
      "learning_rate": 1.1841730865465878e-06,
      "loss": 0.4606,
      "step": 11298
    },
    {
      "epoch": 1.385360470819029,
      "grad_norm": 2.016111869626984,
      "learning_rate": 1.1837421583252548e-06,
      "loss": 0.393,
      "step": 11299
    },
    {
      "epoch": 1.3854830799411477,
      "grad_norm": 1.9614189878672321,
      "learning_rate": 1.1833112842044494e-06,
      "loss": 0.4491,
      "step": 11300
    },
    {
      "epoch": 1.3856056890632664,
      "grad_norm": 1.904637886707869,
      "learning_rate": 1.1828804642018831e-06,
      "loss": 0.3987,
      "step": 11301
    },
    {
      "epoch": 1.3857282981853851,
      "grad_norm": 2.0550969480679213,
      "learning_rate": 1.1824496983352596e-06,
      "loss": 0.4376,
      "step": 11302
    },
    {
      "epoch": 1.3858509073075036,
      "grad_norm": 1.961503857437161,
      "learning_rate": 1.1820189866222858e-06,
      "loss": 0.4133,
      "step": 11303
    },
    {
      "epoch": 1.3859735164296223,
      "grad_norm": 2.187442419050922,
      "learning_rate": 1.1815883290806663e-06,
      "loss": 0.4233,
      "step": 11304
    },
    {
      "epoch": 1.386096125551741,
      "grad_norm": 2.0320818093966286,
      "learning_rate": 1.1811577257281003e-06,
      "loss": 0.43,
      "step": 11305
    },
    {
      "epoch": 1.3862187346738597,
      "grad_norm": 1.981656880826239,
      "learning_rate": 1.180727176582286e-06,
      "loss": 0.4055,
      "step": 11306
    },
    {
      "epoch": 1.3863413437959784,
      "grad_norm": 1.948674293849287,
      "learning_rate": 1.1802966816609198e-06,
      "loss": 0.4389,
      "step": 11307
    },
    {
      "epoch": 1.386463952918097,
      "grad_norm": 1.807026331056607,
      "learning_rate": 1.1798662409816975e-06,
      "loss": 0.4251,
      "step": 11308
    },
    {
      "epoch": 1.3865865620402158,
      "grad_norm": 1.8888429844221242,
      "learning_rate": 1.1794358545623097e-06,
      "loss": 0.4176,
      "step": 11309
    },
    {
      "epoch": 1.3867091711623345,
      "grad_norm": 1.9498173078779713,
      "learning_rate": 1.1790055224204455e-06,
      "loss": 0.4231,
      "step": 11310
    },
    {
      "epoch": 1.3868317802844532,
      "grad_norm": 1.9494610179567424,
      "learning_rate": 1.1785752445737935e-06,
      "loss": 0.383,
      "step": 11311
    },
    {
      "epoch": 1.386954389406572,
      "grad_norm": 1.7806231759366453,
      "learning_rate": 1.1781450210400377e-06,
      "loss": 0.3706,
      "step": 11312
    },
    {
      "epoch": 1.3870769985286906,
      "grad_norm": 1.954993911842875,
      "learning_rate": 1.1777148518368628e-06,
      "loss": 0.4176,
      "step": 11313
    },
    {
      "epoch": 1.3871996076508093,
      "grad_norm": 1.9904872998984484,
      "learning_rate": 1.1772847369819471e-06,
      "loss": 0.4621,
      "step": 11314
    },
    {
      "epoch": 1.3873222167729278,
      "grad_norm": 1.9287439002855706,
      "learning_rate": 1.1768546764929717e-06,
      "loss": 0.4397,
      "step": 11315
    },
    {
      "epoch": 1.3874448258950465,
      "grad_norm": 1.7122743457965626,
      "learning_rate": 1.1764246703876103e-06,
      "loss": 0.4118,
      "step": 11316
    },
    {
      "epoch": 1.3875674350171652,
      "grad_norm": 1.9873211825756336,
      "learning_rate": 1.175994718683539e-06,
      "loss": 0.4377,
      "step": 11317
    },
    {
      "epoch": 1.3876900441392839,
      "grad_norm": 1.9860604719614132,
      "learning_rate": 1.175564821398429e-06,
      "loss": 0.419,
      "step": 11318
    },
    {
      "epoch": 1.3878126532614026,
      "grad_norm": 2.081876180811606,
      "learning_rate": 1.1751349785499487e-06,
      "loss": 0.4642,
      "step": 11319
    },
    {
      "epoch": 1.3879352623835213,
      "grad_norm": 2.0516311964806104,
      "learning_rate": 1.174705190155766e-06,
      "loss": 0.4251,
      "step": 11320
    },
    {
      "epoch": 1.38805787150564,
      "grad_norm": 1.9399797988512562,
      "learning_rate": 1.1742754562335474e-06,
      "loss": 0.4024,
      "step": 11321
    },
    {
      "epoch": 1.3881804806277587,
      "grad_norm": 2.063619117748452,
      "learning_rate": 1.1738457768009543e-06,
      "loss": 0.5224,
      "step": 11322
    },
    {
      "epoch": 1.3883030897498774,
      "grad_norm": 1.975733480800522,
      "learning_rate": 1.1734161518756469e-06,
      "loss": 0.4214,
      "step": 11323
    },
    {
      "epoch": 1.388425698871996,
      "grad_norm": 1.9556790104122797,
      "learning_rate": 1.1729865814752848e-06,
      "loss": 0.4086,
      "step": 11324
    },
    {
      "epoch": 1.3885483079941148,
      "grad_norm": 1.9643790457384007,
      "learning_rate": 1.1725570656175228e-06,
      "loss": 0.4425,
      "step": 11325
    },
    {
      "epoch": 1.3886709171162335,
      "grad_norm": 1.9814016611796978,
      "learning_rate": 1.1721276043200164e-06,
      "loss": 0.4279,
      "step": 11326
    },
    {
      "epoch": 1.3887935262383522,
      "grad_norm": 1.8665706686276358,
      "learning_rate": 1.1716981976004151e-06,
      "loss": 0.415,
      "step": 11327
    },
    {
      "epoch": 1.3889161353604709,
      "grad_norm": 2.0487759002406807,
      "learning_rate": 1.1712688454763708e-06,
      "loss": 0.4423,
      "step": 11328
    },
    {
      "epoch": 1.3890387444825896,
      "grad_norm": 1.906189642756941,
      "learning_rate": 1.170839547965528e-06,
      "loss": 0.4332,
      "step": 11329
    },
    {
      "epoch": 1.3891613536047083,
      "grad_norm": 1.941458357300738,
      "learning_rate": 1.1704103050855339e-06,
      "loss": 0.4591,
      "step": 11330
    },
    {
      "epoch": 1.389283962726827,
      "grad_norm": 1.9614981361693309,
      "learning_rate": 1.16998111685403e-06,
      "loss": 0.4661,
      "step": 11331
    },
    {
      "epoch": 1.3894065718489457,
      "grad_norm": 2.0699669950451147,
      "learning_rate": 1.1695519832886559e-06,
      "loss": 0.4331,
      "step": 11332
    },
    {
      "epoch": 1.3895291809710644,
      "grad_norm": 1.8728086404445645,
      "learning_rate": 1.169122904407051e-06,
      "loss": 0.4484,
      "step": 11333
    },
    {
      "epoch": 1.389651790093183,
      "grad_norm": 1.9728035275693765,
      "learning_rate": 1.1686938802268517e-06,
      "loss": 0.3976,
      "step": 11334
    },
    {
      "epoch": 1.3897743992153015,
      "grad_norm": 2.08448382408572,
      "learning_rate": 1.1682649107656903e-06,
      "loss": 0.4681,
      "step": 11335
    },
    {
      "epoch": 1.3898970083374202,
      "grad_norm": 2.2282400012302648,
      "learning_rate": 1.1678359960411984e-06,
      "loss": 0.4825,
      "step": 11336
    },
    {
      "epoch": 1.390019617459539,
      "grad_norm": 2.1199017173155124,
      "learning_rate": 1.167407136071006e-06,
      "loss": 0.4266,
      "step": 11337
    },
    {
      "epoch": 1.3901422265816576,
      "grad_norm": 2.1025435043761593,
      "learning_rate": 1.1669783308727387e-06,
      "loss": 0.4624,
      "step": 11338
    },
    {
      "epoch": 1.3902648357037763,
      "grad_norm": 1.9727479668657821,
      "learning_rate": 1.166549580464023e-06,
      "loss": 0.4367,
      "step": 11339
    },
    {
      "epoch": 1.390387444825895,
      "grad_norm": 1.9263888819314772,
      "learning_rate": 1.1661208848624791e-06,
      "loss": 0.4512,
      "step": 11340
    },
    {
      "epoch": 1.3905100539480137,
      "grad_norm": 1.929010748956011,
      "learning_rate": 1.1656922440857294e-06,
      "loss": 0.3792,
      "step": 11341
    },
    {
      "epoch": 1.3906326630701324,
      "grad_norm": 2.0747379097623244,
      "learning_rate": 1.1652636581513895e-06,
      "loss": 0.3976,
      "step": 11342
    },
    {
      "epoch": 1.3907552721922511,
      "grad_norm": 2.092508391929354,
      "learning_rate": 1.1648351270770771e-06,
      "loss": 0.4283,
      "step": 11343
    },
    {
      "epoch": 1.3908778813143698,
      "grad_norm": 1.9938676732069378,
      "learning_rate": 1.164406650880405e-06,
      "loss": 0.4413,
      "step": 11344
    },
    {
      "epoch": 1.3910004904364885,
      "grad_norm": 1.6498920495800142,
      "learning_rate": 1.1639782295789831e-06,
      "loss": 0.4003,
      "step": 11345
    },
    {
      "epoch": 1.391123099558607,
      "grad_norm": 2.2306683951615507,
      "learning_rate": 1.163549863190421e-06,
      "loss": 0.3999,
      "step": 11346
    },
    {
      "epoch": 1.3912457086807257,
      "grad_norm": 1.9683697893672496,
      "learning_rate": 1.1631215517323265e-06,
      "loss": 0.4319,
      "step": 11347
    },
    {
      "epoch": 1.3913683178028444,
      "grad_norm": 1.8614629330459729,
      "learning_rate": 1.162693295222303e-06,
      "loss": 0.4316,
      "step": 11348
    },
    {
      "epoch": 1.3914909269249631,
      "grad_norm": 1.8678369065916605,
      "learning_rate": 1.1622650936779518e-06,
      "loss": 0.4223,
      "step": 11349
    },
    {
      "epoch": 1.3916135360470818,
      "grad_norm": 1.8086364956684917,
      "learning_rate": 1.1618369471168733e-06,
      "loss": 0.4424,
      "step": 11350
    },
    {
      "epoch": 1.3917361451692005,
      "grad_norm": 1.9219374433928125,
      "learning_rate": 1.1614088555566671e-06,
      "loss": 0.409,
      "step": 11351
    },
    {
      "epoch": 1.3918587542913192,
      "grad_norm": 2.0279917145556685,
      "learning_rate": 1.1609808190149251e-06,
      "loss": 0.3899,
      "step": 11352
    },
    {
      "epoch": 1.391981363413438,
      "grad_norm": 2.1277322378566033,
      "learning_rate": 1.160552837509242e-06,
      "loss": 0.4373,
      "step": 11353
    },
    {
      "epoch": 1.3921039725355566,
      "grad_norm": 1.9358419007064822,
      "learning_rate": 1.1601249110572094e-06,
      "loss": 0.4499,
      "step": 11354
    },
    {
      "epoch": 1.3922265816576753,
      "grad_norm": 1.9203121444046185,
      "learning_rate": 1.1596970396764151e-06,
      "loss": 0.4452,
      "step": 11355
    },
    {
      "epoch": 1.392349190779794,
      "grad_norm": 2.096276202847806,
      "learning_rate": 1.159269223384444e-06,
      "loss": 0.4346,
      "step": 11356
    },
    {
      "epoch": 1.3924717999019127,
      "grad_norm": 1.802072144097329,
      "learning_rate": 1.1588414621988828e-06,
      "loss": 0.4288,
      "step": 11357
    },
    {
      "epoch": 1.3925944090240314,
      "grad_norm": 1.8571919528290106,
      "learning_rate": 1.1584137561373107e-06,
      "loss": 0.4402,
      "step": 11358
    },
    {
      "epoch": 1.3927170181461501,
      "grad_norm": 1.8316608085978812,
      "learning_rate": 1.1579861052173093e-06,
      "loss": 0.4337,
      "step": 11359
    },
    {
      "epoch": 1.3928396272682688,
      "grad_norm": 1.9532235455331148,
      "learning_rate": 1.157558509456454e-06,
      "loss": 0.4436,
      "step": 11360
    },
    {
      "epoch": 1.3929622363903875,
      "grad_norm": 1.8543747685027403,
      "learning_rate": 1.1571309688723211e-06,
      "loss": 0.4253,
      "step": 11361
    },
    {
      "epoch": 1.3930848455125062,
      "grad_norm": 2.0555649748259,
      "learning_rate": 1.1567034834824822e-06,
      "loss": 0.4134,
      "step": 11362
    },
    {
      "epoch": 1.393207454634625,
      "grad_norm": 2.058961151739402,
      "learning_rate": 1.156276053304508e-06,
      "loss": 0.4437,
      "step": 11363
    },
    {
      "epoch": 1.3933300637567436,
      "grad_norm": 1.7814535743527091,
      "learning_rate": 1.155848678355969e-06,
      "loss": 0.4046,
      "step": 11364
    },
    {
      "epoch": 1.3934526728788623,
      "grad_norm": 1.8779981997631732,
      "learning_rate": 1.155421358654427e-06,
      "loss": 0.4345,
      "step": 11365
    },
    {
      "epoch": 1.3935752820009808,
      "grad_norm": 1.9956153035013613,
      "learning_rate": 1.1549940942174477e-06,
      "loss": 0.4466,
      "step": 11366
    },
    {
      "epoch": 1.3936978911230995,
      "grad_norm": 2.0669521364509227,
      "learning_rate": 1.1545668850625927e-06,
      "loss": 0.4476,
      "step": 11367
    },
    {
      "epoch": 1.3938205002452182,
      "grad_norm": 1.7440518875592106,
      "learning_rate": 1.154139731207421e-06,
      "loss": 0.3908,
      "step": 11368
    },
    {
      "epoch": 1.393943109367337,
      "grad_norm": 1.991301429756167,
      "learning_rate": 1.153712632669488e-06,
      "loss": 0.437,
      "step": 11369
    },
    {
      "epoch": 1.3940657184894556,
      "grad_norm": 1.8703021099670063,
      "learning_rate": 1.1532855894663492e-06,
      "loss": 0.4149,
      "step": 11370
    },
    {
      "epoch": 1.3941883276115743,
      "grad_norm": 1.9157503439991614,
      "learning_rate": 1.152858601615558e-06,
      "loss": 0.4466,
      "step": 11371
    },
    {
      "epoch": 1.394310936733693,
      "grad_norm": 1.8220522932971412,
      "learning_rate": 1.152431669134663e-06,
      "loss": 0.4788,
      "step": 11372
    },
    {
      "epoch": 1.3944335458558117,
      "grad_norm": 1.9189483853773335,
      "learning_rate": 1.152004792041211e-06,
      "loss": 0.4117,
      "step": 11373
    },
    {
      "epoch": 1.3945561549779304,
      "grad_norm": 2.0148649223580213,
      "learning_rate": 1.1515779703527497e-06,
      "loss": 0.4949,
      "step": 11374
    },
    {
      "epoch": 1.394678764100049,
      "grad_norm": 2.0014854772184614,
      "learning_rate": 1.1511512040868197e-06,
      "loss": 0.4501,
      "step": 11375
    },
    {
      "epoch": 1.3948013732221678,
      "grad_norm": 2.0361800455235186,
      "learning_rate": 1.1507244932609644e-06,
      "loss": 0.4082,
      "step": 11376
    },
    {
      "epoch": 1.3949239823442865,
      "grad_norm": 1.8039277129620879,
      "learning_rate": 1.150297837892721e-06,
      "loss": 0.4383,
      "step": 11377
    },
    {
      "epoch": 1.395046591466405,
      "grad_norm": 1.9674451419335774,
      "learning_rate": 1.1498712379996252e-06,
      "loss": 0.4104,
      "step": 11378
    },
    {
      "epoch": 1.3951692005885237,
      "grad_norm": 1.8372195121013357,
      "learning_rate": 1.1494446935992114e-06,
      "loss": 0.427,
      "step": 11379
    },
    {
      "epoch": 1.3952918097106424,
      "grad_norm": 1.930782677941942,
      "learning_rate": 1.1490182047090128e-06,
      "loss": 0.4434,
      "step": 11380
    },
    {
      "epoch": 1.395414418832761,
      "grad_norm": 2.007169657391564,
      "learning_rate": 1.1485917713465578e-06,
      "loss": 0.4209,
      "step": 11381
    },
    {
      "epoch": 1.3955370279548798,
      "grad_norm": 1.794513739350861,
      "learning_rate": 1.1481653935293723e-06,
      "loss": 0.4222,
      "step": 11382
    },
    {
      "epoch": 1.3956596370769985,
      "grad_norm": 1.8889409509269213,
      "learning_rate": 1.1477390712749829e-06,
      "loss": 0.4427,
      "step": 11383
    },
    {
      "epoch": 1.3957822461991172,
      "grad_norm": 1.9386326655866313,
      "learning_rate": 1.147312804600913e-06,
      "loss": 0.4229,
      "step": 11384
    },
    {
      "epoch": 1.3959048553212359,
      "grad_norm": 1.8278329403251592,
      "learning_rate": 1.1468865935246799e-06,
      "loss": 0.4315,
      "step": 11385
    },
    {
      "epoch": 1.3960274644433546,
      "grad_norm": 1.7575605021337644,
      "learning_rate": 1.146460438063803e-06,
      "loss": 0.4318,
      "step": 11386
    },
    {
      "epoch": 1.3961500735654733,
      "grad_norm": 2.031598341060519,
      "learning_rate": 1.1460343382357997e-06,
      "loss": 0.4025,
      "step": 11387
    },
    {
      "epoch": 1.396272682687592,
      "grad_norm": 2.0004360507292227,
      "learning_rate": 1.145608294058181e-06,
      "loss": 0.4163,
      "step": 11388
    },
    {
      "epoch": 1.3963952918097107,
      "grad_norm": 1.9981924779805116,
      "learning_rate": 1.1451823055484601e-06,
      "loss": 0.3922,
      "step": 11389
    },
    {
      "epoch": 1.3965179009318294,
      "grad_norm": 2.102516129022136,
      "learning_rate": 1.1447563727241439e-06,
      "loss": 0.3901,
      "step": 11390
    },
    {
      "epoch": 1.396640510053948,
      "grad_norm": 1.8222140742549842,
      "learning_rate": 1.1443304956027412e-06,
      "loss": 0.3966,
      "step": 11391
    },
    {
      "epoch": 1.3967631191760668,
      "grad_norm": 1.992646895362162,
      "learning_rate": 1.1439046742017543e-06,
      "loss": 0.432,
      "step": 11392
    },
    {
      "epoch": 1.3968857282981855,
      "grad_norm": 1.9758440323407667,
      "learning_rate": 1.1434789085386866e-06,
      "loss": 0.3795,
      "step": 11393
    },
    {
      "epoch": 1.3970083374203042,
      "grad_norm": 1.9919262260764794,
      "learning_rate": 1.1430531986310376e-06,
      "loss": 0.5021,
      "step": 11394
    },
    {
      "epoch": 1.3971309465424229,
      "grad_norm": 1.9991256826587909,
      "learning_rate": 1.1426275444963033e-06,
      "loss": 0.4072,
      "step": 11395
    },
    {
      "epoch": 1.3972535556645416,
      "grad_norm": 1.9414418114953005,
      "learning_rate": 1.14220194615198e-06,
      "loss": 0.4071,
      "step": 11396
    },
    {
      "epoch": 1.39737616478666,
      "grad_norm": 1.8773828132210546,
      "learning_rate": 1.1417764036155626e-06,
      "loss": 0.4691,
      "step": 11397
    },
    {
      "epoch": 1.3974987739087787,
      "grad_norm": 1.9243228566165986,
      "learning_rate": 1.1413509169045376e-06,
      "loss": 0.439,
      "step": 11398
    },
    {
      "epoch": 1.3976213830308974,
      "grad_norm": 1.891896594849574,
      "learning_rate": 1.1409254860363952e-06,
      "loss": 0.3755,
      "step": 11399
    },
    {
      "epoch": 1.3977439921530161,
      "grad_norm": 1.9378461862982685,
      "learning_rate": 1.1405001110286223e-06,
      "loss": 0.4039,
      "step": 11400
    },
    {
      "epoch": 1.3978666012751348,
      "grad_norm": 1.9348548785908368,
      "learning_rate": 1.1400747918987016e-06,
      "loss": 0.4483,
      "step": 11401
    },
    {
      "epoch": 1.3979892103972535,
      "grad_norm": 2.057982189813888,
      "learning_rate": 1.1396495286641136e-06,
      "loss": 0.4275,
      "step": 11402
    },
    {
      "epoch": 1.3981118195193722,
      "grad_norm": 1.6868776252379887,
      "learning_rate": 1.1392243213423382e-06,
      "loss": 0.4316,
      "step": 11403
    },
    {
      "epoch": 1.398234428641491,
      "grad_norm": 1.8191186519747011,
      "learning_rate": 1.1387991699508532e-06,
      "loss": 0.4288,
      "step": 11404
    },
    {
      "epoch": 1.3983570377636096,
      "grad_norm": 1.9526200848782247,
      "learning_rate": 1.1383740745071323e-06,
      "loss": 0.3842,
      "step": 11405
    },
    {
      "epoch": 1.3984796468857283,
      "grad_norm": 2.0802497078047355,
      "learning_rate": 1.1379490350286465e-06,
      "loss": 0.4353,
      "step": 11406
    },
    {
      "epoch": 1.398602256007847,
      "grad_norm": 2.1019192149443415,
      "learning_rate": 1.1375240515328676e-06,
      "loss": 0.4264,
      "step": 11407
    },
    {
      "epoch": 1.3987248651299657,
      "grad_norm": 1.9074997331908379,
      "learning_rate": 1.137099124037261e-06,
      "loss": 0.4307,
      "step": 11408
    },
    {
      "epoch": 1.3988474742520842,
      "grad_norm": 2.111080427174468,
      "learning_rate": 1.1366742525592946e-06,
      "loss": 0.4199,
      "step": 11409
    },
    {
      "epoch": 1.398970083374203,
      "grad_norm": 2.0706666821624573,
      "learning_rate": 1.136249437116429e-06,
      "loss": 0.4451,
      "step": 11410
    },
    {
      "epoch": 1.3990926924963216,
      "grad_norm": 1.8567705855141003,
      "learning_rate": 1.1358246777261267e-06,
      "loss": 0.4343,
      "step": 11411
    },
    {
      "epoch": 1.3992153016184403,
      "grad_norm": 2.0350834409327168,
      "learning_rate": 1.1353999744058445e-06,
      "loss": 0.4361,
      "step": 11412
    },
    {
      "epoch": 1.399337910740559,
      "grad_norm": 2.0476831390812578,
      "learning_rate": 1.1349753271730398e-06,
      "loss": 0.401,
      "step": 11413
    },
    {
      "epoch": 1.3994605198626777,
      "grad_norm": 2.011073668716605,
      "learning_rate": 1.134550736045166e-06,
      "loss": 0.3941,
      "step": 11414
    },
    {
      "epoch": 1.3995831289847964,
      "grad_norm": 1.8897207237834754,
      "learning_rate": 1.134126201039673e-06,
      "loss": 0.5226,
      "step": 11415
    },
    {
      "epoch": 1.3997057381069151,
      "grad_norm": 1.8877602898337649,
      "learning_rate": 1.1337017221740116e-06,
      "loss": 0.3932,
      "step": 11416
    },
    {
      "epoch": 1.3998283472290338,
      "grad_norm": 2.076335940402938,
      "learning_rate": 1.133277299465629e-06,
      "loss": 0.4301,
      "step": 11417
    },
    {
      "epoch": 1.3999509563511525,
      "grad_norm": 2.1117473666692974,
      "learning_rate": 1.132852932931969e-06,
      "loss": 0.4468,
      "step": 11418
    },
    {
      "epoch": 1.4000735654732712,
      "grad_norm": 2.0945593770298667,
      "learning_rate": 1.1324286225904728e-06,
      "loss": 0.4683,
      "step": 11419
    },
    {
      "epoch": 1.40019617459539,
      "grad_norm": 1.999139776604603,
      "learning_rate": 1.1320043684585827e-06,
      "loss": 0.4156,
      "step": 11420
    },
    {
      "epoch": 1.4003187837175086,
      "grad_norm": 1.9743157309105313,
      "learning_rate": 1.1315801705537336e-06,
      "loss": 0.3884,
      "step": 11421
    },
    {
      "epoch": 1.4004413928396273,
      "grad_norm": 1.814258938434556,
      "learning_rate": 1.131156028893363e-06,
      "loss": 0.3941,
      "step": 11422
    },
    {
      "epoch": 1.400564001961746,
      "grad_norm": 1.7835348350423348,
      "learning_rate": 1.130731943494902e-06,
      "loss": 0.3921,
      "step": 11423
    },
    {
      "epoch": 1.4006866110838647,
      "grad_norm": 1.9158584644781582,
      "learning_rate": 1.1303079143757831e-06,
      "loss": 0.4483,
      "step": 11424
    },
    {
      "epoch": 1.4008092202059834,
      "grad_norm": 1.9622574969063915,
      "learning_rate": 1.129883941553433e-06,
      "loss": 0.4205,
      "step": 11425
    },
    {
      "epoch": 1.4009318293281021,
      "grad_norm": 2.0078666252485124,
      "learning_rate": 1.1294600250452795e-06,
      "loss": 0.4168,
      "step": 11426
    },
    {
      "epoch": 1.4010544384502208,
      "grad_norm": 1.9931679651787582,
      "learning_rate": 1.1290361648687452e-06,
      "loss": 0.4428,
      "step": 11427
    },
    {
      "epoch": 1.4011770475723395,
      "grad_norm": 2.0980441495459075,
      "learning_rate": 1.1286123610412506e-06,
      "loss": 0.4043,
      "step": 11428
    },
    {
      "epoch": 1.401299656694458,
      "grad_norm": 1.8855239685747676,
      "learning_rate": 1.1281886135802158e-06,
      "loss": 0.409,
      "step": 11429
    },
    {
      "epoch": 1.4014222658165767,
      "grad_norm": 1.9216349678393356,
      "learning_rate": 1.1277649225030588e-06,
      "loss": 0.4471,
      "step": 11430
    },
    {
      "epoch": 1.4015448749386954,
      "grad_norm": 2.085207031805682,
      "learning_rate": 1.1273412878271928e-06,
      "loss": 0.4382,
      "step": 11431
    },
    {
      "epoch": 1.401667484060814,
      "grad_norm": 1.9361511048626263,
      "learning_rate": 1.1269177095700288e-06,
      "loss": 0.392,
      "step": 11432
    },
    {
      "epoch": 1.4017900931829328,
      "grad_norm": 1.8413725552732567,
      "learning_rate": 1.1264941877489788e-06,
      "loss": 0.4536,
      "step": 11433
    },
    {
      "epoch": 1.4019127023050515,
      "grad_norm": 2.000299148628877,
      "learning_rate": 1.1260707223814486e-06,
      "loss": 0.4155,
      "step": 11434
    },
    {
      "epoch": 1.4020353114271702,
      "grad_norm": 1.8934851597358455,
      "learning_rate": 1.1256473134848445e-06,
      "loss": 0.4292,
      "step": 11435
    },
    {
      "epoch": 1.402157920549289,
      "grad_norm": 1.8846633018387402,
      "learning_rate": 1.1252239610765684e-06,
      "loss": 0.4123,
      "step": 11436
    },
    {
      "epoch": 1.4022805296714076,
      "grad_norm": 2.0398819203490297,
      "learning_rate": 1.124800665174022e-06,
      "loss": 0.4284,
      "step": 11437
    },
    {
      "epoch": 1.4024031387935263,
      "grad_norm": 1.8704719356863442,
      "learning_rate": 1.1243774257946022e-06,
      "loss": 0.4061,
      "step": 11438
    },
    {
      "epoch": 1.402525747915645,
      "grad_norm": 1.8749128943242634,
      "learning_rate": 1.1239542429557062e-06,
      "loss": 0.4617,
      "step": 11439
    },
    {
      "epoch": 1.4026483570377635,
      "grad_norm": 2.0420581911794873,
      "learning_rate": 1.123531116674727e-06,
      "loss": 0.4623,
      "step": 11440
    },
    {
      "epoch": 1.4027709661598822,
      "grad_norm": 1.7387099020507457,
      "learning_rate": 1.123108046969055e-06,
      "loss": 0.4139,
      "step": 11441
    },
    {
      "epoch": 1.4028935752820009,
      "grad_norm": 1.8715943209937373,
      "learning_rate": 1.1226850338560796e-06,
      "loss": 0.4258,
      "step": 11442
    },
    {
      "epoch": 1.4030161844041196,
      "grad_norm": 1.9173712183166682,
      "learning_rate": 1.122262077353189e-06,
      "loss": 0.3958,
      "step": 11443
    },
    {
      "epoch": 1.4031387935262383,
      "grad_norm": 1.9840124118625784,
      "learning_rate": 1.1218391774777657e-06,
      "loss": 0.4362,
      "step": 11444
    },
    {
      "epoch": 1.403261402648357,
      "grad_norm": 1.9335531341822036,
      "learning_rate": 1.1214163342471912e-06,
      "loss": 0.429,
      "step": 11445
    },
    {
      "epoch": 1.4033840117704757,
      "grad_norm": 1.9866139126693059,
      "learning_rate": 1.120993547678846e-06,
      "loss": 0.44,
      "step": 11446
    },
    {
      "epoch": 1.4035066208925944,
      "grad_norm": 1.9097387481199246,
      "learning_rate": 1.1205708177901092e-06,
      "loss": 0.4189,
      "step": 11447
    },
    {
      "epoch": 1.403629230014713,
      "grad_norm": 1.6890499768167664,
      "learning_rate": 1.120148144598352e-06,
      "loss": 0.3848,
      "step": 11448
    },
    {
      "epoch": 1.4037518391368318,
      "grad_norm": 2.028871311821301,
      "learning_rate": 1.119725528120949e-06,
      "loss": 0.4499,
      "step": 11449
    },
    {
      "epoch": 1.4038744482589505,
      "grad_norm": 1.9529232842628679,
      "learning_rate": 1.1193029683752712e-06,
      "loss": 0.4557,
      "step": 11450
    },
    {
      "epoch": 1.4039970573810692,
      "grad_norm": 1.824532117594216,
      "learning_rate": 1.1188804653786855e-06,
      "loss": 0.4201,
      "step": 11451
    },
    {
      "epoch": 1.4041196665031879,
      "grad_norm": 1.9751892325072793,
      "learning_rate": 1.1184580191485571e-06,
      "loss": 0.4293,
      "step": 11452
    },
    {
      "epoch": 1.4042422756253066,
      "grad_norm": 1.8010426759506375,
      "learning_rate": 1.118035629702251e-06,
      "loss": 0.4267,
      "step": 11453
    },
    {
      "epoch": 1.4043648847474253,
      "grad_norm": 1.8924673413261204,
      "learning_rate": 1.117613297057126e-06,
      "loss": 0.439,
      "step": 11454
    },
    {
      "epoch": 1.404487493869544,
      "grad_norm": 2.1107005703366593,
      "learning_rate": 1.117191021230543e-06,
      "loss": 0.4316,
      "step": 11455
    },
    {
      "epoch": 1.4046101029916627,
      "grad_norm": 2.012622729409705,
      "learning_rate": 1.1167688022398558e-06,
      "loss": 0.4572,
      "step": 11456
    },
    {
      "epoch": 1.4047327121137814,
      "grad_norm": 1.7972420693658329,
      "learning_rate": 1.1163466401024207e-06,
      "loss": 0.3916,
      "step": 11457
    },
    {
      "epoch": 1.4048553212359,
      "grad_norm": 1.9277867230193506,
      "learning_rate": 1.115924534835587e-06,
      "loss": 0.4136,
      "step": 11458
    },
    {
      "epoch": 1.4049779303580188,
      "grad_norm": 1.9675968911495834,
      "learning_rate": 1.1155024864567056e-06,
      "loss": 0.4564,
      "step": 11459
    },
    {
      "epoch": 1.4051005394801372,
      "grad_norm": 1.8348299597128508,
      "learning_rate": 1.1150804949831252e-06,
      "loss": 0.4312,
      "step": 11460
    },
    {
      "epoch": 1.405223148602256,
      "grad_norm": 1.9832101806651097,
      "learning_rate": 1.1146585604321858e-06,
      "loss": 0.3906,
      "step": 11461
    },
    {
      "epoch": 1.4053457577243746,
      "grad_norm": 2.0830352967065555,
      "learning_rate": 1.1142366828212325e-06,
      "loss": 0.4365,
      "step": 11462
    },
    {
      "epoch": 1.4054683668464933,
      "grad_norm": 1.9319651778971212,
      "learning_rate": 1.1138148621676057e-06,
      "loss": 0.3921,
      "step": 11463
    },
    {
      "epoch": 1.405590975968612,
      "grad_norm": 1.9473987045725867,
      "learning_rate": 1.113393098488642e-06,
      "loss": 0.386,
      "step": 11464
    },
    {
      "epoch": 1.4057135850907307,
      "grad_norm": 1.9545175563379997,
      "learning_rate": 1.112971391801676e-06,
      "loss": 0.4133,
      "step": 11465
    },
    {
      "epoch": 1.4058361942128494,
      "grad_norm": 1.88470129676757,
      "learning_rate": 1.1125497421240412e-06,
      "loss": 0.419,
      "step": 11466
    },
    {
      "epoch": 1.4059588033349681,
      "grad_norm": 2.007954991865909,
      "learning_rate": 1.1121281494730692e-06,
      "loss": 0.4203,
      "step": 11467
    },
    {
      "epoch": 1.4060814124570868,
      "grad_norm": 2.0657697529840267,
      "learning_rate": 1.1117066138660872e-06,
      "loss": 0.4279,
      "step": 11468
    },
    {
      "epoch": 1.4062040215792055,
      "grad_norm": 1.7075260702420398,
      "learning_rate": 1.1112851353204205e-06,
      "loss": 0.4007,
      "step": 11469
    },
    {
      "epoch": 1.4063266307013242,
      "grad_norm": 1.978442785607322,
      "learning_rate": 1.110863713853394e-06,
      "loss": 0.4616,
      "step": 11470
    },
    {
      "epoch": 1.406449239823443,
      "grad_norm": 1.9644580798520956,
      "learning_rate": 1.1104423494823274e-06,
      "loss": 0.4379,
      "step": 11471
    },
    {
      "epoch": 1.4065718489455614,
      "grad_norm": 1.9641326679124522,
      "learning_rate": 1.1100210422245409e-06,
      "loss": 0.4715,
      "step": 11472
    },
    {
      "epoch": 1.4066944580676801,
      "grad_norm": 2.068990002928242,
      "learning_rate": 1.1095997920973506e-06,
      "loss": 0.4514,
      "step": 11473
    },
    {
      "epoch": 1.4068170671897988,
      "grad_norm": 1.9621565089264954,
      "learning_rate": 1.1091785991180693e-06,
      "loss": 0.4671,
      "step": 11474
    },
    {
      "epoch": 1.4069396763119175,
      "grad_norm": 1.9797978978099418,
      "learning_rate": 1.1087574633040097e-06,
      "loss": 0.4329,
      "step": 11475
    },
    {
      "epoch": 1.4070622854340362,
      "grad_norm": 2.0512641680883505,
      "learning_rate": 1.1083363846724824e-06,
      "loss": 0.446,
      "step": 11476
    },
    {
      "epoch": 1.407184894556155,
      "grad_norm": 2.073555142474845,
      "learning_rate": 1.1079153632407934e-06,
      "loss": 0.4431,
      "step": 11477
    },
    {
      "epoch": 1.4073075036782736,
      "grad_norm": 1.9543469451955975,
      "learning_rate": 1.107494399026247e-06,
      "loss": 0.4244,
      "step": 11478
    },
    {
      "epoch": 1.4074301128003923,
      "grad_norm": 2.0893534337559516,
      "learning_rate": 1.1070734920461454e-06,
      "loss": 0.4429,
      "step": 11479
    },
    {
      "epoch": 1.407552721922511,
      "grad_norm": 1.8495302423925983,
      "learning_rate": 1.1066526423177912e-06,
      "loss": 0.41,
      "step": 11480
    },
    {
      "epoch": 1.4076753310446297,
      "grad_norm": 1.9530498462963084,
      "learning_rate": 1.1062318498584784e-06,
      "loss": 0.4508,
      "step": 11481
    },
    {
      "epoch": 1.4077979401667484,
      "grad_norm": 1.9197448994102977,
      "learning_rate": 1.1058111146855041e-06,
      "loss": 0.4523,
      "step": 11482
    },
    {
      "epoch": 1.4079205492888671,
      "grad_norm": 1.9280892312124478,
      "learning_rate": 1.1053904368161623e-06,
      "loss": 0.4513,
      "step": 11483
    },
    {
      "epoch": 1.4080431584109858,
      "grad_norm": 1.889190934491345,
      "learning_rate": 1.1049698162677414e-06,
      "loss": 0.4389,
      "step": 11484
    },
    {
      "epoch": 1.4081657675331045,
      "grad_norm": 2.078191265531657,
      "learning_rate": 1.1045492530575313e-06,
      "loss": 0.4343,
      "step": 11485
    },
    {
      "epoch": 1.4082883766552232,
      "grad_norm": 1.9833398798003532,
      "learning_rate": 1.1041287472028169e-06,
      "loss": 0.4393,
      "step": 11486
    },
    {
      "epoch": 1.408410985777342,
      "grad_norm": 2.1791588807336524,
      "learning_rate": 1.103708298720883e-06,
      "loss": 0.4169,
      "step": 11487
    },
    {
      "epoch": 1.4085335948994606,
      "grad_norm": 1.839898862355051,
      "learning_rate": 1.103287907629009e-06,
      "loss": 0.4134,
      "step": 11488
    },
    {
      "epoch": 1.4086562040215793,
      "grad_norm": 1.801818060909079,
      "learning_rate": 1.102867573944476e-06,
      "loss": 0.4461,
      "step": 11489
    },
    {
      "epoch": 1.408778813143698,
      "grad_norm": 1.865514086627355,
      "learning_rate": 1.1024472976845584e-06,
      "loss": 0.4583,
      "step": 11490
    },
    {
      "epoch": 1.4089014222658165,
      "grad_norm": 1.929515992524988,
      "learning_rate": 1.1020270788665306e-06,
      "loss": 0.4351,
      "step": 11491
    },
    {
      "epoch": 1.4090240313879352,
      "grad_norm": 2.0934176755989897,
      "learning_rate": 1.1016069175076647e-06,
      "loss": 0.4737,
      "step": 11492
    },
    {
      "epoch": 1.409146640510054,
      "grad_norm": 1.7517163353763305,
      "learning_rate": 1.101186813625232e-06,
      "loss": 0.418,
      "step": 11493
    },
    {
      "epoch": 1.4092692496321726,
      "grad_norm": 1.846372826447585,
      "learning_rate": 1.1007667672364958e-06,
      "loss": 0.4175,
      "step": 11494
    },
    {
      "epoch": 1.4093918587542913,
      "grad_norm": 1.9807933291066997,
      "learning_rate": 1.1003467783587224e-06,
      "loss": 0.4732,
      "step": 11495
    },
    {
      "epoch": 1.40951446787641,
      "grad_norm": 2.0883901260226265,
      "learning_rate": 1.0999268470091754e-06,
      "loss": 0.4407,
      "step": 11496
    },
    {
      "epoch": 1.4096370769985287,
      "grad_norm": 1.7398522433366403,
      "learning_rate": 1.0995069732051137e-06,
      "loss": 0.3941,
      "step": 11497
    },
    {
      "epoch": 1.4097596861206474,
      "grad_norm": 1.8743460083002472,
      "learning_rate": 1.0990871569637937e-06,
      "loss": 0.4544,
      "step": 11498
    },
    {
      "epoch": 1.409882295242766,
      "grad_norm": 2.02512800939099,
      "learning_rate": 1.0986673983024717e-06,
      "loss": 0.4486,
      "step": 11499
    },
    {
      "epoch": 1.4100049043648848,
      "grad_norm": 1.940676746736867,
      "learning_rate": 1.0982476972384017e-06,
      "loss": 0.4091,
      "step": 11500
    },
    {
      "epoch": 1.4101275134870035,
      "grad_norm": 1.8834870081957504,
      "learning_rate": 1.0978280537888328e-06,
      "loss": 0.3816,
      "step": 11501
    },
    {
      "epoch": 1.4102501226091222,
      "grad_norm": 2.220104840690965,
      "learning_rate": 1.097408467971012e-06,
      "loss": 0.4595,
      "step": 11502
    },
    {
      "epoch": 1.4103727317312407,
      "grad_norm": 1.9310545020543073,
      "learning_rate": 1.0969889398021877e-06,
      "loss": 0.4308,
      "step": 11503
    },
    {
      "epoch": 1.4104953408533594,
      "grad_norm": 1.8607604380550218,
      "learning_rate": 1.0965694692996003e-06,
      "loss": 0.46,
      "step": 11504
    },
    {
      "epoch": 1.410617949975478,
      "grad_norm": 1.9543088213048538,
      "learning_rate": 1.0961500564804937e-06,
      "loss": 0.4149,
      "step": 11505
    },
    {
      "epoch": 1.4107405590975968,
      "grad_norm": 1.7871317883893736,
      "learning_rate": 1.095730701362104e-06,
      "loss": 0.4246,
      "step": 11506
    },
    {
      "epoch": 1.4108631682197155,
      "grad_norm": 2.0997131390026014,
      "learning_rate": 1.0953114039616694e-06,
      "loss": 0.3894,
      "step": 11507
    },
    {
      "epoch": 1.4109857773418342,
      "grad_norm": 1.9514203710228957,
      "learning_rate": 1.0948921642964222e-06,
      "loss": 0.4034,
      "step": 11508
    },
    {
      "epoch": 1.4111083864639529,
      "grad_norm": 1.6725339898382947,
      "learning_rate": 1.0944729823835953e-06,
      "loss": 0.4188,
      "step": 11509
    },
    {
      "epoch": 1.4112309955860716,
      "grad_norm": 1.9649034737653681,
      "learning_rate": 1.0940538582404173e-06,
      "loss": 0.4038,
      "step": 11510
    },
    {
      "epoch": 1.4113536047081903,
      "grad_norm": 1.791620304050676,
      "learning_rate": 1.0936347918841136e-06,
      "loss": 0.4491,
      "step": 11511
    },
    {
      "epoch": 1.411476213830309,
      "grad_norm": 1.8501186267208236,
      "learning_rate": 1.0932157833319098e-06,
      "loss": 0.3897,
      "step": 11512
    },
    {
      "epoch": 1.4115988229524277,
      "grad_norm": 1.9354027964608007,
      "learning_rate": 1.0927968326010288e-06,
      "loss": 0.3837,
      "step": 11513
    },
    {
      "epoch": 1.4117214320745464,
      "grad_norm": 2.0424214263783615,
      "learning_rate": 1.0923779397086892e-06,
      "loss": 0.4201,
      "step": 11514
    },
    {
      "epoch": 1.411844041196665,
      "grad_norm": 1.9519195626657215,
      "learning_rate": 1.0919591046721075e-06,
      "loss": 0.4394,
      "step": 11515
    },
    {
      "epoch": 1.4119666503187838,
      "grad_norm": 2.1187386727864816,
      "learning_rate": 1.0915403275085e-06,
      "loss": 0.4305,
      "step": 11516
    },
    {
      "epoch": 1.4120892594409025,
      "grad_norm": 2.164967325990176,
      "learning_rate": 1.0911216082350777e-06,
      "loss": 0.4358,
      "step": 11517
    },
    {
      "epoch": 1.4122118685630212,
      "grad_norm": 1.94111870684788,
      "learning_rate": 1.0907029468690525e-06,
      "loss": 0.4231,
      "step": 11518
    },
    {
      "epoch": 1.4123344776851399,
      "grad_norm": 1.8917076008580558,
      "learning_rate": 1.0902843434276303e-06,
      "loss": 0.4401,
      "step": 11519
    },
    {
      "epoch": 1.4124570868072586,
      "grad_norm": 1.922063816009251,
      "learning_rate": 1.089865797928018e-06,
      "loss": 0.4485,
      "step": 11520
    },
    {
      "epoch": 1.4125796959293773,
      "grad_norm": 1.8236281123523026,
      "learning_rate": 1.0894473103874174e-06,
      "loss": 0.4035,
      "step": 11521
    },
    {
      "epoch": 1.412702305051496,
      "grad_norm": 1.7841646685080066,
      "learning_rate": 1.0890288808230297e-06,
      "loss": 0.4497,
      "step": 11522
    },
    {
      "epoch": 1.4128249141736144,
      "grad_norm": 1.9240197195252309,
      "learning_rate": 1.0886105092520534e-06,
      "loss": 0.4219,
      "step": 11523
    },
    {
      "epoch": 1.4129475232957331,
      "grad_norm": 1.9473502066942123,
      "learning_rate": 1.088192195691683e-06,
      "loss": 0.455,
      "step": 11524
    },
    {
      "epoch": 1.4130701324178518,
      "grad_norm": 1.9602363444064153,
      "learning_rate": 1.0877739401591125e-06,
      "loss": 0.4451,
      "step": 11525
    },
    {
      "epoch": 1.4131927415399705,
      "grad_norm": 1.982979607814841,
      "learning_rate": 1.0873557426715344e-06,
      "loss": 0.3923,
      "step": 11526
    },
    {
      "epoch": 1.4133153506620892,
      "grad_norm": 2.0798091511402452,
      "learning_rate": 1.086937603246136e-06,
      "loss": 0.4247,
      "step": 11527
    },
    {
      "epoch": 1.413437959784208,
      "grad_norm": 1.8952818462213272,
      "learning_rate": 1.0865195219001028e-06,
      "loss": 0.4437,
      "step": 11528
    },
    {
      "epoch": 1.4135605689063266,
      "grad_norm": 1.960618166143493,
      "learning_rate": 1.0861014986506204e-06,
      "loss": 0.4357,
      "step": 11529
    },
    {
      "epoch": 1.4136831780284453,
      "grad_norm": 1.8538362463417672,
      "learning_rate": 1.0856835335148689e-06,
      "loss": 0.3936,
      "step": 11530
    },
    {
      "epoch": 1.413805787150564,
      "grad_norm": 1.920415280564769,
      "learning_rate": 1.0852656265100289e-06,
      "loss": 0.4021,
      "step": 11531
    },
    {
      "epoch": 1.4139283962726827,
      "grad_norm": 1.8474059486453003,
      "learning_rate": 1.0848477776532752e-06,
      "loss": 0.4666,
      "step": 11532
    },
    {
      "epoch": 1.4140510053948014,
      "grad_norm": 2.0416935731659582,
      "learning_rate": 1.0844299869617842e-06,
      "loss": 0.3903,
      "step": 11533
    },
    {
      "epoch": 1.4141736145169201,
      "grad_norm": 2.0162274038235712,
      "learning_rate": 1.0840122544527257e-06,
      "loss": 0.4043,
      "step": 11534
    },
    {
      "epoch": 1.4142962236390386,
      "grad_norm": 1.9199092930130934,
      "learning_rate": 1.0835945801432713e-06,
      "loss": 0.3975,
      "step": 11535
    },
    {
      "epoch": 1.4144188327611573,
      "grad_norm": 1.8858276126136473,
      "learning_rate": 1.0831769640505873e-06,
      "loss": 0.4001,
      "step": 11536
    },
    {
      "epoch": 1.414541441883276,
      "grad_norm": 1.9521243323173696,
      "learning_rate": 1.0827594061918375e-06,
      "loss": 0.3958,
      "step": 11537
    },
    {
      "epoch": 1.4146640510053947,
      "grad_norm": 2.1010349369402785,
      "learning_rate": 1.082341906584185e-06,
      "loss": 0.4033,
      "step": 11538
    },
    {
      "epoch": 1.4147866601275134,
      "grad_norm": 1.9672754674008945,
      "learning_rate": 1.0819244652447907e-06,
      "loss": 0.4312,
      "step": 11539
    },
    {
      "epoch": 1.4149092692496321,
      "grad_norm": 2.0133671353249634,
      "learning_rate": 1.0815070821908115e-06,
      "loss": 0.4295,
      "step": 11540
    },
    {
      "epoch": 1.4150318783717508,
      "grad_norm": 1.920414270531583,
      "learning_rate": 1.0810897574394019e-06,
      "loss": 0.4402,
      "step": 11541
    },
    {
      "epoch": 1.4151544874938695,
      "grad_norm": 1.8636661835538262,
      "learning_rate": 1.080672491007715e-06,
      "loss": 0.4243,
      "step": 11542
    },
    {
      "epoch": 1.4152770966159882,
      "grad_norm": 1.9037549921101522,
      "learning_rate": 1.0802552829129032e-06,
      "loss": 0.3939,
      "step": 11543
    },
    {
      "epoch": 1.415399705738107,
      "grad_norm": 1.9613299936912632,
      "learning_rate": 1.079838133172111e-06,
      "loss": 0.4209,
      "step": 11544
    },
    {
      "epoch": 1.4155223148602256,
      "grad_norm": 1.8803835318781073,
      "learning_rate": 1.0794210418024856e-06,
      "loss": 0.4455,
      "step": 11545
    },
    {
      "epoch": 1.4156449239823443,
      "grad_norm": 1.7326093346796332,
      "learning_rate": 1.0790040088211712e-06,
      "loss": 0.4143,
      "step": 11546
    },
    {
      "epoch": 1.415767533104463,
      "grad_norm": 1.9309906137134214,
      "learning_rate": 1.0785870342453078e-06,
      "loss": 0.408,
      "step": 11547
    },
    {
      "epoch": 1.4158901422265817,
      "grad_norm": 1.999561899186539,
      "learning_rate": 1.0781701180920328e-06,
      "loss": 0.4585,
      "step": 11548
    },
    {
      "epoch": 1.4160127513487004,
      "grad_norm": 2.0628575078386837,
      "learning_rate": 1.0777532603784841e-06,
      "loss": 0.4064,
      "step": 11549
    },
    {
      "epoch": 1.4161353604708191,
      "grad_norm": 1.8673079026429327,
      "learning_rate": 1.0773364611217932e-06,
      "loss": 0.4577,
      "step": 11550
    },
    {
      "epoch": 1.4162579695929378,
      "grad_norm": 1.915701943678715,
      "learning_rate": 1.0769197203390935e-06,
      "loss": 0.4469,
      "step": 11551
    },
    {
      "epoch": 1.4163805787150565,
      "grad_norm": 1.963882932626051,
      "learning_rate": 1.0765030380475118e-06,
      "loss": 0.4978,
      "step": 11552
    },
    {
      "epoch": 1.4165031878371752,
      "grad_norm": 1.9564942609589433,
      "learning_rate": 1.0760864142641758e-06,
      "loss": 0.3951,
      "step": 11553
    },
    {
      "epoch": 1.4166257969592937,
      "grad_norm": 1.9414453078363565,
      "learning_rate": 1.0756698490062085e-06,
      "loss": 0.4339,
      "step": 11554
    },
    {
      "epoch": 1.4167484060814124,
      "grad_norm": 1.9691578748635554,
      "learning_rate": 1.0752533422907318e-06,
      "loss": 0.4044,
      "step": 11555
    },
    {
      "epoch": 1.416871015203531,
      "grad_norm": 1.73673311952806,
      "learning_rate": 1.074836894134867e-06,
      "loss": 0.4287,
      "step": 11556
    },
    {
      "epoch": 1.4169936243256498,
      "grad_norm": 1.9966052298622365,
      "learning_rate": 1.0744205045557272e-06,
      "loss": 0.3838,
      "step": 11557
    },
    {
      "epoch": 1.4171162334477685,
      "grad_norm": 1.906133032079147,
      "learning_rate": 1.074004173570428e-06,
      "loss": 0.3909,
      "step": 11558
    },
    {
      "epoch": 1.4172388425698872,
      "grad_norm": 1.9337516162144308,
      "learning_rate": 1.073587901196083e-06,
      "loss": 0.413,
      "step": 11559
    },
    {
      "epoch": 1.417361451692006,
      "grad_norm": 1.9368725786852252,
      "learning_rate": 1.0731716874498006e-06,
      "loss": 0.4252,
      "step": 11560
    },
    {
      "epoch": 1.4174840608141246,
      "grad_norm": 1.813938839745708,
      "learning_rate": 1.0727555323486868e-06,
      "loss": 0.3916,
      "step": 11561
    },
    {
      "epoch": 1.4176066699362433,
      "grad_norm": 1.9535118743475217,
      "learning_rate": 1.0723394359098473e-06,
      "loss": 0.4665,
      "step": 11562
    },
    {
      "epoch": 1.417729279058362,
      "grad_norm": 2.0018968387670495,
      "learning_rate": 1.0719233981503854e-06,
      "loss": 0.4324,
      "step": 11563
    },
    {
      "epoch": 1.4178518881804807,
      "grad_norm": 2.1441664791289368,
      "learning_rate": 1.0715074190874004e-06,
      "loss": 0.444,
      "step": 11564
    },
    {
      "epoch": 1.4179744973025994,
      "grad_norm": 1.7774117148283144,
      "learning_rate": 1.0710914987379881e-06,
      "loss": 0.4295,
      "step": 11565
    },
    {
      "epoch": 1.4180971064247179,
      "grad_norm": 2.0153009166638567,
      "learning_rate": 1.070675637119246e-06,
      "loss": 0.3783,
      "step": 11566
    },
    {
      "epoch": 1.4182197155468366,
      "grad_norm": 1.8006495575751436,
      "learning_rate": 1.0702598342482644e-06,
      "loss": 0.4078,
      "step": 11567
    },
    {
      "epoch": 1.4183423246689553,
      "grad_norm": 1.8546290372609706,
      "learning_rate": 1.0698440901421361e-06,
      "loss": 0.3839,
      "step": 11568
    },
    {
      "epoch": 1.418464933791074,
      "grad_norm": 1.8473069179831698,
      "learning_rate": 1.0694284048179473e-06,
      "loss": 0.4058,
      "step": 11569
    },
    {
      "epoch": 1.4185875429131927,
      "grad_norm": 1.8454119671620226,
      "learning_rate": 1.069012778292783e-06,
      "loss": 0.4282,
      "step": 11570
    },
    {
      "epoch": 1.4187101520353114,
      "grad_norm": 1.9010780328845505,
      "learning_rate": 1.0685972105837267e-06,
      "loss": 0.4378,
      "step": 11571
    },
    {
      "epoch": 1.41883276115743,
      "grad_norm": 1.9858194676224072,
      "learning_rate": 1.0681817017078601e-06,
      "loss": 0.4032,
      "step": 11572
    },
    {
      "epoch": 1.4189553702795488,
      "grad_norm": 1.990586030452926,
      "learning_rate": 1.0677662516822603e-06,
      "loss": 0.42,
      "step": 11573
    },
    {
      "epoch": 1.4190779794016675,
      "grad_norm": 2.036861782066702,
      "learning_rate": 1.0673508605240023e-06,
      "loss": 0.4666,
      "step": 11574
    },
    {
      "epoch": 1.4192005885237862,
      "grad_norm": 1.7909005683824335,
      "learning_rate": 1.0669355282501604e-06,
      "loss": 0.3955,
      "step": 11575
    },
    {
      "epoch": 1.4193231976459049,
      "grad_norm": 1.9990411368913674,
      "learning_rate": 1.066520254877807e-06,
      "loss": 0.4169,
      "step": 11576
    },
    {
      "epoch": 1.4194458067680236,
      "grad_norm": 1.8403422558159084,
      "learning_rate": 1.0661050404240072e-06,
      "loss": 0.3968,
      "step": 11577
    },
    {
      "epoch": 1.4195684158901423,
      "grad_norm": 1.9250744864315918,
      "learning_rate": 1.0656898849058286e-06,
      "loss": 0.429,
      "step": 11578
    },
    {
      "epoch": 1.419691025012261,
      "grad_norm": 2.1383099410118342,
      "learning_rate": 1.0652747883403357e-06,
      "loss": 0.4167,
      "step": 11579
    },
    {
      "epoch": 1.4198136341343797,
      "grad_norm": 1.8288744352122752,
      "learning_rate": 1.0648597507445884e-06,
      "loss": 0.4111,
      "step": 11580
    },
    {
      "epoch": 1.4199362432564984,
      "grad_norm": 1.9542095542973827,
      "learning_rate": 1.064444772135647e-06,
      "loss": 0.4294,
      "step": 11581
    },
    {
      "epoch": 1.420058852378617,
      "grad_norm": 2.041159738359911,
      "learning_rate": 1.064029852530566e-06,
      "loss": 0.4123,
      "step": 11582
    },
    {
      "epoch": 1.4201814615007358,
      "grad_norm": 1.8974129984251091,
      "learning_rate": 1.0636149919464008e-06,
      "loss": 0.4302,
      "step": 11583
    },
    {
      "epoch": 1.4203040706228545,
      "grad_norm": 1.9091757907861289,
      "learning_rate": 1.0632001904002018e-06,
      "loss": 0.4466,
      "step": 11584
    },
    {
      "epoch": 1.4204266797449732,
      "grad_norm": 1.7950497366845,
      "learning_rate": 1.0627854479090193e-06,
      "loss": 0.4796,
      "step": 11585
    },
    {
      "epoch": 1.4205492888670916,
      "grad_norm": 2.151525935638228,
      "learning_rate": 1.0623707644898995e-06,
      "loss": 0.4712,
      "step": 11586
    },
    {
      "epoch": 1.4206718979892103,
      "grad_norm": 1.9655800987897307,
      "learning_rate": 1.0619561401598854e-06,
      "loss": 0.391,
      "step": 11587
    },
    {
      "epoch": 1.420794507111329,
      "grad_norm": 1.7221397640935092,
      "learning_rate": 1.0615415749360196e-06,
      "loss": 0.4709,
      "step": 11588
    },
    {
      "epoch": 1.4209171162334477,
      "grad_norm": 1.8568516106125192,
      "learning_rate": 1.061127068835344e-06,
      "loss": 0.4474,
      "step": 11589
    },
    {
      "epoch": 1.4210397253555664,
      "grad_norm": 1.8800216760249215,
      "learning_rate": 1.0607126218748908e-06,
      "loss": 0.409,
      "step": 11590
    },
    {
      "epoch": 1.4211623344776851,
      "grad_norm": 2.0259946408086082,
      "learning_rate": 1.060298234071697e-06,
      "loss": 0.4059,
      "step": 11591
    },
    {
      "epoch": 1.4212849435998038,
      "grad_norm": 2.0386185902293277,
      "learning_rate": 1.059883905442796e-06,
      "loss": 0.4599,
      "step": 11592
    },
    {
      "epoch": 1.4214075527219225,
      "grad_norm": 1.8790801978377953,
      "learning_rate": 1.0594696360052153e-06,
      "loss": 0.3688,
      "step": 11593
    },
    {
      "epoch": 1.4215301618440412,
      "grad_norm": 1.982371044344513,
      "learning_rate": 1.0590554257759825e-06,
      "loss": 0.4527,
      "step": 11594
    },
    {
      "epoch": 1.42165277096616,
      "grad_norm": 1.9858267459141938,
      "learning_rate": 1.0586412747721223e-06,
      "loss": 0.4236,
      "step": 11595
    },
    {
      "epoch": 1.4217753800882786,
      "grad_norm": 1.9912884103802604,
      "learning_rate": 1.0582271830106586e-06,
      "loss": 0.471,
      "step": 11596
    },
    {
      "epoch": 1.4218979892103971,
      "grad_norm": 2.0081417232739924,
      "learning_rate": 1.0578131505086097e-06,
      "loss": 0.4102,
      "step": 11597
    },
    {
      "epoch": 1.4220205983325158,
      "grad_norm": 2.050231627760902,
      "learning_rate": 1.057399177282993e-06,
      "loss": 0.4533,
      "step": 11598
    },
    {
      "epoch": 1.4221432074546345,
      "grad_norm": 2.0280980451750583,
      "learning_rate": 1.0569852633508248e-06,
      "loss": 0.4306,
      "step": 11599
    },
    {
      "epoch": 1.4222658165767532,
      "grad_norm": 1.7487619212670082,
      "learning_rate": 1.0565714087291162e-06,
      "loss": 0.3878,
      "step": 11600
    },
    {
      "epoch": 1.422388425698872,
      "grad_norm": 2.07034381249371,
      "learning_rate": 1.0561576134348788e-06,
      "loss": 0.4525,
      "step": 11601
    },
    {
      "epoch": 1.4225110348209906,
      "grad_norm": 1.9522608713725829,
      "learning_rate": 1.055743877485119e-06,
      "loss": 0.4464,
      "step": 11602
    },
    {
      "epoch": 1.4226336439431093,
      "grad_norm": 2.054704017217831,
      "learning_rate": 1.0553302008968433e-06,
      "loss": 0.4445,
      "step": 11603
    },
    {
      "epoch": 1.422756253065228,
      "grad_norm": 2.002876744361282,
      "learning_rate": 1.0549165836870532e-06,
      "loss": 0.441,
      "step": 11604
    },
    {
      "epoch": 1.4228788621873467,
      "grad_norm": 1.8678444324246894,
      "learning_rate": 1.0545030258727504e-06,
      "loss": 0.3982,
      "step": 11605
    },
    {
      "epoch": 1.4230014713094654,
      "grad_norm": 1.9175637278682152,
      "learning_rate": 1.0540895274709325e-06,
      "loss": 0.4188,
      "step": 11606
    },
    {
      "epoch": 1.4231240804315841,
      "grad_norm": 1.8905644792664573,
      "learning_rate": 1.0536760884985941e-06,
      "loss": 0.3903,
      "step": 11607
    },
    {
      "epoch": 1.4232466895537028,
      "grad_norm": 2.0352319252419324,
      "learning_rate": 1.0532627089727288e-06,
      "loss": 0.3895,
      "step": 11608
    },
    {
      "epoch": 1.4233692986758215,
      "grad_norm": 1.8540163080324548,
      "learning_rate": 1.0528493889103282e-06,
      "loss": 0.3981,
      "step": 11609
    },
    {
      "epoch": 1.4234919077979402,
      "grad_norm": 1.872356089237634,
      "learning_rate": 1.05243612832838e-06,
      "loss": 0.4091,
      "step": 11610
    },
    {
      "epoch": 1.423614516920059,
      "grad_norm": 1.9104600531059486,
      "learning_rate": 1.0520229272438683e-06,
      "loss": 0.4181,
      "step": 11611
    },
    {
      "epoch": 1.4237371260421776,
      "grad_norm": 2.059438672976268,
      "learning_rate": 1.051609785673779e-06,
      "loss": 0.4187,
      "step": 11612
    },
    {
      "epoch": 1.4238597351642963,
      "grad_norm": 1.8941868608321313,
      "learning_rate": 1.0511967036350907e-06,
      "loss": 0.4384,
      "step": 11613
    },
    {
      "epoch": 1.423982344286415,
      "grad_norm": 1.9430492500673582,
      "learning_rate": 1.0507836811447835e-06,
      "loss": 0.4084,
      "step": 11614
    },
    {
      "epoch": 1.4241049534085337,
      "grad_norm": 2.028171840187771,
      "learning_rate": 1.050370718219832e-06,
      "loss": 0.4501,
      "step": 11615
    },
    {
      "epoch": 1.4242275625306524,
      "grad_norm": 1.8817290917765381,
      "learning_rate": 1.049957814877211e-06,
      "loss": 0.3821,
      "step": 11616
    },
    {
      "epoch": 1.424350171652771,
      "grad_norm": 2.2178097412333786,
      "learning_rate": 1.0495449711338901e-06,
      "loss": 0.4222,
      "step": 11617
    },
    {
      "epoch": 1.4244727807748896,
      "grad_norm": 2.0650650042248637,
      "learning_rate": 1.0491321870068396e-06,
      "loss": 0.4327,
      "step": 11618
    },
    {
      "epoch": 1.4245953898970083,
      "grad_norm": 2.0347963444926673,
      "learning_rate": 1.048719462513025e-06,
      "loss": 0.3832,
      "step": 11619
    },
    {
      "epoch": 1.424717999019127,
      "grad_norm": 1.7993085591339741,
      "learning_rate": 1.0483067976694086e-06,
      "loss": 0.4067,
      "step": 11620
    },
    {
      "epoch": 1.4248406081412457,
      "grad_norm": 2.126512854051321,
      "learning_rate": 1.0478941924929531e-06,
      "loss": 0.4114,
      "step": 11621
    },
    {
      "epoch": 1.4249632172633644,
      "grad_norm": 1.9962354743528108,
      "learning_rate": 1.0474816470006176e-06,
      "loss": 0.3827,
      "step": 11622
    },
    {
      "epoch": 1.425085826385483,
      "grad_norm": 2.0034286553471325,
      "learning_rate": 1.0470691612093581e-06,
      "loss": 0.4625,
      "step": 11623
    },
    {
      "epoch": 1.4252084355076018,
      "grad_norm": 1.8135301456660602,
      "learning_rate": 1.0466567351361272e-06,
      "loss": 0.4436,
      "step": 11624
    },
    {
      "epoch": 1.4253310446297205,
      "grad_norm": 2.178449713866742,
      "learning_rate": 1.0462443687978788e-06,
      "loss": 0.4383,
      "step": 11625
    },
    {
      "epoch": 1.4254536537518392,
      "grad_norm": 2.111692578877262,
      "learning_rate": 1.0458320622115592e-06,
      "loss": 0.4218,
      "step": 11626
    },
    {
      "epoch": 1.425576262873958,
      "grad_norm": 2.0635989666793284,
      "learning_rate": 1.0454198153941172e-06,
      "loss": 0.4545,
      "step": 11627
    },
    {
      "epoch": 1.4256988719960766,
      "grad_norm": 1.8827471437734362,
      "learning_rate": 1.0450076283624953e-06,
      "loss": 0.414,
      "step": 11628
    },
    {
      "epoch": 1.425821481118195,
      "grad_norm": 1.7565184545882817,
      "learning_rate": 1.0445955011336362e-06,
      "loss": 0.4129,
      "step": 11629
    },
    {
      "epoch": 1.4259440902403138,
      "grad_norm": 1.8889258513424427,
      "learning_rate": 1.044183433724478e-06,
      "loss": 0.4086,
      "step": 11630
    },
    {
      "epoch": 1.4260666993624325,
      "grad_norm": 1.940121432493452,
      "learning_rate": 1.0437714261519587e-06,
      "loss": 0.425,
      "step": 11631
    },
    {
      "epoch": 1.4261893084845512,
      "grad_norm": 1.8259827940470181,
      "learning_rate": 1.043359478433012e-06,
      "loss": 0.3883,
      "step": 11632
    },
    {
      "epoch": 1.4263119176066699,
      "grad_norm": 1.9437791544881742,
      "learning_rate": 1.0429475905845684e-06,
      "loss": 0.4263,
      "step": 11633
    },
    {
      "epoch": 1.4264345267287886,
      "grad_norm": 1.902703995916566,
      "learning_rate": 1.0425357626235584e-06,
      "loss": 0.4445,
      "step": 11634
    },
    {
      "epoch": 1.4265571358509073,
      "grad_norm": 2.033125341162292,
      "learning_rate": 1.0421239945669098e-06,
      "loss": 0.4509,
      "step": 11635
    },
    {
      "epoch": 1.426679744973026,
      "grad_norm": 1.7294181316560582,
      "learning_rate": 1.041712286431546e-06,
      "loss": 0.4333,
      "step": 11636
    },
    {
      "epoch": 1.4268023540951447,
      "grad_norm": 1.9753758772594676,
      "learning_rate": 1.0413006382343879e-06,
      "loss": 0.4423,
      "step": 11637
    },
    {
      "epoch": 1.4269249632172634,
      "grad_norm": 1.9227899632900392,
      "learning_rate": 1.0408890499923563e-06,
      "loss": 0.4309,
      "step": 11638
    },
    {
      "epoch": 1.427047572339382,
      "grad_norm": 2.0264622300097983,
      "learning_rate": 1.0404775217223695e-06,
      "loss": 0.4371,
      "step": 11639
    },
    {
      "epoch": 1.4271701814615008,
      "grad_norm": 1.7860006621663866,
      "learning_rate": 1.0400660534413385e-06,
      "loss": 0.4203,
      "step": 11640
    },
    {
      "epoch": 1.4272927905836195,
      "grad_norm": 1.9752866415250805,
      "learning_rate": 1.0396546451661774e-06,
      "loss": 0.4125,
      "step": 11641
    },
    {
      "epoch": 1.4274153997057382,
      "grad_norm": 1.9612908412513568,
      "learning_rate": 1.0392432969137966e-06,
      "loss": 0.3863,
      "step": 11642
    },
    {
      "epoch": 1.4275380088278569,
      "grad_norm": 2.0828848648009437,
      "learning_rate": 1.0388320087011025e-06,
      "loss": 0.4595,
      "step": 11643
    },
    {
      "epoch": 1.4276606179499756,
      "grad_norm": 2.125008712057439,
      "learning_rate": 1.0384207805449987e-06,
      "loss": 0.4167,
      "step": 11644
    },
    {
      "epoch": 1.4277832270720943,
      "grad_norm": 2.0455203349047744,
      "learning_rate": 1.038009612462389e-06,
      "loss": 0.4619,
      "step": 11645
    },
    {
      "epoch": 1.427905836194213,
      "grad_norm": 1.7751399001549062,
      "learning_rate": 1.037598504470172e-06,
      "loss": 0.3886,
      "step": 11646
    },
    {
      "epoch": 1.4280284453163317,
      "grad_norm": 1.9881053866240685,
      "learning_rate": 1.037187456585246e-06,
      "loss": 0.4901,
      "step": 11647
    },
    {
      "epoch": 1.4281510544384501,
      "grad_norm": 1.9631356009320353,
      "learning_rate": 1.0367764688245044e-06,
      "loss": 0.4277,
      "step": 11648
    },
    {
      "epoch": 1.4282736635605688,
      "grad_norm": 1.7458246413418455,
      "learning_rate": 1.0363655412048411e-06,
      "loss": 0.4098,
      "step": 11649
    },
    {
      "epoch": 1.4283962726826875,
      "grad_norm": 2.085092140618358,
      "learning_rate": 1.0359546737431447e-06,
      "loss": 0.4267,
      "step": 11650
    },
    {
      "epoch": 1.4285188818048062,
      "grad_norm": 2.127686712787889,
      "learning_rate": 1.035543866456303e-06,
      "loss": 0.4077,
      "step": 11651
    },
    {
      "epoch": 1.428641490926925,
      "grad_norm": 1.9565264480685423,
      "learning_rate": 1.0351331193612026e-06,
      "loss": 0.4476,
      "step": 11652
    },
    {
      "epoch": 1.4287641000490436,
      "grad_norm": 1.977598698609426,
      "learning_rate": 1.0347224324747227e-06,
      "loss": 0.4533,
      "step": 11653
    },
    {
      "epoch": 1.4288867091711623,
      "grad_norm": 2.101207669476959,
      "learning_rate": 1.034311805813745e-06,
      "loss": 0.416,
      "step": 11654
    },
    {
      "epoch": 1.429009318293281,
      "grad_norm": 1.7498372808795704,
      "learning_rate": 1.0339012393951475e-06,
      "loss": 0.4069,
      "step": 11655
    },
    {
      "epoch": 1.4291319274153997,
      "grad_norm": 1.9256112846862758,
      "learning_rate": 1.033490733235805e-06,
      "loss": 0.4826,
      "step": 11656
    },
    {
      "epoch": 1.4292545365375184,
      "grad_norm": 2.0933016146902967,
      "learning_rate": 1.0330802873525883e-06,
      "loss": 0.4108,
      "step": 11657
    },
    {
      "epoch": 1.4293771456596371,
      "grad_norm": 1.9731554180416329,
      "learning_rate": 1.0326699017623689e-06,
      "loss": 0.4148,
      "step": 11658
    },
    {
      "epoch": 1.4294997547817558,
      "grad_norm": 1.9515148445789385,
      "learning_rate": 1.032259576482015e-06,
      "loss": 0.4446,
      "step": 11659
    },
    {
      "epoch": 1.4296223639038743,
      "grad_norm": 2.0011546413712447,
      "learning_rate": 1.0318493115283913e-06,
      "loss": 0.4545,
      "step": 11660
    },
    {
      "epoch": 1.429744973025993,
      "grad_norm": 1.9783740832978418,
      "learning_rate": 1.0314391069183588e-06,
      "loss": 0.4156,
      "step": 11661
    },
    {
      "epoch": 1.4298675821481117,
      "grad_norm": 1.8643656590782276,
      "learning_rate": 1.03102896266878e-06,
      "loss": 0.5054,
      "step": 11662
    },
    {
      "epoch": 1.4299901912702304,
      "grad_norm": 1.8675356423481533,
      "learning_rate": 1.0306188787965104e-06,
      "loss": 0.4159,
      "step": 11663
    },
    {
      "epoch": 1.4301128003923491,
      "grad_norm": 1.9499510986253636,
      "learning_rate": 1.0302088553184072e-06,
      "loss": 0.4108,
      "step": 11664
    },
    {
      "epoch": 1.4302354095144678,
      "grad_norm": 1.787086440642433,
      "learning_rate": 1.0297988922513222e-06,
      "loss": 0.414,
      "step": 11665
    },
    {
      "epoch": 1.4303580186365865,
      "grad_norm": 2.045405009419983,
      "learning_rate": 1.0293889896121046e-06,
      "loss": 0.449,
      "step": 11666
    },
    {
      "epoch": 1.4304806277587052,
      "grad_norm": 1.9298415755979095,
      "learning_rate": 1.0289791474176028e-06,
      "loss": 0.4121,
      "step": 11667
    },
    {
      "epoch": 1.430603236880824,
      "grad_norm": 1.959497336976297,
      "learning_rate": 1.0285693656846632e-06,
      "loss": 0.42,
      "step": 11668
    },
    {
      "epoch": 1.4307258460029426,
      "grad_norm": 1.99891388827391,
      "learning_rate": 1.0281596444301279e-06,
      "loss": 0.4505,
      "step": 11669
    },
    {
      "epoch": 1.4308484551250613,
      "grad_norm": 1.8572154961131258,
      "learning_rate": 1.0277499836708358e-06,
      "loss": 0.4192,
      "step": 11670
    },
    {
      "epoch": 1.43097106424718,
      "grad_norm": 1.8927727923634317,
      "learning_rate": 1.027340383423626e-06,
      "loss": 0.3904,
      "step": 11671
    },
    {
      "epoch": 1.4310936733692987,
      "grad_norm": 2.0785347324754873,
      "learning_rate": 1.0269308437053352e-06,
      "loss": 0.4279,
      "step": 11672
    },
    {
      "epoch": 1.4312162824914174,
      "grad_norm": 1.8555731013035413,
      "learning_rate": 1.0265213645327927e-06,
      "loss": 0.4454,
      "step": 11673
    },
    {
      "epoch": 1.4313388916135361,
      "grad_norm": 1.9555796821613345,
      "learning_rate": 1.0261119459228308e-06,
      "loss": 0.4601,
      "step": 11674
    },
    {
      "epoch": 1.4314615007356548,
      "grad_norm": 1.8354387378169799,
      "learning_rate": 1.0257025878922782e-06,
      "loss": 0.4007,
      "step": 11675
    },
    {
      "epoch": 1.4315841098577735,
      "grad_norm": 1.7655706544233105,
      "learning_rate": 1.0252932904579583e-06,
      "loss": 0.4065,
      "step": 11676
    },
    {
      "epoch": 1.4317067189798922,
      "grad_norm": 1.8522568297023603,
      "learning_rate": 1.0248840536366958e-06,
      "loss": 0.4345,
      "step": 11677
    },
    {
      "epoch": 1.431829328102011,
      "grad_norm": 2.0920149655088927,
      "learning_rate": 1.0244748774453095e-06,
      "loss": 0.4652,
      "step": 11678
    },
    {
      "epoch": 1.4319519372241296,
      "grad_norm": 1.8790097174686593,
      "learning_rate": 1.0240657619006186e-06,
      "loss": 0.4106,
      "step": 11679
    },
    {
      "epoch": 1.432074546346248,
      "grad_norm": 2.0790375132224956,
      "learning_rate": 1.0236567070194369e-06,
      "loss": 0.4284,
      "step": 11680
    },
    {
      "epoch": 1.4321971554683668,
      "grad_norm": 2.0023154404290384,
      "learning_rate": 1.0232477128185795e-06,
      "loss": 0.4165,
      "step": 11681
    },
    {
      "epoch": 1.4323197645904855,
      "grad_norm": 2.3079307467385144,
      "learning_rate": 1.022838779314855e-06,
      "loss": 0.4661,
      "step": 11682
    },
    {
      "epoch": 1.4324423737126042,
      "grad_norm": 2.026856964631362,
      "learning_rate": 1.0224299065250713e-06,
      "loss": 0.4076,
      "step": 11683
    },
    {
      "epoch": 1.4325649828347229,
      "grad_norm": 1.9469845912246282,
      "learning_rate": 1.022021094466034e-06,
      "loss": 0.4084,
      "step": 11684
    },
    {
      "epoch": 1.4326875919568416,
      "grad_norm": 1.8363713625652178,
      "learning_rate": 1.021612343154548e-06,
      "loss": 0.4502,
      "step": 11685
    },
    {
      "epoch": 1.4328102010789603,
      "grad_norm": 2.020396308999258,
      "learning_rate": 1.0212036526074105e-06,
      "loss": 0.48,
      "step": 11686
    },
    {
      "epoch": 1.432932810201079,
      "grad_norm": 1.9077241442444828,
      "learning_rate": 1.0207950228414207e-06,
      "loss": 0.4288,
      "step": 11687
    },
    {
      "epoch": 1.4330554193231977,
      "grad_norm": 2.0946925069171716,
      "learning_rate": 1.020386453873375e-06,
      "loss": 0.4083,
      "step": 11688
    },
    {
      "epoch": 1.4331780284453164,
      "grad_norm": 1.921965256297329,
      "learning_rate": 1.0199779457200654e-06,
      "loss": 0.445,
      "step": 11689
    },
    {
      "epoch": 1.433300637567435,
      "grad_norm": 2.25073326802957,
      "learning_rate": 1.0195694983982817e-06,
      "loss": 0.4213,
      "step": 11690
    },
    {
      "epoch": 1.4334232466895536,
      "grad_norm": 1.9587888013458612,
      "learning_rate": 1.0191611119248126e-06,
      "loss": 0.4127,
      "step": 11691
    },
    {
      "epoch": 1.4335458558116723,
      "grad_norm": 2.0679559547730917,
      "learning_rate": 1.0187527863164442e-06,
      "loss": 0.4276,
      "step": 11692
    },
    {
      "epoch": 1.433668464933791,
      "grad_norm": 1.9537614076109684,
      "learning_rate": 1.0183445215899585e-06,
      "loss": 0.3957,
      "step": 11693
    },
    {
      "epoch": 1.4337910740559097,
      "grad_norm": 1.9562800624491359,
      "learning_rate": 1.0179363177621354e-06,
      "loss": 0.4306,
      "step": 11694
    },
    {
      "epoch": 1.4339136831780284,
      "grad_norm": 1.9282909699739712,
      "learning_rate": 1.0175281748497542e-06,
      "loss": 0.4245,
      "step": 11695
    },
    {
      "epoch": 1.434036292300147,
      "grad_norm": 2.0156020665790844,
      "learning_rate": 1.0171200928695884e-06,
      "loss": 0.4543,
      "step": 11696
    },
    {
      "epoch": 1.4341589014222658,
      "grad_norm": 2.227778708449846,
      "learning_rate": 1.0167120718384125e-06,
      "loss": 0.4427,
      "step": 11697
    },
    {
      "epoch": 1.4342815105443845,
      "grad_norm": 1.8191423319727749,
      "learning_rate": 1.0163041117729969e-06,
      "loss": 0.4165,
      "step": 11698
    },
    {
      "epoch": 1.4344041196665032,
      "grad_norm": 1.8735900893728512,
      "learning_rate": 1.015896212690109e-06,
      "loss": 0.4472,
      "step": 11699
    },
    {
      "epoch": 1.4345267287886219,
      "grad_norm": 1.991913754331344,
      "learning_rate": 1.0154883746065133e-06,
      "loss": 0.4184,
      "step": 11700
    },
    {
      "epoch": 1.4346493379107406,
      "grad_norm": 2.01331824209306,
      "learning_rate": 1.0150805975389747e-06,
      "loss": 0.423,
      "step": 11701
    },
    {
      "epoch": 1.4347719470328593,
      "grad_norm": 1.9876480618581218,
      "learning_rate": 1.0146728815042523e-06,
      "loss": 0.401,
      "step": 11702
    },
    {
      "epoch": 1.434894556154978,
      "grad_norm": 2.259343341006857,
      "learning_rate": 1.0142652265191027e-06,
      "loss": 0.4685,
      "step": 11703
    },
    {
      "epoch": 1.4350171652770967,
      "grad_norm": 2.0154354291561245,
      "learning_rate": 1.0138576326002831e-06,
      "loss": 0.4614,
      "step": 11704
    },
    {
      "epoch": 1.4351397743992154,
      "grad_norm": 1.874527464767351,
      "learning_rate": 1.0134500997645464e-06,
      "loss": 0.4277,
      "step": 11705
    },
    {
      "epoch": 1.435262383521334,
      "grad_norm": 2.011117576264659,
      "learning_rate": 1.0130426280286423e-06,
      "loss": 0.4113,
      "step": 11706
    },
    {
      "epoch": 1.4353849926434528,
      "grad_norm": 2.049750168980934,
      "learning_rate": 1.0126352174093179e-06,
      "loss": 0.4588,
      "step": 11707
    },
    {
      "epoch": 1.4355076017655715,
      "grad_norm": 1.921873096302,
      "learning_rate": 1.0122278679233203e-06,
      "loss": 0.4167,
      "step": 11708
    },
    {
      "epoch": 1.4356302108876902,
      "grad_norm": 2.0004877985885363,
      "learning_rate": 1.01182057958739e-06,
      "loss": 0.4491,
      "step": 11709
    },
    {
      "epoch": 1.4357528200098089,
      "grad_norm": 2.0842454341055006,
      "learning_rate": 1.0114133524182696e-06,
      "loss": 0.4224,
      "step": 11710
    },
    {
      "epoch": 1.4358754291319273,
      "grad_norm": 1.771735623893804,
      "learning_rate": 1.0110061864326949e-06,
      "loss": 0.4185,
      "step": 11711
    },
    {
      "epoch": 1.435998038254046,
      "grad_norm": 2.030290965793401,
      "learning_rate": 1.0105990816474026e-06,
      "loss": 0.4474,
      "step": 11712
    },
    {
      "epoch": 1.4361206473761647,
      "grad_norm": 1.9073338286768444,
      "learning_rate": 1.010192038079124e-06,
      "loss": 0.383,
      "step": 11713
    },
    {
      "epoch": 1.4362432564982834,
      "grad_norm": 1.8580419767960776,
      "learning_rate": 1.0097850557445912e-06,
      "loss": 0.4065,
      "step": 11714
    },
    {
      "epoch": 1.4363658656204021,
      "grad_norm": 2.08038234688005,
      "learning_rate": 1.0093781346605308e-06,
      "loss": 0.4082,
      "step": 11715
    },
    {
      "epoch": 1.4364884747425208,
      "grad_norm": 2.0340698868622673,
      "learning_rate": 1.008971274843667e-06,
      "loss": 0.4014,
      "step": 11716
    },
    {
      "epoch": 1.4366110838646395,
      "grad_norm": 1.9943449685374954,
      "learning_rate": 1.0085644763107235e-06,
      "loss": 0.4038,
      "step": 11717
    },
    {
      "epoch": 1.4367336929867582,
      "grad_norm": 1.9004980597192147,
      "learning_rate": 1.0081577390784211e-06,
      "loss": 0.385,
      "step": 11718
    },
    {
      "epoch": 1.436856302108877,
      "grad_norm": 1.9249663972712274,
      "learning_rate": 1.007751063163477e-06,
      "loss": 0.3837,
      "step": 11719
    },
    {
      "epoch": 1.4369789112309956,
      "grad_norm": 2.121673028240771,
      "learning_rate": 1.007344448582605e-06,
      "loss": 0.4581,
      "step": 11720
    },
    {
      "epoch": 1.4371015203531143,
      "grad_norm": 2.023616222265692,
      "learning_rate": 1.0069378953525196e-06,
      "loss": 0.4582,
      "step": 11721
    },
    {
      "epoch": 1.437224129475233,
      "grad_norm": 2.006800349865198,
      "learning_rate": 1.0065314034899293e-06,
      "loss": 0.4602,
      "step": 11722
    },
    {
      "epoch": 1.4373467385973515,
      "grad_norm": 2.0856358700213398,
      "learning_rate": 1.006124973011543e-06,
      "loss": 0.4722,
      "step": 11723
    },
    {
      "epoch": 1.4374693477194702,
      "grad_norm": 2.0272647999726763,
      "learning_rate": 1.005718603934064e-06,
      "loss": 0.4256,
      "step": 11724
    },
    {
      "epoch": 1.437591956841589,
      "grad_norm": 2.1435083349437076,
      "learning_rate": 1.0053122962741971e-06,
      "loss": 0.4181,
      "step": 11725
    },
    {
      "epoch": 1.4377145659637076,
      "grad_norm": 1.7684261689369047,
      "learning_rate": 1.0049060500486396e-06,
      "loss": 0.4688,
      "step": 11726
    },
    {
      "epoch": 1.4378371750858263,
      "grad_norm": 2.173293460178641,
      "learning_rate": 1.0044998652740915e-06,
      "loss": 0.487,
      "step": 11727
    },
    {
      "epoch": 1.437959784207945,
      "grad_norm": 1.9266643196045403,
      "learning_rate": 1.0040937419672463e-06,
      "loss": 0.4804,
      "step": 11728
    },
    {
      "epoch": 1.4380823933300637,
      "grad_norm": 1.9235031314340325,
      "learning_rate": 1.0036876801447958e-06,
      "loss": 0.4066,
      "step": 11729
    },
    {
      "epoch": 1.4382050024521824,
      "grad_norm": 1.9185250076331524,
      "learning_rate": 1.0032816798234307e-06,
      "loss": 0.4416,
      "step": 11730
    },
    {
      "epoch": 1.4383276115743011,
      "grad_norm": 2.037683795146219,
      "learning_rate": 1.0028757410198392e-06,
      "loss": 0.4232,
      "step": 11731
    },
    {
      "epoch": 1.4384502206964198,
      "grad_norm": 2.121504205537566,
      "learning_rate": 1.002469863750705e-06,
      "loss": 0.4167,
      "step": 11732
    },
    {
      "epoch": 1.4385728298185385,
      "grad_norm": 2.0621411349596355,
      "learning_rate": 1.0020640480327098e-06,
      "loss": 0.4687,
      "step": 11733
    },
    {
      "epoch": 1.4386954389406572,
      "grad_norm": 1.9255412037613546,
      "learning_rate": 1.0016582938825343e-06,
      "loss": 0.4284,
      "step": 11734
    },
    {
      "epoch": 1.438818048062776,
      "grad_norm": 1.8728418923274848,
      "learning_rate": 1.001252601316857e-06,
      "loss": 0.4195,
      "step": 11735
    },
    {
      "epoch": 1.4389406571848946,
      "grad_norm": 1.9339245309212802,
      "learning_rate": 1.0008469703523493e-06,
      "loss": 0.3976,
      "step": 11736
    },
    {
      "epoch": 1.4390632663070133,
      "grad_norm": 2.0550039878558533,
      "learning_rate": 1.0004414010056854e-06,
      "loss": 0.3862,
      "step": 11737
    },
    {
      "epoch": 1.439185875429132,
      "grad_norm": 2.052664377801616,
      "learning_rate": 1.0000358932935355e-06,
      "loss": 0.4469,
      "step": 11738
    },
    {
      "epoch": 1.4393084845512507,
      "grad_norm": 1.990997193363763,
      "learning_rate": 9.996304472325658e-07,
      "loss": 0.4494,
      "step": 11739
    },
    {
      "epoch": 1.4394310936733694,
      "grad_norm": 1.8281074106152548,
      "learning_rate": 9.992250628394401e-07,
      "loss": 0.3882,
      "step": 11740
    },
    {
      "epoch": 1.439553702795488,
      "grad_norm": 2.182011933097222,
      "learning_rate": 9.98819740130822e-07,
      "loss": 0.4289,
      "step": 11741
    },
    {
      "epoch": 1.4396763119176066,
      "grad_norm": 1.993452500405226,
      "learning_rate": 9.984144791233694e-07,
      "loss": 0.4364,
      "step": 11742
    },
    {
      "epoch": 1.4397989210397253,
      "grad_norm": 1.866817727125973,
      "learning_rate": 9.98009279833741e-07,
      "loss": 0.4034,
      "step": 11743
    },
    {
      "epoch": 1.439921530161844,
      "grad_norm": 1.9736968655770366,
      "learning_rate": 9.976041422785893e-07,
      "loss": 0.4301,
      "step": 11744
    },
    {
      "epoch": 1.4400441392839627,
      "grad_norm": 2.0651666454790605,
      "learning_rate": 9.971990664745682e-07,
      "loss": 0.411,
      "step": 11745
    },
    {
      "epoch": 1.4401667484060814,
      "grad_norm": 2.0265196253896334,
      "learning_rate": 9.967940524383249e-07,
      "loss": 0.4438,
      "step": 11746
    },
    {
      "epoch": 1.4402893575282,
      "grad_norm": 1.9954457113270132,
      "learning_rate": 9.963891001865073e-07,
      "loss": 0.4126,
      "step": 11747
    },
    {
      "epoch": 1.4404119666503188,
      "grad_norm": 2.155863100313455,
      "learning_rate": 9.959842097357613e-07,
      "loss": 0.4249,
      "step": 11748
    },
    {
      "epoch": 1.4405345757724375,
      "grad_norm": 1.9594836731574132,
      "learning_rate": 9.955793811027253e-07,
      "loss": 0.3953,
      "step": 11749
    },
    {
      "epoch": 1.4406571848945562,
      "grad_norm": 1.9758256552675872,
      "learning_rate": 9.9517461430404e-07,
      "loss": 0.415,
      "step": 11750
    },
    {
      "epoch": 1.4407797940166749,
      "grad_norm": 1.9946279067363317,
      "learning_rate": 9.947699093563432e-07,
      "loss": 0.4379,
      "step": 11751
    },
    {
      "epoch": 1.4409024031387936,
      "grad_norm": 2.1444774948180103,
      "learning_rate": 9.943652662762676e-07,
      "loss": 0.3937,
      "step": 11752
    },
    {
      "epoch": 1.4410250122609123,
      "grad_norm": 2.0259893865071126,
      "learning_rate": 9.939606850804446e-07,
      "loss": 0.4254,
      "step": 11753
    },
    {
      "epoch": 1.4411476213830308,
      "grad_norm": 1.9896673121112347,
      "learning_rate": 9.935561657855034e-07,
      "loss": 0.444,
      "step": 11754
    },
    {
      "epoch": 1.4412702305051495,
      "grad_norm": 1.9198482762284865,
      "learning_rate": 9.931517084080717e-07,
      "loss": 0.4175,
      "step": 11755
    },
    {
      "epoch": 1.4413928396272682,
      "grad_norm": 2.0292620215691697,
      "learning_rate": 9.927473129647724e-07,
      "loss": 0.4154,
      "step": 11756
    },
    {
      "epoch": 1.4415154487493869,
      "grad_norm": 1.8525484547155897,
      "learning_rate": 9.923429794722261e-07,
      "loss": 0.4321,
      "step": 11757
    },
    {
      "epoch": 1.4416380578715056,
      "grad_norm": 1.7584398541329775,
      "learning_rate": 9.919387079470532e-07,
      "loss": 0.413,
      "step": 11758
    },
    {
      "epoch": 1.4417606669936243,
      "grad_norm": 1.887589421049719,
      "learning_rate": 9.915344984058683e-07,
      "loss": 0.4147,
      "step": 11759
    },
    {
      "epoch": 1.441883276115743,
      "grad_norm": 2.1536494740774543,
      "learning_rate": 9.911303508652873e-07,
      "loss": 0.4657,
      "step": 11760
    },
    {
      "epoch": 1.4420058852378617,
      "grad_norm": 1.8304654638168978,
      "learning_rate": 9.907262653419188e-07,
      "loss": 0.4159,
      "step": 11761
    },
    {
      "epoch": 1.4421284943599804,
      "grad_norm": 1.8902304366677436,
      "learning_rate": 9.903222418523739e-07,
      "loss": 0.4233,
      "step": 11762
    },
    {
      "epoch": 1.442251103482099,
      "grad_norm": 1.9876137460195369,
      "learning_rate": 9.899182804132564e-07,
      "loss": 0.4251,
      "step": 11763
    },
    {
      "epoch": 1.4423737126042178,
      "grad_norm": 2.1154036482544103,
      "learning_rate": 9.895143810411723e-07,
      "loss": 0.4402,
      "step": 11764
    },
    {
      "epoch": 1.4424963217263365,
      "grad_norm": 2.080013412033314,
      "learning_rate": 9.891105437527207e-07,
      "loss": 0.4521,
      "step": 11765
    },
    {
      "epoch": 1.4426189308484552,
      "grad_norm": 1.913681713120712,
      "learning_rate": 9.887067685645e-07,
      "loss": 0.3889,
      "step": 11766
    },
    {
      "epoch": 1.4427415399705739,
      "grad_norm": 1.9425809182723992,
      "learning_rate": 9.883030554931067e-07,
      "loss": 0.4312,
      "step": 11767
    },
    {
      "epoch": 1.4428641490926926,
      "grad_norm": 1.889348074005245,
      "learning_rate": 9.878994045551357e-07,
      "loss": 0.3772,
      "step": 11768
    },
    {
      "epoch": 1.4429867582148113,
      "grad_norm": 2.0505145708888826,
      "learning_rate": 9.874958157671743e-07,
      "loss": 0.4191,
      "step": 11769
    },
    {
      "epoch": 1.44310936733693,
      "grad_norm": 1.7775192640994806,
      "learning_rate": 9.870922891458126e-07,
      "loss": 0.4367,
      "step": 11770
    },
    {
      "epoch": 1.4432319764590487,
      "grad_norm": 1.730258402955728,
      "learning_rate": 9.866888247076373e-07,
      "loss": 0.4114,
      "step": 11771
    },
    {
      "epoch": 1.4433545855811674,
      "grad_norm": 1.954633696219693,
      "learning_rate": 9.862854224692294e-07,
      "loss": 0.4381,
      "step": 11772
    },
    {
      "epoch": 1.443477194703286,
      "grad_norm": 1.9881652844094566,
      "learning_rate": 9.858820824471713e-07,
      "loss": 0.4401,
      "step": 11773
    },
    {
      "epoch": 1.4435998038254045,
      "grad_norm": 2.055805493476794,
      "learning_rate": 9.854788046580393e-07,
      "loss": 0.4341,
      "step": 11774
    },
    {
      "epoch": 1.4437224129475232,
      "grad_norm": 1.7502867982000498,
      "learning_rate": 9.850755891184105e-07,
      "loss": 0.3972,
      "step": 11775
    },
    {
      "epoch": 1.443845022069642,
      "grad_norm": 2.0077692118079735,
      "learning_rate": 9.846724358448562e-07,
      "loss": 0.4663,
      "step": 11776
    },
    {
      "epoch": 1.4439676311917606,
      "grad_norm": 1.8974546975603919,
      "learning_rate": 9.842693448539481e-07,
      "loss": 0.3784,
      "step": 11777
    },
    {
      "epoch": 1.4440902403138793,
      "grad_norm": 2.2046602514075895,
      "learning_rate": 9.838663161622536e-07,
      "loss": 0.3895,
      "step": 11778
    },
    {
      "epoch": 1.444212849435998,
      "grad_norm": 2.001638176566886,
      "learning_rate": 9.834633497863366e-07,
      "loss": 0.4498,
      "step": 11779
    },
    {
      "epoch": 1.4443354585581167,
      "grad_norm": 1.985624763369225,
      "learning_rate": 9.830604457427606e-07,
      "loss": 0.426,
      "step": 11780
    },
    {
      "epoch": 1.4444580676802354,
      "grad_norm": 1.8173956774245397,
      "learning_rate": 9.82657604048087e-07,
      "loss": 0.4027,
      "step": 11781
    },
    {
      "epoch": 1.4445806768023541,
      "grad_norm": 2.034319162241697,
      "learning_rate": 9.822548247188717e-07,
      "loss": 0.4443,
      "step": 11782
    },
    {
      "epoch": 1.4447032859244728,
      "grad_norm": 2.0590651606900026,
      "learning_rate": 9.818521077716696e-07,
      "loss": 0.4269,
      "step": 11783
    },
    {
      "epoch": 1.4448258950465915,
      "grad_norm": 2.084016004398218,
      "learning_rate": 9.814494532230343e-07,
      "loss": 0.4205,
      "step": 11784
    },
    {
      "epoch": 1.4449485041687102,
      "grad_norm": 1.9594233868025819,
      "learning_rate": 9.810468610895147e-07,
      "loss": 0.4214,
      "step": 11785
    },
    {
      "epoch": 1.4450711132908287,
      "grad_norm": 2.062661861180223,
      "learning_rate": 9.806443313876573e-07,
      "loss": 0.4509,
      "step": 11786
    },
    {
      "epoch": 1.4451937224129474,
      "grad_norm": 1.8002022585609998,
      "learning_rate": 9.80241864134008e-07,
      "loss": 0.4248,
      "step": 11787
    },
    {
      "epoch": 1.4453163315350661,
      "grad_norm": 1.9370992026517817,
      "learning_rate": 9.798394593451092e-07,
      "loss": 0.4195,
      "step": 11788
    },
    {
      "epoch": 1.4454389406571848,
      "grad_norm": 2.000802378767198,
      "learning_rate": 9.794371170375e-07,
      "loss": 0.4129,
      "step": 11789
    },
    {
      "epoch": 1.4455615497793035,
      "grad_norm": 1.953027439201587,
      "learning_rate": 9.790348372277165e-07,
      "loss": 0.4785,
      "step": 11790
    },
    {
      "epoch": 1.4456841589014222,
      "grad_norm": 1.8502577785182868,
      "learning_rate": 9.786326199322943e-07,
      "loss": 0.4343,
      "step": 11791
    },
    {
      "epoch": 1.445806768023541,
      "grad_norm": 1.9403794247598545,
      "learning_rate": 9.78230465167764e-07,
      "loss": 0.4831,
      "step": 11792
    },
    {
      "epoch": 1.4459293771456596,
      "grad_norm": 1.6984277718582783,
      "learning_rate": 9.77828372950656e-07,
      "loss": 0.4151,
      "step": 11793
    },
    {
      "epoch": 1.4460519862677783,
      "grad_norm": 1.9478048228500284,
      "learning_rate": 9.774263432974974e-07,
      "loss": 0.4405,
      "step": 11794
    },
    {
      "epoch": 1.446174595389897,
      "grad_norm": 1.9837613921471788,
      "learning_rate": 9.770243762248113e-07,
      "loss": 0.4472,
      "step": 11795
    },
    {
      "epoch": 1.4462972045120157,
      "grad_norm": 1.9574607324535578,
      "learning_rate": 9.76622471749119e-07,
      "loss": 0.4593,
      "step": 11796
    },
    {
      "epoch": 1.4464198136341344,
      "grad_norm": 1.9136626847328995,
      "learning_rate": 9.76220629886941e-07,
      "loss": 0.4221,
      "step": 11797
    },
    {
      "epoch": 1.446542422756253,
      "grad_norm": 1.877192340978035,
      "learning_rate": 9.758188506547925e-07,
      "loss": 0.4112,
      "step": 11798
    },
    {
      "epoch": 1.4466650318783718,
      "grad_norm": 1.857296516090368,
      "learning_rate": 9.754171340691868e-07,
      "loss": 0.4067,
      "step": 11799
    },
    {
      "epoch": 1.4467876410004905,
      "grad_norm": 2.058125201525951,
      "learning_rate": 9.75015480146636e-07,
      "loss": 0.4632,
      "step": 11800
    },
    {
      "epoch": 1.4469102501226092,
      "grad_norm": 2.0129512882741056,
      "learning_rate": 9.746138889036494e-07,
      "loss": 0.4744,
      "step": 11801
    },
    {
      "epoch": 1.447032859244728,
      "grad_norm": 2.057087666359747,
      "learning_rate": 9.742123603567327e-07,
      "loss": 0.4243,
      "step": 11802
    },
    {
      "epoch": 1.4471554683668466,
      "grad_norm": 1.9082156874200382,
      "learning_rate": 9.738108945223881e-07,
      "loss": 0.3964,
      "step": 11803
    },
    {
      "epoch": 1.4472780774889653,
      "grad_norm": 2.120878299661053,
      "learning_rate": 9.73409491417119e-07,
      "loss": 0.4137,
      "step": 11804
    },
    {
      "epoch": 1.4474006866110838,
      "grad_norm": 1.7823227484534876,
      "learning_rate": 9.73008151057421e-07,
      "loss": 0.4242,
      "step": 11805
    },
    {
      "epoch": 1.4475232957332025,
      "grad_norm": 2.0126908130511465,
      "learning_rate": 9.726068734597924e-07,
      "loss": 0.4343,
      "step": 11806
    },
    {
      "epoch": 1.4476459048553212,
      "grad_norm": 2.0676352106522904,
      "learning_rate": 9.722056586407245e-07,
      "loss": 0.4238,
      "step": 11807
    },
    {
      "epoch": 1.4477685139774399,
      "grad_norm": 1.8801669861791408,
      "learning_rate": 9.718045066167094e-07,
      "loss": 0.4235,
      "step": 11808
    },
    {
      "epoch": 1.4478911230995586,
      "grad_norm": 2.0732879029020954,
      "learning_rate": 9.71403417404234e-07,
      "loss": 0.3952,
      "step": 11809
    },
    {
      "epoch": 1.4480137322216773,
      "grad_norm": 1.892880544316821,
      "learning_rate": 9.71002391019785e-07,
      "loss": 0.4146,
      "step": 11810
    },
    {
      "epoch": 1.448136341343796,
      "grad_norm": 2.0492241849688897,
      "learning_rate": 9.706014274798444e-07,
      "loss": 0.4568,
      "step": 11811
    },
    {
      "epoch": 1.4482589504659147,
      "grad_norm": 1.9753537053738366,
      "learning_rate": 9.702005268008924e-07,
      "loss": 0.4271,
      "step": 11812
    },
    {
      "epoch": 1.4483815595880334,
      "grad_norm": 1.81681212882056,
      "learning_rate": 9.697996889994065e-07,
      "loss": 0.3966,
      "step": 11813
    },
    {
      "epoch": 1.448504168710152,
      "grad_norm": 1.893331046299974,
      "learning_rate": 9.693989140918635e-07,
      "loss": 0.429,
      "step": 11814
    },
    {
      "epoch": 1.4486267778322708,
      "grad_norm": 2.281743437708844,
      "learning_rate": 9.68998202094735e-07,
      "loss": 0.4638,
      "step": 11815
    },
    {
      "epoch": 1.4487493869543895,
      "grad_norm": 2.0080143830326,
      "learning_rate": 9.685975530244899e-07,
      "loss": 0.4435,
      "step": 11816
    },
    {
      "epoch": 1.448871996076508,
      "grad_norm": 2.0152439371909314,
      "learning_rate": 9.681969668975971e-07,
      "loss": 0.4275,
      "step": 11817
    },
    {
      "epoch": 1.4489946051986267,
      "grad_norm": 1.9567079440101567,
      "learning_rate": 9.677964437305202e-07,
      "loss": 0.4214,
      "step": 11818
    },
    {
      "epoch": 1.4491172143207454,
      "grad_norm": 1.9658780132703098,
      "learning_rate": 9.673959835397226e-07,
      "loss": 0.4521,
      "step": 11819
    },
    {
      "epoch": 1.449239823442864,
      "grad_norm": 1.811143080975903,
      "learning_rate": 9.669955863416627e-07,
      "loss": 0.4341,
      "step": 11820
    },
    {
      "epoch": 1.4493624325649828,
      "grad_norm": 2.0059715493841628,
      "learning_rate": 9.665952521527992e-07,
      "loss": 0.4421,
      "step": 11821
    },
    {
      "epoch": 1.4494850416871015,
      "grad_norm": 1.7301148492153628,
      "learning_rate": 9.661949809895843e-07,
      "loss": 0.4121,
      "step": 11822
    },
    {
      "epoch": 1.4496076508092202,
      "grad_norm": 1.9557555441116763,
      "learning_rate": 9.657947728684722e-07,
      "loss": 0.424,
      "step": 11823
    },
    {
      "epoch": 1.4497302599313389,
      "grad_norm": 2.1558122627336496,
      "learning_rate": 9.65394627805911e-07,
      "loss": 0.431,
      "step": 11824
    },
    {
      "epoch": 1.4498528690534576,
      "grad_norm": 1.7823375954592209,
      "learning_rate": 9.649945458183464e-07,
      "loss": 0.4336,
      "step": 11825
    },
    {
      "epoch": 1.4499754781755763,
      "grad_norm": 2.053045309232243,
      "learning_rate": 9.645945269222235e-07,
      "loss": 0.442,
      "step": 11826
    },
    {
      "epoch": 1.450098087297695,
      "grad_norm": 2.0961501098084483,
      "learning_rate": 9.641945711339844e-07,
      "loss": 0.435,
      "step": 11827
    },
    {
      "epoch": 1.4502206964198137,
      "grad_norm": 1.9462532043799186,
      "learning_rate": 9.637946784700677e-07,
      "loss": 0.4429,
      "step": 11828
    },
    {
      "epoch": 1.4503433055419324,
      "grad_norm": 2.062477711454458,
      "learning_rate": 9.633948489469084e-07,
      "loss": 0.4338,
      "step": 11829
    },
    {
      "epoch": 1.450465914664051,
      "grad_norm": 2.157847895876383,
      "learning_rate": 9.629950825809412e-07,
      "loss": 0.43,
      "step": 11830
    },
    {
      "epoch": 1.4505885237861698,
      "grad_norm": 1.8271833257780732,
      "learning_rate": 9.62595379388599e-07,
      "loss": 0.4241,
      "step": 11831
    },
    {
      "epoch": 1.4507111329082885,
      "grad_norm": 2.0262457525524815,
      "learning_rate": 9.621957393863065e-07,
      "loss": 0.4584,
      "step": 11832
    },
    {
      "epoch": 1.4508337420304072,
      "grad_norm": 2.067518730130664,
      "learning_rate": 9.617961625904915e-07,
      "loss": 0.4386,
      "step": 11833
    },
    {
      "epoch": 1.4509563511525259,
      "grad_norm": 1.9721321269578342,
      "learning_rate": 9.613966490175785e-07,
      "loss": 0.3986,
      "step": 11834
    },
    {
      "epoch": 1.4510789602746446,
      "grad_norm": 2.0074955229149394,
      "learning_rate": 9.60997198683987e-07,
      "loss": 0.4151,
      "step": 11835
    },
    {
      "epoch": 1.4512015693967633,
      "grad_norm": 1.9251160668236391,
      "learning_rate": 9.605978116061346e-07,
      "loss": 0.4079,
      "step": 11836
    },
    {
      "epoch": 1.4513241785188817,
      "grad_norm": 1.9357921188253349,
      "learning_rate": 9.601984878004369e-07,
      "loss": 0.4357,
      "step": 11837
    },
    {
      "epoch": 1.4514467876410004,
      "grad_norm": 1.8291869061509443,
      "learning_rate": 9.597992272833088e-07,
      "loss": 0.4385,
      "step": 11838
    },
    {
      "epoch": 1.4515693967631191,
      "grad_norm": 2.035230949833474,
      "learning_rate": 9.59400030071159e-07,
      "loss": 0.4491,
      "step": 11839
    },
    {
      "epoch": 1.4516920058852378,
      "grad_norm": 1.9443946038221604,
      "learning_rate": 9.590008961803942e-07,
      "loss": 0.4466,
      "step": 11840
    },
    {
      "epoch": 1.4518146150073565,
      "grad_norm": 2.0491811203259553,
      "learning_rate": 9.586018256274218e-07,
      "loss": 0.4774,
      "step": 11841
    },
    {
      "epoch": 1.4519372241294752,
      "grad_norm": 2.0345825425580464,
      "learning_rate": 9.582028184286423e-07,
      "loss": 0.4143,
      "step": 11842
    },
    {
      "epoch": 1.452059833251594,
      "grad_norm": 1.855214382693416,
      "learning_rate": 9.578038746004566e-07,
      "loss": 0.4024,
      "step": 11843
    },
    {
      "epoch": 1.4521824423737126,
      "grad_norm": 2.1358880305311874,
      "learning_rate": 9.574049941592636e-07,
      "loss": 0.4335,
      "step": 11844
    },
    {
      "epoch": 1.4523050514958313,
      "grad_norm": 2.0395024809030535,
      "learning_rate": 9.570061771214545e-07,
      "loss": 0.41,
      "step": 11845
    },
    {
      "epoch": 1.45242766061795,
      "grad_norm": 2.0000785495327515,
      "learning_rate": 9.56607423503423e-07,
      "loss": 0.4602,
      "step": 11846
    },
    {
      "epoch": 1.4525502697400687,
      "grad_norm": 1.790892443109985,
      "learning_rate": 9.5620873332156e-07,
      "loss": 0.4234,
      "step": 11847
    },
    {
      "epoch": 1.4526728788621872,
      "grad_norm": 1.9060594737180951,
      "learning_rate": 9.55810106592251e-07,
      "loss": 0.4757,
      "step": 11848
    },
    {
      "epoch": 1.452795487984306,
      "grad_norm": 1.88209615030382,
      "learning_rate": 9.554115433318794e-07,
      "loss": 0.4468,
      "step": 11849
    },
    {
      "epoch": 1.4529180971064246,
      "grad_norm": 1.8851587619584877,
      "learning_rate": 9.550130435568278e-07,
      "loss": 0.4033,
      "step": 11850
    },
    {
      "epoch": 1.4530407062285433,
      "grad_norm": 2.1377812457972105,
      "learning_rate": 9.546146072834764e-07,
      "loss": 0.3821,
      "step": 11851
    },
    {
      "epoch": 1.453163315350662,
      "grad_norm": 2.033229600674303,
      "learning_rate": 9.542162345282006e-07,
      "loss": 0.4139,
      "step": 11852
    },
    {
      "epoch": 1.4532859244727807,
      "grad_norm": 1.9181660786143746,
      "learning_rate": 9.538179253073728e-07,
      "loss": 0.4295,
      "step": 11853
    },
    {
      "epoch": 1.4534085335948994,
      "grad_norm": 1.912637628635903,
      "learning_rate": 9.534196796373669e-07,
      "loss": 0.4568,
      "step": 11854
    },
    {
      "epoch": 1.453531142717018,
      "grad_norm": 1.9268263712429912,
      "learning_rate": 9.530214975345489e-07,
      "loss": 0.4613,
      "step": 11855
    },
    {
      "epoch": 1.4536537518391368,
      "grad_norm": 1.9320318494466369,
      "learning_rate": 9.526233790152872e-07,
      "loss": 0.4106,
      "step": 11856
    },
    {
      "epoch": 1.4537763609612555,
      "grad_norm": 1.9661116484366374,
      "learning_rate": 9.522253240959433e-07,
      "loss": 0.4221,
      "step": 11857
    },
    {
      "epoch": 1.4538989700833742,
      "grad_norm": 2.237621029672826,
      "learning_rate": 9.518273327928792e-07,
      "loss": 0.4497,
      "step": 11858
    },
    {
      "epoch": 1.454021579205493,
      "grad_norm": 2.121830129351961,
      "learning_rate": 9.514294051224521e-07,
      "loss": 0.4116,
      "step": 11859
    },
    {
      "epoch": 1.4541441883276116,
      "grad_norm": 1.913832389341977,
      "learning_rate": 9.510315411010185e-07,
      "loss": 0.4283,
      "step": 11860
    },
    {
      "epoch": 1.4542667974497303,
      "grad_norm": 1.949373278806474,
      "learning_rate": 9.506337407449309e-07,
      "loss": 0.3975,
      "step": 11861
    },
    {
      "epoch": 1.454389406571849,
      "grad_norm": 1.7620451379011195,
      "learning_rate": 9.502360040705389e-07,
      "loss": 0.4048,
      "step": 11862
    },
    {
      "epoch": 1.4545120156939677,
      "grad_norm": 1.957765201488423,
      "learning_rate": 9.498383310941903e-07,
      "loss": 0.4369,
      "step": 11863
    },
    {
      "epoch": 1.4546346248160864,
      "grad_norm": 2.0542336908491463,
      "learning_rate": 9.494407218322327e-07,
      "loss": 0.4096,
      "step": 11864
    },
    {
      "epoch": 1.454757233938205,
      "grad_norm": 1.9383173150187967,
      "learning_rate": 9.490431763010047e-07,
      "loss": 0.4231,
      "step": 11865
    },
    {
      "epoch": 1.4548798430603238,
      "grad_norm": 2.022333694484177,
      "learning_rate": 9.486456945168476e-07,
      "loss": 0.3996,
      "step": 11866
    },
    {
      "epoch": 1.4550024521824425,
      "grad_norm": 2.1173639874505144,
      "learning_rate": 9.482482764961001e-07,
      "loss": 0.4656,
      "step": 11867
    },
    {
      "epoch": 1.455125061304561,
      "grad_norm": 2.0440559122145263,
      "learning_rate": 9.478509222550947e-07,
      "loss": 0.4232,
      "step": 11868
    },
    {
      "epoch": 1.4552476704266797,
      "grad_norm": 2.011192538820454,
      "learning_rate": 9.474536318101652e-07,
      "loss": 0.4367,
      "step": 11869
    },
    {
      "epoch": 1.4553702795487984,
      "grad_norm": 1.899232995979453,
      "learning_rate": 9.470564051776387e-07,
      "loss": 0.3903,
      "step": 11870
    },
    {
      "epoch": 1.455492888670917,
      "grad_norm": 1.794212755637243,
      "learning_rate": 9.466592423738447e-07,
      "loss": 0.4326,
      "step": 11871
    },
    {
      "epoch": 1.4556154977930358,
      "grad_norm": 1.8482480511030512,
      "learning_rate": 9.462621434151045e-07,
      "loss": 0.4473,
      "step": 11872
    },
    {
      "epoch": 1.4557381069151545,
      "grad_norm": 1.9059393811869352,
      "learning_rate": 9.458651083177419e-07,
      "loss": 0.4084,
      "step": 11873
    },
    {
      "epoch": 1.4558607160372732,
      "grad_norm": 1.8943294750074153,
      "learning_rate": 9.454681370980748e-07,
      "loss": 0.4213,
      "step": 11874
    },
    {
      "epoch": 1.4559833251593919,
      "grad_norm": 1.7827311566069353,
      "learning_rate": 9.450712297724182e-07,
      "loss": 0.4463,
      "step": 11875
    },
    {
      "epoch": 1.4561059342815106,
      "grad_norm": 2.1244227065374783,
      "learning_rate": 9.446743863570873e-07,
      "loss": 0.4194,
      "step": 11876
    },
    {
      "epoch": 1.4562285434036293,
      "grad_norm": 1.7856610210726156,
      "learning_rate": 9.442776068683932e-07,
      "loss": 0.373,
      "step": 11877
    },
    {
      "epoch": 1.456351152525748,
      "grad_norm": 1.8835313357259538,
      "learning_rate": 9.438808913226435e-07,
      "loss": 0.4226,
      "step": 11878
    },
    {
      "epoch": 1.4564737616478667,
      "grad_norm": 1.9869646030357104,
      "learning_rate": 9.434842397361435e-07,
      "loss": 0.4586,
      "step": 11879
    },
    {
      "epoch": 1.4565963707699852,
      "grad_norm": 1.811399079774274,
      "learning_rate": 9.430876521251975e-07,
      "loss": 0.421,
      "step": 11880
    },
    {
      "epoch": 1.4567189798921039,
      "grad_norm": 1.8023332834772818,
      "learning_rate": 9.426911285061053e-07,
      "loss": 0.4323,
      "step": 11881
    },
    {
      "epoch": 1.4568415890142226,
      "grad_norm": 1.8009407860371713,
      "learning_rate": 9.42294668895164e-07,
      "loss": 0.4337,
      "step": 11882
    },
    {
      "epoch": 1.4569641981363413,
      "grad_norm": 1.7501595641667154,
      "learning_rate": 9.418982733086693e-07,
      "loss": 0.4171,
      "step": 11883
    },
    {
      "epoch": 1.45708680725846,
      "grad_norm": 1.963416652254192,
      "learning_rate": 9.415019417629151e-07,
      "loss": 0.4697,
      "step": 11884
    },
    {
      "epoch": 1.4572094163805787,
      "grad_norm": 1.974599964780588,
      "learning_rate": 9.4110567427419e-07,
      "loss": 0.4323,
      "step": 11885
    },
    {
      "epoch": 1.4573320255026974,
      "grad_norm": 1.876647882815058,
      "learning_rate": 9.407094708587805e-07,
      "loss": 0.4001,
      "step": 11886
    },
    {
      "epoch": 1.457454634624816,
      "grad_norm": 2.1141217805511037,
      "learning_rate": 9.403133315329735e-07,
      "loss": 0.4584,
      "step": 11887
    },
    {
      "epoch": 1.4575772437469348,
      "grad_norm": 1.9165175697910062,
      "learning_rate": 9.399172563130488e-07,
      "loss": 0.4344,
      "step": 11888
    },
    {
      "epoch": 1.4576998528690535,
      "grad_norm": 1.8990104152137186,
      "learning_rate": 9.395212452152866e-07,
      "loss": 0.3991,
      "step": 11889
    },
    {
      "epoch": 1.4578224619911722,
      "grad_norm": 1.768942351453932,
      "learning_rate": 9.391252982559648e-07,
      "loss": 0.4385,
      "step": 11890
    },
    {
      "epoch": 1.4579450711132909,
      "grad_norm": 1.9218493939242012,
      "learning_rate": 9.387294154513565e-07,
      "loss": 0.4457,
      "step": 11891
    },
    {
      "epoch": 1.4580676802354096,
      "grad_norm": 1.8995611514846626,
      "learning_rate": 9.383335968177324e-07,
      "loss": 0.4194,
      "step": 11892
    },
    {
      "epoch": 1.4581902893575283,
      "grad_norm": 1.7571234751237959,
      "learning_rate": 9.379378423713622e-07,
      "loss": 0.4316,
      "step": 11893
    },
    {
      "epoch": 1.458312898479647,
      "grad_norm": 1.9315616739194401,
      "learning_rate": 9.375421521285139e-07,
      "loss": 0.4418,
      "step": 11894
    },
    {
      "epoch": 1.4584355076017657,
      "grad_norm": 1.9635529583061462,
      "learning_rate": 9.371465261054474e-07,
      "loss": 0.4387,
      "step": 11895
    },
    {
      "epoch": 1.4585581167238844,
      "grad_norm": 2.0342160862002503,
      "learning_rate": 9.367509643184256e-07,
      "loss": 0.4191,
      "step": 11896
    },
    {
      "epoch": 1.458680725846003,
      "grad_norm": 1.9800802887550113,
      "learning_rate": 9.363554667837075e-07,
      "loss": 0.4135,
      "step": 11897
    },
    {
      "epoch": 1.4588033349681218,
      "grad_norm": 1.7908928265197097,
      "learning_rate": 9.359600335175478e-07,
      "loss": 0.4252,
      "step": 11898
    },
    {
      "epoch": 1.4589259440902402,
      "grad_norm": 1.8619997856955934,
      "learning_rate": 9.355646645361988e-07,
      "loss": 0.4376,
      "step": 11899
    },
    {
      "epoch": 1.459048553212359,
      "grad_norm": 1.9382754189333136,
      "learning_rate": 9.351693598559128e-07,
      "loss": 0.4603,
      "step": 11900
    },
    {
      "epoch": 1.4591711623344776,
      "grad_norm": 1.9604850860051661,
      "learning_rate": 9.347741194929352e-07,
      "loss": 0.4212,
      "step": 11901
    },
    {
      "epoch": 1.4592937714565963,
      "grad_norm": 1.8391187652285008,
      "learning_rate": 9.343789434635136e-07,
      "loss": 0.4391,
      "step": 11902
    },
    {
      "epoch": 1.459416380578715,
      "grad_norm": 2.1720411970937903,
      "learning_rate": 9.339838317838879e-07,
      "loss": 0.4268,
      "step": 11903
    },
    {
      "epoch": 1.4595389897008337,
      "grad_norm": 1.8073706911347749,
      "learning_rate": 9.335887844703001e-07,
      "loss": 0.3988,
      "step": 11904
    },
    {
      "epoch": 1.4596615988229524,
      "grad_norm": 2.7973698074338684,
      "learning_rate": 9.331938015389858e-07,
      "loss": 0.4389,
      "step": 11905
    },
    {
      "epoch": 1.4597842079450711,
      "grad_norm": 1.8517105173526547,
      "learning_rate": 9.327988830061808e-07,
      "loss": 0.4223,
      "step": 11906
    },
    {
      "epoch": 1.4599068170671898,
      "grad_norm": 1.8796507333895607,
      "learning_rate": 9.324040288881159e-07,
      "loss": 0.4301,
      "step": 11907
    },
    {
      "epoch": 1.4600294261893085,
      "grad_norm": 1.9972378744556551,
      "learning_rate": 9.3200923920102e-07,
      "loss": 0.443,
      "step": 11908
    },
    {
      "epoch": 1.4601520353114272,
      "grad_norm": 1.9078651596058758,
      "learning_rate": 9.316145139611202e-07,
      "loss": 0.4361,
      "step": 11909
    },
    {
      "epoch": 1.460274644433546,
      "grad_norm": 1.780151954485021,
      "learning_rate": 9.312198531846414e-07,
      "loss": 0.4325,
      "step": 11910
    },
    {
      "epoch": 1.4603972535556644,
      "grad_norm": 2.179406893709763,
      "learning_rate": 9.30825256887804e-07,
      "loss": 0.4041,
      "step": 11911
    },
    {
      "epoch": 1.460519862677783,
      "grad_norm": 1.8575802170307143,
      "learning_rate": 9.304307250868255e-07,
      "loss": 0.3933,
      "step": 11912
    },
    {
      "epoch": 1.4606424717999018,
      "grad_norm": 2.0194521849548352,
      "learning_rate": 9.30036257797923e-07,
      "loss": 0.433,
      "step": 11913
    },
    {
      "epoch": 1.4607650809220205,
      "grad_norm": 1.888404611438133,
      "learning_rate": 9.296418550373107e-07,
      "loss": 0.4201,
      "step": 11914
    },
    {
      "epoch": 1.4608876900441392,
      "grad_norm": 1.8886538896499816,
      "learning_rate": 9.292475168211982e-07,
      "loss": 0.3968,
      "step": 11915
    },
    {
      "epoch": 1.461010299166258,
      "grad_norm": 1.9152093920839441,
      "learning_rate": 9.288532431657926e-07,
      "loss": 0.4501,
      "step": 11916
    },
    {
      "epoch": 1.4611329082883766,
      "grad_norm": 1.8794390347558227,
      "learning_rate": 9.284590340873012e-07,
      "loss": 0.4237,
      "step": 11917
    },
    {
      "epoch": 1.4612555174104953,
      "grad_norm": 1.8620329657827153,
      "learning_rate": 9.280648896019245e-07,
      "loss": 0.4092,
      "step": 11918
    },
    {
      "epoch": 1.461378126532614,
      "grad_norm": 2.043290638934914,
      "learning_rate": 9.276708097258649e-07,
      "loss": 0.4441,
      "step": 11919
    },
    {
      "epoch": 1.4615007356547327,
      "grad_norm": 1.9964991704159512,
      "learning_rate": 9.272767944753186e-07,
      "loss": 0.3814,
      "step": 11920
    },
    {
      "epoch": 1.4616233447768514,
      "grad_norm": 1.9910767795340931,
      "learning_rate": 9.268828438664795e-07,
      "loss": 0.4193,
      "step": 11921
    },
    {
      "epoch": 1.46174595389897,
      "grad_norm": 2.0817597380641417,
      "learning_rate": 9.264889579155403e-07,
      "loss": 0.416,
      "step": 11922
    },
    {
      "epoch": 1.4618685630210888,
      "grad_norm": 1.9646174652297297,
      "learning_rate": 9.260951366386917e-07,
      "loss": 0.4326,
      "step": 11923
    },
    {
      "epoch": 1.4619911721432075,
      "grad_norm": 1.913945610583129,
      "learning_rate": 9.257013800521192e-07,
      "loss": 0.3931,
      "step": 11924
    },
    {
      "epoch": 1.4621137812653262,
      "grad_norm": 1.9527742026448296,
      "learning_rate": 9.253076881720063e-07,
      "loss": 0.4393,
      "step": 11925
    },
    {
      "epoch": 1.462236390387445,
      "grad_norm": 1.8521274657785154,
      "learning_rate": 9.249140610145352e-07,
      "loss": 0.3805,
      "step": 11926
    },
    {
      "epoch": 1.4623589995095636,
      "grad_norm": 1.882029964868613,
      "learning_rate": 9.245204985958861e-07,
      "loss": 0.4107,
      "step": 11927
    },
    {
      "epoch": 1.4624816086316823,
      "grad_norm": 1.9929513072100127,
      "learning_rate": 9.241270009322323e-07,
      "loss": 0.4101,
      "step": 11928
    },
    {
      "epoch": 1.462604217753801,
      "grad_norm": 1.9690542342348463,
      "learning_rate": 9.237335680397486e-07,
      "loss": 0.4489,
      "step": 11929
    },
    {
      "epoch": 1.4627268268759197,
      "grad_norm": 1.835886900076327,
      "learning_rate": 9.233401999346065e-07,
      "loss": 0.4055,
      "step": 11930
    },
    {
      "epoch": 1.4628494359980382,
      "grad_norm": 2.056121955043545,
      "learning_rate": 9.229468966329736e-07,
      "loss": 0.4307,
      "step": 11931
    },
    {
      "epoch": 1.4629720451201569,
      "grad_norm": 1.8917779621057544,
      "learning_rate": 9.225536581510142e-07,
      "loss": 0.4017,
      "step": 11932
    },
    {
      "epoch": 1.4630946542422756,
      "grad_norm": 1.782438012418205,
      "learning_rate": 9.22160484504892e-07,
      "loss": 0.4296,
      "step": 11933
    },
    {
      "epoch": 1.4632172633643943,
      "grad_norm": 2.0471926405073084,
      "learning_rate": 9.217673757107684e-07,
      "loss": 0.4166,
      "step": 11934
    },
    {
      "epoch": 1.463339872486513,
      "grad_norm": 2.0213699448098494,
      "learning_rate": 9.213743317847995e-07,
      "loss": 0.4281,
      "step": 11935
    },
    {
      "epoch": 1.4634624816086317,
      "grad_norm": 2.15300280036347,
      "learning_rate": 9.209813527431393e-07,
      "loss": 0.4345,
      "step": 11936
    },
    {
      "epoch": 1.4635850907307504,
      "grad_norm": 2.170811143745858,
      "learning_rate": 9.205884386019423e-07,
      "loss": 0.4137,
      "step": 11937
    },
    {
      "epoch": 1.463707699852869,
      "grad_norm": 1.938306297539575,
      "learning_rate": 9.201955893773554e-07,
      "loss": 0.464,
      "step": 11938
    },
    {
      "epoch": 1.4638303089749878,
      "grad_norm": 1.9697184481617056,
      "learning_rate": 9.198028050855265e-07,
      "loss": 0.399,
      "step": 11939
    },
    {
      "epoch": 1.4639529180971065,
      "grad_norm": 1.8834985787582206,
      "learning_rate": 9.194100857426016e-07,
      "loss": 0.4251,
      "step": 11940
    },
    {
      "epoch": 1.4640755272192252,
      "grad_norm": 1.991795883652973,
      "learning_rate": 9.190174313647188e-07,
      "loss": 0.4547,
      "step": 11941
    },
    {
      "epoch": 1.4641981363413437,
      "grad_norm": 1.9061793765604882,
      "learning_rate": 9.186248419680186e-07,
      "loss": 0.4124,
      "step": 11942
    },
    {
      "epoch": 1.4643207454634624,
      "grad_norm": 1.9600526022690696,
      "learning_rate": 9.182323175686375e-07,
      "loss": 0.4203,
      "step": 11943
    },
    {
      "epoch": 1.464443354585581,
      "grad_norm": 2.185114140933698,
      "learning_rate": 9.178398581827086e-07,
      "loss": 0.4191,
      "step": 11944
    },
    {
      "epoch": 1.4645659637076998,
      "grad_norm": 2.0877848282017406,
      "learning_rate": 9.17447463826362e-07,
      "loss": 0.4441,
      "step": 11945
    },
    {
      "epoch": 1.4646885728298185,
      "grad_norm": 1.9059784613416264,
      "learning_rate": 9.170551345157258e-07,
      "loss": 0.4321,
      "step": 11946
    },
    {
      "epoch": 1.4648111819519372,
      "grad_norm": 2.073233304838771,
      "learning_rate": 9.16662870266927e-07,
      "loss": 0.4353,
      "step": 11947
    },
    {
      "epoch": 1.4649337910740559,
      "grad_norm": 1.9398354773083901,
      "learning_rate": 9.162706710960876e-07,
      "loss": 0.4231,
      "step": 11948
    },
    {
      "epoch": 1.4650564001961746,
      "grad_norm": 2.0803652545875315,
      "learning_rate": 9.15878537019326e-07,
      "loss": 0.4365,
      "step": 11949
    },
    {
      "epoch": 1.4651790093182933,
      "grad_norm": 2.1623816037046213,
      "learning_rate": 9.154864680527622e-07,
      "loss": 0.3971,
      "step": 11950
    },
    {
      "epoch": 1.465301618440412,
      "grad_norm": 1.8811856694991687,
      "learning_rate": 9.15094464212509e-07,
      "loss": 0.4262,
      "step": 11951
    },
    {
      "epoch": 1.4654242275625307,
      "grad_norm": 2.0182760356974696,
      "learning_rate": 9.147025255146796e-07,
      "loss": 0.4744,
      "step": 11952
    },
    {
      "epoch": 1.4655468366846494,
      "grad_norm": 1.952535462369541,
      "learning_rate": 9.143106519753824e-07,
      "loss": 0.4003,
      "step": 11953
    },
    {
      "epoch": 1.465669445806768,
      "grad_norm": 1.9363187322618878,
      "learning_rate": 9.139188436107255e-07,
      "loss": 0.447,
      "step": 11954
    },
    {
      "epoch": 1.4657920549288868,
      "grad_norm": 2.0722286987974865,
      "learning_rate": 9.135271004368109e-07,
      "loss": 0.4194,
      "step": 11955
    },
    {
      "epoch": 1.4659146640510055,
      "grad_norm": 1.9123404514029638,
      "learning_rate": 9.131354224697423e-07,
      "loss": 0.3886,
      "step": 11956
    },
    {
      "epoch": 1.4660372731731242,
      "grad_norm": 2.073933951145975,
      "learning_rate": 9.127438097256172e-07,
      "loss": 0.3848,
      "step": 11957
    },
    {
      "epoch": 1.4661598822952429,
      "grad_norm": 1.8681789451948492,
      "learning_rate": 9.123522622205305e-07,
      "loss": 0.443,
      "step": 11958
    },
    {
      "epoch": 1.4662824914173616,
      "grad_norm": 2.0874874395886254,
      "learning_rate": 9.119607799705766e-07,
      "loss": 0.4376,
      "step": 11959
    },
    {
      "epoch": 1.4664051005394803,
      "grad_norm": 1.8804930382604064,
      "learning_rate": 9.115693629918468e-07,
      "loss": 0.3976,
      "step": 11960
    },
    {
      "epoch": 1.466527709661599,
      "grad_norm": 1.9307248562619401,
      "learning_rate": 9.111780113004284e-07,
      "loss": 0.3863,
      "step": 11961
    },
    {
      "epoch": 1.4666503187837174,
      "grad_norm": 1.9369726676877703,
      "learning_rate": 9.107867249124055e-07,
      "loss": 0.4122,
      "step": 11962
    },
    {
      "epoch": 1.4667729279058361,
      "grad_norm": 1.9797158466813252,
      "learning_rate": 9.103955038438628e-07,
      "loss": 0.3669,
      "step": 11963
    },
    {
      "epoch": 1.4668955370279548,
      "grad_norm": 1.9862963346110418,
      "learning_rate": 9.100043481108781e-07,
      "loss": 0.4166,
      "step": 11964
    },
    {
      "epoch": 1.4670181461500735,
      "grad_norm": 1.9170164696178962,
      "learning_rate": 9.096132577295303e-07,
      "loss": 0.3948,
      "step": 11965
    },
    {
      "epoch": 1.4671407552721922,
      "grad_norm": 1.968488294759011,
      "learning_rate": 9.092222327158923e-07,
      "loss": 0.4613,
      "step": 11966
    },
    {
      "epoch": 1.467263364394311,
      "grad_norm": 1.770830571969578,
      "learning_rate": 9.088312730860377e-07,
      "loss": 0.3968,
      "step": 11967
    },
    {
      "epoch": 1.4673859735164296,
      "grad_norm": 1.9676817461718445,
      "learning_rate": 9.084403788560339e-07,
      "loss": 0.4296,
      "step": 11968
    },
    {
      "epoch": 1.4675085826385483,
      "grad_norm": 2.0242663473601294,
      "learning_rate": 9.08049550041949e-07,
      "loss": 0.4013,
      "step": 11969
    },
    {
      "epoch": 1.467631191760667,
      "grad_norm": 2.0402732428883352,
      "learning_rate": 9.07658786659846e-07,
      "loss": 0.3933,
      "step": 11970
    },
    {
      "epoch": 1.4677538008827857,
      "grad_norm": 1.9342260078995444,
      "learning_rate": 9.072680887257848e-07,
      "loss": 0.4318,
      "step": 11971
    },
    {
      "epoch": 1.4678764100049044,
      "grad_norm": 1.922671902777451,
      "learning_rate": 9.068774562558252e-07,
      "loss": 0.4114,
      "step": 11972
    },
    {
      "epoch": 1.4679990191270231,
      "grad_norm": 1.8144231017849257,
      "learning_rate": 9.064868892660231e-07,
      "loss": 0.4395,
      "step": 11973
    },
    {
      "epoch": 1.4681216282491416,
      "grad_norm": 1.97834259154794,
      "learning_rate": 9.060963877724313e-07,
      "loss": 0.4145,
      "step": 11974
    },
    {
      "epoch": 1.4682442373712603,
      "grad_norm": 1.961812000144489,
      "learning_rate": 9.057059517910984e-07,
      "loss": 0.3933,
      "step": 11975
    },
    {
      "epoch": 1.468366846493379,
      "grad_norm": 1.950443093591998,
      "learning_rate": 9.053155813380746e-07,
      "loss": 0.4325,
      "step": 11976
    },
    {
      "epoch": 1.4684894556154977,
      "grad_norm": 1.7794108010809344,
      "learning_rate": 9.049252764294034e-07,
      "loss": 0.4388,
      "step": 11977
    },
    {
      "epoch": 1.4686120647376164,
      "grad_norm": 2.2379042724482376,
      "learning_rate": 9.045350370811265e-07,
      "loss": 0.4896,
      "step": 11978
    },
    {
      "epoch": 1.468734673859735,
      "grad_norm": 1.9923412805949217,
      "learning_rate": 9.041448633092839e-07,
      "loss": 0.4145,
      "step": 11979
    },
    {
      "epoch": 1.4688572829818538,
      "grad_norm": 2.2114854465488167,
      "learning_rate": 9.037547551299136e-07,
      "loss": 0.408,
      "step": 11980
    },
    {
      "epoch": 1.4689798921039725,
      "grad_norm": 1.9002582616116135,
      "learning_rate": 9.033647125590488e-07,
      "loss": 0.4502,
      "step": 11981
    },
    {
      "epoch": 1.4691025012260912,
      "grad_norm": 1.9791538553477523,
      "learning_rate": 9.029747356127203e-07,
      "loss": 0.3995,
      "step": 11982
    },
    {
      "epoch": 1.46922511034821,
      "grad_norm": 1.9248899785219178,
      "learning_rate": 9.025848243069582e-07,
      "loss": 0.3955,
      "step": 11983
    },
    {
      "epoch": 1.4693477194703286,
      "grad_norm": 2.106622002088192,
      "learning_rate": 9.021949786577868e-07,
      "loss": 0.468,
      "step": 11984
    },
    {
      "epoch": 1.4694703285924473,
      "grad_norm": 1.9773147152748547,
      "learning_rate": 9.018051986812307e-07,
      "loss": 0.4091,
      "step": 11985
    },
    {
      "epoch": 1.469592937714566,
      "grad_norm": 2.0189199501929185,
      "learning_rate": 9.01415484393311e-07,
      "loss": 0.4313,
      "step": 11986
    },
    {
      "epoch": 1.4697155468366847,
      "grad_norm": 1.9370252510182282,
      "learning_rate": 9.010258358100449e-07,
      "loss": 0.4365,
      "step": 11987
    },
    {
      "epoch": 1.4698381559588034,
      "grad_norm": 1.923430549605894,
      "learning_rate": 9.006362529474469e-07,
      "loss": 0.4506,
      "step": 11988
    },
    {
      "epoch": 1.469960765080922,
      "grad_norm": 1.9852224015164823,
      "learning_rate": 9.002467358215306e-07,
      "loss": 0.4257,
      "step": 11989
    },
    {
      "epoch": 1.4700833742030408,
      "grad_norm": 1.8495040473398683,
      "learning_rate": 8.998572844483069e-07,
      "loss": 0.4186,
      "step": 11990
    },
    {
      "epoch": 1.4702059833251595,
      "grad_norm": 2.0539167865976866,
      "learning_rate": 8.994678988437802e-07,
      "loss": 0.4252,
      "step": 11991
    },
    {
      "epoch": 1.4703285924472782,
      "grad_norm": 1.6969231073950992,
      "learning_rate": 8.990785790239564e-07,
      "loss": 0.4188,
      "step": 11992
    },
    {
      "epoch": 1.4704512015693967,
      "grad_norm": 1.8019601508967185,
      "learning_rate": 8.986893250048381e-07,
      "loss": 0.4419,
      "step": 11993
    },
    {
      "epoch": 1.4705738106915154,
      "grad_norm": 1.9919784151122188,
      "learning_rate": 8.983001368024233e-07,
      "loss": 0.3937,
      "step": 11994
    },
    {
      "epoch": 1.470696419813634,
      "grad_norm": 2.011779749850101,
      "learning_rate": 8.979110144327075e-07,
      "loss": 0.4288,
      "step": 11995
    },
    {
      "epoch": 1.4708190289357528,
      "grad_norm": 1.7015107614410967,
      "learning_rate": 8.975219579116865e-07,
      "loss": 0.4007,
      "step": 11996
    },
    {
      "epoch": 1.4709416380578715,
      "grad_norm": 1.976078958119139,
      "learning_rate": 8.971329672553489e-07,
      "loss": 0.4178,
      "step": 11997
    },
    {
      "epoch": 1.4710642471799902,
      "grad_norm": 1.9221787477940444,
      "learning_rate": 8.967440424796847e-07,
      "loss": 0.4609,
      "step": 11998
    },
    {
      "epoch": 1.4711868563021089,
      "grad_norm": 1.7874305357486346,
      "learning_rate": 8.963551836006781e-07,
      "loss": 0.4589,
      "step": 11999
    },
    {
      "epoch": 1.4713094654242276,
      "grad_norm": 1.9257155037063134,
      "learning_rate": 8.959663906343133e-07,
      "loss": 0.4217,
      "step": 12000
    },
    {
      "epoch": 1.4714320745463463,
      "grad_norm": 1.9025627627943895,
      "learning_rate": 8.955776635965685e-07,
      "loss": 0.4041,
      "step": 12001
    },
    {
      "epoch": 1.471554683668465,
      "grad_norm": 1.9198398867609587,
      "learning_rate": 8.951890025034232e-07,
      "loss": 0.4919,
      "step": 12002
    },
    {
      "epoch": 1.4716772927905837,
      "grad_norm": 1.9767437670171601,
      "learning_rate": 8.94800407370851e-07,
      "loss": 0.4081,
      "step": 12003
    },
    {
      "epoch": 1.4717999019127024,
      "grad_norm": 1.6805774541578737,
      "learning_rate": 8.944118782148229e-07,
      "loss": 0.4092,
      "step": 12004
    },
    {
      "epoch": 1.4719225110348209,
      "grad_norm": 1.9300424529417295,
      "learning_rate": 8.940234150513091e-07,
      "loss": 0.4111,
      "step": 12005
    },
    {
      "epoch": 1.4720451201569396,
      "grad_norm": 2.1888018779257234,
      "learning_rate": 8.936350178962769e-07,
      "loss": 0.4232,
      "step": 12006
    },
    {
      "epoch": 1.4721677292790583,
      "grad_norm": 2.1413646368591692,
      "learning_rate": 8.932466867656894e-07,
      "loss": 0.4198,
      "step": 12007
    },
    {
      "epoch": 1.472290338401177,
      "grad_norm": 2.0126028821702087,
      "learning_rate": 8.928584216755068e-07,
      "loss": 0.3993,
      "step": 12008
    },
    {
      "epoch": 1.4724129475232957,
      "grad_norm": 1.9819683818138212,
      "learning_rate": 8.924702226416881e-07,
      "loss": 0.4454,
      "step": 12009
    },
    {
      "epoch": 1.4725355566454144,
      "grad_norm": 2.038511813482603,
      "learning_rate": 8.920820896801902e-07,
      "loss": 0.4291,
      "step": 12010
    },
    {
      "epoch": 1.472658165767533,
      "grad_norm": 1.9807723990005077,
      "learning_rate": 8.916940228069648e-07,
      "loss": 0.4114,
      "step": 12011
    },
    {
      "epoch": 1.4727807748896518,
      "grad_norm": 2.2105583246695204,
      "learning_rate": 8.913060220379619e-07,
      "loss": 0.4835,
      "step": 12012
    },
    {
      "epoch": 1.4729033840117705,
      "grad_norm": 1.9460224589192407,
      "learning_rate": 8.909180873891299e-07,
      "loss": 0.4146,
      "step": 12013
    },
    {
      "epoch": 1.4730259931338892,
      "grad_norm": 1.9668571455215422,
      "learning_rate": 8.905302188764123e-07,
      "loss": 0.3798,
      "step": 12014
    },
    {
      "epoch": 1.4731486022560079,
      "grad_norm": 1.9755084948193806,
      "learning_rate": 8.901424165157533e-07,
      "loss": 0.4128,
      "step": 12015
    },
    {
      "epoch": 1.4732712113781266,
      "grad_norm": 2.0724529702918706,
      "learning_rate": 8.897546803230905e-07,
      "loss": 0.4603,
      "step": 12016
    },
    {
      "epoch": 1.4733938205002453,
      "grad_norm": 1.8799838345892328,
      "learning_rate": 8.893670103143606e-07,
      "loss": 0.4527,
      "step": 12017
    },
    {
      "epoch": 1.473516429622364,
      "grad_norm": 1.9567673564052361,
      "learning_rate": 8.889794065054976e-07,
      "loss": 0.4162,
      "step": 12018
    },
    {
      "epoch": 1.4736390387444827,
      "grad_norm": 1.838782846786666,
      "learning_rate": 8.885918689124343e-07,
      "loss": 0.4412,
      "step": 12019
    },
    {
      "epoch": 1.4737616478666014,
      "grad_norm": 1.9380369303065195,
      "learning_rate": 8.882043975510978e-07,
      "loss": 0.4527,
      "step": 12020
    },
    {
      "epoch": 1.47388425698872,
      "grad_norm": 2.1125418922395203,
      "learning_rate": 8.878169924374133e-07,
      "loss": 0.4413,
      "step": 12021
    },
    {
      "epoch": 1.4740068661108388,
      "grad_norm": 1.8418233791209546,
      "learning_rate": 8.874296535873044e-07,
      "loss": 0.3993,
      "step": 12022
    },
    {
      "epoch": 1.4741294752329575,
      "grad_norm": 1.7562454628783208,
      "learning_rate": 8.870423810166934e-07,
      "loss": 0.3794,
      "step": 12023
    },
    {
      "epoch": 1.4742520843550762,
      "grad_norm": 2.2679571540965098,
      "learning_rate": 8.866551747414942e-07,
      "loss": 0.4454,
      "step": 12024
    },
    {
      "epoch": 1.4743746934771946,
      "grad_norm": 2.1075225488456626,
      "learning_rate": 8.862680347776236e-07,
      "loss": 0.4498,
      "step": 12025
    },
    {
      "epoch": 1.4744973025993133,
      "grad_norm": 1.9849379014413093,
      "learning_rate": 8.858809611409946e-07,
      "loss": 0.4371,
      "step": 12026
    },
    {
      "epoch": 1.474619911721432,
      "grad_norm": 1.9002562107432333,
      "learning_rate": 8.854939538475155e-07,
      "loss": 0.4178,
      "step": 12027
    },
    {
      "epoch": 1.4747425208435507,
      "grad_norm": 2.2413273040511315,
      "learning_rate": 8.851070129130923e-07,
      "loss": 0.4477,
      "step": 12028
    },
    {
      "epoch": 1.4748651299656694,
      "grad_norm": 1.807446743155843,
      "learning_rate": 8.8472013835363e-07,
      "loss": 0.4135,
      "step": 12029
    },
    {
      "epoch": 1.4749877390877881,
      "grad_norm": 1.9271443863763518,
      "learning_rate": 8.843333301850305e-07,
      "loss": 0.3967,
      "step": 12030
    },
    {
      "epoch": 1.4751103482099068,
      "grad_norm": 1.9611516762179309,
      "learning_rate": 8.839465884231912e-07,
      "loss": 0.394,
      "step": 12031
    },
    {
      "epoch": 1.4752329573320255,
      "grad_norm": 1.84866342397169,
      "learning_rate": 8.835599130840075e-07,
      "loss": 0.4195,
      "step": 12032
    },
    {
      "epoch": 1.4753555664541442,
      "grad_norm": 1.7529345484924956,
      "learning_rate": 8.831733041833737e-07,
      "loss": 0.3852,
      "step": 12033
    },
    {
      "epoch": 1.475478175576263,
      "grad_norm": 2.1140062636121817,
      "learning_rate": 8.827867617371788e-07,
      "loss": 0.4306,
      "step": 12034
    },
    {
      "epoch": 1.4756007846983816,
      "grad_norm": 2.022631761954715,
      "learning_rate": 8.82400285761311e-07,
      "loss": 0.4182,
      "step": 12035
    },
    {
      "epoch": 1.4757233938205,
      "grad_norm": 1.878175096562713,
      "learning_rate": 8.820138762716568e-07,
      "loss": 0.4284,
      "step": 12036
    },
    {
      "epoch": 1.4758460029426188,
      "grad_norm": 2.1739682282284516,
      "learning_rate": 8.816275332840952e-07,
      "loss": 0.4282,
      "step": 12037
    },
    {
      "epoch": 1.4759686120647375,
      "grad_norm": 1.8904386323991738,
      "learning_rate": 8.812412568145067e-07,
      "loss": 0.423,
      "step": 12038
    },
    {
      "epoch": 1.4760912211868562,
      "grad_norm": 1.8861004249705156,
      "learning_rate": 8.808550468787694e-07,
      "loss": 0.4301,
      "step": 12039
    },
    {
      "epoch": 1.476213830308975,
      "grad_norm": 2.1231374094292637,
      "learning_rate": 8.804689034927564e-07,
      "loss": 0.4116,
      "step": 12040
    },
    {
      "epoch": 1.4763364394310936,
      "grad_norm": 1.9382656543077736,
      "learning_rate": 8.800828266723374e-07,
      "loss": 0.4457,
      "step": 12041
    },
    {
      "epoch": 1.4764590485532123,
      "grad_norm": 2.028896638609804,
      "learning_rate": 8.796968164333822e-07,
      "loss": 0.4402,
      "step": 12042
    },
    {
      "epoch": 1.476581657675331,
      "grad_norm": 1.8256042524516476,
      "learning_rate": 8.793108727917571e-07,
      "loss": 0.4368,
      "step": 12043
    },
    {
      "epoch": 1.4767042667974497,
      "grad_norm": 1.8739516006798556,
      "learning_rate": 8.789249957633245e-07,
      "loss": 0.4177,
      "step": 12044
    },
    {
      "epoch": 1.4768268759195684,
      "grad_norm": 1.9870884686595167,
      "learning_rate": 8.785391853639436e-07,
      "loss": 0.4198,
      "step": 12045
    },
    {
      "epoch": 1.476949485041687,
      "grad_norm": 1.9741316087784755,
      "learning_rate": 8.781534416094737e-07,
      "loss": 0.4467,
      "step": 12046
    },
    {
      "epoch": 1.4770720941638058,
      "grad_norm": 1.8131842239778726,
      "learning_rate": 8.777677645157675e-07,
      "loss": 0.4064,
      "step": 12047
    },
    {
      "epoch": 1.4771947032859245,
      "grad_norm": 2.0507696929742787,
      "learning_rate": 8.77382154098679e-07,
      "loss": 0.5078,
      "step": 12048
    },
    {
      "epoch": 1.4773173124080432,
      "grad_norm": 1.9381985040144822,
      "learning_rate": 8.769966103740557e-07,
      "loss": 0.4211,
      "step": 12049
    },
    {
      "epoch": 1.477439921530162,
      "grad_norm": 2.118897249921873,
      "learning_rate": 8.766111333577462e-07,
      "loss": 0.3964,
      "step": 12050
    },
    {
      "epoch": 1.4775625306522806,
      "grad_norm": 1.893705992795762,
      "learning_rate": 8.762257230655918e-07,
      "loss": 0.3813,
      "step": 12051
    },
    {
      "epoch": 1.4776851397743993,
      "grad_norm": 2.0793796369200375,
      "learning_rate": 8.75840379513436e-07,
      "loss": 0.4665,
      "step": 12052
    },
    {
      "epoch": 1.477807748896518,
      "grad_norm": 1.9825686795933868,
      "learning_rate": 8.75455102717116e-07,
      "loss": 0.469,
      "step": 12053
    },
    {
      "epoch": 1.4779303580186367,
      "grad_norm": 1.69387320817145,
      "learning_rate": 8.750698926924664e-07,
      "loss": 0.4131,
      "step": 12054
    },
    {
      "epoch": 1.4780529671407554,
      "grad_norm": 1.939178715614193,
      "learning_rate": 8.746847494553207e-07,
      "loss": 0.483,
      "step": 12055
    },
    {
      "epoch": 1.4781755762628739,
      "grad_norm": 1.8143074172026683,
      "learning_rate": 8.742996730215103e-07,
      "loss": 0.4181,
      "step": 12056
    },
    {
      "epoch": 1.4782981853849926,
      "grad_norm": 1.9482329586304126,
      "learning_rate": 8.739146634068613e-07,
      "loss": 0.425,
      "step": 12057
    },
    {
      "epoch": 1.4784207945071113,
      "grad_norm": 1.9220134254593586,
      "learning_rate": 8.735297206271975e-07,
      "loss": 0.4214,
      "step": 12058
    },
    {
      "epoch": 1.47854340362923,
      "grad_norm": 1.9708662468122793,
      "learning_rate": 8.731448446983426e-07,
      "loss": 0.439,
      "step": 12059
    },
    {
      "epoch": 1.4786660127513487,
      "grad_norm": 2.009994507505149,
      "learning_rate": 8.727600356361138e-07,
      "loss": 0.4355,
      "step": 12060
    },
    {
      "epoch": 1.4787886218734674,
      "grad_norm": 2.0350334420243694,
      "learning_rate": 8.72375293456329e-07,
      "loss": 0.4508,
      "step": 12061
    },
    {
      "epoch": 1.478911230995586,
      "grad_norm": 1.9400988307905453,
      "learning_rate": 8.719906181748005e-07,
      "loss": 0.4516,
      "step": 12062
    },
    {
      "epoch": 1.4790338401177048,
      "grad_norm": 1.6942477063801378,
      "learning_rate": 8.716060098073406e-07,
      "loss": 0.4312,
      "step": 12063
    },
    {
      "epoch": 1.4791564492398235,
      "grad_norm": 1.8093151674407253,
      "learning_rate": 8.712214683697556e-07,
      "loss": 0.4364,
      "step": 12064
    },
    {
      "epoch": 1.4792790583619422,
      "grad_norm": 1.819284288330642,
      "learning_rate": 8.708369938778527e-07,
      "loss": 0.4819,
      "step": 12065
    },
    {
      "epoch": 1.4794016674840609,
      "grad_norm": 1.9664812400824263,
      "learning_rate": 8.704525863474337e-07,
      "loss": 0.4432,
      "step": 12066
    },
    {
      "epoch": 1.4795242766061796,
      "grad_norm": 1.8425666620301056,
      "learning_rate": 8.700682457942971e-07,
      "loss": 0.4238,
      "step": 12067
    },
    {
      "epoch": 1.479646885728298,
      "grad_norm": 2.1607049559695732,
      "learning_rate": 8.696839722342412e-07,
      "loss": 0.436,
      "step": 12068
    },
    {
      "epoch": 1.4797694948504168,
      "grad_norm": 1.930096680371298,
      "learning_rate": 8.692997656830615e-07,
      "loss": 0.4207,
      "step": 12069
    },
    {
      "epoch": 1.4798921039725355,
      "grad_norm": 1.8601824335546875,
      "learning_rate": 8.689156261565479e-07,
      "loss": 0.4075,
      "step": 12070
    },
    {
      "epoch": 1.4800147130946542,
      "grad_norm": 1.8350112010183335,
      "learning_rate": 8.685315536704892e-07,
      "loss": 0.4484,
      "step": 12071
    },
    {
      "epoch": 1.4801373222167729,
      "grad_norm": 2.0553793388581263,
      "learning_rate": 8.681475482406726e-07,
      "loss": 0.4655,
      "step": 12072
    },
    {
      "epoch": 1.4802599313388916,
      "grad_norm": 1.9456043733083255,
      "learning_rate": 8.677636098828807e-07,
      "loss": 0.4226,
      "step": 12073
    },
    {
      "epoch": 1.4803825404610103,
      "grad_norm": 1.9746378402818425,
      "learning_rate": 8.673797386128932e-07,
      "loss": 0.4579,
      "step": 12074
    },
    {
      "epoch": 1.480505149583129,
      "grad_norm": 2.066323910956429,
      "learning_rate": 8.66995934446489e-07,
      "loss": 0.4573,
      "step": 12075
    },
    {
      "epoch": 1.4806277587052477,
      "grad_norm": 2.072723081516428,
      "learning_rate": 8.666121973994438e-07,
      "loss": 0.4208,
      "step": 12076
    },
    {
      "epoch": 1.4807503678273664,
      "grad_norm": 1.914917773341344,
      "learning_rate": 8.662285274875288e-07,
      "loss": 0.4071,
      "step": 12077
    },
    {
      "epoch": 1.480872976949485,
      "grad_norm": 2.1113321572192802,
      "learning_rate": 8.658449247265127e-07,
      "loss": 0.4039,
      "step": 12078
    },
    {
      "epoch": 1.4809955860716038,
      "grad_norm": 2.0329586376684032,
      "learning_rate": 8.654613891321642e-07,
      "loss": 0.4328,
      "step": 12079
    },
    {
      "epoch": 1.4811181951937225,
      "grad_norm": 1.986334466717582,
      "learning_rate": 8.650779207202453e-07,
      "loss": 0.4611,
      "step": 12080
    },
    {
      "epoch": 1.4812408043158412,
      "grad_norm": 1.923622522242897,
      "learning_rate": 8.646945195065187e-07,
      "loss": 0.44,
      "step": 12081
    },
    {
      "epoch": 1.4813634134379599,
      "grad_norm": 2.182577963572532,
      "learning_rate": 8.643111855067429e-07,
      "loss": 0.3973,
      "step": 12082
    },
    {
      "epoch": 1.4814860225600786,
      "grad_norm": 1.9346488052864392,
      "learning_rate": 8.639279187366734e-07,
      "loss": 0.3921,
      "step": 12083
    },
    {
      "epoch": 1.4816086316821973,
      "grad_norm": 2.0444883035413386,
      "learning_rate": 8.635447192120619e-07,
      "loss": 0.4503,
      "step": 12084
    },
    {
      "epoch": 1.481731240804316,
      "grad_norm": 2.0262179267583176,
      "learning_rate": 8.631615869486596e-07,
      "loss": 0.394,
      "step": 12085
    },
    {
      "epoch": 1.4818538499264347,
      "grad_norm": 1.7403312311135386,
      "learning_rate": 8.627785219622154e-07,
      "loss": 0.4755,
      "step": 12086
    },
    {
      "epoch": 1.4819764590485531,
      "grad_norm": 2.064429897982416,
      "learning_rate": 8.623955242684709e-07,
      "loss": 0.4204,
      "step": 12087
    },
    {
      "epoch": 1.4820990681706718,
      "grad_norm": 1.9968536663925731,
      "learning_rate": 8.620125938831694e-07,
      "loss": 0.4411,
      "step": 12088
    },
    {
      "epoch": 1.4822216772927905,
      "grad_norm": 1.912699516998798,
      "learning_rate": 8.616297308220509e-07,
      "loss": 0.4138,
      "step": 12089
    },
    {
      "epoch": 1.4823442864149092,
      "grad_norm": 1.986593837063511,
      "learning_rate": 8.612469351008512e-07,
      "loss": 0.438,
      "step": 12090
    },
    {
      "epoch": 1.482466895537028,
      "grad_norm": 2.0793760721885426,
      "learning_rate": 8.608642067353026e-07,
      "loss": 0.4397,
      "step": 12091
    },
    {
      "epoch": 1.4825895046591466,
      "grad_norm": 1.9796477460099156,
      "learning_rate": 8.604815457411378e-07,
      "loss": 0.436,
      "step": 12092
    },
    {
      "epoch": 1.4827121137812653,
      "grad_norm": 1.7642340449166665,
      "learning_rate": 8.600989521340832e-07,
      "loss": 0.3642,
      "step": 12093
    },
    {
      "epoch": 1.482834722903384,
      "grad_norm": 2.138266051628966,
      "learning_rate": 8.59716425929866e-07,
      "loss": 0.4319,
      "step": 12094
    },
    {
      "epoch": 1.4829573320255027,
      "grad_norm": 1.9000670156419441,
      "learning_rate": 8.593339671442067e-07,
      "loss": 0.3977,
      "step": 12095
    },
    {
      "epoch": 1.4830799411476214,
      "grad_norm": 1.7265951771941497,
      "learning_rate": 8.589515757928263e-07,
      "loss": 0.4186,
      "step": 12096
    },
    {
      "epoch": 1.4832025502697401,
      "grad_norm": 2.0163580683574396,
      "learning_rate": 8.58569251891441e-07,
      "loss": 0.46,
      "step": 12097
    },
    {
      "epoch": 1.4833251593918588,
      "grad_norm": 1.9331869683316143,
      "learning_rate": 8.581869954557659e-07,
      "loss": 0.422,
      "step": 12098
    },
    {
      "epoch": 1.4834477685139773,
      "grad_norm": 2.2035964802493724,
      "learning_rate": 8.578048065015121e-07,
      "loss": 0.4502,
      "step": 12099
    },
    {
      "epoch": 1.483570377636096,
      "grad_norm": 2.207311970429689,
      "learning_rate": 8.574226850443873e-07,
      "loss": 0.427,
      "step": 12100
    },
    {
      "epoch": 1.4836929867582147,
      "grad_norm": 2.314250308371661,
      "learning_rate": 8.570406311000978e-07,
      "loss": 0.4508,
      "step": 12101
    },
    {
      "epoch": 1.4838155958803334,
      "grad_norm": 1.8596758250829362,
      "learning_rate": 8.566586446843481e-07,
      "loss": 0.3989,
      "step": 12102
    },
    {
      "epoch": 1.483938205002452,
      "grad_norm": 1.9721782832718528,
      "learning_rate": 8.562767258128371e-07,
      "loss": 0.4654,
      "step": 12103
    },
    {
      "epoch": 1.4840608141245708,
      "grad_norm": 2.1044551150513446,
      "learning_rate": 8.558948745012621e-07,
      "loss": 0.445,
      "step": 12104
    },
    {
      "epoch": 1.4841834232466895,
      "grad_norm": 1.7824572629208888,
      "learning_rate": 8.555130907653184e-07,
      "loss": 0.4018,
      "step": 12105
    },
    {
      "epoch": 1.4843060323688082,
      "grad_norm": 1.835810929196508,
      "learning_rate": 8.551313746206985e-07,
      "loss": 0.4809,
      "step": 12106
    },
    {
      "epoch": 1.484428641490927,
      "grad_norm": 1.9986503645565243,
      "learning_rate": 8.547497260830914e-07,
      "loss": 0.4197,
      "step": 12107
    },
    {
      "epoch": 1.4845512506130456,
      "grad_norm": 2.007171156879214,
      "learning_rate": 8.543681451681823e-07,
      "loss": 0.4194,
      "step": 12108
    },
    {
      "epoch": 1.4846738597351643,
      "grad_norm": 1.8007027853827766,
      "learning_rate": 8.539866318916568e-07,
      "loss": 0.4199,
      "step": 12109
    },
    {
      "epoch": 1.484796468857283,
      "grad_norm": 1.9179465387116423,
      "learning_rate": 8.536051862691938e-07,
      "loss": 0.4144,
      "step": 12110
    },
    {
      "epoch": 1.4849190779794017,
      "grad_norm": 2.018378948556049,
      "learning_rate": 8.532238083164731e-07,
      "loss": 0.4264,
      "step": 12111
    },
    {
      "epoch": 1.4850416871015204,
      "grad_norm": 1.9136935391612864,
      "learning_rate": 8.528424980491695e-07,
      "loss": 0.4135,
      "step": 12112
    },
    {
      "epoch": 1.485164296223639,
      "grad_norm": 1.8698789041718569,
      "learning_rate": 8.524612554829545e-07,
      "loss": 0.4191,
      "step": 12113
    },
    {
      "epoch": 1.4852869053457578,
      "grad_norm": 1.7306011023465049,
      "learning_rate": 8.520800806334984e-07,
      "loss": 0.3692,
      "step": 12114
    },
    {
      "epoch": 1.4854095144678765,
      "grad_norm": 2.060247438579838,
      "learning_rate": 8.516989735164694e-07,
      "loss": 0.4314,
      "step": 12115
    },
    {
      "epoch": 1.4855321235899952,
      "grad_norm": 2.1026942825337276,
      "learning_rate": 8.513179341475305e-07,
      "loss": 0.3859,
      "step": 12116
    },
    {
      "epoch": 1.485654732712114,
      "grad_norm": 2.000863156172224,
      "learning_rate": 8.509369625423425e-07,
      "loss": 0.4543,
      "step": 12117
    },
    {
      "epoch": 1.4857773418342326,
      "grad_norm": 2.0214407211763223,
      "learning_rate": 8.505560587165648e-07,
      "loss": 0.4653,
      "step": 12118
    },
    {
      "epoch": 1.485899950956351,
      "grad_norm": 1.9197154301352755,
      "learning_rate": 8.501752226858551e-07,
      "loss": 0.4116,
      "step": 12119
    },
    {
      "epoch": 1.4860225600784698,
      "grad_norm": 1.9140138090351657,
      "learning_rate": 8.497944544658626e-07,
      "loss": 0.4235,
      "step": 12120
    },
    {
      "epoch": 1.4861451692005885,
      "grad_norm": 1.8380934947798546,
      "learning_rate": 8.494137540722394e-07,
      "loss": 0.3995,
      "step": 12121
    },
    {
      "epoch": 1.4862677783227072,
      "grad_norm": 1.8397590421729066,
      "learning_rate": 8.49033121520634e-07,
      "loss": 0.4134,
      "step": 12122
    },
    {
      "epoch": 1.4863903874448259,
      "grad_norm": 1.7869563171873406,
      "learning_rate": 8.486525568266902e-07,
      "loss": 0.4009,
      "step": 12123
    },
    {
      "epoch": 1.4865129965669446,
      "grad_norm": 1.8603642779546172,
      "learning_rate": 8.482720600060487e-07,
      "loss": 0.4223,
      "step": 12124
    },
    {
      "epoch": 1.4866356056890633,
      "grad_norm": 1.8662422875805043,
      "learning_rate": 8.478916310743499e-07,
      "loss": 0.4555,
      "step": 12125
    },
    {
      "epoch": 1.486758214811182,
      "grad_norm": 1.9706047646650364,
      "learning_rate": 8.475112700472307e-07,
      "loss": 0.4192,
      "step": 12126
    },
    {
      "epoch": 1.4868808239333007,
      "grad_norm": 2.026379145554358,
      "learning_rate": 8.471309769403238e-07,
      "loss": 0.4014,
      "step": 12127
    },
    {
      "epoch": 1.4870034330554194,
      "grad_norm": 1.9601755963688163,
      "learning_rate": 8.467507517692592e-07,
      "loss": 0.4064,
      "step": 12128
    },
    {
      "epoch": 1.487126042177538,
      "grad_norm": 1.915690748874586,
      "learning_rate": 8.463705945496662e-07,
      "loss": 0.4332,
      "step": 12129
    },
    {
      "epoch": 1.4872486512996568,
      "grad_norm": 1.7719327903715223,
      "learning_rate": 8.459905052971684e-07,
      "loss": 0.4433,
      "step": 12130
    },
    {
      "epoch": 1.4873712604217753,
      "grad_norm": 1.9183058772340793,
      "learning_rate": 8.45610484027389e-07,
      "loss": 0.4244,
      "step": 12131
    },
    {
      "epoch": 1.487493869543894,
      "grad_norm": 1.899508133412608,
      "learning_rate": 8.452305307559495e-07,
      "loss": 0.443,
      "step": 12132
    },
    {
      "epoch": 1.4876164786660127,
      "grad_norm": 2.2509394699364327,
      "learning_rate": 8.448506454984629e-07,
      "loss": 0.4046,
      "step": 12133
    },
    {
      "epoch": 1.4877390877881314,
      "grad_norm": 1.913604166232581,
      "learning_rate": 8.44470828270545e-07,
      "loss": 0.4405,
      "step": 12134
    },
    {
      "epoch": 1.48786169691025,
      "grad_norm": 1.790437461209343,
      "learning_rate": 8.440910790878079e-07,
      "loss": 0.4593,
      "step": 12135
    },
    {
      "epoch": 1.4879843060323688,
      "grad_norm": 1.8782455375269327,
      "learning_rate": 8.437113979658587e-07,
      "loss": 0.4459,
      "step": 12136
    },
    {
      "epoch": 1.4881069151544875,
      "grad_norm": 1.8037965455268263,
      "learning_rate": 8.433317849203026e-07,
      "loss": 0.3957,
      "step": 12137
    },
    {
      "epoch": 1.4882295242766062,
      "grad_norm": 2.0586700813740846,
      "learning_rate": 8.429522399667434e-07,
      "loss": 0.424,
      "step": 12138
    },
    {
      "epoch": 1.4883521333987249,
      "grad_norm": 1.9382020504039636,
      "learning_rate": 8.425727631207812e-07,
      "loss": 0.4119,
      "step": 12139
    },
    {
      "epoch": 1.4884747425208436,
      "grad_norm": 1.9868112746690068,
      "learning_rate": 8.421933543980126e-07,
      "loss": 0.4428,
      "step": 12140
    },
    {
      "epoch": 1.4885973516429623,
      "grad_norm": 1.8368136288022758,
      "learning_rate": 8.418140138140313e-07,
      "loss": 0.4269,
      "step": 12141
    },
    {
      "epoch": 1.488719960765081,
      "grad_norm": 1.8302246294858857,
      "learning_rate": 8.414347413844304e-07,
      "loss": 0.3703,
      "step": 12142
    },
    {
      "epoch": 1.4888425698871997,
      "grad_norm": 1.7846915948715234,
      "learning_rate": 8.410555371247972e-07,
      "loss": 0.3765,
      "step": 12143
    },
    {
      "epoch": 1.4889651790093184,
      "grad_norm": 1.9770739336442034,
      "learning_rate": 8.406764010507191e-07,
      "loss": 0.4342,
      "step": 12144
    },
    {
      "epoch": 1.489087788131437,
      "grad_norm": 2.063944842075901,
      "learning_rate": 8.402973331777775e-07,
      "loss": 0.4307,
      "step": 12145
    },
    {
      "epoch": 1.4892103972535558,
      "grad_norm": 1.9975184398640626,
      "learning_rate": 8.399183335215549e-07,
      "loss": 0.4688,
      "step": 12146
    },
    {
      "epoch": 1.4893330063756745,
      "grad_norm": 1.9666564152765063,
      "learning_rate": 8.395394020976269e-07,
      "loss": 0.4006,
      "step": 12147
    },
    {
      "epoch": 1.4894556154977932,
      "grad_norm": 1.9237075752999961,
      "learning_rate": 8.391605389215699e-07,
      "loss": 0.4472,
      "step": 12148
    },
    {
      "epoch": 1.4895782246199119,
      "grad_norm": 2.1715171967212075,
      "learning_rate": 8.387817440089549e-07,
      "loss": 0.4294,
      "step": 12149
    },
    {
      "epoch": 1.4897008337420303,
      "grad_norm": 1.9403557796567505,
      "learning_rate": 8.384030173753505e-07,
      "loss": 0.4386,
      "step": 12150
    },
    {
      "epoch": 1.489823442864149,
      "grad_norm": 1.7295816464379392,
      "learning_rate": 8.380243590363235e-07,
      "loss": 0.3936,
      "step": 12151
    },
    {
      "epoch": 1.4899460519862677,
      "grad_norm": 1.7301842045877143,
      "learning_rate": 8.376457690074386e-07,
      "loss": 0.4241,
      "step": 12152
    },
    {
      "epoch": 1.4900686611083864,
      "grad_norm": 1.9497284143134583,
      "learning_rate": 8.372672473042556e-07,
      "loss": 0.4616,
      "step": 12153
    },
    {
      "epoch": 1.4901912702305051,
      "grad_norm": 1.8167311682027225,
      "learning_rate": 8.368887939423314e-07,
      "loss": 0.4331,
      "step": 12154
    },
    {
      "epoch": 1.4903138793526238,
      "grad_norm": 2.052063392039473,
      "learning_rate": 8.365104089372231e-07,
      "loss": 0.4605,
      "step": 12155
    },
    {
      "epoch": 1.4904364884747425,
      "grad_norm": 2.044219598464552,
      "learning_rate": 8.361320923044811e-07,
      "loss": 0.4009,
      "step": 12156
    },
    {
      "epoch": 1.4905590975968612,
      "grad_norm": 2.0974677075257135,
      "learning_rate": 8.357538440596563e-07,
      "loss": 0.4365,
      "step": 12157
    },
    {
      "epoch": 1.49068170671898,
      "grad_norm": 2.0535523755019383,
      "learning_rate": 8.353756642182942e-07,
      "loss": 0.4414,
      "step": 12158
    },
    {
      "epoch": 1.4908043158410986,
      "grad_norm": 2.079970255827407,
      "learning_rate": 8.349975527959403e-07,
      "loss": 0.4385,
      "step": 12159
    },
    {
      "epoch": 1.4909269249632173,
      "grad_norm": 1.7523656105351857,
      "learning_rate": 8.346195098081337e-07,
      "loss": 0.3963,
      "step": 12160
    },
    {
      "epoch": 1.491049534085336,
      "grad_norm": 1.9124055428997384,
      "learning_rate": 8.342415352704142e-07,
      "loss": 0.4207,
      "step": 12161
    },
    {
      "epoch": 1.4911721432074545,
      "grad_norm": 2.089982742989111,
      "learning_rate": 8.338636291983168e-07,
      "loss": 0.4156,
      "step": 12162
    },
    {
      "epoch": 1.4912947523295732,
      "grad_norm": 2.0501674576834947,
      "learning_rate": 8.33485791607373e-07,
      "loss": 0.4297,
      "step": 12163
    },
    {
      "epoch": 1.491417361451692,
      "grad_norm": 1.9878988356192344,
      "learning_rate": 8.331080225131133e-07,
      "loss": 0.4051,
      "step": 12164
    },
    {
      "epoch": 1.4915399705738106,
      "grad_norm": 1.9775693630608664,
      "learning_rate": 8.327303219310659e-07,
      "loss": 0.4556,
      "step": 12165
    },
    {
      "epoch": 1.4916625796959293,
      "grad_norm": 2.0675668453034066,
      "learning_rate": 8.323526898767539e-07,
      "loss": 0.4516,
      "step": 12166
    },
    {
      "epoch": 1.491785188818048,
      "grad_norm": 1.9233565800905328,
      "learning_rate": 8.31975126365698e-07,
      "loss": 0.4236,
      "step": 12167
    },
    {
      "epoch": 1.4919077979401667,
      "grad_norm": 2.136098112236427,
      "learning_rate": 8.315976314134183e-07,
      "loss": 0.4538,
      "step": 12168
    },
    {
      "epoch": 1.4920304070622854,
      "grad_norm": 2.3268730108020637,
      "learning_rate": 8.312202050354293e-07,
      "loss": 0.4358,
      "step": 12169
    },
    {
      "epoch": 1.492153016184404,
      "grad_norm": 1.8386564202290996,
      "learning_rate": 8.308428472472435e-07,
      "loss": 0.3878,
      "step": 12170
    },
    {
      "epoch": 1.4922756253065228,
      "grad_norm": 1.966825668986573,
      "learning_rate": 8.30465558064372e-07,
      "loss": 0.3576,
      "step": 12171
    },
    {
      "epoch": 1.4923982344286415,
      "grad_norm": 1.8597747993493488,
      "learning_rate": 8.300883375023222e-07,
      "loss": 0.443,
      "step": 12172
    },
    {
      "epoch": 1.4925208435507602,
      "grad_norm": 1.9355157959217484,
      "learning_rate": 8.297111855765983e-07,
      "loss": 0.4048,
      "step": 12173
    },
    {
      "epoch": 1.492643452672879,
      "grad_norm": 1.818204831952754,
      "learning_rate": 8.293341023027004e-07,
      "loss": 0.4065,
      "step": 12174
    },
    {
      "epoch": 1.4927660617949976,
      "grad_norm": 1.9500167099754557,
      "learning_rate": 8.2895708769613e-07,
      "loss": 0.4501,
      "step": 12175
    },
    {
      "epoch": 1.4928886709171163,
      "grad_norm": 1.8313231324189074,
      "learning_rate": 8.285801417723804e-07,
      "loss": 0.4139,
      "step": 12176
    },
    {
      "epoch": 1.493011280039235,
      "grad_norm": 2.0738414601844166,
      "learning_rate": 8.282032645469462e-07,
      "loss": 0.47,
      "step": 12177
    },
    {
      "epoch": 1.4931338891613537,
      "grad_norm": 1.9571364365342467,
      "learning_rate": 8.278264560353183e-07,
      "loss": 0.4353,
      "step": 12178
    },
    {
      "epoch": 1.4932564982834724,
      "grad_norm": 2.065121911837986,
      "learning_rate": 8.274497162529835e-07,
      "loss": 0.4669,
      "step": 12179
    },
    {
      "epoch": 1.493379107405591,
      "grad_norm": 1.900321602369626,
      "learning_rate": 8.270730452154255e-07,
      "loss": 0.4233,
      "step": 12180
    },
    {
      "epoch": 1.4935017165277098,
      "grad_norm": 1.9666942781231043,
      "learning_rate": 8.266964429381269e-07,
      "loss": 0.4655,
      "step": 12181
    },
    {
      "epoch": 1.4936243256498283,
      "grad_norm": 1.9937391082389704,
      "learning_rate": 8.263199094365684e-07,
      "loss": 0.4607,
      "step": 12182
    },
    {
      "epoch": 1.493746934771947,
      "grad_norm": 1.963382269780413,
      "learning_rate": 8.259434447262232e-07,
      "loss": 0.4554,
      "step": 12183
    },
    {
      "epoch": 1.4938695438940657,
      "grad_norm": 2.0891805687224663,
      "learning_rate": 8.255670488225662e-07,
      "loss": 0.4121,
      "step": 12184
    },
    {
      "epoch": 1.4939921530161844,
      "grad_norm": 2.0293202772424825,
      "learning_rate": 8.251907217410685e-07,
      "loss": 0.4434,
      "step": 12185
    },
    {
      "epoch": 1.494114762138303,
      "grad_norm": 1.7996616662562221,
      "learning_rate": 8.248144634971972e-07,
      "loss": 0.4126,
      "step": 12186
    },
    {
      "epoch": 1.4942373712604218,
      "grad_norm": 1.9546898120709943,
      "learning_rate": 8.244382741064164e-07,
      "loss": 0.4242,
      "step": 12187
    },
    {
      "epoch": 1.4943599803825405,
      "grad_norm": 1.8405313909207965,
      "learning_rate": 8.240621535841894e-07,
      "loss": 0.4379,
      "step": 12188
    },
    {
      "epoch": 1.4944825895046592,
      "grad_norm": 2.032414057648849,
      "learning_rate": 8.236861019459744e-07,
      "loss": 0.4494,
      "step": 12189
    },
    {
      "epoch": 1.4946051986267779,
      "grad_norm": 1.771870129375275,
      "learning_rate": 8.23310119207229e-07,
      "loss": 0.4165,
      "step": 12190
    },
    {
      "epoch": 1.4947278077488966,
      "grad_norm": 2.005868013079583,
      "learning_rate": 8.229342053834052e-07,
      "loss": 0.401,
      "step": 12191
    },
    {
      "epoch": 1.4948504168710153,
      "grad_norm": 1.8562698707842578,
      "learning_rate": 8.225583604899553e-07,
      "loss": 0.3838,
      "step": 12192
    },
    {
      "epoch": 1.4949730259931338,
      "grad_norm": 1.9366902029589004,
      "learning_rate": 8.221825845423259e-07,
      "loss": 0.4345,
      "step": 12193
    },
    {
      "epoch": 1.4950956351152525,
      "grad_norm": 1.8938469740412776,
      "learning_rate": 8.218068775559632e-07,
      "loss": 0.4573,
      "step": 12194
    },
    {
      "epoch": 1.4952182442373712,
      "grad_norm": 1.9437874414265155,
      "learning_rate": 8.214312395463089e-07,
      "loss": 0.4298,
      "step": 12195
    },
    {
      "epoch": 1.4953408533594899,
      "grad_norm": 1.8087090282899818,
      "learning_rate": 8.210556705288017e-07,
      "loss": 0.4091,
      "step": 12196
    },
    {
      "epoch": 1.4954634624816086,
      "grad_norm": 1.9947075356478539,
      "learning_rate": 8.206801705188785e-07,
      "loss": 0.4051,
      "step": 12197
    },
    {
      "epoch": 1.4955860716037273,
      "grad_norm": 1.920678113924366,
      "learning_rate": 8.203047395319743e-07,
      "loss": 0.3761,
      "step": 12198
    },
    {
      "epoch": 1.495708680725846,
      "grad_norm": 1.8496224446579368,
      "learning_rate": 8.199293775835188e-07,
      "loss": 0.4404,
      "step": 12199
    },
    {
      "epoch": 1.4958312898479647,
      "grad_norm": 2.134726945661639,
      "learning_rate": 8.195540846889397e-07,
      "loss": 0.4422,
      "step": 12200
    },
    {
      "epoch": 1.4959538989700834,
      "grad_norm": 1.8473773117437693,
      "learning_rate": 8.191788608636625e-07,
      "loss": 0.4298,
      "step": 12201
    },
    {
      "epoch": 1.496076508092202,
      "grad_norm": 1.736583210016909,
      "learning_rate": 8.188037061231105e-07,
      "loss": 0.4172,
      "step": 12202
    },
    {
      "epoch": 1.4961991172143208,
      "grad_norm": 2.06539952507257,
      "learning_rate": 8.184286204827027e-07,
      "loss": 0.3972,
      "step": 12203
    },
    {
      "epoch": 1.4963217263364395,
      "grad_norm": 1.989225988130384,
      "learning_rate": 8.180536039578546e-07,
      "loss": 0.4391,
      "step": 12204
    },
    {
      "epoch": 1.4964443354585582,
      "grad_norm": 2.0115150882966275,
      "learning_rate": 8.176786565639818e-07,
      "loss": 0.4486,
      "step": 12205
    },
    {
      "epoch": 1.4965669445806769,
      "grad_norm": 2.1499974626970153,
      "learning_rate": 8.173037783164939e-07,
      "loss": 0.448,
      "step": 12206
    },
    {
      "epoch": 1.4966895537027955,
      "grad_norm": 1.9945416594796546,
      "learning_rate": 8.169289692308005e-07,
      "loss": 0.4737,
      "step": 12207
    },
    {
      "epoch": 1.4968121628249142,
      "grad_norm": 2.0114088405612787,
      "learning_rate": 8.165542293223061e-07,
      "loss": 0.4313,
      "step": 12208
    },
    {
      "epoch": 1.496934771947033,
      "grad_norm": 1.8747262686798472,
      "learning_rate": 8.161795586064123e-07,
      "loss": 0.4578,
      "step": 12209
    },
    {
      "epoch": 1.4970573810691516,
      "grad_norm": 1.9057764334444567,
      "learning_rate": 8.158049570985196e-07,
      "loss": 0.395,
      "step": 12210
    },
    {
      "epoch": 1.4971799901912703,
      "grad_norm": 1.8557398060524661,
      "learning_rate": 8.154304248140258e-07,
      "loss": 0.4395,
      "step": 12211
    },
    {
      "epoch": 1.497302599313389,
      "grad_norm": 1.6618864329384795,
      "learning_rate": 8.150559617683234e-07,
      "loss": 0.4231,
      "step": 12212
    },
    {
      "epoch": 1.4974252084355075,
      "grad_norm": 2.0721444054365863,
      "learning_rate": 8.146815679768035e-07,
      "loss": 0.4487,
      "step": 12213
    },
    {
      "epoch": 1.4975478175576262,
      "grad_norm": 2.0771160237338147,
      "learning_rate": 8.143072434548547e-07,
      "loss": 0.4317,
      "step": 12214
    },
    {
      "epoch": 1.497670426679745,
      "grad_norm": 1.8647185772373385,
      "learning_rate": 8.139329882178639e-07,
      "loss": 0.4032,
      "step": 12215
    },
    {
      "epoch": 1.4977930358018636,
      "grad_norm": 2.0207428413451707,
      "learning_rate": 8.13558802281211e-07,
      "loss": 0.4651,
      "step": 12216
    },
    {
      "epoch": 1.4979156449239823,
      "grad_norm": 2.118238880754474,
      "learning_rate": 8.131846856602765e-07,
      "loss": 0.4326,
      "step": 12217
    },
    {
      "epoch": 1.498038254046101,
      "grad_norm": 2.031456744853119,
      "learning_rate": 8.12810638370439e-07,
      "loss": 0.3794,
      "step": 12218
    },
    {
      "epoch": 1.4981608631682197,
      "grad_norm": 1.9776006373492216,
      "learning_rate": 8.124366604270709e-07,
      "loss": 0.4382,
      "step": 12219
    },
    {
      "epoch": 1.4982834722903384,
      "grad_norm": 1.6828035527378673,
      "learning_rate": 8.120627518455429e-07,
      "loss": 0.4174,
      "step": 12220
    },
    {
      "epoch": 1.4984060814124571,
      "grad_norm": 1.8813852385804797,
      "learning_rate": 8.116889126412239e-07,
      "loss": 0.4089,
      "step": 12221
    },
    {
      "epoch": 1.4985286905345758,
      "grad_norm": 1.8978395224527895,
      "learning_rate": 8.113151428294805e-07,
      "loss": 0.4129,
      "step": 12222
    },
    {
      "epoch": 1.4986512996566945,
      "grad_norm": 1.9534294980142375,
      "learning_rate": 8.109414424256734e-07,
      "loss": 0.4594,
      "step": 12223
    },
    {
      "epoch": 1.4987739087788132,
      "grad_norm": 1.8810140578291545,
      "learning_rate": 8.10567811445164e-07,
      "loss": 0.3683,
      "step": 12224
    },
    {
      "epoch": 1.4988965179009317,
      "grad_norm": 1.703752119500504,
      "learning_rate": 8.101942499033085e-07,
      "loss": 0.4213,
      "step": 12225
    },
    {
      "epoch": 1.4990191270230504,
      "grad_norm": 1.9430980685965509,
      "learning_rate": 8.098207578154602e-07,
      "loss": 0.4879,
      "step": 12226
    },
    {
      "epoch": 1.499141736145169,
      "grad_norm": 1.8868817244849838,
      "learning_rate": 8.094473351969708e-07,
      "loss": 0.4171,
      "step": 12227
    },
    {
      "epoch": 1.4992643452672878,
      "grad_norm": 1.9383489494928456,
      "learning_rate": 8.090739820631907e-07,
      "loss": 0.4346,
      "step": 12228
    },
    {
      "epoch": 1.4993869543894065,
      "grad_norm": 1.8822306220263423,
      "learning_rate": 8.087006984294618e-07,
      "loss": 0.4139,
      "step": 12229
    },
    {
      "epoch": 1.4995095635115252,
      "grad_norm": 1.8744570363920208,
      "learning_rate": 8.083274843111282e-07,
      "loss": 0.411,
      "step": 12230
    },
    {
      "epoch": 1.499632172633644,
      "grad_norm": 1.9689676426426652,
      "learning_rate": 8.07954339723531e-07,
      "loss": 0.4074,
      "step": 12231
    },
    {
      "epoch": 1.4997547817557626,
      "grad_norm": 1.8150323825940446,
      "learning_rate": 8.075812646820059e-07,
      "loss": 0.4101,
      "step": 12232
    },
    {
      "epoch": 1.4998773908778813,
      "grad_norm": 1.9191955519982875,
      "learning_rate": 8.072082592018859e-07,
      "loss": 0.3624,
      "step": 12233
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.0842731321826022,
      "learning_rate": 8.068353232985035e-07,
      "loss": 0.4357,
      "step": 12234
    },
    {
      "epoch": 1.5001226091221187,
      "grad_norm": 1.8515792852799964,
      "learning_rate": 8.064624569871876e-07,
      "loss": 0.4177,
      "step": 12235
    },
    {
      "epoch": 1.5002452182442374,
      "grad_norm": 2.012326799649485,
      "learning_rate": 8.060896602832629e-07,
      "loss": 0.4501,
      "step": 12236
    },
    {
      "epoch": 1.500367827366356,
      "grad_norm": 2.0081094764186767,
      "learning_rate": 8.057169332020512e-07,
      "loss": 0.3884,
      "step": 12237
    },
    {
      "epoch": 1.5004904364884748,
      "grad_norm": 1.8004593432598754,
      "learning_rate": 8.053442757588737e-07,
      "loss": 0.3925,
      "step": 12238
    },
    {
      "epoch": 1.5006130456105935,
      "grad_norm": 1.999080228228872,
      "learning_rate": 8.049716879690462e-07,
      "loss": 0.4662,
      "step": 12239
    },
    {
      "epoch": 1.5007356547327122,
      "grad_norm": 1.855923321466196,
      "learning_rate": 8.045991698478836e-07,
      "loss": 0.4551,
      "step": 12240
    },
    {
      "epoch": 1.500858263854831,
      "grad_norm": 1.9510204138833729,
      "learning_rate": 8.042267214106958e-07,
      "loss": 0.4492,
      "step": 12241
    },
    {
      "epoch": 1.5009808729769496,
      "grad_norm": 1.888408469471935,
      "learning_rate": 8.038543426727927e-07,
      "loss": 0.4017,
      "step": 12242
    },
    {
      "epoch": 1.5011034820990683,
      "grad_norm": 1.8788582427452198,
      "learning_rate": 8.03482033649478e-07,
      "loss": 0.4672,
      "step": 12243
    },
    {
      "epoch": 1.501226091221187,
      "grad_norm": 2.17980372914781,
      "learning_rate": 8.031097943560562e-07,
      "loss": 0.4479,
      "step": 12244
    },
    {
      "epoch": 1.5013487003433057,
      "grad_norm": 1.9737173697564934,
      "learning_rate": 8.027376248078262e-07,
      "loss": 0.4419,
      "step": 12245
    },
    {
      "epoch": 1.5014713094654242,
      "grad_norm": 2.02907575718956,
      "learning_rate": 8.023655250200832e-07,
      "loss": 0.4218,
      "step": 12246
    },
    {
      "epoch": 1.5015939185875429,
      "grad_norm": 1.943586229387296,
      "learning_rate": 8.019934950081229e-07,
      "loss": 0.4261,
      "step": 12247
    },
    {
      "epoch": 1.5017165277096616,
      "grad_norm": 1.9386881451271243,
      "learning_rate": 8.016215347872366e-07,
      "loss": 0.4259,
      "step": 12248
    },
    {
      "epoch": 1.5018391368317803,
      "grad_norm": 1.8948923218086344,
      "learning_rate": 8.012496443727122e-07,
      "loss": 0.4318,
      "step": 12249
    },
    {
      "epoch": 1.501961745953899,
      "grad_norm": 2.1376300584152608,
      "learning_rate": 8.00877823779834e-07,
      "loss": 0.43,
      "step": 12250
    },
    {
      "epoch": 1.5020843550760177,
      "grad_norm": 1.9950382663847885,
      "learning_rate": 8.005060730238861e-07,
      "loss": 0.4668,
      "step": 12251
    },
    {
      "epoch": 1.5022069641981364,
      "grad_norm": 2.0191115717949115,
      "learning_rate": 8.001343921201465e-07,
      "loss": 0.4491,
      "step": 12252
    },
    {
      "epoch": 1.502329573320255,
      "grad_norm": 2.002776389012974,
      "learning_rate": 7.997627810838935e-07,
      "loss": 0.4161,
      "step": 12253
    },
    {
      "epoch": 1.5024521824423736,
      "grad_norm": 2.086930241917597,
      "learning_rate": 7.993912399303993e-07,
      "loss": 0.4431,
      "step": 12254
    },
    {
      "epoch": 1.5025747915644923,
      "grad_norm": 1.9662296025046964,
      "learning_rate": 7.990197686749368e-07,
      "loss": 0.4618,
      "step": 12255
    },
    {
      "epoch": 1.502697400686611,
      "grad_norm": 1.8145787033004062,
      "learning_rate": 7.986483673327724e-07,
      "loss": 0.4099,
      "step": 12256
    },
    {
      "epoch": 1.5028200098087297,
      "grad_norm": 1.974281461273692,
      "learning_rate": 7.982770359191727e-07,
      "loss": 0.4422,
      "step": 12257
    },
    {
      "epoch": 1.5029426189308484,
      "grad_norm": 1.877561557611205,
      "learning_rate": 7.979057744493998e-07,
      "loss": 0.4201,
      "step": 12258
    },
    {
      "epoch": 1.503065228052967,
      "grad_norm": 2.0584692117111882,
      "learning_rate": 7.975345829387118e-07,
      "loss": 0.4491,
      "step": 12259
    },
    {
      "epoch": 1.5031878371750858,
      "grad_norm": 1.979063216948962,
      "learning_rate": 7.971634614023666e-07,
      "loss": 0.4248,
      "step": 12260
    },
    {
      "epoch": 1.5033104462972045,
      "grad_norm": 2.106275780813088,
      "learning_rate": 7.967924098556185e-07,
      "loss": 0.4865,
      "step": 12261
    },
    {
      "epoch": 1.5034330554193231,
      "grad_norm": 1.8575188695815703,
      "learning_rate": 7.964214283137178e-07,
      "loss": 0.3963,
      "step": 12262
    },
    {
      "epoch": 1.5035556645414418,
      "grad_norm": 1.9890276178340123,
      "learning_rate": 7.960505167919114e-07,
      "loss": 0.4708,
      "step": 12263
    },
    {
      "epoch": 1.5036782736635605,
      "grad_norm": 1.9112701222492654,
      "learning_rate": 7.95679675305446e-07,
      "loss": 0.4235,
      "step": 12264
    },
    {
      "epoch": 1.5038008827856792,
      "grad_norm": 1.8562400056191515,
      "learning_rate": 7.953089038695635e-07,
      "loss": 0.4179,
      "step": 12265
    },
    {
      "epoch": 1.503923491907798,
      "grad_norm": 1.8225417464351465,
      "learning_rate": 7.94938202499502e-07,
      "loss": 0.4106,
      "step": 12266
    },
    {
      "epoch": 1.5040461010299166,
      "grad_norm": 1.940385814850765,
      "learning_rate": 7.945675712104989e-07,
      "loss": 0.4241,
      "step": 12267
    },
    {
      "epoch": 1.5041687101520353,
      "grad_norm": 1.8791356560201442,
      "learning_rate": 7.941970100177887e-07,
      "loss": 0.4227,
      "step": 12268
    },
    {
      "epoch": 1.504291319274154,
      "grad_norm": 1.9935024694339376,
      "learning_rate": 7.938265189366015e-07,
      "loss": 0.4643,
      "step": 12269
    },
    {
      "epoch": 1.5044139283962727,
      "grad_norm": 2.0556389889743985,
      "learning_rate": 7.93456097982164e-07,
      "loss": 0.4427,
      "step": 12270
    },
    {
      "epoch": 1.5045365375183914,
      "grad_norm": 1.9791212222675991,
      "learning_rate": 7.930857471697026e-07,
      "loss": 0.3986,
      "step": 12271
    },
    {
      "epoch": 1.5046591466405101,
      "grad_norm": 1.8011349554905582,
      "learning_rate": 7.927154665144385e-07,
      "loss": 0.4428,
      "step": 12272
    },
    {
      "epoch": 1.5047817557626288,
      "grad_norm": 1.982654723891665,
      "learning_rate": 7.923452560315908e-07,
      "loss": 0.4011,
      "step": 12273
    },
    {
      "epoch": 1.5049043648847475,
      "grad_norm": 1.9997403610066622,
      "learning_rate": 7.919751157363772e-07,
      "loss": 0.4934,
      "step": 12274
    },
    {
      "epoch": 1.5050269740068662,
      "grad_norm": 1.9206551687592546,
      "learning_rate": 7.916050456440102e-07,
      "loss": 0.4424,
      "step": 12275
    },
    {
      "epoch": 1.505149583128985,
      "grad_norm": 2.00483176316717,
      "learning_rate": 7.912350457696996e-07,
      "loss": 0.4434,
      "step": 12276
    },
    {
      "epoch": 1.5052721922511036,
      "grad_norm": 2.0455806186334757,
      "learning_rate": 7.908651161286537e-07,
      "loss": 0.4034,
      "step": 12277
    },
    {
      "epoch": 1.5053948013732221,
      "grad_norm": 1.9141970575882052,
      "learning_rate": 7.904952567360788e-07,
      "loss": 0.4096,
      "step": 12278
    },
    {
      "epoch": 1.5055174104953408,
      "grad_norm": 1.9206231116600585,
      "learning_rate": 7.901254676071738e-07,
      "loss": 0.4509,
      "step": 12279
    },
    {
      "epoch": 1.5056400196174595,
      "grad_norm": 1.880826518516114,
      "learning_rate": 7.897557487571392e-07,
      "loss": 0.4463,
      "step": 12280
    },
    {
      "epoch": 1.5057626287395782,
      "grad_norm": 2.0097401166277833,
      "learning_rate": 7.893861002011716e-07,
      "loss": 0.4395,
      "step": 12281
    },
    {
      "epoch": 1.505885237861697,
      "grad_norm": 1.7772609750786943,
      "learning_rate": 7.89016521954464e-07,
      "loss": 0.4296,
      "step": 12282
    },
    {
      "epoch": 1.5060078469838156,
      "grad_norm": 1.9681892453451821,
      "learning_rate": 7.886470140322056e-07,
      "loss": 0.4037,
      "step": 12283
    },
    {
      "epoch": 1.5061304561059343,
      "grad_norm": 1.733381444757847,
      "learning_rate": 7.882775764495851e-07,
      "loss": 0.3943,
      "step": 12284
    },
    {
      "epoch": 1.5062530652280528,
      "grad_norm": 1.9419771388875404,
      "learning_rate": 7.87908209221786e-07,
      "loss": 0.4458,
      "step": 12285
    },
    {
      "epoch": 1.5063756743501715,
      "grad_norm": 1.909751657421007,
      "learning_rate": 7.875389123639915e-07,
      "loss": 0.4628,
      "step": 12286
    },
    {
      "epoch": 1.5064982834722902,
      "grad_norm": 1.7664960618912575,
      "learning_rate": 7.871696858913783e-07,
      "loss": 0.4522,
      "step": 12287
    },
    {
      "epoch": 1.506620892594409,
      "grad_norm": 1.8519049249127946,
      "learning_rate": 7.868005298191242e-07,
      "loss": 0.4316,
      "step": 12288
    },
    {
      "epoch": 1.5067435017165276,
      "grad_norm": 2.009793275946706,
      "learning_rate": 7.864314441624005e-07,
      "loss": 0.4007,
      "step": 12289
    },
    {
      "epoch": 1.5068661108386463,
      "grad_norm": 1.7976815350271875,
      "learning_rate": 7.860624289363789e-07,
      "loss": 0.4387,
      "step": 12290
    },
    {
      "epoch": 1.506988719960765,
      "grad_norm": 1.875890084010905,
      "learning_rate": 7.856934841562258e-07,
      "loss": 0.4128,
      "step": 12291
    },
    {
      "epoch": 1.5071113290828837,
      "grad_norm": 2.1081366792135405,
      "learning_rate": 7.853246098371048e-07,
      "loss": 0.4428,
      "step": 12292
    },
    {
      "epoch": 1.5072339382050024,
      "grad_norm": 1.8901346960296268,
      "learning_rate": 7.849558059941775e-07,
      "loss": 0.4183,
      "step": 12293
    },
    {
      "epoch": 1.507356547327121,
      "grad_norm": 1.8606886750207567,
      "learning_rate": 7.845870726426042e-07,
      "loss": 0.4391,
      "step": 12294
    },
    {
      "epoch": 1.5074791564492398,
      "grad_norm": 1.8388082612302281,
      "learning_rate": 7.842184097975386e-07,
      "loss": 0.4316,
      "step": 12295
    },
    {
      "epoch": 1.5076017655713585,
      "grad_norm": 2.030575304646416,
      "learning_rate": 7.838498174741332e-07,
      "loss": 0.3914,
      "step": 12296
    },
    {
      "epoch": 1.5077243746934772,
      "grad_norm": 2.0230955607933225,
      "learning_rate": 7.834812956875388e-07,
      "loss": 0.42,
      "step": 12297
    },
    {
      "epoch": 1.507846983815596,
      "grad_norm": 1.9423481905417173,
      "learning_rate": 7.831128444529023e-07,
      "loss": 0.4621,
      "step": 12298
    },
    {
      "epoch": 1.5079695929377146,
      "grad_norm": 1.9181576270289602,
      "learning_rate": 7.827444637853679e-07,
      "loss": 0.4013,
      "step": 12299
    },
    {
      "epoch": 1.5080922020598333,
      "grad_norm": 1.8974554809290411,
      "learning_rate": 7.823761537000752e-07,
      "loss": 0.4259,
      "step": 12300
    },
    {
      "epoch": 1.508214811181952,
      "grad_norm": 1.8537954519653461,
      "learning_rate": 7.820079142121642e-07,
      "loss": 0.3903,
      "step": 12301
    },
    {
      "epoch": 1.5083374203040707,
      "grad_norm": 2.2538410416506527,
      "learning_rate": 7.816397453367686e-07,
      "loss": 0.467,
      "step": 12302
    },
    {
      "epoch": 1.5084600294261894,
      "grad_norm": 1.9884541183054327,
      "learning_rate": 7.812716470890225e-07,
      "loss": 0.4224,
      "step": 12303
    },
    {
      "epoch": 1.508582638548308,
      "grad_norm": 1.8364628793530502,
      "learning_rate": 7.809036194840541e-07,
      "loss": 0.46,
      "step": 12304
    },
    {
      "epoch": 1.5087052476704268,
      "grad_norm": 2.0285847175341116,
      "learning_rate": 7.8053566253699e-07,
      "loss": 0.4138,
      "step": 12305
    },
    {
      "epoch": 1.5088278567925455,
      "grad_norm": 1.69383143464978,
      "learning_rate": 7.801677762629539e-07,
      "loss": 0.4418,
      "step": 12306
    },
    {
      "epoch": 1.5089504659146642,
      "grad_norm": 1.762461619461571,
      "learning_rate": 7.797999606770682e-07,
      "loss": 0.4007,
      "step": 12307
    },
    {
      "epoch": 1.509073075036783,
      "grad_norm": 1.7822033829301385,
      "learning_rate": 7.794322157944489e-07,
      "loss": 0.4323,
      "step": 12308
    },
    {
      "epoch": 1.5091956841589014,
      "grad_norm": 2.0475638374080445,
      "learning_rate": 7.790645416302111e-07,
      "loss": 0.4036,
      "step": 12309
    },
    {
      "epoch": 1.50931829328102,
      "grad_norm": 1.821527349982819,
      "learning_rate": 7.786969381994672e-07,
      "loss": 0.437,
      "step": 12310
    },
    {
      "epoch": 1.5094409024031388,
      "grad_norm": 1.9475747846489104,
      "learning_rate": 7.783294055173282e-07,
      "loss": 0.4216,
      "step": 12311
    },
    {
      "epoch": 1.5095635115252575,
      "grad_norm": 2.103244100407443,
      "learning_rate": 7.779619435988969e-07,
      "loss": 0.427,
      "step": 12312
    },
    {
      "epoch": 1.5096861206473762,
      "grad_norm": 1.9605060210484129,
      "learning_rate": 7.775945524592784e-07,
      "loss": 0.4506,
      "step": 12313
    },
    {
      "epoch": 1.5098087297694949,
      "grad_norm": 1.9392622864169229,
      "learning_rate": 7.772272321135737e-07,
      "loss": 0.4311,
      "step": 12314
    },
    {
      "epoch": 1.5099313388916136,
      "grad_norm": 1.9819892635775052,
      "learning_rate": 7.7685998257688e-07,
      "loss": 0.4787,
      "step": 12315
    },
    {
      "epoch": 1.5100539480137323,
      "grad_norm": 1.8372060983446237,
      "learning_rate": 7.764928038642904e-07,
      "loss": 0.423,
      "step": 12316
    },
    {
      "epoch": 1.5101765571358508,
      "grad_norm": 2.1882538930633992,
      "learning_rate": 7.76125695990898e-07,
      "loss": 0.4438,
      "step": 12317
    },
    {
      "epoch": 1.5102991662579694,
      "grad_norm": 1.7684009901092814,
      "learning_rate": 7.757586589717924e-07,
      "loss": 0.3757,
      "step": 12318
    },
    {
      "epoch": 1.5104217753800881,
      "grad_norm": 1.8529169366699467,
      "learning_rate": 7.753916928220573e-07,
      "loss": 0.4222,
      "step": 12319
    },
    {
      "epoch": 1.5105443845022068,
      "grad_norm": 1.9855033078965254,
      "learning_rate": 7.750247975567779e-07,
      "loss": 0.4354,
      "step": 12320
    },
    {
      "epoch": 1.5106669936243255,
      "grad_norm": 1.8732236559108577,
      "learning_rate": 7.746579731910328e-07,
      "loss": 0.4185,
      "step": 12321
    },
    {
      "epoch": 1.5107896027464442,
      "grad_norm": 1.9047582296015726,
      "learning_rate": 7.742912197398988e-07,
      "loss": 0.4468,
      "step": 12322
    },
    {
      "epoch": 1.510912211868563,
      "grad_norm": 2.0923393242183184,
      "learning_rate": 7.739245372184509e-07,
      "loss": 0.4465,
      "step": 12323
    },
    {
      "epoch": 1.5110348209906816,
      "grad_norm": 1.9721170664838328,
      "learning_rate": 7.735579256417619e-07,
      "loss": 0.3966,
      "step": 12324
    },
    {
      "epoch": 1.5111574301128003,
      "grad_norm": 1.9121073058713434,
      "learning_rate": 7.73191385024897e-07,
      "loss": 0.4633,
      "step": 12325
    },
    {
      "epoch": 1.511280039234919,
      "grad_norm": 1.9920968542571016,
      "learning_rate": 7.728249153829232e-07,
      "loss": 0.4427,
      "step": 12326
    },
    {
      "epoch": 1.5114026483570377,
      "grad_norm": 2.0446884222577406,
      "learning_rate": 7.72458516730904e-07,
      "loss": 0.4531,
      "step": 12327
    },
    {
      "epoch": 1.5115252574791564,
      "grad_norm": 1.8154765964685307,
      "learning_rate": 7.720921890838978e-07,
      "loss": 0.3877,
      "step": 12328
    },
    {
      "epoch": 1.5116478666012751,
      "grad_norm": 2.016701120884294,
      "learning_rate": 7.717259324569613e-07,
      "loss": 0.4889,
      "step": 12329
    },
    {
      "epoch": 1.5117704757233938,
      "grad_norm": 2.0079491630996817,
      "learning_rate": 7.713597468651485e-07,
      "loss": 0.4186,
      "step": 12330
    },
    {
      "epoch": 1.5118930848455125,
      "grad_norm": 1.9747905878609169,
      "learning_rate": 7.709936323235109e-07,
      "loss": 0.4066,
      "step": 12331
    },
    {
      "epoch": 1.5120156939676312,
      "grad_norm": 2.0381364502922033,
      "learning_rate": 7.706275888470963e-07,
      "loss": 0.4576,
      "step": 12332
    },
    {
      "epoch": 1.51213830308975,
      "grad_norm": 2.063696530776232,
      "learning_rate": 7.702616164509485e-07,
      "loss": 0.4755,
      "step": 12333
    },
    {
      "epoch": 1.5122609122118686,
      "grad_norm": 1.8365956202765643,
      "learning_rate": 7.698957151501113e-07,
      "loss": 0.4549,
      "step": 12334
    },
    {
      "epoch": 1.5123835213339873,
      "grad_norm": 1.8225923200615477,
      "learning_rate": 7.695298849596225e-07,
      "loss": 0.4755,
      "step": 12335
    },
    {
      "epoch": 1.512506130456106,
      "grad_norm": 1.8867060409738843,
      "learning_rate": 7.691641258945196e-07,
      "loss": 0.4472,
      "step": 12336
    },
    {
      "epoch": 1.5126287395782247,
      "grad_norm": 1.99454502618233,
      "learning_rate": 7.687984379698346e-07,
      "loss": 0.458,
      "step": 12337
    },
    {
      "epoch": 1.5127513487003434,
      "grad_norm": 1.7448023897716844,
      "learning_rate": 7.684328212005995e-07,
      "loss": 0.4301,
      "step": 12338
    },
    {
      "epoch": 1.5128739578224621,
      "grad_norm": 1.9144328238658597,
      "learning_rate": 7.680672756018401e-07,
      "loss": 0.4041,
      "step": 12339
    },
    {
      "epoch": 1.5129965669445806,
      "grad_norm": 1.8784251757636734,
      "learning_rate": 7.677018011885826e-07,
      "loss": 0.4098,
      "step": 12340
    },
    {
      "epoch": 1.5131191760666993,
      "grad_norm": 2.015175269058448,
      "learning_rate": 7.673363979758478e-07,
      "loss": 0.4565,
      "step": 12341
    },
    {
      "epoch": 1.513241785188818,
      "grad_norm": 1.8652515267609229,
      "learning_rate": 7.669710659786536e-07,
      "loss": 0.3876,
      "step": 12342
    },
    {
      "epoch": 1.5133643943109367,
      "grad_norm": 2.024768376400936,
      "learning_rate": 7.666058052120171e-07,
      "loss": 0.4544,
      "step": 12343
    },
    {
      "epoch": 1.5134870034330554,
      "grad_norm": 1.997578179166933,
      "learning_rate": 7.66240615690951e-07,
      "loss": 0.4493,
      "step": 12344
    },
    {
      "epoch": 1.5136096125551741,
      "grad_norm": 1.7675447683586551,
      "learning_rate": 7.658754974304652e-07,
      "loss": 0.4197,
      "step": 12345
    },
    {
      "epoch": 1.5137322216772928,
      "grad_norm": 2.0366736598428052,
      "learning_rate": 7.65510450445566e-07,
      "loss": 0.4122,
      "step": 12346
    },
    {
      "epoch": 1.5138548307994115,
      "grad_norm": 1.9557003059041302,
      "learning_rate": 7.651454747512582e-07,
      "loss": 0.4878,
      "step": 12347
    },
    {
      "epoch": 1.51397743992153,
      "grad_norm": 1.9315148014492522,
      "learning_rate": 7.647805703625425e-07,
      "loss": 0.3921,
      "step": 12348
    },
    {
      "epoch": 1.5141000490436487,
      "grad_norm": 1.835804067768256,
      "learning_rate": 7.644157372944178e-07,
      "loss": 0.4126,
      "step": 12349
    },
    {
      "epoch": 1.5142226581657674,
      "grad_norm": 1.9947778427901641,
      "learning_rate": 7.64050975561878e-07,
      "loss": 0.4689,
      "step": 12350
    },
    {
      "epoch": 1.514345267287886,
      "grad_norm": 1.8687529784321197,
      "learning_rate": 7.636862851799173e-07,
      "loss": 0.4183,
      "step": 12351
    },
    {
      "epoch": 1.5144678764100048,
      "grad_norm": 1.9354801853082515,
      "learning_rate": 7.633216661635232e-07,
      "loss": 0.4127,
      "step": 12352
    },
    {
      "epoch": 1.5145904855321235,
      "grad_norm": 1.987474666687465,
      "learning_rate": 7.629571185276841e-07,
      "loss": 0.4361,
      "step": 12353
    },
    {
      "epoch": 1.5147130946542422,
      "grad_norm": 2.122142326233757,
      "learning_rate": 7.625926422873828e-07,
      "loss": 0.4466,
      "step": 12354
    },
    {
      "epoch": 1.514835703776361,
      "grad_norm": 1.889552403722358,
      "learning_rate": 7.622282374575987e-07,
      "loss": 0.4478,
      "step": 12355
    },
    {
      "epoch": 1.5149583128984796,
      "grad_norm": 1.8824567770509306,
      "learning_rate": 7.618639040533108e-07,
      "loss": 0.4242,
      "step": 12356
    },
    {
      "epoch": 1.5150809220205983,
      "grad_norm": 2.0594751875946278,
      "learning_rate": 7.61499642089494e-07,
      "loss": 0.4293,
      "step": 12357
    },
    {
      "epoch": 1.515203531142717,
      "grad_norm": 2.004545618462613,
      "learning_rate": 7.611354515811198e-07,
      "loss": 0.419,
      "step": 12358
    },
    {
      "epoch": 1.5153261402648357,
      "grad_norm": 2.179351028427903,
      "learning_rate": 7.607713325431562e-07,
      "loss": 0.434,
      "step": 12359
    },
    {
      "epoch": 1.5154487493869544,
      "grad_norm": 1.9234264734175894,
      "learning_rate": 7.604072849905708e-07,
      "loss": 0.4217,
      "step": 12360
    },
    {
      "epoch": 1.515571358509073,
      "grad_norm": 2.0020686835497528,
      "learning_rate": 7.600433089383255e-07,
      "loss": 0.406,
      "step": 12361
    },
    {
      "epoch": 1.5156939676311918,
      "grad_norm": 1.8859963569353864,
      "learning_rate": 7.5967940440138e-07,
      "loss": 0.3713,
      "step": 12362
    },
    {
      "epoch": 1.5158165767533105,
      "grad_norm": 1.9675213421648188,
      "learning_rate": 7.593155713946918e-07,
      "loss": 0.443,
      "step": 12363
    },
    {
      "epoch": 1.5159391858754292,
      "grad_norm": 1.874908227407214,
      "learning_rate": 7.589518099332161e-07,
      "loss": 0.4546,
      "step": 12364
    },
    {
      "epoch": 1.516061794997548,
      "grad_norm": 1.954370946681825,
      "learning_rate": 7.585881200319031e-07,
      "loss": 0.4724,
      "step": 12365
    },
    {
      "epoch": 1.5161844041196666,
      "grad_norm": 1.9427958118186488,
      "learning_rate": 7.582245017057008e-07,
      "loss": 0.3818,
      "step": 12366
    },
    {
      "epoch": 1.5163070132417853,
      "grad_norm": 1.8929128672271203,
      "learning_rate": 7.578609549695556e-07,
      "loss": 0.4178,
      "step": 12367
    },
    {
      "epoch": 1.516429622363904,
      "grad_norm": 1.7300912967805797,
      "learning_rate": 7.574974798384088e-07,
      "loss": 0.3936,
      "step": 12368
    },
    {
      "epoch": 1.5165522314860227,
      "grad_norm": 1.9256262939752997,
      "learning_rate": 7.571340763272003e-07,
      "loss": 0.4584,
      "step": 12369
    },
    {
      "epoch": 1.5166748406081414,
      "grad_norm": 1.8210632275059728,
      "learning_rate": 7.567707444508677e-07,
      "loss": 0.4052,
      "step": 12370
    },
    {
      "epoch": 1.51679744973026,
      "grad_norm": 2.0148013263258227,
      "learning_rate": 7.564074842243435e-07,
      "loss": 0.4367,
      "step": 12371
    },
    {
      "epoch": 1.5169200588523786,
      "grad_norm": 2.005155409538452,
      "learning_rate": 7.560442956625577e-07,
      "loss": 0.4299,
      "step": 12372
    },
    {
      "epoch": 1.5170426679744973,
      "grad_norm": 1.9502311193326785,
      "learning_rate": 7.556811787804388e-07,
      "loss": 0.4448,
      "step": 12373
    },
    {
      "epoch": 1.517165277096616,
      "grad_norm": 1.8192423379368672,
      "learning_rate": 7.553181335929132e-07,
      "loss": 0.4243,
      "step": 12374
    },
    {
      "epoch": 1.5172878862187347,
      "grad_norm": 2.037339725371844,
      "learning_rate": 7.549551601148994e-07,
      "loss": 0.449,
      "step": 12375
    },
    {
      "epoch": 1.5174104953408534,
      "grad_norm": 1.9655516572120328,
      "learning_rate": 7.545922583613177e-07,
      "loss": 0.4531,
      "step": 12376
    },
    {
      "epoch": 1.517533104462972,
      "grad_norm": 1.9002680413331563,
      "learning_rate": 7.542294283470852e-07,
      "loss": 0.387,
      "step": 12377
    },
    {
      "epoch": 1.5176557135850908,
      "grad_norm": 1.9855795730692731,
      "learning_rate": 7.538666700871138e-07,
      "loss": 0.4639,
      "step": 12378
    },
    {
      "epoch": 1.5177783227072095,
      "grad_norm": 1.889072556163268,
      "learning_rate": 7.535039835963126e-07,
      "loss": 0.4372,
      "step": 12379
    },
    {
      "epoch": 1.517900931829328,
      "grad_norm": 1.7989256927941444,
      "learning_rate": 7.531413688895906e-07,
      "loss": 0.3927,
      "step": 12380
    },
    {
      "epoch": 1.5180235409514466,
      "grad_norm": 1.9038898939017546,
      "learning_rate": 7.5277882598185e-07,
      "loss": 0.4247,
      "step": 12381
    },
    {
      "epoch": 1.5181461500735653,
      "grad_norm": 1.9261213061232894,
      "learning_rate": 7.524163548879937e-07,
      "loss": 0.4358,
      "step": 12382
    },
    {
      "epoch": 1.518268759195684,
      "grad_norm": 2.0044508113290767,
      "learning_rate": 7.520539556229184e-07,
      "loss": 0.4779,
      "step": 12383
    },
    {
      "epoch": 1.5183913683178027,
      "grad_norm": 1.963328879058275,
      "learning_rate": 7.516916282015205e-07,
      "loss": 0.4592,
      "step": 12384
    },
    {
      "epoch": 1.5185139774399214,
      "grad_norm": 1.9810421971347054,
      "learning_rate": 7.51329372638691e-07,
      "loss": 0.4135,
      "step": 12385
    },
    {
      "epoch": 1.5186365865620401,
      "grad_norm": 1.8401137470799136,
      "learning_rate": 7.509671889493215e-07,
      "loss": 0.4296,
      "step": 12386
    },
    {
      "epoch": 1.5187591956841588,
      "grad_norm": 1.9403864096381511,
      "learning_rate": 7.506050771482962e-07,
      "loss": 0.4097,
      "step": 12387
    },
    {
      "epoch": 1.5188818048062775,
      "grad_norm": 2.0019359330734425,
      "learning_rate": 7.502430372504987e-07,
      "loss": 0.4276,
      "step": 12388
    },
    {
      "epoch": 1.5190044139283962,
      "grad_norm": 1.9828355493121597,
      "learning_rate": 7.498810692708103e-07,
      "loss": 0.391,
      "step": 12389
    },
    {
      "epoch": 1.519127023050515,
      "grad_norm": 1.7095803980922943,
      "learning_rate": 7.495191732241089e-07,
      "loss": 0.392,
      "step": 12390
    },
    {
      "epoch": 1.5192496321726336,
      "grad_norm": 1.7765268900787758,
      "learning_rate": 7.491573491252685e-07,
      "loss": 0.3997,
      "step": 12391
    },
    {
      "epoch": 1.5193722412947523,
      "grad_norm": 1.8189612261970305,
      "learning_rate": 7.4879559698916e-07,
      "loss": 0.43,
      "step": 12392
    },
    {
      "epoch": 1.519494850416871,
      "grad_norm": 1.8509206865180126,
      "learning_rate": 7.484339168306526e-07,
      "loss": 0.3878,
      "step": 12393
    },
    {
      "epoch": 1.5196174595389897,
      "grad_norm": 2.1110094171414095,
      "learning_rate": 7.48072308664613e-07,
      "loss": 0.4655,
      "step": 12394
    },
    {
      "epoch": 1.5197400686611084,
      "grad_norm": 1.911826346857965,
      "learning_rate": 7.47710772505903e-07,
      "loss": 0.4486,
      "step": 12395
    },
    {
      "epoch": 1.5198626777832271,
      "grad_norm": 1.8769940632957405,
      "learning_rate": 7.473493083693817e-07,
      "loss": 0.4248,
      "step": 12396
    },
    {
      "epoch": 1.5199852869053458,
      "grad_norm": 1.9905041032831392,
      "learning_rate": 7.469879162699073e-07,
      "loss": 0.4378,
      "step": 12397
    },
    {
      "epoch": 1.5201078960274645,
      "grad_norm": 1.8587304026426683,
      "learning_rate": 7.466265962223324e-07,
      "loss": 0.3981,
      "step": 12398
    },
    {
      "epoch": 1.5202305051495832,
      "grad_norm": 1.892434833402049,
      "learning_rate": 7.462653482415091e-07,
      "loss": 0.4579,
      "step": 12399
    },
    {
      "epoch": 1.520353114271702,
      "grad_norm": 1.873845278373626,
      "learning_rate": 7.459041723422847e-07,
      "loss": 0.4137,
      "step": 12400
    },
    {
      "epoch": 1.5204757233938206,
      "grad_norm": 1.8508614794320801,
      "learning_rate": 7.455430685395035e-07,
      "loss": 0.4554,
      "step": 12401
    },
    {
      "epoch": 1.5205983325159393,
      "grad_norm": 1.9992050380651663,
      "learning_rate": 7.451820368480083e-07,
      "loss": 0.4097,
      "step": 12402
    },
    {
      "epoch": 1.5207209416380578,
      "grad_norm": 2.000254690805771,
      "learning_rate": 7.448210772826389e-07,
      "loss": 0.4413,
      "step": 12403
    },
    {
      "epoch": 1.5208435507601765,
      "grad_norm": 1.9009222687102556,
      "learning_rate": 7.444601898582304e-07,
      "loss": 0.4194,
      "step": 12404
    },
    {
      "epoch": 1.5209661598822952,
      "grad_norm": 1.9662924995064581,
      "learning_rate": 7.440993745896152e-07,
      "loss": 0.434,
      "step": 12405
    },
    {
      "epoch": 1.521088769004414,
      "grad_norm": 1.8847944347461563,
      "learning_rate": 7.437386314916242e-07,
      "loss": 0.4488,
      "step": 12406
    },
    {
      "epoch": 1.5212113781265326,
      "grad_norm": 1.913067896321001,
      "learning_rate": 7.433779605790864e-07,
      "loss": 0.4142,
      "step": 12407
    },
    {
      "epoch": 1.5213339872486513,
      "grad_norm": 1.9040403099126635,
      "learning_rate": 7.430173618668227e-07,
      "loss": 0.426,
      "step": 12408
    },
    {
      "epoch": 1.52145659637077,
      "grad_norm": 2.093630062431397,
      "learning_rate": 7.426568353696559e-07,
      "loss": 0.4466,
      "step": 12409
    },
    {
      "epoch": 1.5215792054928887,
      "grad_norm": 2.0533036462980196,
      "learning_rate": 7.422963811024051e-07,
      "loss": 0.4044,
      "step": 12410
    },
    {
      "epoch": 1.5217018146150072,
      "grad_norm": 2.100776141504502,
      "learning_rate": 7.419359990798847e-07,
      "loss": 0.4388,
      "step": 12411
    },
    {
      "epoch": 1.521824423737126,
      "grad_norm": 2.2551993054700223,
      "learning_rate": 7.415756893169063e-07,
      "loss": 0.5011,
      "step": 12412
    },
    {
      "epoch": 1.5219470328592446,
      "grad_norm": 1.9224125413089408,
      "learning_rate": 7.412154518282802e-07,
      "loss": 0.4673,
      "step": 12413
    },
    {
      "epoch": 1.5220696419813633,
      "grad_norm": 1.788800170136212,
      "learning_rate": 7.408552866288138e-07,
      "loss": 0.4404,
      "step": 12414
    },
    {
      "epoch": 1.522192251103482,
      "grad_norm": 2.0408460414257967,
      "learning_rate": 7.404951937333083e-07,
      "loss": 0.4333,
      "step": 12415
    },
    {
      "epoch": 1.5223148602256007,
      "grad_norm": 2.0709156504363655,
      "learning_rate": 7.401351731565662e-07,
      "loss": 0.4247,
      "step": 12416
    },
    {
      "epoch": 1.5224374693477194,
      "grad_norm": 2.0067990150008024,
      "learning_rate": 7.397752249133841e-07,
      "loss": 0.4519,
      "step": 12417
    },
    {
      "epoch": 1.522560078469838,
      "grad_norm": 1.9399930838756465,
      "learning_rate": 7.394153490185555e-07,
      "loss": 0.382,
      "step": 12418
    },
    {
      "epoch": 1.5226826875919568,
      "grad_norm": 1.8415891235248578,
      "learning_rate": 7.390555454868731e-07,
      "loss": 0.3973,
      "step": 12419
    },
    {
      "epoch": 1.5228052967140755,
      "grad_norm": 1.9220379289869336,
      "learning_rate": 7.386958143331261e-07,
      "loss": 0.4176,
      "step": 12420
    },
    {
      "epoch": 1.5229279058361942,
      "grad_norm": 2.0078967686304123,
      "learning_rate": 7.383361555720991e-07,
      "loss": 0.4273,
      "step": 12421
    },
    {
      "epoch": 1.523050514958313,
      "grad_norm": 1.9535855720176105,
      "learning_rate": 7.379765692185745e-07,
      "loss": 0.4074,
      "step": 12422
    },
    {
      "epoch": 1.5231731240804316,
      "grad_norm": 1.92685223600113,
      "learning_rate": 7.376170552873327e-07,
      "loss": 0.4151,
      "step": 12423
    },
    {
      "epoch": 1.5232957332025503,
      "grad_norm": 1.7524511807292298,
      "learning_rate": 7.3725761379315e-07,
      "loss": 0.3995,
      "step": 12424
    },
    {
      "epoch": 1.523418342324669,
      "grad_norm": 1.8799186688692862,
      "learning_rate": 7.368982447507994e-07,
      "loss": 0.4271,
      "step": 12425
    },
    {
      "epoch": 1.5235409514467877,
      "grad_norm": 1.9359012809320537,
      "learning_rate": 7.365389481750524e-07,
      "loss": 0.391,
      "step": 12426
    },
    {
      "epoch": 1.5236635605689064,
      "grad_norm": 1.924721293868239,
      "learning_rate": 7.361797240806773e-07,
      "loss": 0.4148,
      "step": 12427
    },
    {
      "epoch": 1.523786169691025,
      "grad_norm": 2.0995298152424597,
      "learning_rate": 7.35820572482438e-07,
      "loss": 0.4288,
      "step": 12428
    },
    {
      "epoch": 1.5239087788131438,
      "grad_norm": 1.8956025588144345,
      "learning_rate": 7.354614933950957e-07,
      "loss": 0.4204,
      "step": 12429
    },
    {
      "epoch": 1.5240313879352625,
      "grad_norm": 1.94095414738097,
      "learning_rate": 7.351024868334105e-07,
      "loss": 0.4492,
      "step": 12430
    },
    {
      "epoch": 1.5241539970573812,
      "grad_norm": 1.957472299424737,
      "learning_rate": 7.347435528121371e-07,
      "loss": 0.3905,
      "step": 12431
    },
    {
      "epoch": 1.5242766061795,
      "grad_norm": 1.9871743726141529,
      "learning_rate": 7.343846913460295e-07,
      "loss": 0.4307,
      "step": 12432
    },
    {
      "epoch": 1.5243992153016186,
      "grad_norm": 1.9839979118813682,
      "learning_rate": 7.340259024498361e-07,
      "loss": 0.4202,
      "step": 12433
    },
    {
      "epoch": 1.524521824423737,
      "grad_norm": 1.7435441733656785,
      "learning_rate": 7.336671861383055e-07,
      "loss": 0.416,
      "step": 12434
    },
    {
      "epoch": 1.5246444335458558,
      "grad_norm": 1.867197482171281,
      "learning_rate": 7.333085424261793e-07,
      "loss": 0.4099,
      "step": 12435
    },
    {
      "epoch": 1.5247670426679745,
      "grad_norm": 2.1308837607206725,
      "learning_rate": 7.329499713282009e-07,
      "loss": 0.4578,
      "step": 12436
    },
    {
      "epoch": 1.5248896517900932,
      "grad_norm": 2.031974364834701,
      "learning_rate": 7.325914728591068e-07,
      "loss": 0.4705,
      "step": 12437
    },
    {
      "epoch": 1.5250122609122119,
      "grad_norm": 1.8125193045027974,
      "learning_rate": 7.322330470336314e-07,
      "loss": 0.4226,
      "step": 12438
    },
    {
      "epoch": 1.5251348700343306,
      "grad_norm": 2.0759199423195818,
      "learning_rate": 7.318746938665072e-07,
      "loss": 0.42,
      "step": 12439
    },
    {
      "epoch": 1.5252574791564493,
      "grad_norm": 2.040510337859635,
      "learning_rate": 7.315164133724645e-07,
      "loss": 0.4031,
      "step": 12440
    },
    {
      "epoch": 1.525380088278568,
      "grad_norm": 1.8057910227567202,
      "learning_rate": 7.311582055662278e-07,
      "loss": 0.3928,
      "step": 12441
    },
    {
      "epoch": 1.5255026974006864,
      "grad_norm": 1.8955289361606722,
      "learning_rate": 7.308000704625196e-07,
      "loss": 0.4272,
      "step": 12442
    },
    {
      "epoch": 1.5256253065228051,
      "grad_norm": 2.0478009518397515,
      "learning_rate": 7.304420080760613e-07,
      "loss": 0.4506,
      "step": 12443
    },
    {
      "epoch": 1.5257479156449238,
      "grad_norm": 1.936425523712434,
      "learning_rate": 7.300840184215682e-07,
      "loss": 0.4309,
      "step": 12444
    },
    {
      "epoch": 1.5258705247670425,
      "grad_norm": 1.926026020215996,
      "learning_rate": 7.297261015137566e-07,
      "loss": 0.4041,
      "step": 12445
    },
    {
      "epoch": 1.5259931338891612,
      "grad_norm": 1.8019417332128638,
      "learning_rate": 7.293682573673352e-07,
      "loss": 0.4199,
      "step": 12446
    },
    {
      "epoch": 1.52611574301128,
      "grad_norm": 2.047306271627965,
      "learning_rate": 7.290104859970141e-07,
      "loss": 0.4254,
      "step": 12447
    },
    {
      "epoch": 1.5262383521333986,
      "grad_norm": 2.0170302705884757,
      "learning_rate": 7.286527874174961e-07,
      "loss": 0.4013,
      "step": 12448
    },
    {
      "epoch": 1.5263609612555173,
      "grad_norm": 2.013832087845864,
      "learning_rate": 7.282951616434857e-07,
      "loss": 0.4085,
      "step": 12449
    },
    {
      "epoch": 1.526483570377636,
      "grad_norm": 2.038948554128284,
      "learning_rate": 7.279376086896806e-07,
      "loss": 0.4602,
      "step": 12450
    },
    {
      "epoch": 1.5266061794997547,
      "grad_norm": 1.8253513439285725,
      "learning_rate": 7.27580128570776e-07,
      "loss": 0.4041,
      "step": 12451
    },
    {
      "epoch": 1.5267287886218734,
      "grad_norm": 1.833284076326643,
      "learning_rate": 7.27222721301466e-07,
      "loss": 0.4376,
      "step": 12452
    },
    {
      "epoch": 1.5268513977439921,
      "grad_norm": 1.9815893236626319,
      "learning_rate": 7.268653868964415e-07,
      "loss": 0.4449,
      "step": 12453
    },
    {
      "epoch": 1.5269740068661108,
      "grad_norm": 2.009576466461637,
      "learning_rate": 7.265081253703888e-07,
      "loss": 0.4327,
      "step": 12454
    },
    {
      "epoch": 1.5270966159882295,
      "grad_norm": 1.9720855844503635,
      "learning_rate": 7.261509367379907e-07,
      "loss": 0.438,
      "step": 12455
    },
    {
      "epoch": 1.5272192251103482,
      "grad_norm": 1.9781303674141757,
      "learning_rate": 7.257938210139307e-07,
      "loss": 0.4394,
      "step": 12456
    },
    {
      "epoch": 1.527341834232467,
      "grad_norm": 1.8108304695728195,
      "learning_rate": 7.254367782128852e-07,
      "loss": 0.4366,
      "step": 12457
    },
    {
      "epoch": 1.5274644433545856,
      "grad_norm": 2.0323132401561765,
      "learning_rate": 7.250798083495292e-07,
      "loss": 0.4595,
      "step": 12458
    },
    {
      "epoch": 1.5275870524767043,
      "grad_norm": 2.0612919596095307,
      "learning_rate": 7.247229114385354e-07,
      "loss": 0.4101,
      "step": 12459
    },
    {
      "epoch": 1.527709661598823,
      "grad_norm": 1.9758254060176916,
      "learning_rate": 7.243660874945734e-07,
      "loss": 0.4755,
      "step": 12460
    },
    {
      "epoch": 1.5278322707209417,
      "grad_norm": 1.8537842196326868,
      "learning_rate": 7.240093365323089e-07,
      "loss": 0.3946,
      "step": 12461
    },
    {
      "epoch": 1.5279548798430604,
      "grad_norm": 2.085200715684576,
      "learning_rate": 7.236526585664039e-07,
      "loss": 0.3864,
      "step": 12462
    },
    {
      "epoch": 1.5280774889651791,
      "grad_norm": 2.151080146551264,
      "learning_rate": 7.232960536115202e-07,
      "loss": 0.4332,
      "step": 12463
    },
    {
      "epoch": 1.5282000980872978,
      "grad_norm": 1.8529192610617442,
      "learning_rate": 7.229395216823132e-07,
      "loss": 0.4333,
      "step": 12464
    },
    {
      "epoch": 1.5283227072094165,
      "grad_norm": 2.0551565265013494,
      "learning_rate": 7.225830627934383e-07,
      "loss": 0.4114,
      "step": 12465
    },
    {
      "epoch": 1.528445316331535,
      "grad_norm": 1.800314002813307,
      "learning_rate": 7.222266769595465e-07,
      "loss": 0.3966,
      "step": 12466
    },
    {
      "epoch": 1.5285679254536537,
      "grad_norm": 1.9272396809271282,
      "learning_rate": 7.21870364195286e-07,
      "loss": 0.3884,
      "step": 12467
    },
    {
      "epoch": 1.5286905345757724,
      "grad_norm": 2.048520333531971,
      "learning_rate": 7.215141245153004e-07,
      "loss": 0.3684,
      "step": 12468
    },
    {
      "epoch": 1.5288131436978911,
      "grad_norm": 1.8728951609733053,
      "learning_rate": 7.211579579342329e-07,
      "loss": 0.407,
      "step": 12469
    },
    {
      "epoch": 1.5289357528200098,
      "grad_norm": 2.2140980095922216,
      "learning_rate": 7.208018644667239e-07,
      "loss": 0.446,
      "step": 12470
    },
    {
      "epoch": 1.5290583619421285,
      "grad_norm": 1.860811251780901,
      "learning_rate": 7.204458441274068e-07,
      "loss": 0.4165,
      "step": 12471
    },
    {
      "epoch": 1.5291809710642472,
      "grad_norm": 1.9358967426894844,
      "learning_rate": 7.200898969309159e-07,
      "loss": 0.4103,
      "step": 12472
    },
    {
      "epoch": 1.529303580186366,
      "grad_norm": 2.0036713398754347,
      "learning_rate": 7.19734022891882e-07,
      "loss": 0.4337,
      "step": 12473
    },
    {
      "epoch": 1.5294261893084844,
      "grad_norm": 2.029556653704666,
      "learning_rate": 7.193782220249315e-07,
      "loss": 0.3931,
      "step": 12474
    },
    {
      "epoch": 1.529548798430603,
      "grad_norm": 1.9820789912432604,
      "learning_rate": 7.190224943446878e-07,
      "loss": 0.4145,
      "step": 12475
    },
    {
      "epoch": 1.5296714075527218,
      "grad_norm": 1.8035406776195164,
      "learning_rate": 7.186668398657726e-07,
      "loss": 0.4137,
      "step": 12476
    },
    {
      "epoch": 1.5297940166748405,
      "grad_norm": 1.8510268696371073,
      "learning_rate": 7.183112586028043e-07,
      "loss": 0.3953,
      "step": 12477
    },
    {
      "epoch": 1.5299166257969592,
      "grad_norm": 1.8115378655167949,
      "learning_rate": 7.179557505703976e-07,
      "loss": 0.4519,
      "step": 12478
    },
    {
      "epoch": 1.530039234919078,
      "grad_norm": 1.9208560939204269,
      "learning_rate": 7.17600315783164e-07,
      "loss": 0.4065,
      "step": 12479
    },
    {
      "epoch": 1.5301618440411966,
      "grad_norm": 1.9155150215818917,
      "learning_rate": 7.172449542557133e-07,
      "loss": 0.4006,
      "step": 12480
    },
    {
      "epoch": 1.5302844531633153,
      "grad_norm": 1.9558217410010874,
      "learning_rate": 7.168896660026505e-07,
      "loss": 0.4372,
      "step": 12481
    },
    {
      "epoch": 1.530407062285434,
      "grad_norm": 2.0872128200994164,
      "learning_rate": 7.165344510385799e-07,
      "loss": 0.4604,
      "step": 12482
    },
    {
      "epoch": 1.5305296714075527,
      "grad_norm": 1.9242009211041435,
      "learning_rate": 7.161793093781008e-07,
      "loss": 0.4219,
      "step": 12483
    },
    {
      "epoch": 1.5306522805296714,
      "grad_norm": 2.0116701889995037,
      "learning_rate": 7.158242410358093e-07,
      "loss": 0.4607,
      "step": 12484
    },
    {
      "epoch": 1.53077488965179,
      "grad_norm": 1.9765270026641812,
      "learning_rate": 7.154692460263002e-07,
      "loss": 0.4139,
      "step": 12485
    },
    {
      "epoch": 1.5308974987739088,
      "grad_norm": 1.7566819154850117,
      "learning_rate": 7.151143243641648e-07,
      "loss": 0.3916,
      "step": 12486
    },
    {
      "epoch": 1.5310201078960275,
      "grad_norm": 1.8555935707815134,
      "learning_rate": 7.147594760639909e-07,
      "loss": 0.4225,
      "step": 12487
    },
    {
      "epoch": 1.5311427170181462,
      "grad_norm": 1.8637394617961514,
      "learning_rate": 7.14404701140362e-07,
      "loss": 0.4639,
      "step": 12488
    },
    {
      "epoch": 1.531265326140265,
      "grad_norm": 1.9143460540344712,
      "learning_rate": 7.140499996078615e-07,
      "loss": 0.4018,
      "step": 12489
    },
    {
      "epoch": 1.5313879352623836,
      "grad_norm": 1.8399420383049483,
      "learning_rate": 7.136953714810682e-07,
      "loss": 0.4128,
      "step": 12490
    },
    {
      "epoch": 1.5315105443845023,
      "grad_norm": 1.7912039005785996,
      "learning_rate": 7.133408167745576e-07,
      "loss": 0.4205,
      "step": 12491
    },
    {
      "epoch": 1.531633153506621,
      "grad_norm": 1.775365773295974,
      "learning_rate": 7.129863355029015e-07,
      "loss": 0.4525,
      "step": 12492
    },
    {
      "epoch": 1.5317557626287397,
      "grad_norm": 1.8564837186936636,
      "learning_rate": 7.126319276806717e-07,
      "loss": 0.3683,
      "step": 12493
    },
    {
      "epoch": 1.5318783717508584,
      "grad_norm": 1.8123871530011941,
      "learning_rate": 7.122775933224332e-07,
      "loss": 0.4401,
      "step": 12494
    },
    {
      "epoch": 1.532000980872977,
      "grad_norm": 1.8858535373547594,
      "learning_rate": 7.119233324427511e-07,
      "loss": 0.4211,
      "step": 12495
    },
    {
      "epoch": 1.5321235899950958,
      "grad_norm": 1.8275391298882835,
      "learning_rate": 7.115691450561848e-07,
      "loss": 0.4238,
      "step": 12496
    },
    {
      "epoch": 1.5322461991172143,
      "grad_norm": 1.8302076272012204,
      "learning_rate": 7.112150311772939e-07,
      "loss": 0.44,
      "step": 12497
    },
    {
      "epoch": 1.532368808239333,
      "grad_norm": 1.8866255996796526,
      "learning_rate": 7.10860990820631e-07,
      "loss": 0.4302,
      "step": 12498
    },
    {
      "epoch": 1.5324914173614517,
      "grad_norm": 2.1034865081035767,
      "learning_rate": 7.105070240007497e-07,
      "loss": 0.427,
      "step": 12499
    },
    {
      "epoch": 1.5326140264835704,
      "grad_norm": 2.0286684685063534,
      "learning_rate": 7.101531307321976e-07,
      "loss": 0.4511,
      "step": 12500
    },
    {
      "epoch": 1.532736635605689,
      "grad_norm": 1.8959469237715687,
      "learning_rate": 7.097993110295198e-07,
      "loss": 0.4181,
      "step": 12501
    },
    {
      "epoch": 1.5328592447278078,
      "grad_norm": 1.8905086381221605,
      "learning_rate": 7.094455649072596e-07,
      "loss": 0.4445,
      "step": 12502
    },
    {
      "epoch": 1.5329818538499265,
      "grad_norm": 1.8641615544997405,
      "learning_rate": 7.090918923799581e-07,
      "loss": 0.3765,
      "step": 12503
    },
    {
      "epoch": 1.5331044629720452,
      "grad_norm": 1.809261786567005,
      "learning_rate": 7.08738293462149e-07,
      "loss": 0.4279,
      "step": 12504
    },
    {
      "epoch": 1.5332270720941636,
      "grad_norm": 1.90523599468074,
      "learning_rate": 7.083847681683669e-07,
      "loss": 0.4362,
      "step": 12505
    },
    {
      "epoch": 1.5333496812162823,
      "grad_norm": 1.8684381662977454,
      "learning_rate": 7.080313165131439e-07,
      "loss": 0.4437,
      "step": 12506
    },
    {
      "epoch": 1.533472290338401,
      "grad_norm": 1.8876695223594708,
      "learning_rate": 7.076779385110058e-07,
      "loss": 0.4303,
      "step": 12507
    },
    {
      "epoch": 1.5335948994605197,
      "grad_norm": 1.9694410252288357,
      "learning_rate": 7.07324634176477e-07,
      "loss": 0.4596,
      "step": 12508
    },
    {
      "epoch": 1.5337175085826384,
      "grad_norm": 1.8557110671124823,
      "learning_rate": 7.069714035240793e-07,
      "loss": 0.4138,
      "step": 12509
    },
    {
      "epoch": 1.5338401177047571,
      "grad_norm": 2.0443799668343736,
      "learning_rate": 7.066182465683318e-07,
      "loss": 0.4488,
      "step": 12510
    },
    {
      "epoch": 1.5339627268268758,
      "grad_norm": 1.857118181530446,
      "learning_rate": 7.062651633237489e-07,
      "loss": 0.4267,
      "step": 12511
    },
    {
      "epoch": 1.5340853359489945,
      "grad_norm": 2.3357037298519847,
      "learning_rate": 7.059121538048439e-07,
      "loss": 0.4211,
      "step": 12512
    },
    {
      "epoch": 1.5342079450711132,
      "grad_norm": 2.039100409495583,
      "learning_rate": 7.055592180261256e-07,
      "loss": 0.4252,
      "step": 12513
    },
    {
      "epoch": 1.534330554193232,
      "grad_norm": 1.903428000526676,
      "learning_rate": 7.052063560020994e-07,
      "loss": 0.4356,
      "step": 12514
    },
    {
      "epoch": 1.5344531633153506,
      "grad_norm": 1.838925151992715,
      "learning_rate": 7.048535677472695e-07,
      "loss": 0.4622,
      "step": 12515
    },
    {
      "epoch": 1.5345757724374693,
      "grad_norm": 1.9294979027232648,
      "learning_rate": 7.045008532761366e-07,
      "loss": 0.4111,
      "step": 12516
    },
    {
      "epoch": 1.534698381559588,
      "grad_norm": 2.1740582616711768,
      "learning_rate": 7.041482126031976e-07,
      "loss": 0.4445,
      "step": 12517
    },
    {
      "epoch": 1.5348209906817067,
      "grad_norm": 1.925531889958483,
      "learning_rate": 7.037956457429454e-07,
      "loss": 0.4682,
      "step": 12518
    },
    {
      "epoch": 1.5349435998038254,
      "grad_norm": 1.769264213577228,
      "learning_rate": 7.034431527098729e-07,
      "loss": 0.3994,
      "step": 12519
    },
    {
      "epoch": 1.5350662089259441,
      "grad_norm": 1.8920990409268648,
      "learning_rate": 7.030907335184675e-07,
      "loss": 0.3941,
      "step": 12520
    },
    {
      "epoch": 1.5351888180480628,
      "grad_norm": 1.8614918613790328,
      "learning_rate": 7.027383881832131e-07,
      "loss": 0.4164,
      "step": 12521
    },
    {
      "epoch": 1.5353114271701815,
      "grad_norm": 1.9111705212346468,
      "learning_rate": 7.023861167185931e-07,
      "loss": 0.38,
      "step": 12522
    },
    {
      "epoch": 1.5354340362923002,
      "grad_norm": 1.900673148724523,
      "learning_rate": 7.020339191390865e-07,
      "loss": 0.4094,
      "step": 12523
    },
    {
      "epoch": 1.535556645414419,
      "grad_norm": 1.750067516211,
      "learning_rate": 7.016817954591693e-07,
      "loss": 0.394,
      "step": 12524
    },
    {
      "epoch": 1.5356792545365376,
      "grad_norm": 1.8698363670776434,
      "learning_rate": 7.01329745693313e-07,
      "loss": 0.4292,
      "step": 12525
    },
    {
      "epoch": 1.5358018636586563,
      "grad_norm": 2.1161551576803297,
      "learning_rate": 7.009777698559891e-07,
      "loss": 0.3939,
      "step": 12526
    },
    {
      "epoch": 1.535924472780775,
      "grad_norm": 2.074908305588292,
      "learning_rate": 7.006258679616631e-07,
      "loss": 0.4432,
      "step": 12527
    },
    {
      "epoch": 1.5360470819028937,
      "grad_norm": 2.050306548904946,
      "learning_rate": 7.002740400248001e-07,
      "loss": 0.4281,
      "step": 12528
    },
    {
      "epoch": 1.5361696910250122,
      "grad_norm": 1.8355120705894652,
      "learning_rate": 6.999222860598598e-07,
      "loss": 0.4104,
      "step": 12529
    },
    {
      "epoch": 1.536292300147131,
      "grad_norm": 1.9582538500352722,
      "learning_rate": 6.995706060813007e-07,
      "loss": 0.4623,
      "step": 12530
    },
    {
      "epoch": 1.5364149092692496,
      "grad_norm": 2.052135734051883,
      "learning_rate": 6.992190001035768e-07,
      "loss": 0.4389,
      "step": 12531
    },
    {
      "epoch": 1.5365375183913683,
      "grad_norm": 2.080833054685052,
      "learning_rate": 6.988674681411403e-07,
      "loss": 0.4232,
      "step": 12532
    },
    {
      "epoch": 1.536660127513487,
      "grad_norm": 2.017594361972073,
      "learning_rate": 6.985160102084399e-07,
      "loss": 0.4636,
      "step": 12533
    },
    {
      "epoch": 1.5367827366356057,
      "grad_norm": 1.9337229256131034,
      "learning_rate": 6.981646263199196e-07,
      "loss": 0.4615,
      "step": 12534
    },
    {
      "epoch": 1.5369053457577244,
      "grad_norm": 1.9844159551085068,
      "learning_rate": 6.978133164900233e-07,
      "loss": 0.4139,
      "step": 12535
    },
    {
      "epoch": 1.537027954879843,
      "grad_norm": 2.0339421935790796,
      "learning_rate": 6.974620807331911e-07,
      "loss": 0.4325,
      "step": 12536
    },
    {
      "epoch": 1.5371505640019616,
      "grad_norm": 1.953276479769289,
      "learning_rate": 6.971109190638583e-07,
      "loss": 0.4086,
      "step": 12537
    },
    {
      "epoch": 1.5372731731240803,
      "grad_norm": 1.9394311458682394,
      "learning_rate": 6.967598314964577e-07,
      "loss": 0.4377,
      "step": 12538
    },
    {
      "epoch": 1.537395782246199,
      "grad_norm": 2.2117969129574977,
      "learning_rate": 6.964088180454215e-07,
      "loss": 0.4375,
      "step": 12539
    },
    {
      "epoch": 1.5375183913683177,
      "grad_norm": 1.820843148200789,
      "learning_rate": 6.960578787251748e-07,
      "loss": 0.4801,
      "step": 12540
    },
    {
      "epoch": 1.5376410004904364,
      "grad_norm": 2.040930310666221,
      "learning_rate": 6.957070135501437e-07,
      "loss": 0.4144,
      "step": 12541
    },
    {
      "epoch": 1.537763609612555,
      "grad_norm": 1.8191452680931868,
      "learning_rate": 6.95356222534748e-07,
      "loss": 0.4061,
      "step": 12542
    },
    {
      "epoch": 1.5378862187346738,
      "grad_norm": 2.1080084461244297,
      "learning_rate": 6.950055056934071e-07,
      "loss": 0.4412,
      "step": 12543
    },
    {
      "epoch": 1.5380088278567925,
      "grad_norm": 2.0850857055954237,
      "learning_rate": 6.946548630405347e-07,
      "loss": 0.4903,
      "step": 12544
    },
    {
      "epoch": 1.5381314369789112,
      "grad_norm": 1.988841227953505,
      "learning_rate": 6.943042945905443e-07,
      "loss": 0.4492,
      "step": 12545
    },
    {
      "epoch": 1.53825404610103,
      "grad_norm": 1.7833116651568504,
      "learning_rate": 6.939538003578438e-07,
      "loss": 0.3723,
      "step": 12546
    },
    {
      "epoch": 1.5383766552231486,
      "grad_norm": 1.8810989873386608,
      "learning_rate": 6.936033803568393e-07,
      "loss": 0.4429,
      "step": 12547
    },
    {
      "epoch": 1.5384992643452673,
      "grad_norm": 1.8696935565597659,
      "learning_rate": 6.932530346019334e-07,
      "loss": 0.4949,
      "step": 12548
    },
    {
      "epoch": 1.538621873467386,
      "grad_norm": 2.0438057173423925,
      "learning_rate": 6.929027631075272e-07,
      "loss": 0.3997,
      "step": 12549
    },
    {
      "epoch": 1.5387444825895047,
      "grad_norm": 2.031006552606339,
      "learning_rate": 6.925525658880167e-07,
      "loss": 0.4612,
      "step": 12550
    },
    {
      "epoch": 1.5388670917116234,
      "grad_norm": 1.9701078758578188,
      "learning_rate": 6.922024429577948e-07,
      "loss": 0.4318,
      "step": 12551
    },
    {
      "epoch": 1.538989700833742,
      "grad_norm": 1.954992644345443,
      "learning_rate": 6.91852394331253e-07,
      "loss": 0.4188,
      "step": 12552
    },
    {
      "epoch": 1.5391123099558608,
      "grad_norm": 2.0444945865694075,
      "learning_rate": 6.915024200227805e-07,
      "loss": 0.4493,
      "step": 12553
    },
    {
      "epoch": 1.5392349190779795,
      "grad_norm": 1.7679995054885274,
      "learning_rate": 6.911525200467584e-07,
      "loss": 0.4478,
      "step": 12554
    },
    {
      "epoch": 1.5393575282000982,
      "grad_norm": 1.9654368458760798,
      "learning_rate": 6.908026944175702e-07,
      "loss": 0.4463,
      "step": 12555
    },
    {
      "epoch": 1.539480137322217,
      "grad_norm": 2.0589209867253735,
      "learning_rate": 6.904529431495949e-07,
      "loss": 0.4288,
      "step": 12556
    },
    {
      "epoch": 1.5396027464443356,
      "grad_norm": 1.9412489071091419,
      "learning_rate": 6.901032662572074e-07,
      "loss": 0.3943,
      "step": 12557
    },
    {
      "epoch": 1.5397253555664543,
      "grad_norm": 1.909888113922739,
      "learning_rate": 6.897536637547789e-07,
      "loss": 0.4336,
      "step": 12558
    },
    {
      "epoch": 1.539847964688573,
      "grad_norm": 1.900471391512096,
      "learning_rate": 6.894041356566803e-07,
      "loss": 0.4325,
      "step": 12559
    },
    {
      "epoch": 1.5399705738106915,
      "grad_norm": 1.8314740689801225,
      "learning_rate": 6.890546819772765e-07,
      "loss": 0.4488,
      "step": 12560
    },
    {
      "epoch": 1.5400931829328102,
      "grad_norm": 2.075524668192258,
      "learning_rate": 6.887053027309312e-07,
      "loss": 0.3985,
      "step": 12561
    },
    {
      "epoch": 1.5402157920549289,
      "grad_norm": 1.9156971684594244,
      "learning_rate": 6.883559979320054e-07,
      "loss": 0.4539,
      "step": 12562
    },
    {
      "epoch": 1.5403384011770476,
      "grad_norm": 1.8893784994152183,
      "learning_rate": 6.880067675948554e-07,
      "loss": 0.4038,
      "step": 12563
    },
    {
      "epoch": 1.5404610102991663,
      "grad_norm": 2.139943926493329,
      "learning_rate": 6.876576117338344e-07,
      "loss": 0.4043,
      "step": 12564
    },
    {
      "epoch": 1.540583619421285,
      "grad_norm": 1.7424231610228578,
      "learning_rate": 6.873085303632937e-07,
      "loss": 0.406,
      "step": 12565
    },
    {
      "epoch": 1.5407062285434037,
      "grad_norm": 2.027392357107339,
      "learning_rate": 6.869595234975834e-07,
      "loss": 0.4528,
      "step": 12566
    },
    {
      "epoch": 1.5408288376655224,
      "grad_norm": 1.8529639120678454,
      "learning_rate": 6.866105911510448e-07,
      "loss": 0.403,
      "step": 12567
    },
    {
      "epoch": 1.5409514467876408,
      "grad_norm": 1.86580535754557,
      "learning_rate": 6.862617333380214e-07,
      "loss": 0.4345,
      "step": 12568
    },
    {
      "epoch": 1.5410740559097595,
      "grad_norm": 2.189603052607674,
      "learning_rate": 6.859129500728523e-07,
      "loss": 0.4115,
      "step": 12569
    },
    {
      "epoch": 1.5411966650318782,
      "grad_norm": 2.0149044188120895,
      "learning_rate": 6.855642413698726e-07,
      "loss": 0.4458,
      "step": 12570
    },
    {
      "epoch": 1.541319274153997,
      "grad_norm": 2.1324790357509857,
      "learning_rate": 6.852156072434141e-07,
      "loss": 0.4395,
      "step": 12571
    },
    {
      "epoch": 1.5414418832761156,
      "grad_norm": 1.8703378574208378,
      "learning_rate": 6.848670477078068e-07,
      "loss": 0.4176,
      "step": 12572
    },
    {
      "epoch": 1.5415644923982343,
      "grad_norm": 1.9539163892738796,
      "learning_rate": 6.845185627773781e-07,
      "loss": 0.4376,
      "step": 12573
    },
    {
      "epoch": 1.541687101520353,
      "grad_norm": 2.061869719314912,
      "learning_rate": 6.841701524664507e-07,
      "loss": 0.433,
      "step": 12574
    },
    {
      "epoch": 1.5418097106424717,
      "grad_norm": 1.9417003914140705,
      "learning_rate": 6.838218167893437e-07,
      "loss": 0.463,
      "step": 12575
    },
    {
      "epoch": 1.5419323197645904,
      "grad_norm": 1.9849209176945781,
      "learning_rate": 6.834735557603764e-07,
      "loss": 0.4532,
      "step": 12576
    },
    {
      "epoch": 1.5420549288867091,
      "grad_norm": 2.002026453124446,
      "learning_rate": 6.831253693938608e-07,
      "loss": 0.4121,
      "step": 12577
    },
    {
      "epoch": 1.5421775380088278,
      "grad_norm": 1.8333006493646875,
      "learning_rate": 6.8277725770411e-07,
      "loss": 0.4175,
      "step": 12578
    },
    {
      "epoch": 1.5423001471309465,
      "grad_norm": 2.224914160827451,
      "learning_rate": 6.824292207054309e-07,
      "loss": 0.4506,
      "step": 12579
    },
    {
      "epoch": 1.5424227562530652,
      "grad_norm": 2.0005669514221647,
      "learning_rate": 6.820812584121278e-07,
      "loss": 0.4174,
      "step": 12580
    },
    {
      "epoch": 1.542545365375184,
      "grad_norm": 1.9801729324303394,
      "learning_rate": 6.817333708385035e-07,
      "loss": 0.4257,
      "step": 12581
    },
    {
      "epoch": 1.5426679744973026,
      "grad_norm": 2.1066299235135717,
      "learning_rate": 6.813855579988573e-07,
      "loss": 0.4076,
      "step": 12582
    },
    {
      "epoch": 1.5427905836194213,
      "grad_norm": 1.8845715746154832,
      "learning_rate": 6.810378199074843e-07,
      "loss": 0.4573,
      "step": 12583
    },
    {
      "epoch": 1.54291319274154,
      "grad_norm": 1.9546340038153953,
      "learning_rate": 6.806901565786764e-07,
      "loss": 0.4553,
      "step": 12584
    },
    {
      "epoch": 1.5430358018636587,
      "grad_norm": 2.006372618753315,
      "learning_rate": 6.803425680267239e-07,
      "loss": 0.4177,
      "step": 12585
    },
    {
      "epoch": 1.5431584109857774,
      "grad_norm": 2.128018698375729,
      "learning_rate": 6.799950542659139e-07,
      "loss": 0.4298,
      "step": 12586
    },
    {
      "epoch": 1.5432810201078961,
      "grad_norm": 2.108386640489208,
      "learning_rate": 6.796476153105294e-07,
      "loss": 0.4218,
      "step": 12587
    },
    {
      "epoch": 1.5434036292300148,
      "grad_norm": 1.8168245215824206,
      "learning_rate": 6.793002511748498e-07,
      "loss": 0.4512,
      "step": 12588
    },
    {
      "epoch": 1.5435262383521335,
      "grad_norm": 1.9532179396902447,
      "learning_rate": 6.789529618731539e-07,
      "loss": 0.4159,
      "step": 12589
    },
    {
      "epoch": 1.5436488474742522,
      "grad_norm": 2.092985995694436,
      "learning_rate": 6.786057474197144e-07,
      "loss": 0.4227,
      "step": 12590
    },
    {
      "epoch": 1.5437714565963707,
      "grad_norm": 1.8107263260987414,
      "learning_rate": 6.782586078288042e-07,
      "loss": 0.4103,
      "step": 12591
    },
    {
      "epoch": 1.5438940657184894,
      "grad_norm": 2.036786537675455,
      "learning_rate": 6.779115431146894e-07,
      "loss": 0.4606,
      "step": 12592
    },
    {
      "epoch": 1.5440166748406081,
      "grad_norm": 1.948010267907579,
      "learning_rate": 6.775645532916367e-07,
      "loss": 0.4024,
      "step": 12593
    },
    {
      "epoch": 1.5441392839627268,
      "grad_norm": 1.9974395840297752,
      "learning_rate": 6.772176383739065e-07,
      "loss": 0.4384,
      "step": 12594
    },
    {
      "epoch": 1.5442618930848455,
      "grad_norm": 1.920191234303519,
      "learning_rate": 6.768707983757588e-07,
      "loss": 0.4211,
      "step": 12595
    },
    {
      "epoch": 1.5443845022069642,
      "grad_norm": 1.9785101268897114,
      "learning_rate": 6.765240333114492e-07,
      "loss": 0.4331,
      "step": 12596
    },
    {
      "epoch": 1.544507111329083,
      "grad_norm": 2.1730235200510264,
      "learning_rate": 6.761773431952293e-07,
      "loss": 0.4419,
      "step": 12597
    },
    {
      "epoch": 1.5446297204512016,
      "grad_norm": 2.080244147315378,
      "learning_rate": 6.758307280413495e-07,
      "loss": 0.4591,
      "step": 12598
    },
    {
      "epoch": 1.54475232957332,
      "grad_norm": 1.9351973534877385,
      "learning_rate": 6.754841878640575e-07,
      "loss": 0.4264,
      "step": 12599
    },
    {
      "epoch": 1.5448749386954388,
      "grad_norm": 1.9691025979875214,
      "learning_rate": 6.75137722677594e-07,
      "loss": 0.4494,
      "step": 12600
    },
    {
      "epoch": 1.5449975478175575,
      "grad_norm": 2.106363853780119,
      "learning_rate": 6.747913324962008e-07,
      "loss": 0.4308,
      "step": 12601
    },
    {
      "epoch": 1.5451201569396762,
      "grad_norm": 1.8850686633981595,
      "learning_rate": 6.744450173341157e-07,
      "loss": 0.3945,
      "step": 12602
    },
    {
      "epoch": 1.545242766061795,
      "grad_norm": 1.817113369843757,
      "learning_rate": 6.740987772055726e-07,
      "loss": 0.4391,
      "step": 12603
    },
    {
      "epoch": 1.5453653751839136,
      "grad_norm": 1.8269598639338327,
      "learning_rate": 6.737526121248012e-07,
      "loss": 0.4321,
      "step": 12604
    },
    {
      "epoch": 1.5454879843060323,
      "grad_norm": 1.9169561121067258,
      "learning_rate": 6.734065221060307e-07,
      "loss": 0.4624,
      "step": 12605
    },
    {
      "epoch": 1.545610593428151,
      "grad_norm": 2.0882466690375217,
      "learning_rate": 6.730605071634866e-07,
      "loss": 0.3982,
      "step": 12606
    },
    {
      "epoch": 1.5457332025502697,
      "grad_norm": 1.9777608565824543,
      "learning_rate": 6.727145673113894e-07,
      "loss": 0.436,
      "step": 12607
    },
    {
      "epoch": 1.5458558116723884,
      "grad_norm": 2.023642437043602,
      "learning_rate": 6.723687025639589e-07,
      "loss": 0.4298,
      "step": 12608
    },
    {
      "epoch": 1.545978420794507,
      "grad_norm": 1.8639483583839842,
      "learning_rate": 6.720229129354105e-07,
      "loss": 0.416,
      "step": 12609
    },
    {
      "epoch": 1.5461010299166258,
      "grad_norm": 1.8915301026034308,
      "learning_rate": 6.716771984399556e-07,
      "loss": 0.4567,
      "step": 12610
    },
    {
      "epoch": 1.5462236390387445,
      "grad_norm": 1.8342831491282188,
      "learning_rate": 6.713315590918049e-07,
      "loss": 0.436,
      "step": 12611
    },
    {
      "epoch": 1.5463462481608632,
      "grad_norm": 1.8323833324385772,
      "learning_rate": 6.709859949051651e-07,
      "loss": 0.3843,
      "step": 12612
    },
    {
      "epoch": 1.546468857282982,
      "grad_norm": 2.0344361124426227,
      "learning_rate": 6.706405058942389e-07,
      "loss": 0.3953,
      "step": 12613
    },
    {
      "epoch": 1.5465914664051006,
      "grad_norm": 2.0005711117335077,
      "learning_rate": 6.702950920732259e-07,
      "loss": 0.4111,
      "step": 12614
    },
    {
      "epoch": 1.5467140755272193,
      "grad_norm": 1.751880369828719,
      "learning_rate": 6.699497534563246e-07,
      "loss": 0.4207,
      "step": 12615
    },
    {
      "epoch": 1.546836684649338,
      "grad_norm": 1.82471977612486,
      "learning_rate": 6.696044900577284e-07,
      "loss": 0.4126,
      "step": 12616
    },
    {
      "epoch": 1.5469592937714567,
      "grad_norm": 1.7824756330779064,
      "learning_rate": 6.692593018916272e-07,
      "loss": 0.4175,
      "step": 12617
    },
    {
      "epoch": 1.5470819028935754,
      "grad_norm": 2.1345842114439284,
      "learning_rate": 6.689141889722095e-07,
      "loss": 0.4107,
      "step": 12618
    },
    {
      "epoch": 1.547204512015694,
      "grad_norm": 1.7098806662708121,
      "learning_rate": 6.685691513136613e-07,
      "loss": 0.4336,
      "step": 12619
    },
    {
      "epoch": 1.5473271211378128,
      "grad_norm": 1.873031593844598,
      "learning_rate": 6.682241889301636e-07,
      "loss": 0.4205,
      "step": 12620
    },
    {
      "epoch": 1.5474497302599315,
      "grad_norm": 1.763200176537079,
      "learning_rate": 6.678793018358934e-07,
      "loss": 0.4312,
      "step": 12621
    },
    {
      "epoch": 1.5475723393820502,
      "grad_norm": 2.0430453693932247,
      "learning_rate": 6.675344900450284e-07,
      "loss": 0.4385,
      "step": 12622
    },
    {
      "epoch": 1.5476949485041687,
      "grad_norm": 1.915590462647947,
      "learning_rate": 6.671897535717392e-07,
      "loss": 0.3984,
      "step": 12623
    },
    {
      "epoch": 1.5478175576262874,
      "grad_norm": 2.0989888169580495,
      "learning_rate": 6.668450924301967e-07,
      "loss": 0.4085,
      "step": 12624
    },
    {
      "epoch": 1.547940166748406,
      "grad_norm": 1.946467702919335,
      "learning_rate": 6.665005066345653e-07,
      "loss": 0.376,
      "step": 12625
    },
    {
      "epoch": 1.5480627758705248,
      "grad_norm": 2.0616958105628904,
      "learning_rate": 6.6615599619901e-07,
      "loss": 0.4265,
      "step": 12626
    },
    {
      "epoch": 1.5481853849926435,
      "grad_norm": 2.152203030824442,
      "learning_rate": 6.658115611376889e-07,
      "loss": 0.434,
      "step": 12627
    },
    {
      "epoch": 1.5483079941147622,
      "grad_norm": 2.047237360259091,
      "learning_rate": 6.654672014647607e-07,
      "loss": 0.4032,
      "step": 12628
    },
    {
      "epoch": 1.5484306032368809,
      "grad_norm": 1.9163990672607631,
      "learning_rate": 6.651229171943782e-07,
      "loss": 0.43,
      "step": 12629
    },
    {
      "epoch": 1.5485532123589996,
      "grad_norm": 2.019763247270667,
      "learning_rate": 6.647787083406915e-07,
      "loss": 0.4311,
      "step": 12630
    },
    {
      "epoch": 1.548675821481118,
      "grad_norm": 2.062419641924636,
      "learning_rate": 6.644345749178488e-07,
      "loss": 0.4573,
      "step": 12631
    },
    {
      "epoch": 1.5487984306032367,
      "grad_norm": 1.8004865733123172,
      "learning_rate": 6.640905169399956e-07,
      "loss": 0.4314,
      "step": 12632
    },
    {
      "epoch": 1.5489210397253554,
      "grad_norm": 2.0163667259588043,
      "learning_rate": 6.637465344212721e-07,
      "loss": 0.4303,
      "step": 12633
    },
    {
      "epoch": 1.5490436488474741,
      "grad_norm": 1.9756286337424083,
      "learning_rate": 6.634026273758165e-07,
      "loss": 0.4381,
      "step": 12634
    },
    {
      "epoch": 1.5491662579695928,
      "grad_norm": 2.1744113398016554,
      "learning_rate": 6.630587958177648e-07,
      "loss": 0.4295,
      "step": 12635
    },
    {
      "epoch": 1.5492888670917115,
      "grad_norm": 1.9872898604292275,
      "learning_rate": 6.62715039761248e-07,
      "loss": 0.4029,
      "step": 12636
    },
    {
      "epoch": 1.5494114762138302,
      "grad_norm": 2.0479039611955177,
      "learning_rate": 6.623713592203965e-07,
      "loss": 0.4639,
      "step": 12637
    },
    {
      "epoch": 1.549534085335949,
      "grad_norm": 1.8253888046702698,
      "learning_rate": 6.620277542093345e-07,
      "loss": 0.3966,
      "step": 12638
    },
    {
      "epoch": 1.5496566944580676,
      "grad_norm": 1.9005627312385251,
      "learning_rate": 6.616842247421865e-07,
      "loss": 0.3799,
      "step": 12639
    },
    {
      "epoch": 1.5497793035801863,
      "grad_norm": 1.8279017418555243,
      "learning_rate": 6.613407708330708e-07,
      "loss": 0.4272,
      "step": 12640
    },
    {
      "epoch": 1.549901912702305,
      "grad_norm": 1.917335669478253,
      "learning_rate": 6.609973924961049e-07,
      "loss": 0.4001,
      "step": 12641
    },
    {
      "epoch": 1.5500245218244237,
      "grad_norm": 2.152672108487589,
      "learning_rate": 6.606540897454022e-07,
      "loss": 0.4066,
      "step": 12642
    },
    {
      "epoch": 1.5501471309465424,
      "grad_norm": 1.983581277490728,
      "learning_rate": 6.603108625950718e-07,
      "loss": 0.4713,
      "step": 12643
    },
    {
      "epoch": 1.5502697400686611,
      "grad_norm": 1.947435141315411,
      "learning_rate": 6.599677110592218e-07,
      "loss": 0.4231,
      "step": 12644
    },
    {
      "epoch": 1.5503923491907798,
      "grad_norm": 1.9724636483851543,
      "learning_rate": 6.596246351519572e-07,
      "loss": 0.4726,
      "step": 12645
    },
    {
      "epoch": 1.5505149583128985,
      "grad_norm": 1.9438998905381537,
      "learning_rate": 6.592816348873785e-07,
      "loss": 0.4607,
      "step": 12646
    },
    {
      "epoch": 1.5506375674350172,
      "grad_norm": 1.7495572279363352,
      "learning_rate": 6.589387102795822e-07,
      "loss": 0.3826,
      "step": 12647
    },
    {
      "epoch": 1.550760176557136,
      "grad_norm": 2.0458587481656934,
      "learning_rate": 6.585958613426644e-07,
      "loss": 0.457,
      "step": 12648
    },
    {
      "epoch": 1.5508827856792546,
      "grad_norm": 2.0893579042904142,
      "learning_rate": 6.582530880907182e-07,
      "loss": 0.4653,
      "step": 12649
    },
    {
      "epoch": 1.5510053948013733,
      "grad_norm": 1.7782826493554762,
      "learning_rate": 6.57910390537829e-07,
      "loss": 0.413,
      "step": 12650
    },
    {
      "epoch": 1.551128003923492,
      "grad_norm": 1.8540455609454058,
      "learning_rate": 6.575677686980841e-07,
      "loss": 0.4273,
      "step": 12651
    },
    {
      "epoch": 1.5512506130456107,
      "grad_norm": 1.9616123867770916,
      "learning_rate": 6.572252225855663e-07,
      "loss": 0.4407,
      "step": 12652
    },
    {
      "epoch": 1.5513732221677294,
      "grad_norm": 1.9293588152408374,
      "learning_rate": 6.568827522143542e-07,
      "loss": 0.4119,
      "step": 12653
    },
    {
      "epoch": 1.551495831289848,
      "grad_norm": 1.9414436637534336,
      "learning_rate": 6.565403575985233e-07,
      "loss": 0.4559,
      "step": 12654
    },
    {
      "epoch": 1.5516184404119666,
      "grad_norm": 1.8888135509179729,
      "learning_rate": 6.56198038752148e-07,
      "loss": 0.4388,
      "step": 12655
    },
    {
      "epoch": 1.5517410495340853,
      "grad_norm": 1.8873729604377194,
      "learning_rate": 6.558557956892969e-07,
      "loss": 0.4406,
      "step": 12656
    },
    {
      "epoch": 1.551863658656204,
      "grad_norm": 1.9352862898411092,
      "learning_rate": 6.555136284240371e-07,
      "loss": 0.4013,
      "step": 12657
    },
    {
      "epoch": 1.5519862677783227,
      "grad_norm": 2.0190749618144506,
      "learning_rate": 6.551715369704334e-07,
      "loss": 0.4265,
      "step": 12658
    },
    {
      "epoch": 1.5521088769004414,
      "grad_norm": 1.9656607070612038,
      "learning_rate": 6.548295213425456e-07,
      "loss": 0.3897,
      "step": 12659
    },
    {
      "epoch": 1.5522314860225601,
      "grad_norm": 1.8647262541985477,
      "learning_rate": 6.544875815544305e-07,
      "loss": 0.4194,
      "step": 12660
    },
    {
      "epoch": 1.5523540951446788,
      "grad_norm": 2.1470987908482417,
      "learning_rate": 6.541457176201429e-07,
      "loss": 0.3973,
      "step": 12661
    },
    {
      "epoch": 1.5524767042667973,
      "grad_norm": 1.9771811455737585,
      "learning_rate": 6.538039295537358e-07,
      "loss": 0.4105,
      "step": 12662
    },
    {
      "epoch": 1.552599313388916,
      "grad_norm": 1.8717869632156399,
      "learning_rate": 6.534622173692537e-07,
      "loss": 0.4387,
      "step": 12663
    },
    {
      "epoch": 1.5527219225110347,
      "grad_norm": 1.8171384250402147,
      "learning_rate": 6.531205810807439e-07,
      "loss": 0.4143,
      "step": 12664
    },
    {
      "epoch": 1.5528445316331534,
      "grad_norm": 1.790715931562482,
      "learning_rate": 6.527790207022483e-07,
      "loss": 0.3802,
      "step": 12665
    },
    {
      "epoch": 1.552967140755272,
      "grad_norm": 2.047983964301985,
      "learning_rate": 6.524375362478055e-07,
      "loss": 0.4146,
      "step": 12666
    },
    {
      "epoch": 1.5530897498773908,
      "grad_norm": 2.008252756718238,
      "learning_rate": 6.5209612773145e-07,
      "loss": 0.4593,
      "step": 12667
    },
    {
      "epoch": 1.5532123589995095,
      "grad_norm": 2.3603333117490197,
      "learning_rate": 6.517547951672149e-07,
      "loss": 0.4576,
      "step": 12668
    },
    {
      "epoch": 1.5533349681216282,
      "grad_norm": 2.1619719288103663,
      "learning_rate": 6.514135385691305e-07,
      "loss": 0.4025,
      "step": 12669
    },
    {
      "epoch": 1.553457577243747,
      "grad_norm": 1.9644936437375058,
      "learning_rate": 6.510723579512227e-07,
      "loss": 0.4068,
      "step": 12670
    },
    {
      "epoch": 1.5535801863658656,
      "grad_norm": 2.12910110902123,
      "learning_rate": 6.507312533275134e-07,
      "loss": 0.4821,
      "step": 12671
    },
    {
      "epoch": 1.5537027954879843,
      "grad_norm": 2.0835377008789937,
      "learning_rate": 6.503902247120239e-07,
      "loss": 0.3825,
      "step": 12672
    },
    {
      "epoch": 1.553825404610103,
      "grad_norm": 1.9471151331808998,
      "learning_rate": 6.500492721187701e-07,
      "loss": 0.4062,
      "step": 12673
    },
    {
      "epoch": 1.5539480137322217,
      "grad_norm": 1.8751826469943655,
      "learning_rate": 6.497083955617672e-07,
      "loss": 0.3778,
      "step": 12674
    },
    {
      "epoch": 1.5540706228543404,
      "grad_norm": 1.7963198116596666,
      "learning_rate": 6.493675950550246e-07,
      "loss": 0.4075,
      "step": 12675
    },
    {
      "epoch": 1.554193231976459,
      "grad_norm": 1.9079862358096293,
      "learning_rate": 6.490268706125497e-07,
      "loss": 0.4489,
      "step": 12676
    },
    {
      "epoch": 1.5543158410985778,
      "grad_norm": 1.8717385335383214,
      "learning_rate": 6.486862222483472e-07,
      "loss": 0.3972,
      "step": 12677
    },
    {
      "epoch": 1.5544384502206965,
      "grad_norm": 1.855480325963272,
      "learning_rate": 6.48345649976419e-07,
      "loss": 0.4267,
      "step": 12678
    },
    {
      "epoch": 1.5545610593428152,
      "grad_norm": 2.1983243764018465,
      "learning_rate": 6.480051538107629e-07,
      "loss": 0.4541,
      "step": 12679
    },
    {
      "epoch": 1.5546836684649339,
      "grad_norm": 1.858160789721879,
      "learning_rate": 6.47664733765373e-07,
      "loss": 0.4327,
      "step": 12680
    },
    {
      "epoch": 1.5548062775870526,
      "grad_norm": 1.83980105700694,
      "learning_rate": 6.473243898542417e-07,
      "loss": 0.4044,
      "step": 12681
    },
    {
      "epoch": 1.5549288867091713,
      "grad_norm": 1.996040333512092,
      "learning_rate": 6.469841220913587e-07,
      "loss": 0.4435,
      "step": 12682
    },
    {
      "epoch": 1.55505149583129,
      "grad_norm": 2.1084471953970603,
      "learning_rate": 6.466439304907088e-07,
      "loss": 0.4299,
      "step": 12683
    },
    {
      "epoch": 1.5551741049534087,
      "grad_norm": 1.8414620685471137,
      "learning_rate": 6.463038150662737e-07,
      "loss": 0.4414,
      "step": 12684
    },
    {
      "epoch": 1.5552967140755272,
      "grad_norm": 2.0618491823793383,
      "learning_rate": 6.459637758320341e-07,
      "loss": 0.4524,
      "step": 12685
    },
    {
      "epoch": 1.5554193231976459,
      "grad_norm": 2.1293687958454384,
      "learning_rate": 6.456238128019652e-07,
      "loss": 0.4104,
      "step": 12686
    },
    {
      "epoch": 1.5555419323197646,
      "grad_norm": 2.074679808578311,
      "learning_rate": 6.452839259900409e-07,
      "loss": 0.4155,
      "step": 12687
    },
    {
      "epoch": 1.5556645414418833,
      "grad_norm": 2.132759947697713,
      "learning_rate": 6.449441154102301e-07,
      "loss": 0.4084,
      "step": 12688
    },
    {
      "epoch": 1.555787150564002,
      "grad_norm": 1.9015487703214895,
      "learning_rate": 6.446043810765012e-07,
      "loss": 0.3779,
      "step": 12689
    },
    {
      "epoch": 1.5559097596861207,
      "grad_norm": 1.9528238478933146,
      "learning_rate": 6.442647230028159e-07,
      "loss": 0.4599,
      "step": 12690
    },
    {
      "epoch": 1.5560323688082394,
      "grad_norm": 2.027529443446154,
      "learning_rate": 6.43925141203137e-07,
      "loss": 0.422,
      "step": 12691
    },
    {
      "epoch": 1.556154977930358,
      "grad_norm": 1.9559316210035025,
      "learning_rate": 6.435856356914202e-07,
      "loss": 0.4265,
      "step": 12692
    },
    {
      "epoch": 1.5562775870524765,
      "grad_norm": 1.954101564362946,
      "learning_rate": 6.4324620648162e-07,
      "loss": 0.4115,
      "step": 12693
    },
    {
      "epoch": 1.5564001961745952,
      "grad_norm": 2.019502478925377,
      "learning_rate": 6.429068535876875e-07,
      "loss": 0.4041,
      "step": 12694
    },
    {
      "epoch": 1.556522805296714,
      "grad_norm": 2.1452796559617213,
      "learning_rate": 6.425675770235728e-07,
      "loss": 0.4581,
      "step": 12695
    },
    {
      "epoch": 1.5566454144188326,
      "grad_norm": 1.6676339466928252,
      "learning_rate": 6.422283768032175e-07,
      "loss": 0.4384,
      "step": 12696
    },
    {
      "epoch": 1.5567680235409513,
      "grad_norm": 1.8598392813016773,
      "learning_rate": 6.418892529405646e-07,
      "loss": 0.4393,
      "step": 12697
    },
    {
      "epoch": 1.55689063266307,
      "grad_norm": 1.951248259323254,
      "learning_rate": 6.41550205449554e-07,
      "loss": 0.4497,
      "step": 12698
    },
    {
      "epoch": 1.5570132417851887,
      "grad_norm": 1.7718753121166284,
      "learning_rate": 6.412112343441198e-07,
      "loss": 0.4049,
      "step": 12699
    },
    {
      "epoch": 1.5571358509073074,
      "grad_norm": 1.9710761384259965,
      "learning_rate": 6.408723396381941e-07,
      "loss": 0.4257,
      "step": 12700
    },
    {
      "epoch": 1.5572584600294261,
      "grad_norm": 2.1694272443531575,
      "learning_rate": 6.405335213457062e-07,
      "loss": 0.3836,
      "step": 12701
    },
    {
      "epoch": 1.5573810691515448,
      "grad_norm": 1.978324403893834,
      "learning_rate": 6.401947794805835e-07,
      "loss": 0.4418,
      "step": 12702
    },
    {
      "epoch": 1.5575036782736635,
      "grad_norm": 1.8834244946524712,
      "learning_rate": 6.398561140567472e-07,
      "loss": 0.4527,
      "step": 12703
    },
    {
      "epoch": 1.5576262873957822,
      "grad_norm": 1.9009999754087394,
      "learning_rate": 6.395175250881183e-07,
      "loss": 0.407,
      "step": 12704
    },
    {
      "epoch": 1.557748896517901,
      "grad_norm": 2.0614552278159057,
      "learning_rate": 6.39179012588613e-07,
      "loss": 0.4129,
      "step": 12705
    },
    {
      "epoch": 1.5578715056400196,
      "grad_norm": 2.1888916402289302,
      "learning_rate": 6.388405765721439e-07,
      "loss": 0.4873,
      "step": 12706
    },
    {
      "epoch": 1.5579941147621383,
      "grad_norm": 1.7447856362098686,
      "learning_rate": 6.38502217052622e-07,
      "loss": 0.4396,
      "step": 12707
    },
    {
      "epoch": 1.558116723884257,
      "grad_norm": 1.9396904651957896,
      "learning_rate": 6.381639340439549e-07,
      "loss": 0.4336,
      "step": 12708
    },
    {
      "epoch": 1.5582393330063757,
      "grad_norm": 1.9190017166390123,
      "learning_rate": 6.378257275600466e-07,
      "loss": 0.4348,
      "step": 12709
    },
    {
      "epoch": 1.5583619421284944,
      "grad_norm": 1.944138237021598,
      "learning_rate": 6.374875976147968e-07,
      "loss": 0.4791,
      "step": 12710
    },
    {
      "epoch": 1.5584845512506131,
      "grad_norm": 2.062061584819176,
      "learning_rate": 6.371495442221048e-07,
      "loss": 0.4057,
      "step": 12711
    },
    {
      "epoch": 1.5586071603727318,
      "grad_norm": 1.9359188391453206,
      "learning_rate": 6.368115673958645e-07,
      "loss": 0.4309,
      "step": 12712
    },
    {
      "epoch": 1.5587297694948505,
      "grad_norm": 1.9095500318705552,
      "learning_rate": 6.364736671499664e-07,
      "loss": 0.4073,
      "step": 12713
    },
    {
      "epoch": 1.5588523786169692,
      "grad_norm": 1.8478469236465052,
      "learning_rate": 6.361358434982998e-07,
      "loss": 0.4137,
      "step": 12714
    },
    {
      "epoch": 1.558974987739088,
      "grad_norm": 2.044879536875733,
      "learning_rate": 6.357980964547506e-07,
      "loss": 0.3994,
      "step": 12715
    },
    {
      "epoch": 1.5590975968612066,
      "grad_norm": 1.7818208976775507,
      "learning_rate": 6.354604260331998e-07,
      "loss": 0.3888,
      "step": 12716
    },
    {
      "epoch": 1.5592202059833251,
      "grad_norm": 1.9787934603054032,
      "learning_rate": 6.351228322475256e-07,
      "loss": 0.4574,
      "step": 12717
    },
    {
      "epoch": 1.5593428151054438,
      "grad_norm": 1.9353949092142215,
      "learning_rate": 6.347853151116054e-07,
      "loss": 0.4158,
      "step": 12718
    },
    {
      "epoch": 1.5594654242275625,
      "grad_norm": 1.8136975019944528,
      "learning_rate": 6.344478746393098e-07,
      "loss": 0.431,
      "step": 12719
    },
    {
      "epoch": 1.5595880333496812,
      "grad_norm": 2.012322351678307,
      "learning_rate": 6.341105108445103e-07,
      "loss": 0.4185,
      "step": 12720
    },
    {
      "epoch": 1.5597106424718,
      "grad_norm": 1.9375496259064298,
      "learning_rate": 6.337732237410713e-07,
      "loss": 0.4354,
      "step": 12721
    },
    {
      "epoch": 1.5598332515939186,
      "grad_norm": 1.951787339908151,
      "learning_rate": 6.334360133428575e-07,
      "loss": 0.4498,
      "step": 12722
    },
    {
      "epoch": 1.5599558607160373,
      "grad_norm": 2.1108132444399885,
      "learning_rate": 6.330988796637272e-07,
      "loss": 0.4356,
      "step": 12723
    },
    {
      "epoch": 1.560078469838156,
      "grad_norm": 1.951600293491128,
      "learning_rate": 6.327618227175389e-07,
      "loss": 0.4427,
      "step": 12724
    },
    {
      "epoch": 1.5602010789602745,
      "grad_norm": 2.1303837881075673,
      "learning_rate": 6.324248425181451e-07,
      "loss": 0.4007,
      "step": 12725
    },
    {
      "epoch": 1.5603236880823932,
      "grad_norm": 2.162215408350672,
      "learning_rate": 6.32087939079396e-07,
      "loss": 0.4299,
      "step": 12726
    },
    {
      "epoch": 1.560446297204512,
      "grad_norm": 1.787812869502154,
      "learning_rate": 6.317511124151396e-07,
      "loss": 0.4498,
      "step": 12727
    },
    {
      "epoch": 1.5605689063266306,
      "grad_norm": 1.8800354645818513,
      "learning_rate": 6.314143625392205e-07,
      "loss": 0.4225,
      "step": 12728
    },
    {
      "epoch": 1.5606915154487493,
      "grad_norm": 2.0881349465430046,
      "learning_rate": 6.310776894654791e-07,
      "loss": 0.4154,
      "step": 12729
    },
    {
      "epoch": 1.560814124570868,
      "grad_norm": 2.0924913621794063,
      "learning_rate": 6.307410932077529e-07,
      "loss": 0.4066,
      "step": 12730
    },
    {
      "epoch": 1.5609367336929867,
      "grad_norm": 1.8436112307583692,
      "learning_rate": 6.304045737798775e-07,
      "loss": 0.4054,
      "step": 12731
    },
    {
      "epoch": 1.5610593428151054,
      "grad_norm": 1.8441510761900026,
      "learning_rate": 6.300681311956833e-07,
      "loss": 0.4124,
      "step": 12732
    },
    {
      "epoch": 1.561181951937224,
      "grad_norm": 1.9605297279227392,
      "learning_rate": 6.297317654690002e-07,
      "loss": 0.4316,
      "step": 12733
    },
    {
      "epoch": 1.5613045610593428,
      "grad_norm": 2.0911266436590465,
      "learning_rate": 6.293954766136517e-07,
      "loss": 0.4777,
      "step": 12734
    },
    {
      "epoch": 1.5614271701814615,
      "grad_norm": 1.8105941975368607,
      "learning_rate": 6.290592646434615e-07,
      "loss": 0.4649,
      "step": 12735
    },
    {
      "epoch": 1.5615497793035802,
      "grad_norm": 1.7692230895033327,
      "learning_rate": 6.28723129572247e-07,
      "loss": 0.4514,
      "step": 12736
    },
    {
      "epoch": 1.5616723884256989,
      "grad_norm": 1.8736689366246617,
      "learning_rate": 6.283870714138257e-07,
      "loss": 0.4057,
      "step": 12737
    },
    {
      "epoch": 1.5617949975478176,
      "grad_norm": 1.8513637430478032,
      "learning_rate": 6.280510901820092e-07,
      "loss": 0.4327,
      "step": 12738
    },
    {
      "epoch": 1.5619176066699363,
      "grad_norm": 2.0098780586182263,
      "learning_rate": 6.277151858906058e-07,
      "loss": 0.4143,
      "step": 12739
    },
    {
      "epoch": 1.562040215792055,
      "grad_norm": 1.9559847207839183,
      "learning_rate": 6.273793585534233e-07,
      "loss": 0.407,
      "step": 12740
    },
    {
      "epoch": 1.5621628249141737,
      "grad_norm": 1.9440177638570204,
      "learning_rate": 6.270436081842648e-07,
      "loss": 0.3785,
      "step": 12741
    },
    {
      "epoch": 1.5622854340362924,
      "grad_norm": 1.8060859250284678,
      "learning_rate": 6.267079347969298e-07,
      "loss": 0.4342,
      "step": 12742
    },
    {
      "epoch": 1.562408043158411,
      "grad_norm": 1.9845845071586463,
      "learning_rate": 6.263723384052145e-07,
      "loss": 0.3913,
      "step": 12743
    },
    {
      "epoch": 1.5625306522805298,
      "grad_norm": 1.7374105971779281,
      "learning_rate": 6.260368190229133e-07,
      "loss": 0.473,
      "step": 12744
    },
    {
      "epoch": 1.5626532614026485,
      "grad_norm": 2.017611324775711,
      "learning_rate": 6.257013766638176e-07,
      "loss": 0.4102,
      "step": 12745
    },
    {
      "epoch": 1.5627758705247672,
      "grad_norm": 1.8863906428285941,
      "learning_rate": 6.253660113417121e-07,
      "loss": 0.489,
      "step": 12746
    },
    {
      "epoch": 1.5628984796468859,
      "grad_norm": 2.1083382639786366,
      "learning_rate": 6.250307230703823e-07,
      "loss": 0.4425,
      "step": 12747
    },
    {
      "epoch": 1.5630210887690044,
      "grad_norm": 2.0301508250631755,
      "learning_rate": 6.246955118636097e-07,
      "loss": 0.459,
      "step": 12748
    },
    {
      "epoch": 1.563143697891123,
      "grad_norm": 2.0141980768975727,
      "learning_rate": 6.243603777351712e-07,
      "loss": 0.4178,
      "step": 12749
    },
    {
      "epoch": 1.5632663070132418,
      "grad_norm": 1.9083967964504005,
      "learning_rate": 6.240253206988422e-07,
      "loss": 0.3628,
      "step": 12750
    },
    {
      "epoch": 1.5633889161353605,
      "grad_norm": 1.9003472789793143,
      "learning_rate": 6.236903407683936e-07,
      "loss": 0.4194,
      "step": 12751
    },
    {
      "epoch": 1.5635115252574792,
      "grad_norm": 2.0833017716437623,
      "learning_rate": 6.233554379575931e-07,
      "loss": 0.4022,
      "step": 12752
    },
    {
      "epoch": 1.5636341343795979,
      "grad_norm": 1.9422292480524013,
      "learning_rate": 6.230206122802066e-07,
      "loss": 0.4369,
      "step": 12753
    },
    {
      "epoch": 1.5637567435017166,
      "grad_norm": 1.8358717463162402,
      "learning_rate": 6.226858637499964e-07,
      "loss": 0.3856,
      "step": 12754
    },
    {
      "epoch": 1.5638793526238353,
      "grad_norm": 1.9818760958916284,
      "learning_rate": 6.22351192380721e-07,
      "loss": 0.3611,
      "step": 12755
    },
    {
      "epoch": 1.5640019617459537,
      "grad_norm": 1.9689658956689986,
      "learning_rate": 6.220165981861345e-07,
      "loss": 0.4617,
      "step": 12756
    },
    {
      "epoch": 1.5641245708680724,
      "grad_norm": 1.8762205656827406,
      "learning_rate": 6.216820811799909e-07,
      "loss": 0.4409,
      "step": 12757
    },
    {
      "epoch": 1.5642471799901911,
      "grad_norm": 1.8750066822482148,
      "learning_rate": 6.213476413760405e-07,
      "loss": 0.3868,
      "step": 12758
    },
    {
      "epoch": 1.5643697891123098,
      "grad_norm": 2.1434978988593025,
      "learning_rate": 6.210132787880263e-07,
      "loss": 0.4993,
      "step": 12759
    },
    {
      "epoch": 1.5644923982344285,
      "grad_norm": 1.9187361127675586,
      "learning_rate": 6.206789934296931e-07,
      "loss": 0.4508,
      "step": 12760
    },
    {
      "epoch": 1.5646150073565472,
      "grad_norm": 1.8225438619489316,
      "learning_rate": 6.203447853147807e-07,
      "loss": 0.408,
      "step": 12761
    },
    {
      "epoch": 1.564737616478666,
      "grad_norm": 1.8459138393516719,
      "learning_rate": 6.200106544570256e-07,
      "loss": 0.4369,
      "step": 12762
    },
    {
      "epoch": 1.5648602256007846,
      "grad_norm": 2.0107774645050793,
      "learning_rate": 6.196766008701599e-07,
      "loss": 0.4281,
      "step": 12763
    },
    {
      "epoch": 1.5649828347229033,
      "grad_norm": 2.0487260579800224,
      "learning_rate": 6.193426245679149e-07,
      "loss": 0.4492,
      "step": 12764
    },
    {
      "epoch": 1.565105443845022,
      "grad_norm": 1.9302834460782505,
      "learning_rate": 6.190087255640182e-07,
      "loss": 0.4467,
      "step": 12765
    },
    {
      "epoch": 1.5652280529671407,
      "grad_norm": 1.8614225774241855,
      "learning_rate": 6.186749038721929e-07,
      "loss": 0.4618,
      "step": 12766
    },
    {
      "epoch": 1.5653506620892594,
      "grad_norm": 1.677818436798454,
      "learning_rate": 6.183411595061589e-07,
      "loss": 0.4104,
      "step": 12767
    },
    {
      "epoch": 1.5654732712113781,
      "grad_norm": 1.962817176243294,
      "learning_rate": 6.180074924796353e-07,
      "loss": 0.4355,
      "step": 12768
    },
    {
      "epoch": 1.5655958803334968,
      "grad_norm": 1.8673710459318638,
      "learning_rate": 6.176739028063347e-07,
      "loss": 0.3784,
      "step": 12769
    },
    {
      "epoch": 1.5657184894556155,
      "grad_norm": 2.013976665096212,
      "learning_rate": 6.173403904999701e-07,
      "loss": 0.4145,
      "step": 12770
    },
    {
      "epoch": 1.5658410985777342,
      "grad_norm": 1.949753131355517,
      "learning_rate": 6.17006955574248e-07,
      "loss": 0.4484,
      "step": 12771
    },
    {
      "epoch": 1.565963707699853,
      "grad_norm": 2.083391739417113,
      "learning_rate": 6.166735980428731e-07,
      "loss": 0.4581,
      "step": 12772
    },
    {
      "epoch": 1.5660863168219716,
      "grad_norm": 1.9117653925086848,
      "learning_rate": 6.163403179195474e-07,
      "loss": 0.4078,
      "step": 12773
    },
    {
      "epoch": 1.5662089259440903,
      "grad_norm": 2.092085719637644,
      "learning_rate": 6.160071152179703e-07,
      "loss": 0.4216,
      "step": 12774
    },
    {
      "epoch": 1.566331535066209,
      "grad_norm": 1.9649770701797589,
      "learning_rate": 6.15673989951836e-07,
      "loss": 0.3828,
      "step": 12775
    },
    {
      "epoch": 1.5664541441883277,
      "grad_norm": 1.861051253511864,
      "learning_rate": 6.153409421348358e-07,
      "loss": 0.4162,
      "step": 12776
    },
    {
      "epoch": 1.5665767533104464,
      "grad_norm": 1.8579149298745514,
      "learning_rate": 6.150079717806595e-07,
      "loss": 0.3616,
      "step": 12777
    },
    {
      "epoch": 1.5666993624325651,
      "grad_norm": 1.82840870160988,
      "learning_rate": 6.146750789029935e-07,
      "loss": 0.4052,
      "step": 12778
    },
    {
      "epoch": 1.5668219715546838,
      "grad_norm": 1.8979890553147298,
      "learning_rate": 6.143422635155194e-07,
      "loss": 0.3914,
      "step": 12779
    },
    {
      "epoch": 1.5669445806768023,
      "grad_norm": 2.110503210607174,
      "learning_rate": 6.140095256319159e-07,
      "loss": 0.4276,
      "step": 12780
    },
    {
      "epoch": 1.567067189798921,
      "grad_norm": 1.9011849426521128,
      "learning_rate": 6.136768652658603e-07,
      "loss": 0.4504,
      "step": 12781
    },
    {
      "epoch": 1.5671897989210397,
      "grad_norm": 2.0451563117495617,
      "learning_rate": 6.133442824310246e-07,
      "loss": 0.4801,
      "step": 12782
    },
    {
      "epoch": 1.5673124080431584,
      "grad_norm": 1.5647908167790439,
      "learning_rate": 6.130117771410795e-07,
      "loss": 0.436,
      "step": 12783
    },
    {
      "epoch": 1.5674350171652771,
      "grad_norm": 2.040425604525812,
      "learning_rate": 6.126793494096903e-07,
      "loss": 0.4336,
      "step": 12784
    },
    {
      "epoch": 1.5675576262873958,
      "grad_norm": 1.8286043737726634,
      "learning_rate": 6.123469992505218e-07,
      "loss": 0.4116,
      "step": 12785
    },
    {
      "epoch": 1.5676802354095145,
      "grad_norm": 1.9074696267458318,
      "learning_rate": 6.120147266772328e-07,
      "loss": 0.4005,
      "step": 12786
    },
    {
      "epoch": 1.567802844531633,
      "grad_norm": 2.0414268856109263,
      "learning_rate": 6.116825317034814e-07,
      "loss": 0.4434,
      "step": 12787
    },
    {
      "epoch": 1.5679254536537517,
      "grad_norm": 2.000363422387667,
      "learning_rate": 6.113504143429214e-07,
      "loss": 0.427,
      "step": 12788
    },
    {
      "epoch": 1.5680480627758704,
      "grad_norm": 1.973361399485545,
      "learning_rate": 6.110183746092019e-07,
      "loss": 0.4451,
      "step": 12789
    },
    {
      "epoch": 1.568170671897989,
      "grad_norm": 1.894215992671477,
      "learning_rate": 6.106864125159715e-07,
      "loss": 0.4018,
      "step": 12790
    },
    {
      "epoch": 1.5682932810201078,
      "grad_norm": 1.8680530289552446,
      "learning_rate": 6.103545280768755e-07,
      "loss": 0.3973,
      "step": 12791
    },
    {
      "epoch": 1.5684158901422265,
      "grad_norm": 1.9065595226292744,
      "learning_rate": 6.100227213055527e-07,
      "loss": 0.4422,
      "step": 12792
    },
    {
      "epoch": 1.5685384992643452,
      "grad_norm": 1.7601759291346013,
      "learning_rate": 6.096909922156419e-07,
      "loss": 0.3923,
      "step": 12793
    },
    {
      "epoch": 1.5686611083864639,
      "grad_norm": 1.8500269460799592,
      "learning_rate": 6.093593408207785e-07,
      "loss": 0.4355,
      "step": 12794
    },
    {
      "epoch": 1.5687837175085826,
      "grad_norm": 1.7368895037765613,
      "learning_rate": 6.090277671345932e-07,
      "loss": 0.4357,
      "step": 12795
    },
    {
      "epoch": 1.5689063266307013,
      "grad_norm": 2.0401120715398466,
      "learning_rate": 6.086962711707137e-07,
      "loss": 0.4435,
      "step": 12796
    },
    {
      "epoch": 1.56902893575282,
      "grad_norm": 2.1049222047017744,
      "learning_rate": 6.083648529427657e-07,
      "loss": 0.4529,
      "step": 12797
    },
    {
      "epoch": 1.5691515448749387,
      "grad_norm": 1.9845381919575489,
      "learning_rate": 6.080335124643721e-07,
      "loss": 0.4501,
      "step": 12798
    },
    {
      "epoch": 1.5692741539970574,
      "grad_norm": 1.8957588122070166,
      "learning_rate": 6.077022497491497e-07,
      "loss": 0.4507,
      "step": 12799
    },
    {
      "epoch": 1.569396763119176,
      "grad_norm": 1.9615140680236547,
      "learning_rate": 6.073710648107156e-07,
      "loss": 0.4095,
      "step": 12800
    },
    {
      "epoch": 1.5695193722412948,
      "grad_norm": 1.8863606458288906,
      "learning_rate": 6.070399576626815e-07,
      "loss": 0.4917,
      "step": 12801
    },
    {
      "epoch": 1.5696419813634135,
      "grad_norm": 1.9459113907166388,
      "learning_rate": 6.067089283186555e-07,
      "loss": 0.4538,
      "step": 12802
    },
    {
      "epoch": 1.5697645904855322,
      "grad_norm": 2.2261915881613796,
      "learning_rate": 6.063779767922445e-07,
      "loss": 0.392,
      "step": 12803
    },
    {
      "epoch": 1.5698871996076509,
      "grad_norm": 1.7080282528082102,
      "learning_rate": 6.060471030970516e-07,
      "loss": 0.4267,
      "step": 12804
    },
    {
      "epoch": 1.5700098087297696,
      "grad_norm": 1.9469527614976565,
      "learning_rate": 6.057163072466756e-07,
      "loss": 0.4353,
      "step": 12805
    },
    {
      "epoch": 1.5701324178518883,
      "grad_norm": 1.8877084961966586,
      "learning_rate": 6.053855892547125e-07,
      "loss": 0.4479,
      "step": 12806
    },
    {
      "epoch": 1.570255026974007,
      "grad_norm": 1.8597838554667234,
      "learning_rate": 6.050549491347566e-07,
      "loss": 0.4459,
      "step": 12807
    },
    {
      "epoch": 1.5703776360961257,
      "grad_norm": 1.8825011763628414,
      "learning_rate": 6.047243869003968e-07,
      "loss": 0.4129,
      "step": 12808
    },
    {
      "epoch": 1.5705002452182444,
      "grad_norm": 1.703368401057758,
      "learning_rate": 6.043939025652196e-07,
      "loss": 0.4043,
      "step": 12809
    },
    {
      "epoch": 1.570622854340363,
      "grad_norm": 1.8487821091096632,
      "learning_rate": 6.040634961428085e-07,
      "loss": 0.412,
      "step": 12810
    },
    {
      "epoch": 1.5707454634624816,
      "grad_norm": 2.0367799592181943,
      "learning_rate": 6.037331676467453e-07,
      "loss": 0.3751,
      "step": 12811
    },
    {
      "epoch": 1.5708680725846003,
      "grad_norm": 1.922712101767531,
      "learning_rate": 6.034029170906059e-07,
      "loss": 0.4356,
      "step": 12812
    },
    {
      "epoch": 1.570990681706719,
      "grad_norm": 1.839694995092739,
      "learning_rate": 6.030727444879633e-07,
      "loss": 0.4131,
      "step": 12813
    },
    {
      "epoch": 1.5711132908288377,
      "grad_norm": 2.064186741878715,
      "learning_rate": 6.0274264985239e-07,
      "loss": 0.4214,
      "step": 12814
    },
    {
      "epoch": 1.5712358999509564,
      "grad_norm": 2.031684867718789,
      "learning_rate": 6.02412633197452e-07,
      "loss": 0.4433,
      "step": 12815
    },
    {
      "epoch": 1.571358509073075,
      "grad_norm": 2.1343661363421638,
      "learning_rate": 6.02082694536715e-07,
      "loss": 0.4462,
      "step": 12816
    },
    {
      "epoch": 1.5714811181951938,
      "grad_norm": 1.8950089734324742,
      "learning_rate": 6.017528338837384e-07,
      "loss": 0.4395,
      "step": 12817
    },
    {
      "epoch": 1.5716037273173125,
      "grad_norm": 1.8698624517754359,
      "learning_rate": 6.014230512520816e-07,
      "loss": 0.4242,
      "step": 12818
    },
    {
      "epoch": 1.571726336439431,
      "grad_norm": 2.176045273389587,
      "learning_rate": 6.010933466552979e-07,
      "loss": 0.4762,
      "step": 12819
    },
    {
      "epoch": 1.5718489455615496,
      "grad_norm": 1.9481603093539792,
      "learning_rate": 6.0076372010694e-07,
      "loss": 0.4262,
      "step": 12820
    },
    {
      "epoch": 1.5719715546836683,
      "grad_norm": 1.863425100963037,
      "learning_rate": 6.004341716205556e-07,
      "loss": 0.4292,
      "step": 12821
    },
    {
      "epoch": 1.572094163805787,
      "grad_norm": 1.8983667252626093,
      "learning_rate": 6.001047012096889e-07,
      "loss": 0.4163,
      "step": 12822
    },
    {
      "epoch": 1.5722167729279057,
      "grad_norm": 1.955571124001244,
      "learning_rate": 5.997753088878827e-07,
      "loss": 0.4019,
      "step": 12823
    },
    {
      "epoch": 1.5723393820500244,
      "grad_norm": 2.0499128773662876,
      "learning_rate": 5.994459946686759e-07,
      "loss": 0.4159,
      "step": 12824
    },
    {
      "epoch": 1.5724619911721431,
      "grad_norm": 1.594437289754916,
      "learning_rate": 5.991167585656032e-07,
      "loss": 0.3804,
      "step": 12825
    },
    {
      "epoch": 1.5725846002942618,
      "grad_norm": 2.0231734135328203,
      "learning_rate": 5.987876005921966e-07,
      "loss": 0.4506,
      "step": 12826
    },
    {
      "epoch": 1.5727072094163805,
      "grad_norm": 1.8532032799015317,
      "learning_rate": 5.984585207619861e-07,
      "loss": 0.418,
      "step": 12827
    },
    {
      "epoch": 1.5728298185384992,
      "grad_norm": 1.9874971958421477,
      "learning_rate": 5.981295190884962e-07,
      "loss": 0.4461,
      "step": 12828
    },
    {
      "epoch": 1.572952427660618,
      "grad_norm": 2.044281468500358,
      "learning_rate": 5.978005955852504e-07,
      "loss": 0.4402,
      "step": 12829
    },
    {
      "epoch": 1.5730750367827366,
      "grad_norm": 1.9184474667582347,
      "learning_rate": 5.974717502657673e-07,
      "loss": 0.4294,
      "step": 12830
    },
    {
      "epoch": 1.5731976459048553,
      "grad_norm": 2.024596684124202,
      "learning_rate": 5.971429831435641e-07,
      "loss": 0.5061,
      "step": 12831
    },
    {
      "epoch": 1.573320255026974,
      "grad_norm": 2.1086612803116545,
      "learning_rate": 5.968142942321523e-07,
      "loss": 0.4123,
      "step": 12832
    },
    {
      "epoch": 1.5734428641490927,
      "grad_norm": 2.026586125127373,
      "learning_rate": 5.96485683545043e-07,
      "loss": 0.3892,
      "step": 12833
    },
    {
      "epoch": 1.5735654732712114,
      "grad_norm": 2.005737056172981,
      "learning_rate": 5.96157151095742e-07,
      "loss": 0.4414,
      "step": 12834
    },
    {
      "epoch": 1.5736880823933301,
      "grad_norm": 1.7808602149756052,
      "learning_rate": 5.95828696897752e-07,
      "loss": 0.4143,
      "step": 12835
    },
    {
      "epoch": 1.5738106915154488,
      "grad_norm": 2.067777086384354,
      "learning_rate": 5.955003209645735e-07,
      "loss": 0.4567,
      "step": 12836
    },
    {
      "epoch": 1.5739333006375675,
      "grad_norm": 1.94723123407287,
      "learning_rate": 5.951720233097041e-07,
      "loss": 0.4897,
      "step": 12837
    },
    {
      "epoch": 1.5740559097596862,
      "grad_norm": 1.976444741213413,
      "learning_rate": 5.948438039466367e-07,
      "loss": 0.453,
      "step": 12838
    },
    {
      "epoch": 1.574178518881805,
      "grad_norm": 2.0248635084649873,
      "learning_rate": 5.945156628888613e-07,
      "loss": 0.3959,
      "step": 12839
    },
    {
      "epoch": 1.5743011280039236,
      "grad_norm": 2.0279274818848516,
      "learning_rate": 5.941876001498656e-07,
      "loss": 0.4802,
      "step": 12840
    },
    {
      "epoch": 1.5744237371260423,
      "grad_norm": 1.9928612459575736,
      "learning_rate": 5.938596157431348e-07,
      "loss": 0.3965,
      "step": 12841
    },
    {
      "epoch": 1.5745463462481608,
      "grad_norm": 2.0448309734750776,
      "learning_rate": 5.935317096821469e-07,
      "loss": 0.434,
      "step": 12842
    },
    {
      "epoch": 1.5746689553702795,
      "grad_norm": 1.9779590187351923,
      "learning_rate": 5.932038819803809e-07,
      "loss": 0.4269,
      "step": 12843
    },
    {
      "epoch": 1.5747915644923982,
      "grad_norm": 2.045797497031635,
      "learning_rate": 5.928761326513119e-07,
      "loss": 0.453,
      "step": 12844
    },
    {
      "epoch": 1.574914173614517,
      "grad_norm": 1.8884862236458408,
      "learning_rate": 5.925484617084093e-07,
      "loss": 0.4133,
      "step": 12845
    },
    {
      "epoch": 1.5750367827366356,
      "grad_norm": 2.1190676160634285,
      "learning_rate": 5.922208691651423e-07,
      "loss": 0.4415,
      "step": 12846
    },
    {
      "epoch": 1.5751593918587543,
      "grad_norm": 1.8350961312225504,
      "learning_rate": 5.918933550349754e-07,
      "loss": 0.4301,
      "step": 12847
    },
    {
      "epoch": 1.575282000980873,
      "grad_norm": 1.9010578611452889,
      "learning_rate": 5.915659193313688e-07,
      "loss": 0.4133,
      "step": 12848
    },
    {
      "epoch": 1.5754046101029917,
      "grad_norm": 2.041953171631568,
      "learning_rate": 5.912385620677816e-07,
      "loss": 0.4317,
      "step": 12849
    },
    {
      "epoch": 1.5755272192251102,
      "grad_norm": 1.9047381645352555,
      "learning_rate": 5.909112832576694e-07,
      "loss": 0.4088,
      "step": 12850
    },
    {
      "epoch": 1.5756498283472289,
      "grad_norm": 2.0283366412745845,
      "learning_rate": 5.905840829144832e-07,
      "loss": 0.3978,
      "step": 12851
    },
    {
      "epoch": 1.5757724374693476,
      "grad_norm": 2.0355492386102623,
      "learning_rate": 5.902569610516706e-07,
      "loss": 0.4157,
      "step": 12852
    },
    {
      "epoch": 1.5758950465914663,
      "grad_norm": 1.8270592478805194,
      "learning_rate": 5.899299176826781e-07,
      "loss": 0.4121,
      "step": 12853
    },
    {
      "epoch": 1.576017655713585,
      "grad_norm": 2.1279195641229416,
      "learning_rate": 5.89602952820949e-07,
      "loss": 0.4526,
      "step": 12854
    },
    {
      "epoch": 1.5761402648357037,
      "grad_norm": 1.8049533376833613,
      "learning_rate": 5.89276066479919e-07,
      "loss": 0.434,
      "step": 12855
    },
    {
      "epoch": 1.5762628739578224,
      "grad_norm": 1.7855772451979375,
      "learning_rate": 5.889492586730255e-07,
      "loss": 0.4572,
      "step": 12856
    },
    {
      "epoch": 1.576385483079941,
      "grad_norm": 2.092551145277785,
      "learning_rate": 5.886225294137013e-07,
      "loss": 0.4323,
      "step": 12857
    },
    {
      "epoch": 1.5765080922020598,
      "grad_norm": 1.9115884881196585,
      "learning_rate": 5.88295878715375e-07,
      "loss": 0.4067,
      "step": 12858
    },
    {
      "epoch": 1.5766307013241785,
      "grad_norm": 1.7712981276894262,
      "learning_rate": 5.879693065914718e-07,
      "loss": 0.4369,
      "step": 12859
    },
    {
      "epoch": 1.5767533104462972,
      "grad_norm": 1.8877763148014397,
      "learning_rate": 5.876428130554152e-07,
      "loss": 0.401,
      "step": 12860
    },
    {
      "epoch": 1.5768759195684159,
      "grad_norm": 1.9925228420462244,
      "learning_rate": 5.873163981206251e-07,
      "loss": 0.4026,
      "step": 12861
    },
    {
      "epoch": 1.5769985286905346,
      "grad_norm": 1.9088496423321537,
      "learning_rate": 5.86990061800517e-07,
      "loss": 0.4218,
      "step": 12862
    },
    {
      "epoch": 1.5771211378126533,
      "grad_norm": 1.922049427704015,
      "learning_rate": 5.866638041085038e-07,
      "loss": 0.4109,
      "step": 12863
    },
    {
      "epoch": 1.577243746934772,
      "grad_norm": 2.358361033229136,
      "learning_rate": 5.863376250579958e-07,
      "loss": 0.3582,
      "step": 12864
    },
    {
      "epoch": 1.5773663560568907,
      "grad_norm": 1.7953685523144844,
      "learning_rate": 5.860115246623988e-07,
      "loss": 0.4356,
      "step": 12865
    },
    {
      "epoch": 1.5774889651790094,
      "grad_norm": 2.0647916949818783,
      "learning_rate": 5.856855029351175e-07,
      "loss": 0.3904,
      "step": 12866
    },
    {
      "epoch": 1.577611574301128,
      "grad_norm": 1.9600300706847877,
      "learning_rate": 5.853595598895506e-07,
      "loss": 0.479,
      "step": 12867
    },
    {
      "epoch": 1.5777341834232468,
      "grad_norm": 1.9368346816623876,
      "learning_rate": 5.85033695539095e-07,
      "loss": 0.405,
      "step": 12868
    },
    {
      "epoch": 1.5778567925453655,
      "grad_norm": 2.0928954820944674,
      "learning_rate": 5.847079098971448e-07,
      "loss": 0.4336,
      "step": 12869
    },
    {
      "epoch": 1.5779794016674842,
      "grad_norm": 2.2495344554895365,
      "learning_rate": 5.843822029770907e-07,
      "loss": 0.4366,
      "step": 12870
    },
    {
      "epoch": 1.5781020107896029,
      "grad_norm": 1.897446964088359,
      "learning_rate": 5.840565747923197e-07,
      "loss": 0.4119,
      "step": 12871
    },
    {
      "epoch": 1.5782246199117216,
      "grad_norm": 1.9897094375371864,
      "learning_rate": 5.837310253562142e-07,
      "loss": 0.4426,
      "step": 12872
    },
    {
      "epoch": 1.5783472290338403,
      "grad_norm": 2.049120069130889,
      "learning_rate": 5.834055546821565e-07,
      "loss": 0.4244,
      "step": 12873
    },
    {
      "epoch": 1.5784698381559588,
      "grad_norm": 1.8646076888480383,
      "learning_rate": 5.830801627835239e-07,
      "loss": 0.4564,
      "step": 12874
    },
    {
      "epoch": 1.5785924472780775,
      "grad_norm": 2.110980779316818,
      "learning_rate": 5.827548496736907e-07,
      "loss": 0.4739,
      "step": 12875
    },
    {
      "epoch": 1.5787150564001962,
      "grad_norm": 1.9295140148773824,
      "learning_rate": 5.824296153660264e-07,
      "loss": 0.4558,
      "step": 12876
    },
    {
      "epoch": 1.5788376655223149,
      "grad_norm": 2.0133595563555824,
      "learning_rate": 5.821044598739004e-07,
      "loss": 0.4532,
      "step": 12877
    },
    {
      "epoch": 1.5789602746444336,
      "grad_norm": 2.015511019777437,
      "learning_rate": 5.817793832106758e-07,
      "loss": 0.4766,
      "step": 12878
    },
    {
      "epoch": 1.5790828837665523,
      "grad_norm": 1.8447355168954829,
      "learning_rate": 5.814543853897151e-07,
      "loss": 0.4646,
      "step": 12879
    },
    {
      "epoch": 1.579205492888671,
      "grad_norm": 1.9141419466300376,
      "learning_rate": 5.811294664243752e-07,
      "loss": 0.4216,
      "step": 12880
    },
    {
      "epoch": 1.5793281020107894,
      "grad_norm": 1.9935434508747463,
      "learning_rate": 5.808046263280118e-07,
      "loss": 0.3809,
      "step": 12881
    },
    {
      "epoch": 1.5794507111329081,
      "grad_norm": 2.1263239907046505,
      "learning_rate": 5.804798651139754e-07,
      "loss": 0.4113,
      "step": 12882
    },
    {
      "epoch": 1.5795733202550268,
      "grad_norm": 1.9650119905413699,
      "learning_rate": 5.801551827956153e-07,
      "loss": 0.4012,
      "step": 12883
    },
    {
      "epoch": 1.5796959293771455,
      "grad_norm": 1.7746469429752498,
      "learning_rate": 5.798305793862763e-07,
      "loss": 0.4497,
      "step": 12884
    },
    {
      "epoch": 1.5798185384992642,
      "grad_norm": 1.7631735613917423,
      "learning_rate": 5.79506054899299e-07,
      "loss": 0.4345,
      "step": 12885
    },
    {
      "epoch": 1.579941147621383,
      "grad_norm": 2.084281747728941,
      "learning_rate": 5.791816093480226e-07,
      "loss": 0.4481,
      "step": 12886
    },
    {
      "epoch": 1.5800637567435016,
      "grad_norm": 1.8116599432082026,
      "learning_rate": 5.788572427457842e-07,
      "loss": 0.4212,
      "step": 12887
    },
    {
      "epoch": 1.5801863658656203,
      "grad_norm": 1.8874146048893852,
      "learning_rate": 5.78532955105913e-07,
      "loss": 0.3957,
      "step": 12888
    },
    {
      "epoch": 1.580308974987739,
      "grad_norm": 2.0052190505219167,
      "learning_rate": 5.782087464417388e-07,
      "loss": 0.4137,
      "step": 12889
    },
    {
      "epoch": 1.5804315841098577,
      "grad_norm": 1.9351246158568165,
      "learning_rate": 5.77884616766588e-07,
      "loss": 0.4154,
      "step": 12890
    },
    {
      "epoch": 1.5805541932319764,
      "grad_norm": 2.066697816429472,
      "learning_rate": 5.775605660937827e-07,
      "loss": 0.4381,
      "step": 12891
    },
    {
      "epoch": 1.5806768023540951,
      "grad_norm": 1.8110705674016845,
      "learning_rate": 5.772365944366403e-07,
      "loss": 0.4006,
      "step": 12892
    },
    {
      "epoch": 1.5807994114762138,
      "grad_norm": 1.905001305452274,
      "learning_rate": 5.769127018084783e-07,
      "loss": 0.4477,
      "step": 12893
    },
    {
      "epoch": 1.5809220205983325,
      "grad_norm": 1.9624338948047855,
      "learning_rate": 5.765888882226095e-07,
      "loss": 0.4657,
      "step": 12894
    },
    {
      "epoch": 1.5810446297204512,
      "grad_norm": 2.0745701893260797,
      "learning_rate": 5.762651536923417e-07,
      "loss": 0.4664,
      "step": 12895
    },
    {
      "epoch": 1.58116723884257,
      "grad_norm": 1.9200414600296067,
      "learning_rate": 5.759414982309824e-07,
      "loss": 0.4423,
      "step": 12896
    },
    {
      "epoch": 1.5812898479646886,
      "grad_norm": 1.8442964052206678,
      "learning_rate": 5.756179218518338e-07,
      "loss": 0.3849,
      "step": 12897
    },
    {
      "epoch": 1.5814124570868073,
      "grad_norm": 1.912594041265398,
      "learning_rate": 5.752944245681949e-07,
      "loss": 0.4123,
      "step": 12898
    },
    {
      "epoch": 1.581535066208926,
      "grad_norm": 2.118630485526777,
      "learning_rate": 5.749710063933625e-07,
      "loss": 0.4188,
      "step": 12899
    },
    {
      "epoch": 1.5816576753310447,
      "grad_norm": 2.0519645234077983,
      "learning_rate": 5.746476673406304e-07,
      "loss": 0.43,
      "step": 12900
    },
    {
      "epoch": 1.5817802844531634,
      "grad_norm": 1.9274569110599529,
      "learning_rate": 5.743244074232878e-07,
      "loss": 0.4118,
      "step": 12901
    },
    {
      "epoch": 1.5819028935752821,
      "grad_norm": 2.0543607403794466,
      "learning_rate": 5.740012266546204e-07,
      "loss": 0.4299,
      "step": 12902
    },
    {
      "epoch": 1.5820255026974008,
      "grad_norm": 1.9791488817115177,
      "learning_rate": 5.736781250479131e-07,
      "loss": 0.3767,
      "step": 12903
    },
    {
      "epoch": 1.5821481118195195,
      "grad_norm": 1.8871073274683394,
      "learning_rate": 5.733551026164452e-07,
      "loss": 0.3635,
      "step": 12904
    },
    {
      "epoch": 1.582270720941638,
      "grad_norm": 2.1850683679822374,
      "learning_rate": 5.730321593734928e-07,
      "loss": 0.5142,
      "step": 12905
    },
    {
      "epoch": 1.5823933300637567,
      "grad_norm": 1.9873898717447094,
      "learning_rate": 5.727092953323299e-07,
      "loss": 0.379,
      "step": 12906
    },
    {
      "epoch": 1.5825159391858754,
      "grad_norm": 2.014246041686598,
      "learning_rate": 5.723865105062277e-07,
      "loss": 0.4643,
      "step": 12907
    },
    {
      "epoch": 1.582638548307994,
      "grad_norm": 1.9701192785216812,
      "learning_rate": 5.720638049084526e-07,
      "loss": 0.3904,
      "step": 12908
    },
    {
      "epoch": 1.5827611574301128,
      "grad_norm": 1.8901747441755001,
      "learning_rate": 5.717411785522672e-07,
      "loss": 0.4216,
      "step": 12909
    },
    {
      "epoch": 1.5828837665522315,
      "grad_norm": 1.8973711244409368,
      "learning_rate": 5.714186314509341e-07,
      "loss": 0.4141,
      "step": 12910
    },
    {
      "epoch": 1.5830063756743502,
      "grad_norm": 1.9732818190923607,
      "learning_rate": 5.710961636177087e-07,
      "loss": 0.4171,
      "step": 12911
    },
    {
      "epoch": 1.583128984796469,
      "grad_norm": 2.0541397182222996,
      "learning_rate": 5.707737750658468e-07,
      "loss": 0.4553,
      "step": 12912
    },
    {
      "epoch": 1.5832515939185874,
      "grad_norm": 1.938340309789738,
      "learning_rate": 5.704514658085971e-07,
      "loss": 0.4017,
      "step": 12913
    },
    {
      "epoch": 1.583374203040706,
      "grad_norm": 1.9416631350250673,
      "learning_rate": 5.701292358592092e-07,
      "loss": 0.4253,
      "step": 12914
    },
    {
      "epoch": 1.5834968121628248,
      "grad_norm": 1.8313722881986219,
      "learning_rate": 5.698070852309253e-07,
      "loss": 0.4319,
      "step": 12915
    },
    {
      "epoch": 1.5836194212849435,
      "grad_norm": 1.9140161910554871,
      "learning_rate": 5.694850139369881e-07,
      "loss": 0.4125,
      "step": 12916
    },
    {
      "epoch": 1.5837420304070622,
      "grad_norm": 1.9351116344820136,
      "learning_rate": 5.691630219906347e-07,
      "loss": 0.418,
      "step": 12917
    },
    {
      "epoch": 1.5838646395291809,
      "grad_norm": 1.8609739134872907,
      "learning_rate": 5.688411094050983e-07,
      "loss": 0.4596,
      "step": 12918
    },
    {
      "epoch": 1.5839872486512996,
      "grad_norm": 1.9814350932971108,
      "learning_rate": 5.685192761936115e-07,
      "loss": 0.4235,
      "step": 12919
    },
    {
      "epoch": 1.5841098577734183,
      "grad_norm": 2.0454318079928258,
      "learning_rate": 5.681975223694025e-07,
      "loss": 0.4245,
      "step": 12920
    },
    {
      "epoch": 1.584232466895537,
      "grad_norm": 1.957677295274883,
      "learning_rate": 5.678758479456953e-07,
      "loss": 0.4562,
      "step": 12921
    },
    {
      "epoch": 1.5843550760176557,
      "grad_norm": 2.043574772047277,
      "learning_rate": 5.675542529357106e-07,
      "loss": 0.4315,
      "step": 12922
    },
    {
      "epoch": 1.5844776851397744,
      "grad_norm": 1.8922526247822022,
      "learning_rate": 5.672327373526679e-07,
      "loss": 0.3728,
      "step": 12923
    },
    {
      "epoch": 1.584600294261893,
      "grad_norm": 1.9866447726756598,
      "learning_rate": 5.669113012097807e-07,
      "loss": 0.4674,
      "step": 12924
    },
    {
      "epoch": 1.5847229033840118,
      "grad_norm": 1.9030900763716723,
      "learning_rate": 5.665899445202619e-07,
      "loss": 0.4578,
      "step": 12925
    },
    {
      "epoch": 1.5848455125061305,
      "grad_norm": 2.0675861560806914,
      "learning_rate": 5.662686672973187e-07,
      "loss": 0.3933,
      "step": 12926
    },
    {
      "epoch": 1.5849681216282492,
      "grad_norm": 2.1326213696368654,
      "learning_rate": 5.659474695541572e-07,
      "loss": 0.3969,
      "step": 12927
    },
    {
      "epoch": 1.5850907307503679,
      "grad_norm": 2.014079021125021,
      "learning_rate": 5.656263513039781e-07,
      "loss": 0.413,
      "step": 12928
    },
    {
      "epoch": 1.5852133398724866,
      "grad_norm": 1.91678099227513,
      "learning_rate": 5.653053125599811e-07,
      "loss": 0.4138,
      "step": 12929
    },
    {
      "epoch": 1.5853359489946053,
      "grad_norm": 1.9069161742603842,
      "learning_rate": 5.649843533353608e-07,
      "loss": 0.4117,
      "step": 12930
    },
    {
      "epoch": 1.585458558116724,
      "grad_norm": 1.8468337429163322,
      "learning_rate": 5.646634736433087e-07,
      "loss": 0.4365,
      "step": 12931
    },
    {
      "epoch": 1.5855811672388427,
      "grad_norm": 2.1620939715313843,
      "learning_rate": 5.64342673497014e-07,
      "loss": 0.4564,
      "step": 12932
    },
    {
      "epoch": 1.5857037763609614,
      "grad_norm": 2.0540326636946036,
      "learning_rate": 5.64021952909663e-07,
      "loss": 0.4657,
      "step": 12933
    },
    {
      "epoch": 1.58582638548308,
      "grad_norm": 2.0689990372922766,
      "learning_rate": 5.63701311894437e-07,
      "loss": 0.4526,
      "step": 12934
    },
    {
      "epoch": 1.5859489946051988,
      "grad_norm": 1.7976965250122807,
      "learning_rate": 5.633807504645145e-07,
      "loss": 0.3754,
      "step": 12935
    },
    {
      "epoch": 1.5860716037273173,
      "grad_norm": 2.0309907864819623,
      "learning_rate": 5.630602686330716e-07,
      "loss": 0.4768,
      "step": 12936
    },
    {
      "epoch": 1.586194212849436,
      "grad_norm": 1.834557721472973,
      "learning_rate": 5.627398664132821e-07,
      "loss": 0.4411,
      "step": 12937
    },
    {
      "epoch": 1.5863168219715547,
      "grad_norm": 2.1666109267666385,
      "learning_rate": 5.624195438183125e-07,
      "loss": 0.4068,
      "step": 12938
    },
    {
      "epoch": 1.5864394310936734,
      "grad_norm": 1.9301590742377077,
      "learning_rate": 5.620993008613296e-07,
      "loss": 0.3676,
      "step": 12939
    },
    {
      "epoch": 1.586562040215792,
      "grad_norm": 1.9048746449161236,
      "learning_rate": 5.617791375554971e-07,
      "loss": 0.434,
      "step": 12940
    },
    {
      "epoch": 1.5866846493379108,
      "grad_norm": 2.0051770109616136,
      "learning_rate": 5.614590539139725e-07,
      "loss": 0.4347,
      "step": 12941
    },
    {
      "epoch": 1.5868072584600295,
      "grad_norm": 1.9989957019222635,
      "learning_rate": 5.611390499499137e-07,
      "loss": 0.441,
      "step": 12942
    },
    {
      "epoch": 1.5869298675821482,
      "grad_norm": 1.9060959952691048,
      "learning_rate": 5.608191256764722e-07,
      "loss": 0.4494,
      "step": 12943
    },
    {
      "epoch": 1.5870524767042666,
      "grad_norm": 2.021430924421495,
      "learning_rate": 5.604992811067972e-07,
      "loss": 0.4409,
      "step": 12944
    },
    {
      "epoch": 1.5871750858263853,
      "grad_norm": 1.9842504776703822,
      "learning_rate": 5.601795162540352e-07,
      "loss": 0.4154,
      "step": 12945
    },
    {
      "epoch": 1.587297694948504,
      "grad_norm": 1.9882301584660207,
      "learning_rate": 5.598598311313299e-07,
      "loss": 0.4408,
      "step": 12946
    },
    {
      "epoch": 1.5874203040706227,
      "grad_norm": 1.9540044093842386,
      "learning_rate": 5.595402257518204e-07,
      "loss": 0.4526,
      "step": 12947
    },
    {
      "epoch": 1.5875429131927414,
      "grad_norm": 1.894723604351973,
      "learning_rate": 5.592207001286421e-07,
      "loss": 0.4523,
      "step": 12948
    },
    {
      "epoch": 1.5876655223148601,
      "grad_norm": 1.9161576505458289,
      "learning_rate": 5.589012542749292e-07,
      "loss": 0.4187,
      "step": 12949
    },
    {
      "epoch": 1.5877881314369788,
      "grad_norm": 2.0430916473772687,
      "learning_rate": 5.585818882038124e-07,
      "loss": 0.4285,
      "step": 12950
    },
    {
      "epoch": 1.5879107405590975,
      "grad_norm": 1.945451269400253,
      "learning_rate": 5.582626019284154e-07,
      "loss": 0.4166,
      "step": 12951
    },
    {
      "epoch": 1.5880333496812162,
      "grad_norm": 1.7658367371945975,
      "learning_rate": 5.579433954618635e-07,
      "loss": 0.3911,
      "step": 12952
    },
    {
      "epoch": 1.588155958803335,
      "grad_norm": 2.093853792956353,
      "learning_rate": 5.576242688172765e-07,
      "loss": 0.4506,
      "step": 12953
    },
    {
      "epoch": 1.5882785679254536,
      "grad_norm": 1.7449595438801884,
      "learning_rate": 5.57305222007771e-07,
      "loss": 0.3967,
      "step": 12954
    },
    {
      "epoch": 1.5884011770475723,
      "grad_norm": 1.8601304810627877,
      "learning_rate": 5.569862550464592e-07,
      "loss": 0.3891,
      "step": 12955
    },
    {
      "epoch": 1.588523786169691,
      "grad_norm": 1.9630438719895857,
      "learning_rate": 5.566673679464524e-07,
      "loss": 0.4407,
      "step": 12956
    },
    {
      "epoch": 1.5886463952918097,
      "grad_norm": 1.8499193244606658,
      "learning_rate": 5.563485607208577e-07,
      "loss": 0.4437,
      "step": 12957
    },
    {
      "epoch": 1.5887690044139284,
      "grad_norm": 1.8923097526843562,
      "learning_rate": 5.560298333827782e-07,
      "loss": 0.3855,
      "step": 12958
    },
    {
      "epoch": 1.5888916135360471,
      "grad_norm": 1.773372617290935,
      "learning_rate": 5.557111859453135e-07,
      "loss": 0.4394,
      "step": 12959
    },
    {
      "epoch": 1.5890142226581658,
      "grad_norm": 2.062608084227195,
      "learning_rate": 5.553926184215616e-07,
      "loss": 0.4285,
      "step": 12960
    },
    {
      "epoch": 1.5891368317802845,
      "grad_norm": 1.78408068171505,
      "learning_rate": 5.550741308246152e-07,
      "loss": 0.4105,
      "step": 12961
    },
    {
      "epoch": 1.5892594409024032,
      "grad_norm": 1.9272532995524152,
      "learning_rate": 5.54755723167566e-07,
      "loss": 0.3983,
      "step": 12962
    },
    {
      "epoch": 1.589382050024522,
      "grad_norm": 2.041403471445147,
      "learning_rate": 5.544373954635002e-07,
      "loss": 0.4481,
      "step": 12963
    },
    {
      "epoch": 1.5895046591466406,
      "grad_norm": 2.052762104981893,
      "learning_rate": 5.541191477255012e-07,
      "loss": 0.4379,
      "step": 12964
    },
    {
      "epoch": 1.5896272682687593,
      "grad_norm": 1.980272232572419,
      "learning_rate": 5.538009799666499e-07,
      "loss": 0.4199,
      "step": 12965
    },
    {
      "epoch": 1.589749877390878,
      "grad_norm": 2.0882372863289214,
      "learning_rate": 5.534828922000249e-07,
      "loss": 0.4545,
      "step": 12966
    },
    {
      "epoch": 1.5898724865129967,
      "grad_norm": 1.9433025286741297,
      "learning_rate": 5.531648844386986e-07,
      "loss": 0.4336,
      "step": 12967
    },
    {
      "epoch": 1.5899950956351152,
      "grad_norm": 2.054186356601242,
      "learning_rate": 5.528469566957417e-07,
      "loss": 0.4062,
      "step": 12968
    },
    {
      "epoch": 1.590117704757234,
      "grad_norm": 2.1823483977127505,
      "learning_rate": 5.525291089842221e-07,
      "loss": 0.4122,
      "step": 12969
    },
    {
      "epoch": 1.5902403138793526,
      "grad_norm": 1.86653511746469,
      "learning_rate": 5.522113413172045e-07,
      "loss": 0.4071,
      "step": 12970
    },
    {
      "epoch": 1.5903629230014713,
      "grad_norm": 1.695996121489267,
      "learning_rate": 5.518936537077491e-07,
      "loss": 0.4077,
      "step": 12971
    },
    {
      "epoch": 1.59048553212359,
      "grad_norm": 1.7106583476582624,
      "learning_rate": 5.515760461689126e-07,
      "loss": 0.4145,
      "step": 12972
    },
    {
      "epoch": 1.5906081412457087,
      "grad_norm": 1.986517722993035,
      "learning_rate": 5.512585187137509e-07,
      "loss": 0.4379,
      "step": 12973
    },
    {
      "epoch": 1.5907307503678274,
      "grad_norm": 1.9290173504662076,
      "learning_rate": 5.509410713553132e-07,
      "loss": 0.4378,
      "step": 12974
    },
    {
      "epoch": 1.590853359489946,
      "grad_norm": 1.8617599652691015,
      "learning_rate": 5.506237041066486e-07,
      "loss": 0.3821,
      "step": 12975
    },
    {
      "epoch": 1.5909759686120646,
      "grad_norm": 2.0077101361292753,
      "learning_rate": 5.503064169808003e-07,
      "loss": 0.3765,
      "step": 12976
    },
    {
      "epoch": 1.5910985777341833,
      "grad_norm": 2.0492989276815243,
      "learning_rate": 5.499892099908108e-07,
      "loss": 0.4141,
      "step": 12977
    },
    {
      "epoch": 1.591221186856302,
      "grad_norm": 1.97031381292607,
      "learning_rate": 5.49672083149716e-07,
      "loss": 0.4452,
      "step": 12978
    },
    {
      "epoch": 1.5913437959784207,
      "grad_norm": 1.8171231839702717,
      "learning_rate": 5.493550364705521e-07,
      "loss": 0.398,
      "step": 12979
    },
    {
      "epoch": 1.5914664051005394,
      "grad_norm": 1.961446383327483,
      "learning_rate": 5.490380699663497e-07,
      "loss": 0.4628,
      "step": 12980
    },
    {
      "epoch": 1.591589014222658,
      "grad_norm": 2.0249917446120707,
      "learning_rate": 5.487211836501358e-07,
      "loss": 0.4515,
      "step": 12981
    },
    {
      "epoch": 1.5917116233447768,
      "grad_norm": 1.8995307635410301,
      "learning_rate": 5.484043775349357e-07,
      "loss": 0.4895,
      "step": 12982
    },
    {
      "epoch": 1.5918342324668955,
      "grad_norm": 1.9286693743534644,
      "learning_rate": 5.480876516337718e-07,
      "loss": 0.4794,
      "step": 12983
    },
    {
      "epoch": 1.5919568415890142,
      "grad_norm": 1.9679132492381586,
      "learning_rate": 5.4777100595966e-07,
      "loss": 0.4168,
      "step": 12984
    },
    {
      "epoch": 1.5920794507111329,
      "grad_norm": 2.004081858063996,
      "learning_rate": 5.47454440525616e-07,
      "loss": 0.4466,
      "step": 12985
    },
    {
      "epoch": 1.5922020598332516,
      "grad_norm": 2.0080016638890257,
      "learning_rate": 5.471379553446518e-07,
      "loss": 0.4072,
      "step": 12986
    },
    {
      "epoch": 1.5923246689553703,
      "grad_norm": 1.8256841429152006,
      "learning_rate": 5.46821550429775e-07,
      "loss": 0.3695,
      "step": 12987
    },
    {
      "epoch": 1.592447278077489,
      "grad_norm": 1.8015430310266067,
      "learning_rate": 5.465052257939893e-07,
      "loss": 0.4768,
      "step": 12988
    },
    {
      "epoch": 1.5925698871996077,
      "grad_norm": 2.0098074956057155,
      "learning_rate": 5.461889814502974e-07,
      "loss": 0.4205,
      "step": 12989
    },
    {
      "epoch": 1.5926924963217264,
      "grad_norm": 2.2038439727886514,
      "learning_rate": 5.458728174116981e-07,
      "loss": 0.491,
      "step": 12990
    },
    {
      "epoch": 1.592815105443845,
      "grad_norm": 2.0449067322944527,
      "learning_rate": 5.455567336911846e-07,
      "loss": 0.413,
      "step": 12991
    },
    {
      "epoch": 1.5929377145659638,
      "grad_norm": 1.9800504739952778,
      "learning_rate": 5.452407303017504e-07,
      "loss": 0.4457,
      "step": 12992
    },
    {
      "epoch": 1.5930603236880825,
      "grad_norm": 1.973366995512713,
      "learning_rate": 5.449248072563825e-07,
      "loss": 0.4074,
      "step": 12993
    },
    {
      "epoch": 1.5931829328102012,
      "grad_norm": 1.9968967792560135,
      "learning_rate": 5.446089645680658e-07,
      "loss": 0.4681,
      "step": 12994
    },
    {
      "epoch": 1.5933055419323199,
      "grad_norm": 1.92386292455605,
      "learning_rate": 5.442932022497824e-07,
      "loss": 0.4131,
      "step": 12995
    },
    {
      "epoch": 1.5934281510544386,
      "grad_norm": 2.1554614852919816,
      "learning_rate": 5.439775203145112e-07,
      "loss": 0.4664,
      "step": 12996
    },
    {
      "epoch": 1.5935507601765573,
      "grad_norm": 2.075281139800336,
      "learning_rate": 5.436619187752268e-07,
      "loss": 0.4416,
      "step": 12997
    },
    {
      "epoch": 1.593673369298676,
      "grad_norm": 1.8718304423904903,
      "learning_rate": 5.433463976449005e-07,
      "loss": 0.4598,
      "step": 12998
    },
    {
      "epoch": 1.5937959784207945,
      "grad_norm": 1.9238606788243502,
      "learning_rate": 5.430309569365016e-07,
      "loss": 0.4251,
      "step": 12999
    },
    {
      "epoch": 1.5939185875429132,
      "grad_norm": 1.8630083401173252,
      "learning_rate": 5.427155966629952e-07,
      "loss": 0.4018,
      "step": 13000
    },
    {
      "epoch": 1.5940411966650319,
      "grad_norm": 1.8225377743342475,
      "learning_rate": 5.424003168373421e-07,
      "loss": 0.4251,
      "step": 13001
    },
    {
      "epoch": 1.5941638057871506,
      "grad_norm": 1.8007901881287358,
      "learning_rate": 5.420851174725014e-07,
      "loss": 0.418,
      "step": 13002
    },
    {
      "epoch": 1.5942864149092693,
      "grad_norm": 2.0118047947055993,
      "learning_rate": 5.417699985814295e-07,
      "loss": 0.4303,
      "step": 13003
    },
    {
      "epoch": 1.594409024031388,
      "grad_norm": 2.08218903129532,
      "learning_rate": 5.414549601770774e-07,
      "loss": 0.4253,
      "step": 13004
    },
    {
      "epoch": 1.5945316331535067,
      "grad_norm": 1.9498268791962075,
      "learning_rate": 5.411400022723931e-07,
      "loss": 0.4332,
      "step": 13005
    },
    {
      "epoch": 1.5946542422756254,
      "grad_norm": 1.9404560153078119,
      "learning_rate": 5.408251248803234e-07,
      "loss": 0.4692,
      "step": 13006
    },
    {
      "epoch": 1.5947768513977438,
      "grad_norm": 1.9135444575161515,
      "learning_rate": 5.405103280138086e-07,
      "loss": 0.3951,
      "step": 13007
    },
    {
      "epoch": 1.5948994605198625,
      "grad_norm": 2.0351793868955883,
      "learning_rate": 5.40195611685789e-07,
      "loss": 0.4496,
      "step": 13008
    },
    {
      "epoch": 1.5950220696419812,
      "grad_norm": 1.8960181664055393,
      "learning_rate": 5.398809759091988e-07,
      "loss": 0.4198,
      "step": 13009
    },
    {
      "epoch": 1.5951446787641,
      "grad_norm": 1.7703587904819897,
      "learning_rate": 5.395664206969712e-07,
      "loss": 0.3819,
      "step": 13010
    },
    {
      "epoch": 1.5952672878862186,
      "grad_norm": 1.9410664984854236,
      "learning_rate": 5.392519460620338e-07,
      "loss": 0.424,
      "step": 13011
    },
    {
      "epoch": 1.5953898970083373,
      "grad_norm": 1.9109159410166983,
      "learning_rate": 5.389375520173126e-07,
      "loss": 0.4292,
      "step": 13012
    },
    {
      "epoch": 1.595512506130456,
      "grad_norm": 2.025793665211663,
      "learning_rate": 5.386232385757312e-07,
      "loss": 0.4354,
      "step": 13013
    },
    {
      "epoch": 1.5956351152525747,
      "grad_norm": 1.8746444487325051,
      "learning_rate": 5.383090057502055e-07,
      "loss": 0.4311,
      "step": 13014
    },
    {
      "epoch": 1.5957577243746934,
      "grad_norm": 2.038071079524065,
      "learning_rate": 5.379948535536528e-07,
      "loss": 0.3826,
      "step": 13015
    },
    {
      "epoch": 1.5958803334968121,
      "grad_norm": 1.8206273936219894,
      "learning_rate": 5.376807819989855e-07,
      "loss": 0.4479,
      "step": 13016
    },
    {
      "epoch": 1.5960029426189308,
      "grad_norm": 2.004317424884381,
      "learning_rate": 5.373667910991123e-07,
      "loss": 0.4169,
      "step": 13017
    },
    {
      "epoch": 1.5961255517410495,
      "grad_norm": 1.99955835557988,
      "learning_rate": 5.370528808669379e-07,
      "loss": 0.4295,
      "step": 13018
    },
    {
      "epoch": 1.5962481608631682,
      "grad_norm": 2.0068298273004346,
      "learning_rate": 5.367390513153656e-07,
      "loss": 0.4208,
      "step": 13019
    },
    {
      "epoch": 1.596370769985287,
      "grad_norm": 1.9719662902051187,
      "learning_rate": 5.364253024572936e-07,
      "loss": 0.4552,
      "step": 13020
    },
    {
      "epoch": 1.5964933791074056,
      "grad_norm": 1.8852317030751868,
      "learning_rate": 5.361116343056183e-07,
      "loss": 0.378,
      "step": 13021
    },
    {
      "epoch": 1.5966159882295243,
      "grad_norm": 1.7574713066300087,
      "learning_rate": 5.357980468732313e-07,
      "loss": 0.3931,
      "step": 13022
    },
    {
      "epoch": 1.596738597351643,
      "grad_norm": 1.9721824872134814,
      "learning_rate": 5.354845401730222e-07,
      "loss": 0.4532,
      "step": 13023
    },
    {
      "epoch": 1.5968612064737617,
      "grad_norm": 2.0016699329275696,
      "learning_rate": 5.351711142178758e-07,
      "loss": 0.4238,
      "step": 13024
    },
    {
      "epoch": 1.5969838155958804,
      "grad_norm": 1.9039801922363155,
      "learning_rate": 5.34857769020676e-07,
      "loss": 0.4286,
      "step": 13025
    },
    {
      "epoch": 1.5971064247179991,
      "grad_norm": 2.0688103574982257,
      "learning_rate": 5.34544504594301e-07,
      "loss": 0.4284,
      "step": 13026
    },
    {
      "epoch": 1.5972290338401178,
      "grad_norm": 1.9221351600046133,
      "learning_rate": 5.342313209516253e-07,
      "loss": 0.469,
      "step": 13027
    },
    {
      "epoch": 1.5973516429622365,
      "grad_norm": 1.9443112300284655,
      "learning_rate": 5.339182181055227e-07,
      "loss": 0.4098,
      "step": 13028
    },
    {
      "epoch": 1.5974742520843552,
      "grad_norm": 2.140323859113605,
      "learning_rate": 5.336051960688627e-07,
      "loss": 0.4386,
      "step": 13029
    },
    {
      "epoch": 1.5975968612064737,
      "grad_norm": 1.8874242972984085,
      "learning_rate": 5.332922548545102e-07,
      "loss": 0.4154,
      "step": 13030
    },
    {
      "epoch": 1.5977194703285924,
      "grad_norm": 1.7294529877708151,
      "learning_rate": 5.329793944753273e-07,
      "loss": 0.3723,
      "step": 13031
    },
    {
      "epoch": 1.597842079450711,
      "grad_norm": 1.9136973190738549,
      "learning_rate": 5.326666149441733e-07,
      "loss": 0.4388,
      "step": 13032
    },
    {
      "epoch": 1.5979646885728298,
      "grad_norm": 1.93561972702521,
      "learning_rate": 5.323539162739061e-07,
      "loss": 0.4045,
      "step": 13033
    },
    {
      "epoch": 1.5980872976949485,
      "grad_norm": 1.8886211747787804,
      "learning_rate": 5.320412984773749e-07,
      "loss": 0.4691,
      "step": 13034
    },
    {
      "epoch": 1.5982099068170672,
      "grad_norm": 1.8753910303070234,
      "learning_rate": 5.317287615674302e-07,
      "loss": 0.4487,
      "step": 13035
    },
    {
      "epoch": 1.598332515939186,
      "grad_norm": 1.9752322113037848,
      "learning_rate": 5.314163055569188e-07,
      "loss": 0.3929,
      "step": 13036
    },
    {
      "epoch": 1.5984551250613046,
      "grad_norm": 1.9664009516160523,
      "learning_rate": 5.311039304586815e-07,
      "loss": 0.4043,
      "step": 13037
    },
    {
      "epoch": 1.598577734183423,
      "grad_norm": 1.9944017054321224,
      "learning_rate": 5.307916362855592e-07,
      "loss": 0.4058,
      "step": 13038
    },
    {
      "epoch": 1.5987003433055418,
      "grad_norm": 2.064303089319292,
      "learning_rate": 5.304794230503868e-07,
      "loss": 0.4673,
      "step": 13039
    },
    {
      "epoch": 1.5988229524276605,
      "grad_norm": 1.8300551767948507,
      "learning_rate": 5.301672907659961e-07,
      "loss": 0.4411,
      "step": 13040
    },
    {
      "epoch": 1.5989455615497792,
      "grad_norm": 1.7766573756540402,
      "learning_rate": 5.29855239445217e-07,
      "loss": 0.44,
      "step": 13041
    },
    {
      "epoch": 1.5990681706718979,
      "grad_norm": 2.056540281014436,
      "learning_rate": 5.295432691008761e-07,
      "loss": 0.4229,
      "step": 13042
    },
    {
      "epoch": 1.5991907797940166,
      "grad_norm": 1.9795044720810289,
      "learning_rate": 5.292313797457954e-07,
      "loss": 0.4577,
      "step": 13043
    },
    {
      "epoch": 1.5993133889161353,
      "grad_norm": 1.982360983268385,
      "learning_rate": 5.289195713927933e-07,
      "loss": 0.4154,
      "step": 13044
    },
    {
      "epoch": 1.599435998038254,
      "grad_norm": 2.1157728858828775,
      "learning_rate": 5.286078440546863e-07,
      "loss": 0.4121,
      "step": 13045
    },
    {
      "epoch": 1.5995586071603727,
      "grad_norm": 2.2304428635895013,
      "learning_rate": 5.282961977442882e-07,
      "loss": 0.4378,
      "step": 13046
    },
    {
      "epoch": 1.5996812162824914,
      "grad_norm": 1.951334173667805,
      "learning_rate": 5.279846324744059e-07,
      "loss": 0.4304,
      "step": 13047
    },
    {
      "epoch": 1.59980382540461,
      "grad_norm": 1.9255063036092743,
      "learning_rate": 5.276731482578462e-07,
      "loss": 0.3998,
      "step": 13048
    },
    {
      "epoch": 1.5999264345267288,
      "grad_norm": 1.9431526978750981,
      "learning_rate": 5.273617451074125e-07,
      "loss": 0.4069,
      "step": 13049
    },
    {
      "epoch": 1.6000490436488475,
      "grad_norm": 2.1734286986296847,
      "learning_rate": 5.270504230359036e-07,
      "loss": 0.4532,
      "step": 13050
    },
    {
      "epoch": 1.6001716527709662,
      "grad_norm": 2.1217308043834855,
      "learning_rate": 5.26739182056114e-07,
      "loss": 0.4218,
      "step": 13051
    },
    {
      "epoch": 1.6002942618930849,
      "grad_norm": 1.9923419834181086,
      "learning_rate": 5.264280221808379e-07,
      "loss": 0.4391,
      "step": 13052
    },
    {
      "epoch": 1.6004168710152036,
      "grad_norm": 1.820880950697465,
      "learning_rate": 5.261169434228644e-07,
      "loss": 0.4275,
      "step": 13053
    },
    {
      "epoch": 1.6005394801373223,
      "grad_norm": 1.9059273926163862,
      "learning_rate": 5.25805945794979e-07,
      "loss": 0.4062,
      "step": 13054
    },
    {
      "epoch": 1.600662089259441,
      "grad_norm": 1.9412189870695797,
      "learning_rate": 5.254950293099639e-07,
      "loss": 0.3965,
      "step": 13055
    },
    {
      "epoch": 1.6007846983815597,
      "grad_norm": 1.7685499693993711,
      "learning_rate": 5.25184193980599e-07,
      "loss": 0.4625,
      "step": 13056
    },
    {
      "epoch": 1.6009073075036784,
      "grad_norm": 1.8718279966603264,
      "learning_rate": 5.248734398196595e-07,
      "loss": 0.4152,
      "step": 13057
    },
    {
      "epoch": 1.601029916625797,
      "grad_norm": 2.0313573576094526,
      "learning_rate": 5.245627668399187e-07,
      "loss": 0.4669,
      "step": 13058
    },
    {
      "epoch": 1.6011525257479158,
      "grad_norm": 1.999899707147838,
      "learning_rate": 5.242521750541448e-07,
      "loss": 0.3946,
      "step": 13059
    },
    {
      "epoch": 1.6012751348700345,
      "grad_norm": 1.9011518556846803,
      "learning_rate": 5.239416644751052e-07,
      "loss": 0.4726,
      "step": 13060
    },
    {
      "epoch": 1.6013977439921532,
      "grad_norm": 1.8861752702321994,
      "learning_rate": 5.236312351155607e-07,
      "loss": 0.4675,
      "step": 13061
    },
    {
      "epoch": 1.6015203531142717,
      "grad_norm": 1.9244492125324983,
      "learning_rate": 5.23320886988272e-07,
      "loss": 0.412,
      "step": 13062
    },
    {
      "epoch": 1.6016429622363904,
      "grad_norm": 1.7429961134073633,
      "learning_rate": 5.230106201059942e-07,
      "loss": 0.3615,
      "step": 13063
    },
    {
      "epoch": 1.601765571358509,
      "grad_norm": 1.9231875457242518,
      "learning_rate": 5.227004344814792e-07,
      "loss": 0.3925,
      "step": 13064
    },
    {
      "epoch": 1.6018881804806278,
      "grad_norm": 2.093547809672661,
      "learning_rate": 5.223903301274769e-07,
      "loss": 0.4398,
      "step": 13065
    },
    {
      "epoch": 1.6020107896027465,
      "grad_norm": 1.9026275152380248,
      "learning_rate": 5.22080307056734e-07,
      "loss": 0.409,
      "step": 13066
    },
    {
      "epoch": 1.6021333987248652,
      "grad_norm": 2.060800319098453,
      "learning_rate": 5.21770365281992e-07,
      "loss": 0.4222,
      "step": 13067
    },
    {
      "epoch": 1.6022560078469839,
      "grad_norm": 2.0038960783912962,
      "learning_rate": 5.214605048159893e-07,
      "loss": 0.4253,
      "step": 13068
    },
    {
      "epoch": 1.6023786169691026,
      "grad_norm": 1.8733479342816504,
      "learning_rate": 5.211507256714634e-07,
      "loss": 0.4017,
      "step": 13069
    },
    {
      "epoch": 1.602501226091221,
      "grad_norm": 1.8992965101721715,
      "learning_rate": 5.208410278611453e-07,
      "loss": 0.463,
      "step": 13070
    },
    {
      "epoch": 1.6026238352133397,
      "grad_norm": 2.261168094252635,
      "learning_rate": 5.205314113977656e-07,
      "loss": 0.4627,
      "step": 13071
    },
    {
      "epoch": 1.6027464443354584,
      "grad_norm": 2.032070543854561,
      "learning_rate": 5.202218762940484e-07,
      "loss": 0.4249,
      "step": 13072
    },
    {
      "epoch": 1.6028690534575771,
      "grad_norm": 1.8122194507520375,
      "learning_rate": 5.199124225627175e-07,
      "loss": 0.4217,
      "step": 13073
    },
    {
      "epoch": 1.6029916625796958,
      "grad_norm": 1.8919991867223767,
      "learning_rate": 5.196030502164912e-07,
      "loss": 0.4071,
      "step": 13074
    },
    {
      "epoch": 1.6031142717018145,
      "grad_norm": 1.9890213201058642,
      "learning_rate": 5.192937592680861e-07,
      "loss": 0.4177,
      "step": 13075
    },
    {
      "epoch": 1.6032368808239332,
      "grad_norm": 2.0397376339735653,
      "learning_rate": 5.18984549730214e-07,
      "loss": 0.4009,
      "step": 13076
    },
    {
      "epoch": 1.603359489946052,
      "grad_norm": 2.010989907686854,
      "learning_rate": 5.186754216155834e-07,
      "loss": 0.4391,
      "step": 13077
    },
    {
      "epoch": 1.6034820990681706,
      "grad_norm": 1.8673352746873115,
      "learning_rate": 5.183663749369006e-07,
      "loss": 0.4161,
      "step": 13078
    },
    {
      "epoch": 1.6036047081902893,
      "grad_norm": 1.8986316769982332,
      "learning_rate": 5.180574097068685e-07,
      "loss": 0.4396,
      "step": 13079
    },
    {
      "epoch": 1.603727317312408,
      "grad_norm": 2.0764470737930276,
      "learning_rate": 5.17748525938186e-07,
      "loss": 0.4516,
      "step": 13080
    },
    {
      "epoch": 1.6038499264345267,
      "grad_norm": 2.044636741048591,
      "learning_rate": 5.174397236435475e-07,
      "loss": 0.4318,
      "step": 13081
    },
    {
      "epoch": 1.6039725355566454,
      "grad_norm": 2.082885620887053,
      "learning_rate": 5.171310028356466e-07,
      "loss": 0.4232,
      "step": 13082
    },
    {
      "epoch": 1.6040951446787641,
      "grad_norm": 1.9488448719708258,
      "learning_rate": 5.168223635271724e-07,
      "loss": 0.4156,
      "step": 13083
    },
    {
      "epoch": 1.6042177538008828,
      "grad_norm": 1.9885708680264067,
      "learning_rate": 5.165138057308089e-07,
      "loss": 0.412,
      "step": 13084
    },
    {
      "epoch": 1.6043403629230015,
      "grad_norm": 1.8277679042026296,
      "learning_rate": 5.162053294592398e-07,
      "loss": 0.4124,
      "step": 13085
    },
    {
      "epoch": 1.6044629720451202,
      "grad_norm": 2.0722956211117043,
      "learning_rate": 5.158969347251442e-07,
      "loss": 0.4466,
      "step": 13086
    },
    {
      "epoch": 1.604585581167239,
      "grad_norm": 2.03417515537779,
      "learning_rate": 5.155886215411965e-07,
      "loss": 0.3961,
      "step": 13087
    },
    {
      "epoch": 1.6047081902893576,
      "grad_norm": 1.8384002995873625,
      "learning_rate": 5.1528038992007e-07,
      "loss": 0.3967,
      "step": 13088
    },
    {
      "epoch": 1.6048307994114763,
      "grad_norm": 2.036868144342388,
      "learning_rate": 5.149722398744336e-07,
      "loss": 0.3994,
      "step": 13089
    },
    {
      "epoch": 1.604953408533595,
      "grad_norm": 1.9611170424003053,
      "learning_rate": 5.146641714169515e-07,
      "loss": 0.3781,
      "step": 13090
    },
    {
      "epoch": 1.6050760176557137,
      "grad_norm": 1.9395805325693514,
      "learning_rate": 5.143561845602865e-07,
      "loss": 0.4153,
      "step": 13091
    },
    {
      "epoch": 1.6051986267778324,
      "grad_norm": 2.069495028509095,
      "learning_rate": 5.140482793170986e-07,
      "loss": 0.4399,
      "step": 13092
    },
    {
      "epoch": 1.605321235899951,
      "grad_norm": 2.0103312685246717,
      "learning_rate": 5.137404557000422e-07,
      "loss": 0.4431,
      "step": 13093
    },
    {
      "epoch": 1.6054438450220696,
      "grad_norm": 1.8071707663700571,
      "learning_rate": 5.134327137217687e-07,
      "loss": 0.4605,
      "step": 13094
    },
    {
      "epoch": 1.6055664541441883,
      "grad_norm": 1.916259241895349,
      "learning_rate": 5.131250533949283e-07,
      "loss": 0.4442,
      "step": 13095
    },
    {
      "epoch": 1.605689063266307,
      "grad_norm": 1.9307562199368227,
      "learning_rate": 5.128174747321657e-07,
      "loss": 0.4111,
      "step": 13096
    },
    {
      "epoch": 1.6058116723884257,
      "grad_norm": 1.8736912670303187,
      "learning_rate": 5.125099777461224e-07,
      "loss": 0.4069,
      "step": 13097
    },
    {
      "epoch": 1.6059342815105444,
      "grad_norm": 2.075225249445796,
      "learning_rate": 5.122025624494375e-07,
      "loss": 0.4031,
      "step": 13098
    },
    {
      "epoch": 1.606056890632663,
      "grad_norm": 2.125628093745281,
      "learning_rate": 5.118952288547468e-07,
      "loss": 0.4223,
      "step": 13099
    },
    {
      "epoch": 1.6061794997547818,
      "grad_norm": 1.9204885228936786,
      "learning_rate": 5.115879769746823e-07,
      "loss": 0.4168,
      "step": 13100
    },
    {
      "epoch": 1.6063021088769003,
      "grad_norm": 1.904930869521767,
      "learning_rate": 5.112808068218711e-07,
      "loss": 0.4116,
      "step": 13101
    },
    {
      "epoch": 1.606424717999019,
      "grad_norm": 1.8695623492017317,
      "learning_rate": 5.109737184089405e-07,
      "loss": 0.4221,
      "step": 13102
    },
    {
      "epoch": 1.6065473271211377,
      "grad_norm": 2.054569081861918,
      "learning_rate": 5.106667117485103e-07,
      "loss": 0.4001,
      "step": 13103
    },
    {
      "epoch": 1.6066699362432564,
      "grad_norm": 1.927129254249864,
      "learning_rate": 5.103597868532012e-07,
      "loss": 0.4224,
      "step": 13104
    },
    {
      "epoch": 1.606792545365375,
      "grad_norm": 1.871817497324088,
      "learning_rate": 5.100529437356264e-07,
      "loss": 0.4546,
      "step": 13105
    },
    {
      "epoch": 1.6069151544874938,
      "grad_norm": 1.9189025561241397,
      "learning_rate": 5.09746182408399e-07,
      "loss": 0.421,
      "step": 13106
    },
    {
      "epoch": 1.6070377636096125,
      "grad_norm": 1.8058142195644709,
      "learning_rate": 5.094395028841265e-07,
      "loss": 0.4209,
      "step": 13107
    },
    {
      "epoch": 1.6071603727317312,
      "grad_norm": 1.8375679396722264,
      "learning_rate": 5.091329051754146e-07,
      "loss": 0.4468,
      "step": 13108
    },
    {
      "epoch": 1.6072829818538499,
      "grad_norm": 1.8374173856273877,
      "learning_rate": 5.088263892948658e-07,
      "loss": 0.4368,
      "step": 13109
    },
    {
      "epoch": 1.6074055909759686,
      "grad_norm": 1.9021876071389765,
      "learning_rate": 5.085199552550765e-07,
      "loss": 0.4014,
      "step": 13110
    },
    {
      "epoch": 1.6075282000980873,
      "grad_norm": 1.9153275633143485,
      "learning_rate": 5.082136030686425e-07,
      "loss": 0.4185,
      "step": 13111
    },
    {
      "epoch": 1.607650809220206,
      "grad_norm": 2.092397978710147,
      "learning_rate": 5.079073327481562e-07,
      "loss": 0.419,
      "step": 13112
    },
    {
      "epoch": 1.6077734183423247,
      "grad_norm": 2.1490383512090476,
      "learning_rate": 5.076011443062054e-07,
      "loss": 0.4128,
      "step": 13113
    },
    {
      "epoch": 1.6078960274644434,
      "grad_norm": 1.923587549248726,
      "learning_rate": 5.07295037755374e-07,
      "loss": 0.3639,
      "step": 13114
    },
    {
      "epoch": 1.608018636586562,
      "grad_norm": 2.0217720187665926,
      "learning_rate": 5.069890131082452e-07,
      "loss": 0.3535,
      "step": 13115
    },
    {
      "epoch": 1.6081412457086808,
      "grad_norm": 1.7179417590308017,
      "learning_rate": 5.066830703773956e-07,
      "loss": 0.4377,
      "step": 13116
    },
    {
      "epoch": 1.6082638548307995,
      "grad_norm": 1.910851367995251,
      "learning_rate": 5.063772095754013e-07,
      "loss": 0.3989,
      "step": 13117
    },
    {
      "epoch": 1.6083864639529182,
      "grad_norm": 2.062882298921677,
      "learning_rate": 5.060714307148323e-07,
      "loss": 0.4283,
      "step": 13118
    },
    {
      "epoch": 1.6085090730750369,
      "grad_norm": 2.0090724401022424,
      "learning_rate": 5.057657338082586e-07,
      "loss": 0.4285,
      "step": 13119
    },
    {
      "epoch": 1.6086316821971556,
      "grad_norm": 1.9303550676340349,
      "learning_rate": 5.054601188682426e-07,
      "loss": 0.458,
      "step": 13120
    },
    {
      "epoch": 1.6087542913192743,
      "grad_norm": 2.2061851535020995,
      "learning_rate": 5.051545859073479e-07,
      "loss": 0.4527,
      "step": 13121
    },
    {
      "epoch": 1.608876900441393,
      "grad_norm": 1.9797029718885086,
      "learning_rate": 5.04849134938131e-07,
      "loss": 0.4078,
      "step": 13122
    },
    {
      "epoch": 1.6089995095635117,
      "grad_norm": 1.9540544282898709,
      "learning_rate": 5.045437659731461e-07,
      "loss": 0.4614,
      "step": 13123
    },
    {
      "epoch": 1.6091221186856304,
      "grad_norm": 1.9778212927932584,
      "learning_rate": 5.042384790249452e-07,
      "loss": 0.411,
      "step": 13124
    },
    {
      "epoch": 1.6092447278077489,
      "grad_norm": 2.10944406775489,
      "learning_rate": 5.039332741060767e-07,
      "loss": 0.4043,
      "step": 13125
    },
    {
      "epoch": 1.6093673369298676,
      "grad_norm": 1.8756007885305457,
      "learning_rate": 5.036281512290844e-07,
      "loss": 0.3961,
      "step": 13126
    },
    {
      "epoch": 1.6094899460519863,
      "grad_norm": 1.8548133880132762,
      "learning_rate": 5.033231104065087e-07,
      "loss": 0.4489,
      "step": 13127
    },
    {
      "epoch": 1.609612555174105,
      "grad_norm": 1.6807036729011047,
      "learning_rate": 5.030181516508878e-07,
      "loss": 0.4181,
      "step": 13128
    },
    {
      "epoch": 1.6097351642962237,
      "grad_norm": 1.9244630761808168,
      "learning_rate": 5.027132749747579e-07,
      "loss": 0.3978,
      "step": 13129
    },
    {
      "epoch": 1.6098577734183424,
      "grad_norm": 1.9735649688666532,
      "learning_rate": 5.024084803906467e-07,
      "loss": 0.4318,
      "step": 13130
    },
    {
      "epoch": 1.609980382540461,
      "grad_norm": 1.969840117037585,
      "learning_rate": 5.021037679110838e-07,
      "loss": 0.4479,
      "step": 13131
    },
    {
      "epoch": 1.6101029916625795,
      "grad_norm": 1.9590845733800568,
      "learning_rate": 5.017991375485934e-07,
      "loss": 0.4297,
      "step": 13132
    },
    {
      "epoch": 1.6102256007846982,
      "grad_norm": 1.954970811927708,
      "learning_rate": 5.014945893156953e-07,
      "loss": 0.4319,
      "step": 13133
    },
    {
      "epoch": 1.610348209906817,
      "grad_norm": 1.6494856426152162,
      "learning_rate": 5.011901232249083e-07,
      "loss": 0.4194,
      "step": 13134
    },
    {
      "epoch": 1.6104708190289356,
      "grad_norm": 2.078819206936657,
      "learning_rate": 5.008857392887454e-07,
      "loss": 0.4339,
      "step": 13135
    },
    {
      "epoch": 1.6105934281510543,
      "grad_norm": 1.9251274131731897,
      "learning_rate": 5.005814375197183e-07,
      "loss": 0.4727,
      "step": 13136
    },
    {
      "epoch": 1.610716037273173,
      "grad_norm": 2.0854490087079283,
      "learning_rate": 5.002772179303334e-07,
      "loss": 0.4543,
      "step": 13137
    },
    {
      "epoch": 1.6108386463952917,
      "grad_norm": 1.9895206324587462,
      "learning_rate": 4.999730805330955e-07,
      "loss": 0.4302,
      "step": 13138
    },
    {
      "epoch": 1.6109612555174104,
      "grad_norm": 1.8789224435288636,
      "learning_rate": 4.996690253405045e-07,
      "loss": 0.4304,
      "step": 13139
    },
    {
      "epoch": 1.6110838646395291,
      "grad_norm": 1.8860409553825042,
      "learning_rate": 4.993650523650575e-07,
      "loss": 0.3835,
      "step": 13140
    },
    {
      "epoch": 1.6112064737616478,
      "grad_norm": 2.101061839696593,
      "learning_rate": 4.990611616192487e-07,
      "loss": 0.4232,
      "step": 13141
    },
    {
      "epoch": 1.6113290828837665,
      "grad_norm": 1.9615741568166731,
      "learning_rate": 4.987573531155698e-07,
      "loss": 0.4141,
      "step": 13142
    },
    {
      "epoch": 1.6114516920058852,
      "grad_norm": 1.7236838460080353,
      "learning_rate": 4.984536268665053e-07,
      "loss": 0.4189,
      "step": 13143
    },
    {
      "epoch": 1.611574301128004,
      "grad_norm": 1.9784251903522942,
      "learning_rate": 4.981499828845402e-07,
      "loss": 0.4401,
      "step": 13144
    },
    {
      "epoch": 1.6116969102501226,
      "grad_norm": 2.025428615713599,
      "learning_rate": 4.978464211821555e-07,
      "loss": 0.3992,
      "step": 13145
    },
    {
      "epoch": 1.6118195193722413,
      "grad_norm": 1.95871273465702,
      "learning_rate": 4.975429417718275e-07,
      "loss": 0.4469,
      "step": 13146
    },
    {
      "epoch": 1.61194212849436,
      "grad_norm": 1.98307363684843,
      "learning_rate": 4.972395446660288e-07,
      "loss": 0.4114,
      "step": 13147
    },
    {
      "epoch": 1.6120647376164787,
      "grad_norm": 1.9480456992659805,
      "learning_rate": 4.969362298772307e-07,
      "loss": 0.4806,
      "step": 13148
    },
    {
      "epoch": 1.6121873467385974,
      "grad_norm": 2.066220436616069,
      "learning_rate": 4.966329974178999e-07,
      "loss": 0.4005,
      "step": 13149
    },
    {
      "epoch": 1.6123099558607161,
      "grad_norm": 1.881192241114974,
      "learning_rate": 4.963298473004998e-07,
      "loss": 0.4465,
      "step": 13150
    },
    {
      "epoch": 1.6124325649828348,
      "grad_norm": 1.959616452791138,
      "learning_rate": 4.960267795374899e-07,
      "loss": 0.3925,
      "step": 13151
    },
    {
      "epoch": 1.6125551741049535,
      "grad_norm": 1.9696533127529028,
      "learning_rate": 4.957237941413273e-07,
      "loss": 0.4064,
      "step": 13152
    },
    {
      "epoch": 1.6126777832270722,
      "grad_norm": 2.0520370188134374,
      "learning_rate": 4.954208911244645e-07,
      "loss": 0.4648,
      "step": 13153
    },
    {
      "epoch": 1.612800392349191,
      "grad_norm": 1.8584694216386481,
      "learning_rate": 4.951180704993528e-07,
      "loss": 0.4205,
      "step": 13154
    },
    {
      "epoch": 1.6129230014713096,
      "grad_norm": 1.9203687735025767,
      "learning_rate": 4.948153322784368e-07,
      "loss": 0.4133,
      "step": 13155
    },
    {
      "epoch": 1.613045610593428,
      "grad_norm": 1.72289991325009,
      "learning_rate": 4.945126764741614e-07,
      "loss": 0.395,
      "step": 13156
    },
    {
      "epoch": 1.6131682197155468,
      "grad_norm": 1.9880768548183658,
      "learning_rate": 4.942101030989649e-07,
      "loss": 0.4066,
      "step": 13157
    },
    {
      "epoch": 1.6132908288376655,
      "grad_norm": 1.9896057072752376,
      "learning_rate": 4.939076121652844e-07,
      "loss": 0.4244,
      "step": 13158
    },
    {
      "epoch": 1.6134134379597842,
      "grad_norm": 1.96656992445333,
      "learning_rate": 4.936052036855529e-07,
      "loss": 0.424,
      "step": 13159
    },
    {
      "epoch": 1.613536047081903,
      "grad_norm": 1.9293861434512192,
      "learning_rate": 4.933028776721987e-07,
      "loss": 0.418,
      "step": 13160
    },
    {
      "epoch": 1.6136586562040216,
      "grad_norm": 1.8157539359154518,
      "learning_rate": 4.930006341376489e-07,
      "loss": 0.4356,
      "step": 13161
    },
    {
      "epoch": 1.6137812653261403,
      "grad_norm": 1.8991831761895932,
      "learning_rate": 4.926984730943268e-07,
      "loss": 0.3975,
      "step": 13162
    },
    {
      "epoch": 1.613903874448259,
      "grad_norm": 1.7118236870678414,
      "learning_rate": 4.923963945546511e-07,
      "loss": 0.3858,
      "step": 13163
    },
    {
      "epoch": 1.6140264835703775,
      "grad_norm": 2.002701698071343,
      "learning_rate": 4.92094398531037e-07,
      "loss": 0.4294,
      "step": 13164
    },
    {
      "epoch": 1.6141490926924962,
      "grad_norm": 1.887086055096301,
      "learning_rate": 4.917924850358988e-07,
      "loss": 0.4108,
      "step": 13165
    },
    {
      "epoch": 1.6142717018146149,
      "grad_norm": 1.8488857690381366,
      "learning_rate": 4.914906540816436e-07,
      "loss": 0.3867,
      "step": 13166
    },
    {
      "epoch": 1.6143943109367336,
      "grad_norm": 2.1315155768072414,
      "learning_rate": 4.911889056806793e-07,
      "loss": 0.4201,
      "step": 13167
    },
    {
      "epoch": 1.6145169200588523,
      "grad_norm": 1.88094917358807,
      "learning_rate": 4.908872398454064e-07,
      "loss": 0.392,
      "step": 13168
    },
    {
      "epoch": 1.614639529180971,
      "grad_norm": 2.2342636398161826,
      "learning_rate": 4.905856565882258e-07,
      "loss": 0.3982,
      "step": 13169
    },
    {
      "epoch": 1.6147621383030897,
      "grad_norm": 2.028694825003288,
      "learning_rate": 4.902841559215313e-07,
      "loss": 0.4217,
      "step": 13170
    },
    {
      "epoch": 1.6148847474252084,
      "grad_norm": 2.2038090763364546,
      "learning_rate": 4.899827378577166e-07,
      "loss": 0.4407,
      "step": 13171
    },
    {
      "epoch": 1.615007356547327,
      "grad_norm": 1.9915620965880876,
      "learning_rate": 4.896814024091695e-07,
      "loss": 0.4013,
      "step": 13172
    },
    {
      "epoch": 1.6151299656694458,
      "grad_norm": 2.0543325093428035,
      "learning_rate": 4.893801495882755e-07,
      "loss": 0.4342,
      "step": 13173
    },
    {
      "epoch": 1.6152525747915645,
      "grad_norm": 2.087099651746899,
      "learning_rate": 4.890789794074166e-07,
      "loss": 0.4428,
      "step": 13174
    },
    {
      "epoch": 1.6153751839136832,
      "grad_norm": 2.1901960516766144,
      "learning_rate": 4.887778918789726e-07,
      "loss": 0.4294,
      "step": 13175
    },
    {
      "epoch": 1.6154977930358019,
      "grad_norm": 1.9383732767108015,
      "learning_rate": 4.884768870153175e-07,
      "loss": 0.4304,
      "step": 13176
    },
    {
      "epoch": 1.6156204021579206,
      "grad_norm": 2.1700042945310045,
      "learning_rate": 4.881759648288231e-07,
      "loss": 0.4452,
      "step": 13177
    },
    {
      "epoch": 1.6157430112800393,
      "grad_norm": 1.8415621106871352,
      "learning_rate": 4.878751253318589e-07,
      "loss": 0.4697,
      "step": 13178
    },
    {
      "epoch": 1.615865620402158,
      "grad_norm": 2.0405282952583668,
      "learning_rate": 4.87574368536789e-07,
      "loss": 0.4484,
      "step": 13179
    },
    {
      "epoch": 1.6159882295242767,
      "grad_norm": 1.8361477598259295,
      "learning_rate": 4.872736944559747e-07,
      "loss": 0.4011,
      "step": 13180
    },
    {
      "epoch": 1.6161108386463954,
      "grad_norm": 2.055958789858551,
      "learning_rate": 4.869731031017749e-07,
      "loss": 0.4468,
      "step": 13181
    },
    {
      "epoch": 1.616233447768514,
      "grad_norm": 2.074733880021995,
      "learning_rate": 4.86672594486545e-07,
      "loss": 0.4609,
      "step": 13182
    },
    {
      "epoch": 1.6163560568906328,
      "grad_norm": 1.8167211578945048,
      "learning_rate": 4.86372168622635e-07,
      "loss": 0.4157,
      "step": 13183
    },
    {
      "epoch": 1.6164786660127515,
      "grad_norm": 2.119888090233401,
      "learning_rate": 4.860718255223945e-07,
      "loss": 0.4554,
      "step": 13184
    },
    {
      "epoch": 1.6166012751348702,
      "grad_norm": 1.9403154778767842,
      "learning_rate": 4.857715651981673e-07,
      "loss": 0.4412,
      "step": 13185
    },
    {
      "epoch": 1.6167238842569889,
      "grad_norm": 2.0229625628470016,
      "learning_rate": 4.85471387662294e-07,
      "loss": 0.3968,
      "step": 13186
    },
    {
      "epoch": 1.6168464933791074,
      "grad_norm": 1.9456277910504358,
      "learning_rate": 4.851712929271132e-07,
      "loss": 0.4534,
      "step": 13187
    },
    {
      "epoch": 1.616969102501226,
      "grad_norm": 1.788893673860472,
      "learning_rate": 4.848712810049597e-07,
      "loss": 0.4411,
      "step": 13188
    },
    {
      "epoch": 1.6170917116233448,
      "grad_norm": 1.9810200353326026,
      "learning_rate": 4.845713519081641e-07,
      "loss": 0.4673,
      "step": 13189
    },
    {
      "epoch": 1.6172143207454635,
      "grad_norm": 1.9389355718204206,
      "learning_rate": 4.842715056490535e-07,
      "loss": 0.4423,
      "step": 13190
    },
    {
      "epoch": 1.6173369298675822,
      "grad_norm": 2.101023473134663,
      "learning_rate": 4.839717422399526e-07,
      "loss": 0.4259,
      "step": 13191
    },
    {
      "epoch": 1.6174595389897009,
      "grad_norm": 1.867738940020163,
      "learning_rate": 4.836720616931831e-07,
      "loss": 0.4465,
      "step": 13192
    },
    {
      "epoch": 1.6175821481118196,
      "grad_norm": 1.982739273890274,
      "learning_rate": 4.833724640210605e-07,
      "loss": 0.4411,
      "step": 13193
    },
    {
      "epoch": 1.6177047572339383,
      "grad_norm": 2.0226753183324617,
      "learning_rate": 4.830729492358998e-07,
      "loss": 0.4053,
      "step": 13194
    },
    {
      "epoch": 1.6178273663560567,
      "grad_norm": 2.008704751694151,
      "learning_rate": 4.827735173500123e-07,
      "loss": 0.3926,
      "step": 13195
    },
    {
      "epoch": 1.6179499754781754,
      "grad_norm": 2.0583301243908187,
      "learning_rate": 4.824741683757043e-07,
      "loss": 0.4397,
      "step": 13196
    },
    {
      "epoch": 1.6180725846002941,
      "grad_norm": 1.9800739680876047,
      "learning_rate": 4.821749023252789e-07,
      "loss": 0.3936,
      "step": 13197
    },
    {
      "epoch": 1.6181951937224128,
      "grad_norm": 1.7762565509015487,
      "learning_rate": 4.818757192110382e-07,
      "loss": 0.4224,
      "step": 13198
    },
    {
      "epoch": 1.6183178028445315,
      "grad_norm": 1.8047984159323962,
      "learning_rate": 4.815766190452775e-07,
      "loss": 0.3762,
      "step": 13199
    },
    {
      "epoch": 1.6184404119666502,
      "grad_norm": 1.998351727532097,
      "learning_rate": 4.812776018402921e-07,
      "loss": 0.4421,
      "step": 13200
    },
    {
      "epoch": 1.618563021088769,
      "grad_norm": 2.101109894787219,
      "learning_rate": 4.8097866760837e-07,
      "loss": 0.4389,
      "step": 13201
    },
    {
      "epoch": 1.6186856302108876,
      "grad_norm": 2.00320775848001,
      "learning_rate": 4.806798163617998e-07,
      "loss": 0.4187,
      "step": 13202
    },
    {
      "epoch": 1.6188082393330063,
      "grad_norm": 1.885400405229087,
      "learning_rate": 4.803810481128635e-07,
      "loss": 0.3991,
      "step": 13203
    },
    {
      "epoch": 1.618930848455125,
      "grad_norm": 1.9214249347904435,
      "learning_rate": 4.800823628738416e-07,
      "loss": 0.3793,
      "step": 13204
    },
    {
      "epoch": 1.6190534575772437,
      "grad_norm": 2.027085438855625,
      "learning_rate": 4.797837606570119e-07,
      "loss": 0.4772,
      "step": 13205
    },
    {
      "epoch": 1.6191760666993624,
      "grad_norm": 2.0265657304907267,
      "learning_rate": 4.794852414746448e-07,
      "loss": 0.4443,
      "step": 13206
    },
    {
      "epoch": 1.6192986758214811,
      "grad_norm": 1.947636344971341,
      "learning_rate": 4.791868053390114e-07,
      "loss": 0.4525,
      "step": 13207
    },
    {
      "epoch": 1.6194212849435998,
      "grad_norm": 1.8291206136797078,
      "learning_rate": 4.788884522623782e-07,
      "loss": 0.4297,
      "step": 13208
    },
    {
      "epoch": 1.6195438940657185,
      "grad_norm": 1.8472465622668166,
      "learning_rate": 4.78590182257008e-07,
      "loss": 0.4216,
      "step": 13209
    },
    {
      "epoch": 1.6196665031878372,
      "grad_norm": 1.9294485578872,
      "learning_rate": 4.782919953351592e-07,
      "loss": 0.3991,
      "step": 13210
    },
    {
      "epoch": 1.619789112309956,
      "grad_norm": 1.9935888156862367,
      "learning_rate": 4.779938915090887e-07,
      "loss": 0.4328,
      "step": 13211
    },
    {
      "epoch": 1.6199117214320746,
      "grad_norm": 1.9623212848760339,
      "learning_rate": 4.776958707910495e-07,
      "loss": 0.3933,
      "step": 13212
    },
    {
      "epoch": 1.6200343305541933,
      "grad_norm": 1.8615904042355569,
      "learning_rate": 4.773979331932902e-07,
      "loss": 0.4042,
      "step": 13213
    },
    {
      "epoch": 1.620156939676312,
      "grad_norm": 1.8863016573424252,
      "learning_rate": 4.771000787280558e-07,
      "loss": 0.4047,
      "step": 13214
    },
    {
      "epoch": 1.6202795487984307,
      "grad_norm": 2.0674398550571693,
      "learning_rate": 4.7680230740759033e-07,
      "loss": 0.4547,
      "step": 13215
    },
    {
      "epoch": 1.6204021579205494,
      "grad_norm": 2.0686088323658858,
      "learning_rate": 4.765046192441308e-07,
      "loss": 0.4586,
      "step": 13216
    },
    {
      "epoch": 1.6205247670426681,
      "grad_norm": 1.9049787847569395,
      "learning_rate": 4.7620701424991476e-07,
      "loss": 0.4331,
      "step": 13217
    },
    {
      "epoch": 1.6206473761647868,
      "grad_norm": 1.890938983608629,
      "learning_rate": 4.7590949243717323e-07,
      "loss": 0.4085,
      "step": 13218
    },
    {
      "epoch": 1.6207699852869053,
      "grad_norm": 2.0665806735344434,
      "learning_rate": 4.7561205381813413e-07,
      "loss": 0.4567,
      "step": 13219
    },
    {
      "epoch": 1.620892594409024,
      "grad_norm": 1.9621877971829593,
      "learning_rate": 4.753146984050236e-07,
      "loss": 0.4446,
      "step": 13220
    },
    {
      "epoch": 1.6210152035311427,
      "grad_norm": 2.1202664468352608,
      "learning_rate": 4.7501742621006376e-07,
      "loss": 0.4376,
      "step": 13221
    },
    {
      "epoch": 1.6211378126532614,
      "grad_norm": 1.9286826188562995,
      "learning_rate": 4.747202372454729e-07,
      "loss": 0.4538,
      "step": 13222
    },
    {
      "epoch": 1.62126042177538,
      "grad_norm": 1.8581607924366756,
      "learning_rate": 4.7442313152346515e-07,
      "loss": 0.4438,
      "step": 13223
    },
    {
      "epoch": 1.6213830308974988,
      "grad_norm": 1.9654149806683725,
      "learning_rate": 4.7412610905625267e-07,
      "loss": 0.4133,
      "step": 13224
    },
    {
      "epoch": 1.6215056400196175,
      "grad_norm": 1.9112300729213054,
      "learning_rate": 4.7382916985604485e-07,
      "loss": 0.4146,
      "step": 13225
    },
    {
      "epoch": 1.6216282491417362,
      "grad_norm": 2.2987839423834644,
      "learning_rate": 4.7353231393504396e-07,
      "loss": 0.4495,
      "step": 13226
    },
    {
      "epoch": 1.6217508582638547,
      "grad_norm": 2.0124596975138127,
      "learning_rate": 4.732355413054526e-07,
      "loss": 0.4792,
      "step": 13227
    },
    {
      "epoch": 1.6218734673859734,
      "grad_norm": 2.1263784324163955,
      "learning_rate": 4.729388519794692e-07,
      "loss": 0.402,
      "step": 13228
    },
    {
      "epoch": 1.621996076508092,
      "grad_norm": 1.8039192032996874,
      "learning_rate": 4.72642245969287e-07,
      "loss": 0.3868,
      "step": 13229
    },
    {
      "epoch": 1.6221186856302108,
      "grad_norm": 1.8861258707492277,
      "learning_rate": 4.7234572328709844e-07,
      "loss": 0.4108,
      "step": 13230
    },
    {
      "epoch": 1.6222412947523295,
      "grad_norm": 1.9809898539181516,
      "learning_rate": 4.720492839450894e-07,
      "loss": 0.4386,
      "step": 13231
    },
    {
      "epoch": 1.6223639038744482,
      "grad_norm": 1.8835725609490048,
      "learning_rate": 4.7175292795544586e-07,
      "loss": 0.4409,
      "step": 13232
    },
    {
      "epoch": 1.6224865129965669,
      "grad_norm": 2.0819619329374524,
      "learning_rate": 4.7145665533034677e-07,
      "loss": 0.4377,
      "step": 13233
    },
    {
      "epoch": 1.6226091221186856,
      "grad_norm": 1.9511651692552503,
      "learning_rate": 4.711604660819713e-07,
      "loss": 0.4348,
      "step": 13234
    },
    {
      "epoch": 1.6227317312408043,
      "grad_norm": 1.9148031365561518,
      "learning_rate": 4.7086436022249243e-07,
      "loss": 0.4027,
      "step": 13235
    },
    {
      "epoch": 1.622854340362923,
      "grad_norm": 1.9239260062930406,
      "learning_rate": 4.705683377640799e-07,
      "loss": 0.46,
      "step": 13236
    },
    {
      "epoch": 1.6229769494850417,
      "grad_norm": 1.956327375737486,
      "learning_rate": 4.7027239871890143e-07,
      "loss": 0.447,
      "step": 13237
    },
    {
      "epoch": 1.6230995586071604,
      "grad_norm": 1.8475666221528357,
      "learning_rate": 4.6997654309912237e-07,
      "loss": 0.4425,
      "step": 13238
    },
    {
      "epoch": 1.623222167729279,
      "grad_norm": 1.864250823584717,
      "learning_rate": 4.696807709168996e-07,
      "loss": 0.4475,
      "step": 13239
    },
    {
      "epoch": 1.6233447768513978,
      "grad_norm": 2.142400096267339,
      "learning_rate": 4.6938508218439176e-07,
      "loss": 0.4271,
      "step": 13240
    },
    {
      "epoch": 1.6234673859735165,
      "grad_norm": 1.943740419061855,
      "learning_rate": 4.6908947691375244e-07,
      "loss": 0.4644,
      "step": 13241
    },
    {
      "epoch": 1.6235899950956352,
      "grad_norm": 1.9642029470012379,
      "learning_rate": 4.6879395511713137e-07,
      "loss": 0.4868,
      "step": 13242
    },
    {
      "epoch": 1.6237126042177539,
      "grad_norm": 2.000790171484791,
      "learning_rate": 4.6849851680667385e-07,
      "loss": 0.4503,
      "step": 13243
    },
    {
      "epoch": 1.6238352133398726,
      "grad_norm": 2.041120506429111,
      "learning_rate": 4.682031619945238e-07,
      "loss": 0.4328,
      "step": 13244
    },
    {
      "epoch": 1.6239578224619913,
      "grad_norm": 2.0486780889359153,
      "learning_rate": 4.6790789069282137e-07,
      "loss": 0.3914,
      "step": 13245
    },
    {
      "epoch": 1.62408043158411,
      "grad_norm": 1.9955638410688679,
      "learning_rate": 4.676127029137023e-07,
      "loss": 0.4427,
      "step": 13246
    },
    {
      "epoch": 1.6242030407062287,
      "grad_norm": 2.0134820118933376,
      "learning_rate": 4.673175986692985e-07,
      "loss": 0.4584,
      "step": 13247
    },
    {
      "epoch": 1.6243256498283474,
      "grad_norm": 1.706230094387035,
      "learning_rate": 4.670225779717408e-07,
      "loss": 0.4311,
      "step": 13248
    },
    {
      "epoch": 1.624448258950466,
      "grad_norm": 1.9940370253408104,
      "learning_rate": 4.6672764083315337e-07,
      "loss": 0.4412,
      "step": 13249
    },
    {
      "epoch": 1.6245708680725846,
      "grad_norm": 1.88645035406385,
      "learning_rate": 4.6643278726566016e-07,
      "loss": 0.4411,
      "step": 13250
    },
    {
      "epoch": 1.6246934771947033,
      "grad_norm": 1.9629303127317879,
      "learning_rate": 4.6613801728137897e-07,
      "loss": 0.4549,
      "step": 13251
    },
    {
      "epoch": 1.624816086316822,
      "grad_norm": 2.0124118444180303,
      "learning_rate": 4.658433308924265e-07,
      "loss": 0.4434,
      "step": 13252
    },
    {
      "epoch": 1.6249386954389407,
      "grad_norm": 1.9641192980831015,
      "learning_rate": 4.655487281109139e-07,
      "loss": 0.4633,
      "step": 13253
    },
    {
      "epoch": 1.6250613045610593,
      "grad_norm": 2.2022949923152533,
      "learning_rate": 4.6525420894895063e-07,
      "loss": 0.4018,
      "step": 13254
    },
    {
      "epoch": 1.625183913683178,
      "grad_norm": 1.7634945948049063,
      "learning_rate": 4.649597734186417e-07,
      "loss": 0.4418,
      "step": 13255
    },
    {
      "epoch": 1.6253065228052967,
      "grad_norm": 1.9657270359739916,
      "learning_rate": 4.6466542153208837e-07,
      "loss": 0.4599,
      "step": 13256
    },
    {
      "epoch": 1.6254291319274154,
      "grad_norm": 1.908252326513937,
      "learning_rate": 4.643711533013895e-07,
      "loss": 0.4529,
      "step": 13257
    },
    {
      "epoch": 1.625551741049534,
      "grad_norm": 1.952318141423833,
      "learning_rate": 4.640769687386404e-07,
      "loss": 0.4513,
      "step": 13258
    },
    {
      "epoch": 1.6256743501716526,
      "grad_norm": 1.995255489096908,
      "learning_rate": 4.637828678559325e-07,
      "loss": 0.429,
      "step": 13259
    },
    {
      "epoch": 1.6257969592937713,
      "grad_norm": 1.8806825219761985,
      "learning_rate": 4.6348885066535257e-07,
      "loss": 0.4169,
      "step": 13260
    },
    {
      "epoch": 1.62591956841589,
      "grad_norm": 1.9909530677669482,
      "learning_rate": 4.6319491717898733e-07,
      "loss": 0.4298,
      "step": 13261
    },
    {
      "epoch": 1.6260421775380087,
      "grad_norm": 1.7642334149535999,
      "learning_rate": 4.62901067408916e-07,
      "loss": 0.3936,
      "step": 13262
    },
    {
      "epoch": 1.6261647866601274,
      "grad_norm": 2.015228184358742,
      "learning_rate": 4.626073013672178e-07,
      "loss": 0.4387,
      "step": 13263
    },
    {
      "epoch": 1.6262873957822461,
      "grad_norm": 1.8718857301284995,
      "learning_rate": 4.623136190659658e-07,
      "loss": 0.4024,
      "step": 13264
    },
    {
      "epoch": 1.6264100049043648,
      "grad_norm": 1.9902991355878283,
      "learning_rate": 4.6202002051723206e-07,
      "loss": 0.4357,
      "step": 13265
    },
    {
      "epoch": 1.6265326140264835,
      "grad_norm": 1.9162427826489217,
      "learning_rate": 4.6172650573308305e-07,
      "loss": 0.4061,
      "step": 13266
    },
    {
      "epoch": 1.6266552231486022,
      "grad_norm": 1.7491751291420077,
      "learning_rate": 4.614330747255835e-07,
      "loss": 0.441,
      "step": 13267
    },
    {
      "epoch": 1.626777832270721,
      "grad_norm": 2.0555959710923073,
      "learning_rate": 4.6113972750679387e-07,
      "loss": 0.4243,
      "step": 13268
    },
    {
      "epoch": 1.6269004413928396,
      "grad_norm": 1.9249908921051144,
      "learning_rate": 4.6084646408877e-07,
      "loss": 0.4878,
      "step": 13269
    },
    {
      "epoch": 1.6270230505149583,
      "grad_norm": 1.9693879085699495,
      "learning_rate": 4.605532844835667e-07,
      "loss": 0.4324,
      "step": 13270
    },
    {
      "epoch": 1.627145659637077,
      "grad_norm": 1.9675807482732302,
      "learning_rate": 4.6026018870323456e-07,
      "loss": 0.4164,
      "step": 13271
    },
    {
      "epoch": 1.6272682687591957,
      "grad_norm": 1.9426968567059988,
      "learning_rate": 4.599671767598196e-07,
      "loss": 0.4893,
      "step": 13272
    },
    {
      "epoch": 1.6273908778813144,
      "grad_norm": 1.867663958099173,
      "learning_rate": 4.596742486653649e-07,
      "loss": 0.4185,
      "step": 13273
    },
    {
      "epoch": 1.6275134870034331,
      "grad_norm": 1.9567439903228199,
      "learning_rate": 4.5938140443191114e-07,
      "loss": 0.4469,
      "step": 13274
    },
    {
      "epoch": 1.6276360961255518,
      "grad_norm": 1.7660546939598312,
      "learning_rate": 4.590886440714937e-07,
      "loss": 0.4118,
      "step": 13275
    },
    {
      "epoch": 1.6277587052476705,
      "grad_norm": 2.027378795642174,
      "learning_rate": 4.5879596759614675e-07,
      "loss": 0.4288,
      "step": 13276
    },
    {
      "epoch": 1.6278813143697892,
      "grad_norm": 2.000920607220474,
      "learning_rate": 4.5850337501789854e-07,
      "loss": 0.4066,
      "step": 13277
    },
    {
      "epoch": 1.628003923491908,
      "grad_norm": 2.0470484071197643,
      "learning_rate": 4.5821086634877666e-07,
      "loss": 0.4614,
      "step": 13278
    },
    {
      "epoch": 1.6281265326140266,
      "grad_norm": 2.075402907258279,
      "learning_rate": 4.5791844160080203e-07,
      "loss": 0.4209,
      "step": 13279
    },
    {
      "epoch": 1.6282491417361453,
      "grad_norm": 1.861515610286339,
      "learning_rate": 4.5762610078599527e-07,
      "loss": 0.4825,
      "step": 13280
    },
    {
      "epoch": 1.6283717508582638,
      "grad_norm": 1.8385874411882164,
      "learning_rate": 4.573338439163716e-07,
      "loss": 0.4193,
      "step": 13281
    },
    {
      "epoch": 1.6284943599803825,
      "grad_norm": 1.8200566182396014,
      "learning_rate": 4.570416710039427e-07,
      "loss": 0.4146,
      "step": 13282
    },
    {
      "epoch": 1.6286169691025012,
      "grad_norm": 1.966335614956888,
      "learning_rate": 4.5674958206071763e-07,
      "loss": 0.4416,
      "step": 13283
    },
    {
      "epoch": 1.62873957822462,
      "grad_norm": 1.8690129525254053,
      "learning_rate": 4.5645757709870284e-07,
      "loss": 0.449,
      "step": 13284
    },
    {
      "epoch": 1.6288621873467386,
      "grad_norm": 1.9486212465317683,
      "learning_rate": 4.561656561298994e-07,
      "loss": 0.4077,
      "step": 13285
    },
    {
      "epoch": 1.6289847964688573,
      "grad_norm": 2.017856776218401,
      "learning_rate": 4.558738191663051e-07,
      "loss": 0.445,
      "step": 13286
    },
    {
      "epoch": 1.629107405590976,
      "grad_norm": 1.9904305513166527,
      "learning_rate": 4.555820662199156e-07,
      "loss": 0.419,
      "step": 13287
    },
    {
      "epoch": 1.6292300147130947,
      "grad_norm": 1.9163612596097277,
      "learning_rate": 4.5529039730272397e-07,
      "loss": 0.4526,
      "step": 13288
    },
    {
      "epoch": 1.6293526238352132,
      "grad_norm": 2.218775293891012,
      "learning_rate": 4.5499881242671573e-07,
      "loss": 0.467,
      "step": 13289
    },
    {
      "epoch": 1.6294752329573319,
      "grad_norm": 2.0116452258488833,
      "learning_rate": 4.547073116038764e-07,
      "loss": 0.4378,
      "step": 13290
    },
    {
      "epoch": 1.6295978420794506,
      "grad_norm": 1.9207401250708247,
      "learning_rate": 4.5441589484618824e-07,
      "loss": 0.452,
      "step": 13291
    },
    {
      "epoch": 1.6297204512015693,
      "grad_norm": 1.9674660839547362,
      "learning_rate": 4.5412456216562816e-07,
      "loss": 0.3931,
      "step": 13292
    },
    {
      "epoch": 1.629843060323688,
      "grad_norm": 1.9666747039375836,
      "learning_rate": 4.5383331357417016e-07,
      "loss": 0.4386,
      "step": 13293
    },
    {
      "epoch": 1.6299656694458067,
      "grad_norm": 1.9428637052168658,
      "learning_rate": 4.5354214908378573e-07,
      "loss": 0.4052,
      "step": 13294
    },
    {
      "epoch": 1.6300882785679254,
      "grad_norm": 1.9503440739831275,
      "learning_rate": 4.532510687064415e-07,
      "loss": 0.4119,
      "step": 13295
    },
    {
      "epoch": 1.630210887690044,
      "grad_norm": 1.9150779550339856,
      "learning_rate": 4.5296007245410225e-07,
      "loss": 0.4416,
      "step": 13296
    },
    {
      "epoch": 1.6303334968121628,
      "grad_norm": 1.7796743699997044,
      "learning_rate": 4.526691603387276e-07,
      "loss": 0.3878,
      "step": 13297
    },
    {
      "epoch": 1.6304561059342815,
      "grad_norm": 1.8424845870550044,
      "learning_rate": 4.523783323722755e-07,
      "loss": 0.4074,
      "step": 13298
    },
    {
      "epoch": 1.6305787150564002,
      "grad_norm": 1.9786126384197664,
      "learning_rate": 4.520875885666984e-07,
      "loss": 0.4348,
      "step": 13299
    },
    {
      "epoch": 1.6307013241785189,
      "grad_norm": 1.8373310870786028,
      "learning_rate": 4.517969289339469e-07,
      "loss": 0.4004,
      "step": 13300
    },
    {
      "epoch": 1.6308239333006376,
      "grad_norm": 1.93119045444217,
      "learning_rate": 4.51506353485969e-07,
      "loss": 0.4025,
      "step": 13301
    },
    {
      "epoch": 1.6309465424227563,
      "grad_norm": 2.0171466829551505,
      "learning_rate": 4.5121586223470516e-07,
      "loss": 0.4359,
      "step": 13302
    },
    {
      "epoch": 1.631069151544875,
      "grad_norm": 2.038711225330225,
      "learning_rate": 4.5092545519209665e-07,
      "loss": 0.3759,
      "step": 13303
    },
    {
      "epoch": 1.6311917606669937,
      "grad_norm": 2.0765951664608298,
      "learning_rate": 4.5063513237008e-07,
      "loss": 0.4168,
      "step": 13304
    },
    {
      "epoch": 1.6313143697891124,
      "grad_norm": 1.9129440925592585,
      "learning_rate": 4.5034489378058767e-07,
      "loss": 0.4332,
      "step": 13305
    },
    {
      "epoch": 1.631436978911231,
      "grad_norm": 1.8367941457295338,
      "learning_rate": 4.500547394355481e-07,
      "loss": 0.4185,
      "step": 13306
    },
    {
      "epoch": 1.6315595880333498,
      "grad_norm": 1.644636221293131,
      "learning_rate": 4.4976466934688797e-07,
      "loss": 0.4525,
      "step": 13307
    },
    {
      "epoch": 1.6316821971554685,
      "grad_norm": 1.9052859419400288,
      "learning_rate": 4.4947468352653014e-07,
      "loss": 0.3659,
      "step": 13308
    },
    {
      "epoch": 1.6318048062775872,
      "grad_norm": 1.9940304995200955,
      "learning_rate": 4.491847819863929e-07,
      "loss": 0.4246,
      "step": 13309
    },
    {
      "epoch": 1.6319274153997059,
      "grad_norm": 2.0311671167321395,
      "learning_rate": 4.488949647383914e-07,
      "loss": 0.4347,
      "step": 13310
    },
    {
      "epoch": 1.6320500245218246,
      "grad_norm": 1.997760099383205,
      "learning_rate": 4.4860523179443866e-07,
      "loss": 0.3876,
      "step": 13311
    },
    {
      "epoch": 1.6321726336439433,
      "grad_norm": 1.862945696278082,
      "learning_rate": 4.483155831664418e-07,
      "loss": 0.4223,
      "step": 13312
    },
    {
      "epoch": 1.6322952427660617,
      "grad_norm": 1.8823304060843327,
      "learning_rate": 4.480260188663074e-07,
      "loss": 0.4466,
      "step": 13313
    },
    {
      "epoch": 1.6324178518881804,
      "grad_norm": 1.8838858445378535,
      "learning_rate": 4.4773653890593625e-07,
      "loss": 0.3714,
      "step": 13314
    },
    {
      "epoch": 1.6325404610102991,
      "grad_norm": 1.915049640885823,
      "learning_rate": 4.4744714329722604e-07,
      "loss": 0.4434,
      "step": 13315
    },
    {
      "epoch": 1.6326630701324178,
      "grad_norm": 1.9637221306226411,
      "learning_rate": 4.47157832052072e-07,
      "loss": 0.4247,
      "step": 13316
    },
    {
      "epoch": 1.6327856792545365,
      "grad_norm": 2.230411832914322,
      "learning_rate": 4.4686860518236593e-07,
      "loss": 0.4676,
      "step": 13317
    },
    {
      "epoch": 1.6329082883766552,
      "grad_norm": 1.8694122221217424,
      "learning_rate": 4.4657946269999504e-07,
      "loss": 0.4549,
      "step": 13318
    },
    {
      "epoch": 1.633030897498774,
      "grad_norm": 2.0039851990860122,
      "learning_rate": 4.4629040461684284e-07,
      "loss": 0.438,
      "step": 13319
    },
    {
      "epoch": 1.6331535066208926,
      "grad_norm": 1.8249873056009853,
      "learning_rate": 4.4600143094479103e-07,
      "loss": 0.4201,
      "step": 13320
    },
    {
      "epoch": 1.6332761157430111,
      "grad_norm": 1.999188534213056,
      "learning_rate": 4.457125416957178e-07,
      "loss": 0.359,
      "step": 13321
    },
    {
      "epoch": 1.6333987248651298,
      "grad_norm": 2.0600722079616025,
      "learning_rate": 4.4542373688149483e-07,
      "loss": 0.4413,
      "step": 13322
    },
    {
      "epoch": 1.6335213339872485,
      "grad_norm": 1.8552122714405395,
      "learning_rate": 4.4513501651399336e-07,
      "loss": 0.3687,
      "step": 13323
    },
    {
      "epoch": 1.6336439431093672,
      "grad_norm": 2.1552390025281363,
      "learning_rate": 4.4484638060508123e-07,
      "loss": 0.3839,
      "step": 13324
    },
    {
      "epoch": 1.633766552231486,
      "grad_norm": 2.1049269285295114,
      "learning_rate": 4.445578291666208e-07,
      "loss": 0.4465,
      "step": 13325
    },
    {
      "epoch": 1.6338891613536046,
      "grad_norm": 1.8231762997602299,
      "learning_rate": 4.44269362210473e-07,
      "loss": 0.4391,
      "step": 13326
    },
    {
      "epoch": 1.6340117704757233,
      "grad_norm": 2.1239228273417856,
      "learning_rate": 4.4398097974849314e-07,
      "loss": 0.4472,
      "step": 13327
    },
    {
      "epoch": 1.634134379597842,
      "grad_norm": 2.006084775884176,
      "learning_rate": 4.436926817925355e-07,
      "loss": 0.4123,
      "step": 13328
    },
    {
      "epoch": 1.6342569887199607,
      "grad_norm": 1.8017755140192877,
      "learning_rate": 4.434044683544483e-07,
      "loss": 0.4454,
      "step": 13329
    },
    {
      "epoch": 1.6343795978420794,
      "grad_norm": 1.9616009932024137,
      "learning_rate": 4.431163394460794e-07,
      "loss": 0.3879,
      "step": 13330
    },
    {
      "epoch": 1.6345022069641981,
      "grad_norm": 1.9342645808686338,
      "learning_rate": 4.4282829507927007e-07,
      "loss": 0.3948,
      "step": 13331
    },
    {
      "epoch": 1.6346248160863168,
      "grad_norm": 1.9845638534821761,
      "learning_rate": 4.4254033526585917e-07,
      "loss": 0.471,
      "step": 13332
    },
    {
      "epoch": 1.6347474252084355,
      "grad_norm": 1.91902355309021,
      "learning_rate": 4.422524600176828e-07,
      "loss": 0.3818,
      "step": 13333
    },
    {
      "epoch": 1.6348700343305542,
      "grad_norm": 1.866633256057597,
      "learning_rate": 4.419646693465748e-07,
      "loss": 0.4168,
      "step": 13334
    },
    {
      "epoch": 1.634992643452673,
      "grad_norm": 1.7986624095287485,
      "learning_rate": 4.416769632643608e-07,
      "loss": 0.4031,
      "step": 13335
    },
    {
      "epoch": 1.6351152525747916,
      "grad_norm": 1.9390933210225934,
      "learning_rate": 4.41389341782868e-07,
      "loss": 0.3469,
      "step": 13336
    },
    {
      "epoch": 1.6352378616969103,
      "grad_norm": 1.8894731268732219,
      "learning_rate": 4.4110180491391793e-07,
      "loss": 0.429,
      "step": 13337
    },
    {
      "epoch": 1.635360470819029,
      "grad_norm": 2.0679852017522222,
      "learning_rate": 4.40814352669329e-07,
      "loss": 0.4293,
      "step": 13338
    },
    {
      "epoch": 1.6354830799411477,
      "grad_norm": 2.0936075628100843,
      "learning_rate": 4.405269850609148e-07,
      "loss": 0.4502,
      "step": 13339
    },
    {
      "epoch": 1.6356056890632664,
      "grad_norm": 2.2030274180149387,
      "learning_rate": 4.4023970210048755e-07,
      "loss": 0.4404,
      "step": 13340
    },
    {
      "epoch": 1.6357282981853851,
      "grad_norm": 2.123964997370056,
      "learning_rate": 4.399525037998559e-07,
      "loss": 0.4179,
      "step": 13341
    },
    {
      "epoch": 1.6358509073075038,
      "grad_norm": 2.0011263572542246,
      "learning_rate": 4.396653901708234e-07,
      "loss": 0.4209,
      "step": 13342
    },
    {
      "epoch": 1.6359735164296225,
      "grad_norm": 2.0982351990134926,
      "learning_rate": 4.3937836122519005e-07,
      "loss": 0.4356,
      "step": 13343
    },
    {
      "epoch": 1.636096125551741,
      "grad_norm": 1.896495291920732,
      "learning_rate": 4.390914169747548e-07,
      "loss": 0.4193,
      "step": 13344
    },
    {
      "epoch": 1.6362187346738597,
      "grad_norm": 1.9222334566465813,
      "learning_rate": 4.388045574313102e-07,
      "loss": 0.3853,
      "step": 13345
    },
    {
      "epoch": 1.6363413437959784,
      "grad_norm": 2.006085153678577,
      "learning_rate": 4.3851778260664805e-07,
      "loss": 0.4058,
      "step": 13346
    },
    {
      "epoch": 1.636463952918097,
      "grad_norm": 1.970267149105459,
      "learning_rate": 4.382310925125538e-07,
      "loss": 0.4247,
      "step": 13347
    },
    {
      "epoch": 1.6365865620402158,
      "grad_norm": 1.9609788892900377,
      "learning_rate": 4.3794448716081237e-07,
      "loss": 0.4235,
      "step": 13348
    },
    {
      "epoch": 1.6367091711623345,
      "grad_norm": 2.005139456611124,
      "learning_rate": 4.3765796656320274e-07,
      "loss": 0.429,
      "step": 13349
    },
    {
      "epoch": 1.6368317802844532,
      "grad_norm": 1.8504994026894994,
      "learning_rate": 4.3737153073150215e-07,
      "loss": 0.3864,
      "step": 13350
    },
    {
      "epoch": 1.636954389406572,
      "grad_norm": 1.7971435519413552,
      "learning_rate": 4.370851796774833e-07,
      "loss": 0.4604,
      "step": 13351
    },
    {
      "epoch": 1.6370769985286904,
      "grad_norm": 2.020294380242217,
      "learning_rate": 4.3679891341291524e-07,
      "loss": 0.3861,
      "step": 13352
    },
    {
      "epoch": 1.637199607650809,
      "grad_norm": 2.0346263631947012,
      "learning_rate": 4.365127319495643e-07,
      "loss": 0.4068,
      "step": 13353
    },
    {
      "epoch": 1.6373222167729278,
      "grad_norm": 2.044924498344309,
      "learning_rate": 4.362266352991937e-07,
      "loss": 0.4193,
      "step": 13354
    },
    {
      "epoch": 1.6374448258950465,
      "grad_norm": 2.1394741600549128,
      "learning_rate": 4.359406234735622e-07,
      "loss": 0.4544,
      "step": 13355
    },
    {
      "epoch": 1.6375674350171652,
      "grad_norm": 1.9674212002782643,
      "learning_rate": 4.356546964844244e-07,
      "loss": 0.4106,
      "step": 13356
    },
    {
      "epoch": 1.6376900441392839,
      "grad_norm": 1.7512010829408329,
      "learning_rate": 4.3536885434353416e-07,
      "loss": 0.4189,
      "step": 13357
    },
    {
      "epoch": 1.6378126532614026,
      "grad_norm": 1.9822409065645328,
      "learning_rate": 4.3508309706263826e-07,
      "loss": 0.4743,
      "step": 13358
    },
    {
      "epoch": 1.6379352623835213,
      "grad_norm": 2.1085609045777556,
      "learning_rate": 4.3479742465348335e-07,
      "loss": 0.4633,
      "step": 13359
    },
    {
      "epoch": 1.63805787150564,
      "grad_norm": 1.886909029704905,
      "learning_rate": 4.345118371278098e-07,
      "loss": 0.4303,
      "step": 13360
    },
    {
      "epoch": 1.6381804806277587,
      "grad_norm": 2.0567154430821266,
      "learning_rate": 4.3422633449735713e-07,
      "loss": 0.4015,
      "step": 13361
    },
    {
      "epoch": 1.6383030897498774,
      "grad_norm": 2.0172773070073626,
      "learning_rate": 4.3394091677385843e-07,
      "loss": 0.4178,
      "step": 13362
    },
    {
      "epoch": 1.638425698871996,
      "grad_norm": 2.009967076585611,
      "learning_rate": 4.336555839690465e-07,
      "loss": 0.409,
      "step": 13363
    },
    {
      "epoch": 1.6385483079941148,
      "grad_norm": 2.0231163004402037,
      "learning_rate": 4.333703360946484e-07,
      "loss": 0.4196,
      "step": 13364
    },
    {
      "epoch": 1.6386709171162335,
      "grad_norm": 1.8365811499997655,
      "learning_rate": 4.3308517316238753e-07,
      "loss": 0.4003,
      "step": 13365
    },
    {
      "epoch": 1.6387935262383522,
      "grad_norm": 1.7983956964161896,
      "learning_rate": 4.32800095183985e-07,
      "loss": 0.4232,
      "step": 13366
    },
    {
      "epoch": 1.6389161353604709,
      "grad_norm": 1.839857277735832,
      "learning_rate": 4.32515102171159e-07,
      "loss": 0.4274,
      "step": 13367
    },
    {
      "epoch": 1.6390387444825896,
      "grad_norm": 2.0724107459292798,
      "learning_rate": 4.3223019413562243e-07,
      "loss": 0.4249,
      "step": 13368
    },
    {
      "epoch": 1.6391613536047083,
      "grad_norm": 1.9590283060973257,
      "learning_rate": 4.3194537108908504e-07,
      "loss": 0.4296,
      "step": 13369
    },
    {
      "epoch": 1.639283962726827,
      "grad_norm": 1.8647005549581583,
      "learning_rate": 4.316606330432546e-07,
      "loss": 0.3892,
      "step": 13370
    },
    {
      "epoch": 1.6394065718489457,
      "grad_norm": 2.137356160274452,
      "learning_rate": 4.313759800098333e-07,
      "loss": 0.433,
      "step": 13371
    },
    {
      "epoch": 1.6395291809710644,
      "grad_norm": 1.9560983370796425,
      "learning_rate": 4.3109141200052216e-07,
      "loss": 0.4034,
      "step": 13372
    },
    {
      "epoch": 1.639651790093183,
      "grad_norm": 2.130465195963815,
      "learning_rate": 4.3080692902701586e-07,
      "loss": 0.4527,
      "step": 13373
    },
    {
      "epoch": 1.6397743992153018,
      "grad_norm": 1.8413832433876334,
      "learning_rate": 4.305225311010089e-07,
      "loss": 0.4134,
      "step": 13374
    },
    {
      "epoch": 1.6398970083374205,
      "grad_norm": 1.8272451089827502,
      "learning_rate": 4.302382182341888e-07,
      "loss": 0.4259,
      "step": 13375
    },
    {
      "epoch": 1.640019617459539,
      "grad_norm": 1.8056078957867985,
      "learning_rate": 4.299539904382427e-07,
      "loss": 0.368,
      "step": 13376
    },
    {
      "epoch": 1.6401422265816576,
      "grad_norm": 2.2847457078308233,
      "learning_rate": 4.296698477248526e-07,
      "loss": 0.4346,
      "step": 13377
    },
    {
      "epoch": 1.6402648357037763,
      "grad_norm": 1.8217784735817963,
      "learning_rate": 4.293857901056961e-07,
      "loss": 0.4234,
      "step": 13378
    },
    {
      "epoch": 1.640387444825895,
      "grad_norm": 1.9573132203000094,
      "learning_rate": 4.291018175924494e-07,
      "loss": 0.4713,
      "step": 13379
    },
    {
      "epoch": 1.6405100539480137,
      "grad_norm": 1.9945947736380047,
      "learning_rate": 4.2881793019678487e-07,
      "loss": 0.4236,
      "step": 13380
    },
    {
      "epoch": 1.6406326630701324,
      "grad_norm": 2.0319020212552217,
      "learning_rate": 4.285341279303704e-07,
      "loss": 0.4615,
      "step": 13381
    },
    {
      "epoch": 1.6407552721922511,
      "grad_norm": 1.9669648357489067,
      "learning_rate": 4.282504108048696e-07,
      "loss": 0.4484,
      "step": 13382
    },
    {
      "epoch": 1.6408778813143696,
      "grad_norm": 2.027551680716718,
      "learning_rate": 4.279667788319447e-07,
      "loss": 0.4802,
      "step": 13383
    },
    {
      "epoch": 1.6410004904364883,
      "grad_norm": 2.0422623671951117,
      "learning_rate": 4.276832320232549e-07,
      "loss": 0.4399,
      "step": 13384
    },
    {
      "epoch": 1.641123099558607,
      "grad_norm": 2.0457383732879664,
      "learning_rate": 4.273997703904514e-07,
      "loss": 0.3942,
      "step": 13385
    },
    {
      "epoch": 1.6412457086807257,
      "grad_norm": 1.9915463262264546,
      "learning_rate": 4.2711639394518693e-07,
      "loss": 0.4133,
      "step": 13386
    },
    {
      "epoch": 1.6413683178028444,
      "grad_norm": 2.0490957821201294,
      "learning_rate": 4.2683310269910907e-07,
      "loss": 0.4012,
      "step": 13387
    },
    {
      "epoch": 1.6414909269249631,
      "grad_norm": 2.093865019898418,
      "learning_rate": 4.2654989666386065e-07,
      "loss": 0.454,
      "step": 13388
    },
    {
      "epoch": 1.6416135360470818,
      "grad_norm": 2.052580753178878,
      "learning_rate": 4.262667758510816e-07,
      "loss": 0.4347,
      "step": 13389
    },
    {
      "epoch": 1.6417361451692005,
      "grad_norm": 1.8563193731448844,
      "learning_rate": 4.259837402724101e-07,
      "loss": 0.3847,
      "step": 13390
    },
    {
      "epoch": 1.6418587542913192,
      "grad_norm": 1.9639517923464127,
      "learning_rate": 4.25700789939478e-07,
      "loss": 0.3997,
      "step": 13391
    },
    {
      "epoch": 1.641981363413438,
      "grad_norm": 2.0008995815345347,
      "learning_rate": 4.254179248639165e-07,
      "loss": 0.4261,
      "step": 13392
    },
    {
      "epoch": 1.6421039725355566,
      "grad_norm": 1.8912769712798165,
      "learning_rate": 4.2513514505735014e-07,
      "loss": 0.4298,
      "step": 13393
    },
    {
      "epoch": 1.6422265816576753,
      "grad_norm": 1.9826436515359929,
      "learning_rate": 4.248524505314033e-07,
      "loss": 0.3904,
      "step": 13394
    },
    {
      "epoch": 1.642349190779794,
      "grad_norm": 2.041448015282898,
      "learning_rate": 4.2456984129769397e-07,
      "loss": 0.4427,
      "step": 13395
    },
    {
      "epoch": 1.6424717999019127,
      "grad_norm": 2.03627887902935,
      "learning_rate": 4.242873173678383e-07,
      "loss": 0.4622,
      "step": 13396
    },
    {
      "epoch": 1.6425944090240314,
      "grad_norm": 2.0094632708039377,
      "learning_rate": 4.2400487875344983e-07,
      "loss": 0.4404,
      "step": 13397
    },
    {
      "epoch": 1.6427170181461501,
      "grad_norm": 1.9987487858509598,
      "learning_rate": 4.237225254661348e-07,
      "loss": 0.4232,
      "step": 13398
    },
    {
      "epoch": 1.6428396272682688,
      "grad_norm": 1.9916413876660708,
      "learning_rate": 4.2344025751750004e-07,
      "loss": 0.4104,
      "step": 13399
    },
    {
      "epoch": 1.6429622363903875,
      "grad_norm": 1.8945243512154273,
      "learning_rate": 4.2315807491914747e-07,
      "loss": 0.3843,
      "step": 13400
    },
    {
      "epoch": 1.6430848455125062,
      "grad_norm": 1.9509104671930302,
      "learning_rate": 4.228759776826746e-07,
      "loss": 0.4427,
      "step": 13401
    },
    {
      "epoch": 1.643207454634625,
      "grad_norm": 1.7422824698028354,
      "learning_rate": 4.225939658196759e-07,
      "loss": 0.3962,
      "step": 13402
    },
    {
      "epoch": 1.6433300637567436,
      "grad_norm": 2.0684523440889824,
      "learning_rate": 4.223120393417429e-07,
      "loss": 0.4402,
      "step": 13403
    },
    {
      "epoch": 1.6434526728788623,
      "grad_norm": 1.9294404428331795,
      "learning_rate": 4.220301982604638e-07,
      "loss": 0.4379,
      "step": 13404
    },
    {
      "epoch": 1.643575282000981,
      "grad_norm": 1.9529275490210771,
      "learning_rate": 4.217484425874224e-07,
      "loss": 0.4451,
      "step": 13405
    },
    {
      "epoch": 1.6436978911230997,
      "grad_norm": 1.7343621918017442,
      "learning_rate": 4.2146677233419893e-07,
      "loss": 0.3953,
      "step": 13406
    },
    {
      "epoch": 1.6438205002452182,
      "grad_norm": 1.8148121633296417,
      "learning_rate": 4.2118518751237096e-07,
      "loss": 0.4237,
      "step": 13407
    },
    {
      "epoch": 1.643943109367337,
      "grad_norm": 2.041764452618663,
      "learning_rate": 4.209036881335118e-07,
      "loss": 0.4458,
      "step": 13408
    },
    {
      "epoch": 1.6440657184894556,
      "grad_norm": 1.6565237183716428,
      "learning_rate": 4.2062227420919226e-07,
      "loss": 0.4439,
      "step": 13409
    },
    {
      "epoch": 1.6441883276115743,
      "grad_norm": 2.0666813354346107,
      "learning_rate": 4.2034094575097857e-07,
      "loss": 0.4632,
      "step": 13410
    },
    {
      "epoch": 1.644310936733693,
      "grad_norm": 1.9080796874257187,
      "learning_rate": 4.200597027704331e-07,
      "loss": 0.4244,
      "step": 13411
    },
    {
      "epoch": 1.6444335458558117,
      "grad_norm": 1.8108270116450103,
      "learning_rate": 4.1977854527911613e-07,
      "loss": 0.4057,
      "step": 13412
    },
    {
      "epoch": 1.6445561549779304,
      "grad_norm": 1.9749631849762448,
      "learning_rate": 4.1949747328858456e-07,
      "loss": 0.4292,
      "step": 13413
    },
    {
      "epoch": 1.644678764100049,
      "grad_norm": 2.177357712179502,
      "learning_rate": 4.1921648681038985e-07,
      "loss": 0.4794,
      "step": 13414
    },
    {
      "epoch": 1.6448013732221676,
      "grad_norm": 2.12867832170537,
      "learning_rate": 4.189355858560806e-07,
      "loss": 0.4291,
      "step": 13415
    },
    {
      "epoch": 1.6449239823442863,
      "grad_norm": 1.8573237928967326,
      "learning_rate": 4.1865477043720294e-07,
      "loss": 0.4041,
      "step": 13416
    },
    {
      "epoch": 1.645046591466405,
      "grad_norm": 1.8937046600575222,
      "learning_rate": 4.183740405653003e-07,
      "loss": 0.3953,
      "step": 13417
    },
    {
      "epoch": 1.6451692005885237,
      "grad_norm": 2.1144113474928754,
      "learning_rate": 4.180933962519085e-07,
      "loss": 0.4399,
      "step": 13418
    },
    {
      "epoch": 1.6452918097106424,
      "grad_norm": 1.9787859075564591,
      "learning_rate": 4.1781283750856385e-07,
      "loss": 0.437,
      "step": 13419
    },
    {
      "epoch": 1.645414418832761,
      "grad_norm": 2.162573314830301,
      "learning_rate": 4.175323643467985e-07,
      "loss": 0.4234,
      "step": 13420
    },
    {
      "epoch": 1.6455370279548798,
      "grad_norm": 1.9424139005516763,
      "learning_rate": 4.1725197677813913e-07,
      "loss": 0.4373,
      "step": 13421
    },
    {
      "epoch": 1.6456596370769985,
      "grad_norm": 1.9029945559097885,
      "learning_rate": 4.1697167481411115e-07,
      "loss": 0.4049,
      "step": 13422
    },
    {
      "epoch": 1.6457822461991172,
      "grad_norm": 1.8605298043914682,
      "learning_rate": 4.166914584662346e-07,
      "loss": 0.4183,
      "step": 13423
    },
    {
      "epoch": 1.6459048553212359,
      "grad_norm": 2.251411808160672,
      "learning_rate": 4.16411327746028e-07,
      "loss": 0.4166,
      "step": 13424
    },
    {
      "epoch": 1.6460274644433546,
      "grad_norm": 1.920224519161788,
      "learning_rate": 4.161312826650038e-07,
      "loss": 0.4445,
      "step": 13425
    },
    {
      "epoch": 1.6461500735654733,
      "grad_norm": 2.108486985314741,
      "learning_rate": 4.1585132323467364e-07,
      "loss": 0.4416,
      "step": 13426
    },
    {
      "epoch": 1.646272682687592,
      "grad_norm": 1.782900623541433,
      "learning_rate": 4.1557144946654386e-07,
      "loss": 0.4212,
      "step": 13427
    },
    {
      "epoch": 1.6463952918097107,
      "grad_norm": 1.8841572194723608,
      "learning_rate": 4.15291661372117e-07,
      "loss": 0.3788,
      "step": 13428
    },
    {
      "epoch": 1.6465179009318294,
      "grad_norm": 1.9389371181883532,
      "learning_rate": 4.150119589628937e-07,
      "loss": 0.3863,
      "step": 13429
    },
    {
      "epoch": 1.646640510053948,
      "grad_norm": 1.917859500861251,
      "learning_rate": 4.14732342250371e-07,
      "loss": 0.4567,
      "step": 13430
    },
    {
      "epoch": 1.6467631191760668,
      "grad_norm": 1.8164213470639958,
      "learning_rate": 4.144528112460397e-07,
      "loss": 0.4326,
      "step": 13431
    },
    {
      "epoch": 1.6468857282981855,
      "grad_norm": 1.750845668918648,
      "learning_rate": 4.1417336596139e-07,
      "loss": 0.4135,
      "step": 13432
    },
    {
      "epoch": 1.6470083374203042,
      "grad_norm": 1.8500383994608292,
      "learning_rate": 4.1389400640790834e-07,
      "loss": 0.3828,
      "step": 13433
    },
    {
      "epoch": 1.6471309465424229,
      "grad_norm": 1.9513099415799027,
      "learning_rate": 4.1361473259707603e-07,
      "loss": 0.4522,
      "step": 13434
    },
    {
      "epoch": 1.6472535556645416,
      "grad_norm": 2.0105196696998604,
      "learning_rate": 4.133355445403714e-07,
      "loss": 0.4125,
      "step": 13435
    },
    {
      "epoch": 1.6473761647866603,
      "grad_norm": 1.865803759895425,
      "learning_rate": 4.1305644224926997e-07,
      "loss": 0.4538,
      "step": 13436
    },
    {
      "epoch": 1.647498773908779,
      "grad_norm": 2.0010111529688555,
      "learning_rate": 4.12777425735244e-07,
      "loss": 0.4246,
      "step": 13437
    },
    {
      "epoch": 1.6476213830308974,
      "grad_norm": 1.8935324108287244,
      "learning_rate": 4.12498495009761e-07,
      "loss": 0.3919,
      "step": 13438
    },
    {
      "epoch": 1.6477439921530161,
      "grad_norm": 2.0955520522983337,
      "learning_rate": 4.1221965008428483e-07,
      "loss": 0.4716,
      "step": 13439
    },
    {
      "epoch": 1.6478666012751348,
      "grad_norm": 1.7989866931071927,
      "learning_rate": 4.119408909702777e-07,
      "loss": 0.3862,
      "step": 13440
    },
    {
      "epoch": 1.6479892103972535,
      "grad_norm": 1.8948389414538473,
      "learning_rate": 4.1166221767919624e-07,
      "loss": 0.4377,
      "step": 13441
    },
    {
      "epoch": 1.6481118195193722,
      "grad_norm": 2.0217130409887054,
      "learning_rate": 4.1138363022249497e-07,
      "loss": 0.4097,
      "step": 13442
    },
    {
      "epoch": 1.648234428641491,
      "grad_norm": 1.963741752340861,
      "learning_rate": 4.1110512861162385e-07,
      "loss": 0.4561,
      "step": 13443
    },
    {
      "epoch": 1.6483570377636096,
      "grad_norm": 1.9699299416452292,
      "learning_rate": 4.1082671285803036e-07,
      "loss": 0.4273,
      "step": 13444
    },
    {
      "epoch": 1.6484796468857283,
      "grad_norm": 1.9365821427039804,
      "learning_rate": 4.1054838297315715e-07,
      "loss": 0.4322,
      "step": 13445
    },
    {
      "epoch": 1.6486022560078468,
      "grad_norm": 1.7402130159409577,
      "learning_rate": 4.102701389684449e-07,
      "loss": 0.4278,
      "step": 13446
    },
    {
      "epoch": 1.6487248651299655,
      "grad_norm": 1.970417322144226,
      "learning_rate": 4.099919808553296e-07,
      "loss": 0.4121,
      "step": 13447
    },
    {
      "epoch": 1.6488474742520842,
      "grad_norm": 2.042249432083708,
      "learning_rate": 4.097139086452434e-07,
      "loss": 0.4317,
      "step": 13448
    },
    {
      "epoch": 1.648970083374203,
      "grad_norm": 1.879287486583651,
      "learning_rate": 4.094359223496161e-07,
      "loss": 0.4405,
      "step": 13449
    },
    {
      "epoch": 1.6490926924963216,
      "grad_norm": 2.0607546510942267,
      "learning_rate": 4.091580219798741e-07,
      "loss": 0.4107,
      "step": 13450
    },
    {
      "epoch": 1.6492153016184403,
      "grad_norm": 2.02956527015039,
      "learning_rate": 4.0888020754743877e-07,
      "loss": 0.3976,
      "step": 13451
    },
    {
      "epoch": 1.649337910740559,
      "grad_norm": 2.004731131301374,
      "learning_rate": 4.086024790637286e-07,
      "loss": 0.3867,
      "step": 13452
    },
    {
      "epoch": 1.6494605198626777,
      "grad_norm": 1.9442766292408422,
      "learning_rate": 4.083248365401596e-07,
      "loss": 0.4197,
      "step": 13453
    },
    {
      "epoch": 1.6495831289847964,
      "grad_norm": 1.9100949766115651,
      "learning_rate": 4.0804727998814216e-07,
      "loss": 0.4486,
      "step": 13454
    },
    {
      "epoch": 1.6497057381069151,
      "grad_norm": 2.073616250527107,
      "learning_rate": 4.0776980941908575e-07,
      "loss": 0.4378,
      "step": 13455
    },
    {
      "epoch": 1.6498283472290338,
      "grad_norm": 1.8806433328696361,
      "learning_rate": 4.074924248443932e-07,
      "loss": 0.4239,
      "step": 13456
    },
    {
      "epoch": 1.6499509563511525,
      "grad_norm": 1.7495056624368746,
      "learning_rate": 4.0721512627546757e-07,
      "loss": 0.4244,
      "step": 13457
    },
    {
      "epoch": 1.6500735654732712,
      "grad_norm": 2.0241518596151744,
      "learning_rate": 4.069379137237042e-07,
      "loss": 0.4233,
      "step": 13458
    },
    {
      "epoch": 1.65019617459539,
      "grad_norm": 2.039246663577585,
      "learning_rate": 4.0666078720049906e-07,
      "loss": 0.4185,
      "step": 13459
    },
    {
      "epoch": 1.6503187837175086,
      "grad_norm": 1.7554269139739025,
      "learning_rate": 4.063837467172413e-07,
      "loss": 0.4179,
      "step": 13460
    },
    {
      "epoch": 1.6504413928396273,
      "grad_norm": 1.9206987988829787,
      "learning_rate": 4.0610679228531737e-07,
      "loss": 0.4189,
      "step": 13461
    },
    {
      "epoch": 1.650564001961746,
      "grad_norm": 1.8530061663665842,
      "learning_rate": 4.058299239161112e-07,
      "loss": 0.4475,
      "step": 13462
    },
    {
      "epoch": 1.6506866110838647,
      "grad_norm": 2.2770275384819834,
      "learning_rate": 4.0555314162100346e-07,
      "loss": 0.4613,
      "step": 13463
    },
    {
      "epoch": 1.6508092202059834,
      "grad_norm": 1.819730862416694,
      "learning_rate": 4.052764454113692e-07,
      "loss": 0.4428,
      "step": 13464
    },
    {
      "epoch": 1.6509318293281021,
      "grad_norm": 2.1043836349754548,
      "learning_rate": 4.0499983529858094e-07,
      "loss": 0.4121,
      "step": 13465
    },
    {
      "epoch": 1.6510544384502208,
      "grad_norm": 1.8460863019681126,
      "learning_rate": 4.047233112940091e-07,
      "loss": 0.4769,
      "step": 13466
    },
    {
      "epoch": 1.6511770475723395,
      "grad_norm": 1.6354691169111586,
      "learning_rate": 4.044468734090179e-07,
      "loss": 0.3932,
      "step": 13467
    },
    {
      "epoch": 1.6512996566944582,
      "grad_norm": 2.086152090717495,
      "learning_rate": 4.0417052165497075e-07,
      "loss": 0.4396,
      "step": 13468
    },
    {
      "epoch": 1.651422265816577,
      "grad_norm": 1.9127746054126262,
      "learning_rate": 4.0389425604322465e-07,
      "loss": 0.4027,
      "step": 13469
    },
    {
      "epoch": 1.6515448749386954,
      "grad_norm": 1.9397472316884896,
      "learning_rate": 4.0361807658513636e-07,
      "loss": 0.4018,
      "step": 13470
    },
    {
      "epoch": 1.651667484060814,
      "grad_norm": 1.7468922355874583,
      "learning_rate": 4.033419832920557e-07,
      "loss": 0.4441,
      "step": 13471
    },
    {
      "epoch": 1.6517900931829328,
      "grad_norm": 2.130713566703868,
      "learning_rate": 4.030659761753322e-07,
      "loss": 0.4361,
      "step": 13472
    },
    {
      "epoch": 1.6519127023050515,
      "grad_norm": 2.047077308113884,
      "learning_rate": 4.027900552463093e-07,
      "loss": 0.4245,
      "step": 13473
    },
    {
      "epoch": 1.6520353114271702,
      "grad_norm": 1.8684046234394962,
      "learning_rate": 4.025142205163274e-07,
      "loss": 0.3787,
      "step": 13474
    },
    {
      "epoch": 1.652157920549289,
      "grad_norm": 2.0594283663722046,
      "learning_rate": 4.0223847199672426e-07,
      "loss": 0.4594,
      "step": 13475
    },
    {
      "epoch": 1.6522805296714076,
      "grad_norm": 1.832848555528738,
      "learning_rate": 4.0196280969883427e-07,
      "loss": 0.4744,
      "step": 13476
    },
    {
      "epoch": 1.6524031387935263,
      "grad_norm": 1.97364244700117,
      "learning_rate": 4.016872336339872e-07,
      "loss": 0.4193,
      "step": 13477
    },
    {
      "epoch": 1.6525257479156448,
      "grad_norm": 1.8105367290567236,
      "learning_rate": 4.014117438135087e-07,
      "loss": 0.3955,
      "step": 13478
    },
    {
      "epoch": 1.6526483570377635,
      "grad_norm": 1.9070623635112816,
      "learning_rate": 4.011363402487231e-07,
      "loss": 0.4426,
      "step": 13479
    },
    {
      "epoch": 1.6527709661598822,
      "grad_norm": 1.9896570770488557,
      "learning_rate": 4.0086102295095077e-07,
      "loss": 0.4509,
      "step": 13480
    },
    {
      "epoch": 1.6528935752820009,
      "grad_norm": 1.972589862369837,
      "learning_rate": 4.0058579193150537e-07,
      "loss": 0.4191,
      "step": 13481
    },
    {
      "epoch": 1.6530161844041196,
      "grad_norm": 1.9601020044503055,
      "learning_rate": 4.0031064720170097e-07,
      "loss": 0.4894,
      "step": 13482
    },
    {
      "epoch": 1.6531387935262383,
      "grad_norm": 2.00918036019863,
      "learning_rate": 4.000355887728463e-07,
      "loss": 0.4097,
      "step": 13483
    },
    {
      "epoch": 1.653261402648357,
      "grad_norm": 2.0741601599757766,
      "learning_rate": 3.9976061665624707e-07,
      "loss": 0.4559,
      "step": 13484
    },
    {
      "epoch": 1.6533840117704757,
      "grad_norm": 2.1296118137714113,
      "learning_rate": 3.9948573086320387e-07,
      "loss": 0.394,
      "step": 13485
    },
    {
      "epoch": 1.6535066208925944,
      "grad_norm": 1.8086013423162353,
      "learning_rate": 3.992109314050163e-07,
      "loss": 0.406,
      "step": 13486
    },
    {
      "epoch": 1.653629230014713,
      "grad_norm": 1.9396885871990321,
      "learning_rate": 3.989362182929782e-07,
      "loss": 0.4064,
      "step": 13487
    },
    {
      "epoch": 1.6537518391368318,
      "grad_norm": 1.7845769295246015,
      "learning_rate": 3.986615915383815e-07,
      "loss": 0.3826,
      "step": 13488
    },
    {
      "epoch": 1.6538744482589505,
      "grad_norm": 2.0397796876333807,
      "learning_rate": 3.9838705115251316e-07,
      "loss": 0.4289,
      "step": 13489
    },
    {
      "epoch": 1.6539970573810692,
      "grad_norm": 1.9444320491518272,
      "learning_rate": 3.9811259714665815e-07,
      "loss": 0.4224,
      "step": 13490
    },
    {
      "epoch": 1.6541196665031879,
      "grad_norm": 1.963293569597422,
      "learning_rate": 3.9783822953209594e-07,
      "loss": 0.3868,
      "step": 13491
    },
    {
      "epoch": 1.6542422756253066,
      "grad_norm": 1.8826823087491527,
      "learning_rate": 3.975639483201038e-07,
      "loss": 0.3796,
      "step": 13492
    },
    {
      "epoch": 1.6543648847474253,
      "grad_norm": 1.9650510414388223,
      "learning_rate": 3.9728975352195693e-07,
      "loss": 0.4846,
      "step": 13493
    },
    {
      "epoch": 1.654487493869544,
      "grad_norm": 1.9257341593348196,
      "learning_rate": 3.9701564514892266e-07,
      "loss": 0.443,
      "step": 13494
    },
    {
      "epoch": 1.6546101029916627,
      "grad_norm": 2.1067854809281163,
      "learning_rate": 3.967416232122681e-07,
      "loss": 0.4688,
      "step": 13495
    },
    {
      "epoch": 1.6547327121137814,
      "grad_norm": 1.9424856882207522,
      "learning_rate": 3.964676877232573e-07,
      "loss": 0.4379,
      "step": 13496
    },
    {
      "epoch": 1.6548553212359,
      "grad_norm": 1.9071684671343916,
      "learning_rate": 3.9619383869314824e-07,
      "loss": 0.4964,
      "step": 13497
    },
    {
      "epoch": 1.6549779303580188,
      "grad_norm": 1.8555813033693944,
      "learning_rate": 3.9592007613319633e-07,
      "loss": 0.4274,
      "step": 13498
    },
    {
      "epoch": 1.6551005394801375,
      "grad_norm": 1.7980119442085398,
      "learning_rate": 3.9564640005465447e-07,
      "loss": 0.4312,
      "step": 13499
    },
    {
      "epoch": 1.6552231486022562,
      "grad_norm": 1.8302731266106316,
      "learning_rate": 3.9537281046877154e-07,
      "loss": 0.3777,
      "step": 13500
    },
    {
      "epoch": 1.6553457577243746,
      "grad_norm": 1.879454061559099,
      "learning_rate": 3.9509930738679205e-07,
      "loss": 0.3973,
      "step": 13501
    },
    {
      "epoch": 1.6554683668464933,
      "grad_norm": 1.9124927205410236,
      "learning_rate": 3.9482589081995676e-07,
      "loss": 0.4089,
      "step": 13502
    },
    {
      "epoch": 1.655590975968612,
      "grad_norm": 2.0177302956501544,
      "learning_rate": 3.9455256077950506e-07,
      "loss": 0.4143,
      "step": 13503
    },
    {
      "epoch": 1.6557135850907307,
      "grad_norm": 1.9554428207941088,
      "learning_rate": 3.9427931727666983e-07,
      "loss": 0.3769,
      "step": 13504
    },
    {
      "epoch": 1.6558361942128494,
      "grad_norm": 1.8805956375803083,
      "learning_rate": 3.9400616032268304e-07,
      "loss": 0.4049,
      "step": 13505
    },
    {
      "epoch": 1.6559588033349681,
      "grad_norm": 1.9523202638263784,
      "learning_rate": 3.937330899287717e-07,
      "loss": 0.4336,
      "step": 13506
    },
    {
      "epoch": 1.6560814124570868,
      "grad_norm": 2.0908613587375844,
      "learning_rate": 3.9346010610615826e-07,
      "loss": 0.4254,
      "step": 13507
    },
    {
      "epoch": 1.6562040215792055,
      "grad_norm": 1.8577424605604238,
      "learning_rate": 3.9318720886606406e-07,
      "loss": 0.4245,
      "step": 13508
    },
    {
      "epoch": 1.656326630701324,
      "grad_norm": 1.8066459436591595,
      "learning_rate": 3.929143982197059e-07,
      "loss": 0.4485,
      "step": 13509
    },
    {
      "epoch": 1.6564492398234427,
      "grad_norm": 1.9616019646167357,
      "learning_rate": 3.9264167417829624e-07,
      "loss": 0.4064,
      "step": 13510
    },
    {
      "epoch": 1.6565718489455614,
      "grad_norm": 1.9991259594816213,
      "learning_rate": 3.9236903675304417e-07,
      "loss": 0.4023,
      "step": 13511
    },
    {
      "epoch": 1.6566944580676801,
      "grad_norm": 1.9990211790203107,
      "learning_rate": 3.9209648595515573e-07,
      "loss": 0.4445,
      "step": 13512
    },
    {
      "epoch": 1.6568170671897988,
      "grad_norm": 1.8986854589707334,
      "learning_rate": 3.918240217958347e-07,
      "loss": 0.4298,
      "step": 13513
    },
    {
      "epoch": 1.6569396763119175,
      "grad_norm": 1.9299293929537087,
      "learning_rate": 3.915516442862774e-07,
      "loss": 0.3967,
      "step": 13514
    },
    {
      "epoch": 1.6570622854340362,
      "grad_norm": 2.0137221286371285,
      "learning_rate": 3.912793534376802e-07,
      "loss": 0.4293,
      "step": 13515
    },
    {
      "epoch": 1.657184894556155,
      "grad_norm": 2.0886021689033503,
      "learning_rate": 3.910071492612355e-07,
      "loss": 0.4481,
      "step": 13516
    },
    {
      "epoch": 1.6573075036782736,
      "grad_norm": 1.8657296616526324,
      "learning_rate": 3.907350317681299e-07,
      "loss": 0.4515,
      "step": 13517
    },
    {
      "epoch": 1.6574301128003923,
      "grad_norm": 1.8671883418333488,
      "learning_rate": 3.9046300096954927e-07,
      "loss": 0.4143,
      "step": 13518
    },
    {
      "epoch": 1.657552721922511,
      "grad_norm": 2.11202435014121,
      "learning_rate": 3.9019105687667343e-07,
      "loss": 0.4268,
      "step": 13519
    },
    {
      "epoch": 1.6576753310446297,
      "grad_norm": 1.993242061387284,
      "learning_rate": 3.8991919950068096e-07,
      "loss": 0.4325,
      "step": 13520
    },
    {
      "epoch": 1.6577979401667484,
      "grad_norm": 2.1763886513635438,
      "learning_rate": 3.8964742885274437e-07,
      "loss": 0.4066,
      "step": 13521
    },
    {
      "epoch": 1.6579205492888671,
      "grad_norm": 1.8261973625816295,
      "learning_rate": 3.89375744944035e-07,
      "loss": 0.3796,
      "step": 13522
    },
    {
      "epoch": 1.6580431584109858,
      "grad_norm": 2.0028178286590173,
      "learning_rate": 3.891041477857191e-07,
      "loss": 0.3714,
      "step": 13523
    },
    {
      "epoch": 1.6581657675331045,
      "grad_norm": 1.942683266348833,
      "learning_rate": 3.8883263738895923e-07,
      "loss": 0.4336,
      "step": 13524
    },
    {
      "epoch": 1.6582883766552232,
      "grad_norm": 1.9981026861688036,
      "learning_rate": 3.885612137649153e-07,
      "loss": 0.427,
      "step": 13525
    },
    {
      "epoch": 1.658410985777342,
      "grad_norm": 2.1449182964969364,
      "learning_rate": 3.8828987692474506e-07,
      "loss": 0.4342,
      "step": 13526
    },
    {
      "epoch": 1.6585335948994606,
      "grad_norm": 2.0762627100040185,
      "learning_rate": 3.8801862687959797e-07,
      "loss": 0.4327,
      "step": 13527
    },
    {
      "epoch": 1.6586562040215793,
      "grad_norm": 1.9921780200273118,
      "learning_rate": 3.877474636406242e-07,
      "loss": 0.4261,
      "step": 13528
    },
    {
      "epoch": 1.658778813143698,
      "grad_norm": 1.901629817791228,
      "learning_rate": 3.874763872189699e-07,
      "loss": 0.3973,
      "step": 13529
    },
    {
      "epoch": 1.6589014222658167,
      "grad_norm": 2.0238940880230865,
      "learning_rate": 3.8720539762577583e-07,
      "loss": 0.4244,
      "step": 13530
    },
    {
      "epoch": 1.6590240313879354,
      "grad_norm": 1.967766550432807,
      "learning_rate": 3.869344948721798e-07,
      "loss": 0.3964,
      "step": 13531
    },
    {
      "epoch": 1.659146640510054,
      "grad_norm": 2.1567754489144986,
      "learning_rate": 3.8666367896931676e-07,
      "loss": 0.4199,
      "step": 13532
    },
    {
      "epoch": 1.6592692496321726,
      "grad_norm": 2.0271271802752957,
      "learning_rate": 3.8639294992831895e-07,
      "loss": 0.4284,
      "step": 13533
    },
    {
      "epoch": 1.6593918587542913,
      "grad_norm": 1.6904144983073008,
      "learning_rate": 3.861223077603124e-07,
      "loss": 0.4127,
      "step": 13534
    },
    {
      "epoch": 1.65951446787641,
      "grad_norm": 1.9702934196392305,
      "learning_rate": 3.8585175247642076e-07,
      "loss": 0.4467,
      "step": 13535
    },
    {
      "epoch": 1.6596370769985287,
      "grad_norm": 2.181136704604064,
      "learning_rate": 3.855812840877654e-07,
      "loss": 0.4192,
      "step": 13536
    },
    {
      "epoch": 1.6597596861206474,
      "grad_norm": 1.9793620878365425,
      "learning_rate": 3.8531090260546216e-07,
      "loss": 0.3876,
      "step": 13537
    },
    {
      "epoch": 1.659882295242766,
      "grad_norm": 1.9480901933806898,
      "learning_rate": 3.850406080406249e-07,
      "loss": 0.4281,
      "step": 13538
    },
    {
      "epoch": 1.6600049043648848,
      "grad_norm": 1.8945089975413212,
      "learning_rate": 3.8477040040436256e-07,
      "loss": 0.3874,
      "step": 13539
    },
    {
      "epoch": 1.6601275134870033,
      "grad_norm": 1.9945403918974902,
      "learning_rate": 3.845002797077821e-07,
      "loss": 0.4865,
      "step": 13540
    },
    {
      "epoch": 1.660250122609122,
      "grad_norm": 2.0843474937256237,
      "learning_rate": 3.8423024596198456e-07,
      "loss": 0.4412,
      "step": 13541
    },
    {
      "epoch": 1.6603727317312407,
      "grad_norm": 1.931611060997303,
      "learning_rate": 3.839602991780703e-07,
      "loss": 0.4497,
      "step": 13542
    },
    {
      "epoch": 1.6604953408533594,
      "grad_norm": 1.6621591726977998,
      "learning_rate": 3.8369043936713396e-07,
      "loss": 0.3947,
      "step": 13543
    },
    {
      "epoch": 1.660617949975478,
      "grad_norm": 1.8843029511766651,
      "learning_rate": 3.834206665402665e-07,
      "loss": 0.3783,
      "step": 13544
    },
    {
      "epoch": 1.6607405590975968,
      "grad_norm": 1.9375649603661163,
      "learning_rate": 3.8315098070855677e-07,
      "loss": 0.4124,
      "step": 13545
    },
    {
      "epoch": 1.6608631682197155,
      "grad_norm": 2.0410673407807614,
      "learning_rate": 3.828813818830898e-07,
      "loss": 0.4653,
      "step": 13546
    },
    {
      "epoch": 1.6609857773418342,
      "grad_norm": 1.7844001989232128,
      "learning_rate": 3.8261187007494623e-07,
      "loss": 0.4308,
      "step": 13547
    },
    {
      "epoch": 1.6611083864639529,
      "grad_norm": 1.9673480027468782,
      "learning_rate": 3.823424452952026e-07,
      "loss": 0.4019,
      "step": 13548
    },
    {
      "epoch": 1.6612309955860716,
      "grad_norm": 1.8010383966101011,
      "learning_rate": 3.8207310755493446e-07,
      "loss": 0.4539,
      "step": 13549
    },
    {
      "epoch": 1.6613536047081903,
      "grad_norm": 2.12763529080127,
      "learning_rate": 3.818038568652102e-07,
      "loss": 0.4364,
      "step": 13550
    },
    {
      "epoch": 1.661476213830309,
      "grad_norm": 1.9075030992917472,
      "learning_rate": 3.815346932370978e-07,
      "loss": 0.4215,
      "step": 13551
    },
    {
      "epoch": 1.6615988229524277,
      "grad_norm": 1.9083746854000436,
      "learning_rate": 3.812656166816597e-07,
      "loss": 0.4057,
      "step": 13552
    },
    {
      "epoch": 1.6617214320745464,
      "grad_norm": 1.861679912767333,
      "learning_rate": 3.8099662720995605e-07,
      "loss": 0.4098,
      "step": 13553
    },
    {
      "epoch": 1.661844041196665,
      "grad_norm": 2.0235306483402566,
      "learning_rate": 3.807277248330421e-07,
      "loss": 0.4446,
      "step": 13554
    },
    {
      "epoch": 1.6619666503187838,
      "grad_norm": 2.01931742738779,
      "learning_rate": 3.8045890956197096e-07,
      "loss": 0.4479,
      "step": 13555
    },
    {
      "epoch": 1.6620892594409025,
      "grad_norm": 2.1498470615026593,
      "learning_rate": 3.8019018140779103e-07,
      "loss": 0.3901,
      "step": 13556
    },
    {
      "epoch": 1.6622118685630212,
      "grad_norm": 2.032707727903874,
      "learning_rate": 3.799215403815468e-07,
      "loss": 0.4294,
      "step": 13557
    },
    {
      "epoch": 1.6623344776851399,
      "grad_norm": 1.9872611811655225,
      "learning_rate": 3.796529864942805e-07,
      "loss": 0.4209,
      "step": 13558
    },
    {
      "epoch": 1.6624570868072586,
      "grad_norm": 1.9455562267910183,
      "learning_rate": 3.793845197570312e-07,
      "loss": 0.4215,
      "step": 13559
    },
    {
      "epoch": 1.6625796959293773,
      "grad_norm": 1.854764157036996,
      "learning_rate": 3.791161401808319e-07,
      "loss": 0.401,
      "step": 13560
    },
    {
      "epoch": 1.662702305051496,
      "grad_norm": 1.8462417859666471,
      "learning_rate": 3.788478477767138e-07,
      "loss": 0.3981,
      "step": 13561
    },
    {
      "epoch": 1.6628249141736147,
      "grad_norm": 1.9865901176488094,
      "learning_rate": 3.785796425557045e-07,
      "loss": 0.4136,
      "step": 13562
    },
    {
      "epoch": 1.6629475232957334,
      "grad_norm": 2.1811591858842276,
      "learning_rate": 3.783115245288274e-07,
      "loss": 0.3988,
      "step": 13563
    },
    {
      "epoch": 1.6630701324178518,
      "grad_norm": 1.982910202701339,
      "learning_rate": 3.7804349370710314e-07,
      "loss": 0.4191,
      "step": 13564
    },
    {
      "epoch": 1.6631927415399705,
      "grad_norm": 2.1168411680290364,
      "learning_rate": 3.777755501015476e-07,
      "loss": 0.3899,
      "step": 13565
    },
    {
      "epoch": 1.6633153506620892,
      "grad_norm": 2.0617953662738886,
      "learning_rate": 3.7750769372317417e-07,
      "loss": 0.4784,
      "step": 13566
    },
    {
      "epoch": 1.663437959784208,
      "grad_norm": 2.0564169096655722,
      "learning_rate": 3.772399245829919e-07,
      "loss": 0.4447,
      "step": 13567
    },
    {
      "epoch": 1.6635605689063266,
      "grad_norm": 1.9027068874244455,
      "learning_rate": 3.7697224269200723e-07,
      "loss": 0.4408,
      "step": 13568
    },
    {
      "epoch": 1.6636831780284453,
      "grad_norm": 2.0398622348381923,
      "learning_rate": 3.7670464806122217e-07,
      "loss": 0.4126,
      "step": 13569
    },
    {
      "epoch": 1.663805787150564,
      "grad_norm": 2.080419313969932,
      "learning_rate": 3.764371407016343e-07,
      "loss": 0.3767,
      "step": 13570
    },
    {
      "epoch": 1.6639283962726827,
      "grad_norm": 1.976647929026384,
      "learning_rate": 3.761697206242393e-07,
      "loss": 0.4322,
      "step": 13571
    },
    {
      "epoch": 1.6640510053948012,
      "grad_norm": 1.9997052613821737,
      "learning_rate": 3.7590238784002947e-07,
      "loss": 0.4522,
      "step": 13572
    },
    {
      "epoch": 1.66417361451692,
      "grad_norm": 2.0404200423232166,
      "learning_rate": 3.756351423599916e-07,
      "loss": 0.3799,
      "step": 13573
    },
    {
      "epoch": 1.6642962236390386,
      "grad_norm": 1.919743502258275,
      "learning_rate": 3.7536798419510996e-07,
      "loss": 0.4312,
      "step": 13574
    },
    {
      "epoch": 1.6644188327611573,
      "grad_norm": 1.92340658499277,
      "learning_rate": 3.751009133563657e-07,
      "loss": 0.3982,
      "step": 13575
    },
    {
      "epoch": 1.664541441883276,
      "grad_norm": 1.874723331352556,
      "learning_rate": 3.7483392985473654e-07,
      "loss": 0.4189,
      "step": 13576
    },
    {
      "epoch": 1.6646640510053947,
      "grad_norm": 1.8046569990461268,
      "learning_rate": 3.745670337011942e-07,
      "loss": 0.439,
      "step": 13577
    },
    {
      "epoch": 1.6647866601275134,
      "grad_norm": 2.003769303581133,
      "learning_rate": 3.7430022490670957e-07,
      "loss": 0.441,
      "step": 13578
    },
    {
      "epoch": 1.6649092692496321,
      "grad_norm": 2.009206524729988,
      "learning_rate": 3.7403350348224955e-07,
      "loss": 0.4852,
      "step": 13579
    },
    {
      "epoch": 1.6650318783717508,
      "grad_norm": 1.8441246024702942,
      "learning_rate": 3.7376686943877637e-07,
      "loss": 0.4207,
      "step": 13580
    },
    {
      "epoch": 1.6651544874938695,
      "grad_norm": 1.8132247702024413,
      "learning_rate": 3.735003227872483e-07,
      "loss": 0.4421,
      "step": 13581
    },
    {
      "epoch": 1.6652770966159882,
      "grad_norm": 1.800447541869716,
      "learning_rate": 3.7323386353862206e-07,
      "loss": 0.3933,
      "step": 13582
    },
    {
      "epoch": 1.665399705738107,
      "grad_norm": 2.2200380521556458,
      "learning_rate": 3.7296749170384895e-07,
      "loss": 0.4628,
      "step": 13583
    },
    {
      "epoch": 1.6655223148602256,
      "grad_norm": 1.8749790234905446,
      "learning_rate": 3.7270120729387796e-07,
      "loss": 0.4171,
      "step": 13584
    },
    {
      "epoch": 1.6656449239823443,
      "grad_norm": 1.7327181813072225,
      "learning_rate": 3.724350103196528e-07,
      "loss": 0.4444,
      "step": 13585
    },
    {
      "epoch": 1.665767533104463,
      "grad_norm": 2.0748000958971606,
      "learning_rate": 3.721689007921159e-07,
      "loss": 0.4206,
      "step": 13586
    },
    {
      "epoch": 1.6658901422265817,
      "grad_norm": 1.889800904987957,
      "learning_rate": 3.719028787222037e-07,
      "loss": 0.4352,
      "step": 13587
    },
    {
      "epoch": 1.6660127513487004,
      "grad_norm": 1.8691672609422334,
      "learning_rate": 3.7163694412085063e-07,
      "loss": 0.4297,
      "step": 13588
    },
    {
      "epoch": 1.6661353604708191,
      "grad_norm": 1.8154747318962203,
      "learning_rate": 3.713710969989881e-07,
      "loss": 0.4078,
      "step": 13589
    },
    {
      "epoch": 1.6662579695929378,
      "grad_norm": 2.036490576412266,
      "learning_rate": 3.7110533736754106e-07,
      "loss": 0.4384,
      "step": 13590
    },
    {
      "epoch": 1.6663805787150565,
      "grad_norm": 2.18558188945125,
      "learning_rate": 3.7083966523743324e-07,
      "loss": 0.4725,
      "step": 13591
    },
    {
      "epoch": 1.6665031878371752,
      "grad_norm": 1.9524846885066365,
      "learning_rate": 3.705740806195854e-07,
      "loss": 0.4348,
      "step": 13592
    },
    {
      "epoch": 1.666625796959294,
      "grad_norm": 2.0016424829686024,
      "learning_rate": 3.703085835249126e-07,
      "loss": 0.4236,
      "step": 13593
    },
    {
      "epoch": 1.6667484060814126,
      "grad_norm": 1.9890810664525058,
      "learning_rate": 3.7004317396432675e-07,
      "loss": 0.4459,
      "step": 13594
    },
    {
      "epoch": 1.666871015203531,
      "grad_norm": 1.919223302265869,
      "learning_rate": 3.697778519487374e-07,
      "loss": 0.4249,
      "step": 13595
    },
    {
      "epoch": 1.6669936243256498,
      "grad_norm": 2.0691538375711778,
      "learning_rate": 3.695126174890501e-07,
      "loss": 0.3966,
      "step": 13596
    },
    {
      "epoch": 1.6671162334477685,
      "grad_norm": 1.8032737816725772,
      "learning_rate": 3.69247470596166e-07,
      "loss": 0.4116,
      "step": 13597
    },
    {
      "epoch": 1.6672388425698872,
      "grad_norm": 1.912984881717113,
      "learning_rate": 3.689824112809823e-07,
      "loss": 0.4226,
      "step": 13598
    },
    {
      "epoch": 1.667361451692006,
      "grad_norm": 2.050775506228449,
      "learning_rate": 3.6871743955439493e-07,
      "loss": 0.4422,
      "step": 13599
    },
    {
      "epoch": 1.6674840608141246,
      "grad_norm": 1.8871038239159708,
      "learning_rate": 3.684525554272933e-07,
      "loss": 0.382,
      "step": 13600
    },
    {
      "epoch": 1.6676066699362433,
      "grad_norm": 2.0661378974068114,
      "learning_rate": 3.6818775891056564e-07,
      "loss": 0.4277,
      "step": 13601
    },
    {
      "epoch": 1.667729279058362,
      "grad_norm": 1.9130927225825591,
      "learning_rate": 3.679230500150954e-07,
      "loss": 0.4362,
      "step": 13602
    },
    {
      "epoch": 1.6678518881804805,
      "grad_norm": 1.8996625414065327,
      "learning_rate": 3.6765842875176176e-07,
      "loss": 0.4158,
      "step": 13603
    },
    {
      "epoch": 1.6679744973025992,
      "grad_norm": 1.9778851949194347,
      "learning_rate": 3.673938951314415e-07,
      "loss": 0.4338,
      "step": 13604
    },
    {
      "epoch": 1.6680971064247179,
      "grad_norm": 1.8500560506210548,
      "learning_rate": 3.6712944916500847e-07,
      "loss": 0.3876,
      "step": 13605
    },
    {
      "epoch": 1.6682197155468366,
      "grad_norm": 2.042931847634576,
      "learning_rate": 3.668650908633309e-07,
      "loss": 0.4193,
      "step": 13606
    },
    {
      "epoch": 1.6683423246689553,
      "grad_norm": 1.9604649918041073,
      "learning_rate": 3.6660082023727396e-07,
      "loss": 0.3972,
      "step": 13607
    },
    {
      "epoch": 1.668464933791074,
      "grad_norm": 2.1025137084213665,
      "learning_rate": 3.6633663729770007e-07,
      "loss": 0.42,
      "step": 13608
    },
    {
      "epoch": 1.6685875429131927,
      "grad_norm": 2.1353975787904647,
      "learning_rate": 3.6607254205546923e-07,
      "loss": 0.4538,
      "step": 13609
    },
    {
      "epoch": 1.6687101520353114,
      "grad_norm": 2.0358754098341745,
      "learning_rate": 3.658085345214332e-07,
      "loss": 0.4103,
      "step": 13610
    },
    {
      "epoch": 1.66883276115743,
      "grad_norm": 2.0676601681473072,
      "learning_rate": 3.655446147064448e-07,
      "loss": 0.4133,
      "step": 13611
    },
    {
      "epoch": 1.6689553702795488,
      "grad_norm": 1.8853000120444832,
      "learning_rate": 3.6528078262135227e-07,
      "loss": 0.3719,
      "step": 13612
    },
    {
      "epoch": 1.6690779794016675,
      "grad_norm": 1.9792328925803437,
      "learning_rate": 3.6501703827699796e-07,
      "loss": 0.3908,
      "step": 13613
    },
    {
      "epoch": 1.6692005885237862,
      "grad_norm": 1.965591159277678,
      "learning_rate": 3.6475338168422386e-07,
      "loss": 0.4571,
      "step": 13614
    },
    {
      "epoch": 1.6693231976459049,
      "grad_norm": 2.074679879266292,
      "learning_rate": 3.644898128538651e-07,
      "loss": 0.439,
      "step": 13615
    },
    {
      "epoch": 1.6694458067680236,
      "grad_norm": 1.8471301291339637,
      "learning_rate": 3.6422633179675646e-07,
      "loss": 0.3984,
      "step": 13616
    },
    {
      "epoch": 1.6695684158901423,
      "grad_norm": 1.8688957347394344,
      "learning_rate": 3.6396293852372614e-07,
      "loss": 0.4286,
      "step": 13617
    },
    {
      "epoch": 1.669691025012261,
      "grad_norm": 1.9325474858313658,
      "learning_rate": 3.636996330456011e-07,
      "loss": 0.4154,
      "step": 13618
    },
    {
      "epoch": 1.6698136341343797,
      "grad_norm": 1.92887329549086,
      "learning_rate": 3.634364153732028e-07,
      "loss": 0.4191,
      "step": 13619
    },
    {
      "epoch": 1.6699362432564984,
      "grad_norm": 2.1981875043014014,
      "learning_rate": 3.631732855173503e-07,
      "loss": 0.4117,
      "step": 13620
    },
    {
      "epoch": 1.670058852378617,
      "grad_norm": 2.0890724609803493,
      "learning_rate": 3.6291024348885844e-07,
      "loss": 0.4192,
      "step": 13621
    },
    {
      "epoch": 1.6701814615007358,
      "grad_norm": 1.8762671549773013,
      "learning_rate": 3.6264728929854025e-07,
      "loss": 0.4431,
      "step": 13622
    },
    {
      "epoch": 1.6703040706228545,
      "grad_norm": 2.1481947958151837,
      "learning_rate": 3.62384422957201e-07,
      "loss": 0.5082,
      "step": 13623
    },
    {
      "epoch": 1.6704266797449732,
      "grad_norm": 1.9903444810059738,
      "learning_rate": 3.621216444756465e-07,
      "loss": 0.4156,
      "step": 13624
    },
    {
      "epoch": 1.6705492888670919,
      "grad_norm": 1.6683463443364202,
      "learning_rate": 3.6185895386467775e-07,
      "loss": 0.4089,
      "step": 13625
    },
    {
      "epoch": 1.6706718979892106,
      "grad_norm": 1.7036516948911433,
      "learning_rate": 3.615963511350912e-07,
      "loss": 0.4235,
      "step": 13626
    },
    {
      "epoch": 1.670794507111329,
      "grad_norm": 1.8537615237947458,
      "learning_rate": 3.613338362976801e-07,
      "loss": 0.4121,
      "step": 13627
    },
    {
      "epoch": 1.6709171162334477,
      "grad_norm": 1.8539378974525431,
      "learning_rate": 3.610714093632342e-07,
      "loss": 0.3689,
      "step": 13628
    },
    {
      "epoch": 1.6710397253555664,
      "grad_norm": 2.0019090815363283,
      "learning_rate": 3.608090703425407e-07,
      "loss": 0.4286,
      "step": 13629
    },
    {
      "epoch": 1.6711623344776851,
      "grad_norm": 2.002377702281607,
      "learning_rate": 3.6054681924638154e-07,
      "loss": 0.4782,
      "step": 13630
    },
    {
      "epoch": 1.6712849435998038,
      "grad_norm": 1.8778688004440092,
      "learning_rate": 3.6028465608553524e-07,
      "loss": 0.4285,
      "step": 13631
    },
    {
      "epoch": 1.6714075527219225,
      "grad_norm": 2.0174193624754557,
      "learning_rate": 3.600225808707783e-07,
      "loss": 0.3715,
      "step": 13632
    },
    {
      "epoch": 1.6715301618440412,
      "grad_norm": 1.7654477282729908,
      "learning_rate": 3.5976059361288093e-07,
      "loss": 0.4176,
      "step": 13633
    },
    {
      "epoch": 1.6716527709661597,
      "grad_norm": 1.6290412044030966,
      "learning_rate": 3.5949869432261255e-07,
      "loss": 0.3823,
      "step": 13634
    },
    {
      "epoch": 1.6717753800882784,
      "grad_norm": 2.1587554063381287,
      "learning_rate": 3.5923688301073763e-07,
      "loss": 0.4575,
      "step": 13635
    },
    {
      "epoch": 1.6718979892103971,
      "grad_norm": 1.9627610120905898,
      "learning_rate": 3.589751596880167e-07,
      "loss": 0.423,
      "step": 13636
    },
    {
      "epoch": 1.6720205983325158,
      "grad_norm": 1.9109763480539224,
      "learning_rate": 3.587135243652065e-07,
      "loss": 0.4568,
      "step": 13637
    },
    {
      "epoch": 1.6721432074546345,
      "grad_norm": 2.0536845278220683,
      "learning_rate": 3.58451977053062e-07,
      "loss": 0.4481,
      "step": 13638
    },
    {
      "epoch": 1.6722658165767532,
      "grad_norm": 1.994913234501027,
      "learning_rate": 3.581905177623324e-07,
      "loss": 0.4221,
      "step": 13639
    },
    {
      "epoch": 1.672388425698872,
      "grad_norm": 2.0832081927506216,
      "learning_rate": 3.5792914650376406e-07,
      "loss": 0.4118,
      "step": 13640
    },
    {
      "epoch": 1.6725110348209906,
      "grad_norm": 1.926039680535536,
      "learning_rate": 3.5766786328809976e-07,
      "loss": 0.4542,
      "step": 13641
    },
    {
      "epoch": 1.6726336439431093,
      "grad_norm": 2.0062198326941454,
      "learning_rate": 3.574066681260796e-07,
      "loss": 0.4093,
      "step": 13642
    },
    {
      "epoch": 1.672756253065228,
      "grad_norm": 2.0667640607445765,
      "learning_rate": 3.571455610284386e-07,
      "loss": 0.4114,
      "step": 13643
    },
    {
      "epoch": 1.6728788621873467,
      "grad_norm": 1.797885414323724,
      "learning_rate": 3.5688454200590807e-07,
      "loss": 0.4199,
      "step": 13644
    },
    {
      "epoch": 1.6730014713094654,
      "grad_norm": 2.0869227685522316,
      "learning_rate": 3.566236110692173e-07,
      "loss": 0.4306,
      "step": 13645
    },
    {
      "epoch": 1.6731240804315841,
      "grad_norm": 1.7910517133966521,
      "learning_rate": 3.563627682290904e-07,
      "loss": 0.3966,
      "step": 13646
    },
    {
      "epoch": 1.6732466895537028,
      "grad_norm": 1.8206959966343865,
      "learning_rate": 3.5610201349624925e-07,
      "loss": 0.4297,
      "step": 13647
    },
    {
      "epoch": 1.6733692986758215,
      "grad_norm": 1.9169795790858777,
      "learning_rate": 3.558413468814101e-07,
      "loss": 0.4407,
      "step": 13648
    },
    {
      "epoch": 1.6734919077979402,
      "grad_norm": 2.009665819392312,
      "learning_rate": 3.5558076839528803e-07,
      "loss": 0.415,
      "step": 13649
    },
    {
      "epoch": 1.673614516920059,
      "grad_norm": 2.1262744424651885,
      "learning_rate": 3.5532027804859257e-07,
      "loss": 0.4192,
      "step": 13650
    },
    {
      "epoch": 1.6737371260421776,
      "grad_norm": 1.8601961963141287,
      "learning_rate": 3.5505987585203073e-07,
      "loss": 0.3788,
      "step": 13651
    },
    {
      "epoch": 1.6738597351642963,
      "grad_norm": 1.9531521122814233,
      "learning_rate": 3.547995618163055e-07,
      "loss": 0.4213,
      "step": 13652
    },
    {
      "epoch": 1.673982344286415,
      "grad_norm": 2.129484383053002,
      "learning_rate": 3.545393359521157e-07,
      "loss": 0.3887,
      "step": 13653
    },
    {
      "epoch": 1.6741049534085337,
      "grad_norm": 1.9921798035607707,
      "learning_rate": 3.542791982701571e-07,
      "loss": 0.4791,
      "step": 13654
    },
    {
      "epoch": 1.6742275625306524,
      "grad_norm": 2.0056636637142633,
      "learning_rate": 3.5401914878112285e-07,
      "loss": 0.3886,
      "step": 13655
    },
    {
      "epoch": 1.6743501716527711,
      "grad_norm": 2.010196737269607,
      "learning_rate": 3.537591874957008e-07,
      "loss": 0.3996,
      "step": 13656
    },
    {
      "epoch": 1.6744727807748898,
      "grad_norm": 2.0858800983455588,
      "learning_rate": 3.5349931442457493e-07,
      "loss": 0.4574,
      "step": 13657
    },
    {
      "epoch": 1.6745953898970083,
      "grad_norm": 2.071714875442181,
      "learning_rate": 3.5323952957842826e-07,
      "loss": 0.4157,
      "step": 13658
    },
    {
      "epoch": 1.674717999019127,
      "grad_norm": 1.8542502880285003,
      "learning_rate": 3.529798329679365e-07,
      "loss": 0.3604,
      "step": 13659
    },
    {
      "epoch": 1.6748406081412457,
      "grad_norm": 1.8840934157856994,
      "learning_rate": 3.527202246037756e-07,
      "loss": 0.4575,
      "step": 13660
    },
    {
      "epoch": 1.6749632172633644,
      "grad_norm": 2.1035636233168735,
      "learning_rate": 3.5246070449661413e-07,
      "loss": 0.4858,
      "step": 13661
    },
    {
      "epoch": 1.675085826385483,
      "grad_norm": 1.889676670128586,
      "learning_rate": 3.5220127265712e-07,
      "loss": 0.4071,
      "step": 13662
    },
    {
      "epoch": 1.6752084355076018,
      "grad_norm": 1.8767037569340541,
      "learning_rate": 3.5194192909595583e-07,
      "loss": 0.4337,
      "step": 13663
    },
    {
      "epoch": 1.6753310446297205,
      "grad_norm": 1.928960344534362,
      "learning_rate": 3.5168267382378136e-07,
      "loss": 0.4347,
      "step": 13664
    },
    {
      "epoch": 1.6754536537518392,
      "grad_norm": 1.793608359647084,
      "learning_rate": 3.514235068512525e-07,
      "loss": 0.4193,
      "step": 13665
    },
    {
      "epoch": 1.6755762628739577,
      "grad_norm": 2.208635922538531,
      "learning_rate": 3.511644281890206e-07,
      "loss": 0.4607,
      "step": 13666
    },
    {
      "epoch": 1.6756988719960764,
      "grad_norm": 1.7874921393052356,
      "learning_rate": 3.50905437847735e-07,
      "loss": 0.4121,
      "step": 13667
    },
    {
      "epoch": 1.675821481118195,
      "grad_norm": 1.8404314326624662,
      "learning_rate": 3.506465358380412e-07,
      "loss": 0.4262,
      "step": 13668
    },
    {
      "epoch": 1.6759440902403138,
      "grad_norm": 2.1460513492507554,
      "learning_rate": 3.5038772217057983e-07,
      "loss": 0.4429,
      "step": 13669
    },
    {
      "epoch": 1.6760666993624325,
      "grad_norm": 1.9560228080907407,
      "learning_rate": 3.501289968559882e-07,
      "loss": 0.4166,
      "step": 13670
    },
    {
      "epoch": 1.6761893084845512,
      "grad_norm": 2.0996040405649206,
      "learning_rate": 3.498703599049005e-07,
      "loss": 0.4436,
      "step": 13671
    },
    {
      "epoch": 1.6763119176066699,
      "grad_norm": 2.004531689536003,
      "learning_rate": 3.4961181132794895e-07,
      "loss": 0.4475,
      "step": 13672
    },
    {
      "epoch": 1.6764345267287886,
      "grad_norm": 1.8884597520527109,
      "learning_rate": 3.4935335113575775e-07,
      "loss": 0.3673,
      "step": 13673
    },
    {
      "epoch": 1.6765571358509073,
      "grad_norm": 2.0675189470692974,
      "learning_rate": 3.4909497933895116e-07,
      "loss": 0.4254,
      "step": 13674
    },
    {
      "epoch": 1.676679744973026,
      "grad_norm": 1.7293856635512508,
      "learning_rate": 3.4883669594814954e-07,
      "loss": 0.4105,
      "step": 13675
    },
    {
      "epoch": 1.6768023540951447,
      "grad_norm": 2.1740366970464566,
      "learning_rate": 3.4857850097396785e-07,
      "loss": 0.3968,
      "step": 13676
    },
    {
      "epoch": 1.6769249632172634,
      "grad_norm": 1.9796113638116224,
      "learning_rate": 3.4832039442701804e-07,
      "loss": 0.4489,
      "step": 13677
    },
    {
      "epoch": 1.677047572339382,
      "grad_norm": 1.8925360023399231,
      "learning_rate": 3.4806237631791e-07,
      "loss": 0.4529,
      "step": 13678
    },
    {
      "epoch": 1.6771701814615008,
      "grad_norm": 2.1038836884373833,
      "learning_rate": 3.4780444665724723e-07,
      "loss": 0.393,
      "step": 13679
    },
    {
      "epoch": 1.6772927905836195,
      "grad_norm": 1.9629885570710028,
      "learning_rate": 3.4754660545563255e-07,
      "loss": 0.4016,
      "step": 13680
    },
    {
      "epoch": 1.6774153997057382,
      "grad_norm": 1.8547429781934661,
      "learning_rate": 3.472888527236623e-07,
      "loss": 0.4065,
      "step": 13681
    },
    {
      "epoch": 1.6775380088278569,
      "grad_norm": 2.0021090703433977,
      "learning_rate": 3.470311884719321e-07,
      "loss": 0.4561,
      "step": 13682
    },
    {
      "epoch": 1.6776606179499756,
      "grad_norm": 1.9442852179407697,
      "learning_rate": 3.467736127110308e-07,
      "loss": 0.4047,
      "step": 13683
    },
    {
      "epoch": 1.6777832270720943,
      "grad_norm": 1.929122555245681,
      "learning_rate": 3.4651612545154615e-07,
      "loss": 0.4582,
      "step": 13684
    },
    {
      "epoch": 1.677905836194213,
      "grad_norm": 1.9841199711578923,
      "learning_rate": 3.4625872670406206e-07,
      "loss": 0.4563,
      "step": 13685
    },
    {
      "epoch": 1.6780284453163317,
      "grad_norm": 1.910785890287298,
      "learning_rate": 3.4600141647915643e-07,
      "loss": 0.423,
      "step": 13686
    },
    {
      "epoch": 1.6781510544384504,
      "grad_norm": 2.124617583210436,
      "learning_rate": 3.4574419478740574e-07,
      "loss": 0.4325,
      "step": 13687
    },
    {
      "epoch": 1.678273663560569,
      "grad_norm": 1.9700117347887371,
      "learning_rate": 3.4548706163938296e-07,
      "loss": 0.4469,
      "step": 13688
    },
    {
      "epoch": 1.6783962726826875,
      "grad_norm": 1.934713808473606,
      "learning_rate": 3.452300170456563e-07,
      "loss": 0.4642,
      "step": 13689
    },
    {
      "epoch": 1.6785188818048062,
      "grad_norm": 2.0926045920669223,
      "learning_rate": 3.449730610167901e-07,
      "loss": 0.432,
      "step": 13690
    },
    {
      "epoch": 1.678641490926925,
      "grad_norm": 2.070325730253106,
      "learning_rate": 3.4471619356334614e-07,
      "loss": 0.4421,
      "step": 13691
    },
    {
      "epoch": 1.6787641000490436,
      "grad_norm": 1.9399468531449473,
      "learning_rate": 3.4445941469588293e-07,
      "loss": 0.4391,
      "step": 13692
    },
    {
      "epoch": 1.6788867091711623,
      "grad_norm": 2.0486225895108072,
      "learning_rate": 3.442027244249538e-07,
      "loss": 0.4134,
      "step": 13693
    },
    {
      "epoch": 1.679009318293281,
      "grad_norm": 1.7870329304940107,
      "learning_rate": 3.439461227611088e-07,
      "loss": 0.4271,
      "step": 13694
    },
    {
      "epoch": 1.6791319274153997,
      "grad_norm": 2.169469395553243,
      "learning_rate": 3.4368960971489564e-07,
      "loss": 0.4707,
      "step": 13695
    },
    {
      "epoch": 1.6792545365375184,
      "grad_norm": 1.877451936722493,
      "learning_rate": 3.434331852968564e-07,
      "loss": 0.423,
      "step": 13696
    },
    {
      "epoch": 1.679377145659637,
      "grad_norm": 2.0375697471897745,
      "learning_rate": 3.4317684951753166e-07,
      "loss": 0.429,
      "step": 13697
    },
    {
      "epoch": 1.6794997547817556,
      "grad_norm": 2.083991480157463,
      "learning_rate": 3.4292060238745623e-07,
      "loss": 0.4183,
      "step": 13698
    },
    {
      "epoch": 1.6796223639038743,
      "grad_norm": 1.9193162138132367,
      "learning_rate": 3.426644439171636e-07,
      "loss": 0.4006,
      "step": 13699
    },
    {
      "epoch": 1.679744973025993,
      "grad_norm": 1.8803844259566855,
      "learning_rate": 3.4240837411718075e-07,
      "loss": 0.4465,
      "step": 13700
    },
    {
      "epoch": 1.6798675821481117,
      "grad_norm": 2.0750468045660035,
      "learning_rate": 3.4215239299803417e-07,
      "loss": 0.4367,
      "step": 13701
    },
    {
      "epoch": 1.6799901912702304,
      "grad_norm": 2.0614093588631297,
      "learning_rate": 3.418965005702443e-07,
      "loss": 0.4351,
      "step": 13702
    },
    {
      "epoch": 1.6801128003923491,
      "grad_norm": 2.0320807006765156,
      "learning_rate": 3.416406968443284e-07,
      "loss": 0.433,
      "step": 13703
    },
    {
      "epoch": 1.6802354095144678,
      "grad_norm": 1.9161971279093923,
      "learning_rate": 3.413849818308007e-07,
      "loss": 0.4262,
      "step": 13704
    },
    {
      "epoch": 1.6803580186365865,
      "grad_norm": 2.1649097627938194,
      "learning_rate": 3.4112935554017316e-07,
      "loss": 0.4245,
      "step": 13705
    },
    {
      "epoch": 1.6804806277587052,
      "grad_norm": 1.8026360501661864,
      "learning_rate": 3.408738179829499e-07,
      "loss": 0.4227,
      "step": 13706
    },
    {
      "epoch": 1.680603236880824,
      "grad_norm": 2.064110719368079,
      "learning_rate": 3.406183691696352e-07,
      "loss": 0.4492,
      "step": 13707
    },
    {
      "epoch": 1.6807258460029426,
      "grad_norm": 2.075503298787509,
      "learning_rate": 3.4036300911072904e-07,
      "loss": 0.4243,
      "step": 13708
    },
    {
      "epoch": 1.6808484551250613,
      "grad_norm": 1.876381592774867,
      "learning_rate": 3.4010773781672583e-07,
      "loss": 0.4282,
      "step": 13709
    },
    {
      "epoch": 1.68097106424718,
      "grad_norm": 1.9965668845678988,
      "learning_rate": 3.398525552981188e-07,
      "loss": 0.4223,
      "step": 13710
    },
    {
      "epoch": 1.6810936733692987,
      "grad_norm": 1.9177488361028792,
      "learning_rate": 3.3959746156539576e-07,
      "loss": 0.4057,
      "step": 13711
    },
    {
      "epoch": 1.6812162824914174,
      "grad_norm": 1.9516803523045305,
      "learning_rate": 3.3934245662904207e-07,
      "loss": 0.4464,
      "step": 13712
    },
    {
      "epoch": 1.6813388916135361,
      "grad_norm": 2.106581977740792,
      "learning_rate": 3.390875404995381e-07,
      "loss": 0.4067,
      "step": 13713
    },
    {
      "epoch": 1.6814615007356548,
      "grad_norm": 1.9902628020602255,
      "learning_rate": 3.388327131873623e-07,
      "loss": 0.4081,
      "step": 13714
    },
    {
      "epoch": 1.6815841098577735,
      "grad_norm": 1.8793297505663822,
      "learning_rate": 3.3857797470298823e-07,
      "loss": 0.4346,
      "step": 13715
    },
    {
      "epoch": 1.6817067189798922,
      "grad_norm": 1.90661038790343,
      "learning_rate": 3.3832332505688516e-07,
      "loss": 0.4616,
      "step": 13716
    },
    {
      "epoch": 1.681829328102011,
      "grad_norm": 1.921807067812623,
      "learning_rate": 3.380687642595204e-07,
      "loss": 0.4027,
      "step": 13717
    },
    {
      "epoch": 1.6819519372241296,
      "grad_norm": 2.0333482573826904,
      "learning_rate": 3.378142923213573e-07,
      "loss": 0.4297,
      "step": 13718
    },
    {
      "epoch": 1.6820745463462483,
      "grad_norm": 1.944261722175067,
      "learning_rate": 3.3755990925285474e-07,
      "loss": 0.3872,
      "step": 13719
    },
    {
      "epoch": 1.682197155468367,
      "grad_norm": 1.9801337516525899,
      "learning_rate": 3.373056150644677e-07,
      "loss": 0.3948,
      "step": 13720
    },
    {
      "epoch": 1.6823197645904855,
      "grad_norm": 1.8452471210972006,
      "learning_rate": 3.3705140976664914e-07,
      "loss": 0.436,
      "step": 13721
    },
    {
      "epoch": 1.6824423737126042,
      "grad_norm": 1.9645752856038992,
      "learning_rate": 3.3679729336984685e-07,
      "loss": 0.4033,
      "step": 13722
    },
    {
      "epoch": 1.6825649828347229,
      "grad_norm": 1.9182879358202973,
      "learning_rate": 3.365432658845047e-07,
      "loss": 0.4385,
      "step": 13723
    },
    {
      "epoch": 1.6826875919568416,
      "grad_norm": 2.0026502246743485,
      "learning_rate": 3.362893273210646e-07,
      "loss": 0.4466,
      "step": 13724
    },
    {
      "epoch": 1.6828102010789603,
      "grad_norm": 1.912322127686128,
      "learning_rate": 3.360354776899644e-07,
      "loss": 0.4351,
      "step": 13725
    },
    {
      "epoch": 1.682932810201079,
      "grad_norm": 1.9315885490059947,
      "learning_rate": 3.357817170016367e-07,
      "loss": 0.4049,
      "step": 13726
    },
    {
      "epoch": 1.6830554193231977,
      "grad_norm": 2.0832235923693014,
      "learning_rate": 3.355280452665116e-07,
      "loss": 0.4497,
      "step": 13727
    },
    {
      "epoch": 1.6831780284453162,
      "grad_norm": 2.0099865947572533,
      "learning_rate": 3.352744624950163e-07,
      "loss": 0.3665,
      "step": 13728
    },
    {
      "epoch": 1.6833006375674349,
      "grad_norm": 2.011123810544392,
      "learning_rate": 3.3502096869757225e-07,
      "loss": 0.4137,
      "step": 13729
    },
    {
      "epoch": 1.6834232466895536,
      "grad_norm": 1.9326529627721387,
      "learning_rate": 3.347675638845993e-07,
      "loss": 0.4163,
      "step": 13730
    },
    {
      "epoch": 1.6835458558116723,
      "grad_norm": 2.179623776561181,
      "learning_rate": 3.345142480665131e-07,
      "loss": 0.4248,
      "step": 13731
    },
    {
      "epoch": 1.683668464933791,
      "grad_norm": 1.9326267206789665,
      "learning_rate": 3.3426102125372504e-07,
      "loss": 0.4032,
      "step": 13732
    },
    {
      "epoch": 1.6837910740559097,
      "grad_norm": 2.139881551646523,
      "learning_rate": 3.3400788345664267e-07,
      "loss": 0.4779,
      "step": 13733
    },
    {
      "epoch": 1.6839136831780284,
      "grad_norm": 1.8804907282821983,
      "learning_rate": 3.337548346856714e-07,
      "loss": 0.4511,
      "step": 13734
    },
    {
      "epoch": 1.684036292300147,
      "grad_norm": 1.8527014429849902,
      "learning_rate": 3.3350187495121147e-07,
      "loss": 0.4023,
      "step": 13735
    },
    {
      "epoch": 1.6841589014222658,
      "grad_norm": 1.9787997124388605,
      "learning_rate": 3.3324900426365937e-07,
      "loss": 0.407,
      "step": 13736
    },
    {
      "epoch": 1.6842815105443845,
      "grad_norm": 1.87601898288585,
      "learning_rate": 3.3299622263340937e-07,
      "loss": 0.3661,
      "step": 13737
    },
    {
      "epoch": 1.6844041196665032,
      "grad_norm": 1.991193785926227,
      "learning_rate": 3.327435300708512e-07,
      "loss": 0.3972,
      "step": 13738
    },
    {
      "epoch": 1.6845267287886219,
      "grad_norm": 2.01257910427258,
      "learning_rate": 3.32490926586371e-07,
      "loss": 0.437,
      "step": 13739
    },
    {
      "epoch": 1.6846493379107406,
      "grad_norm": 2.0090647141111484,
      "learning_rate": 3.322384121903502e-07,
      "loss": 0.4518,
      "step": 13740
    },
    {
      "epoch": 1.6847719470328593,
      "grad_norm": 1.878531224010542,
      "learning_rate": 3.319859868931691e-07,
      "loss": 0.4618,
      "step": 13741
    },
    {
      "epoch": 1.684894556154978,
      "grad_norm": 2.0959649531712388,
      "learning_rate": 3.317336507052016e-07,
      "loss": 0.4661,
      "step": 13742
    },
    {
      "epoch": 1.6850171652770967,
      "grad_norm": 1.8946202367023617,
      "learning_rate": 3.3148140363682024e-07,
      "loss": 0.4247,
      "step": 13743
    },
    {
      "epoch": 1.6851397743992154,
      "grad_norm": 1.8855378953966229,
      "learning_rate": 3.3122924569839147e-07,
      "loss": 0.4246,
      "step": 13744
    },
    {
      "epoch": 1.685262383521334,
      "grad_norm": 2.0419374267369084,
      "learning_rate": 3.309771769002809e-07,
      "loss": 0.4634,
      "step": 13745
    },
    {
      "epoch": 1.6853849926434528,
      "grad_norm": 2.2287293796484065,
      "learning_rate": 3.3072519725284774e-07,
      "loss": 0.4329,
      "step": 13746
    },
    {
      "epoch": 1.6855076017655715,
      "grad_norm": 1.8159770829335604,
      "learning_rate": 3.3047330676645004e-07,
      "loss": 0.4174,
      "step": 13747
    },
    {
      "epoch": 1.6856302108876902,
      "grad_norm": 1.9858277771157602,
      "learning_rate": 3.3022150545144014e-07,
      "loss": 0.4352,
      "step": 13748
    },
    {
      "epoch": 1.6857528200098089,
      "grad_norm": 1.925344355720666,
      "learning_rate": 3.2996979331816725e-07,
      "loss": 0.4147,
      "step": 13749
    },
    {
      "epoch": 1.6858754291319276,
      "grad_norm": 1.8565323513341025,
      "learning_rate": 3.297181703769775e-07,
      "loss": 0.4716,
      "step": 13750
    },
    {
      "epoch": 1.6859980382540463,
      "grad_norm": 2.0630228629477,
      "learning_rate": 3.294666366382138e-07,
      "loss": 0.4267,
      "step": 13751
    },
    {
      "epoch": 1.6861206473761647,
      "grad_norm": 1.9212714564254536,
      "learning_rate": 3.292151921122136e-07,
      "loss": 0.3933,
      "step": 13752
    },
    {
      "epoch": 1.6862432564982834,
      "grad_norm": 1.9288417679303955,
      "learning_rate": 3.2896383680931176e-07,
      "loss": 0.4428,
      "step": 13753
    },
    {
      "epoch": 1.6863658656204021,
      "grad_norm": 1.9902632731395626,
      "learning_rate": 3.2871257073984033e-07,
      "loss": 0.4055,
      "step": 13754
    },
    {
      "epoch": 1.6864884747425208,
      "grad_norm": 2.096901344590223,
      "learning_rate": 3.284613939141257e-07,
      "loss": 0.3779,
      "step": 13755
    },
    {
      "epoch": 1.6866110838646395,
      "grad_norm": 1.984024763834007,
      "learning_rate": 3.282103063424927e-07,
      "loss": 0.447,
      "step": 13756
    },
    {
      "epoch": 1.6867336929867582,
      "grad_norm": 1.9737714168223621,
      "learning_rate": 3.2795930803526025e-07,
      "loss": 0.4373,
      "step": 13757
    },
    {
      "epoch": 1.686856302108877,
      "grad_norm": 1.9802229703935397,
      "learning_rate": 3.277083990027463e-07,
      "loss": 0.4279,
      "step": 13758
    },
    {
      "epoch": 1.6869789112309956,
      "grad_norm": 1.862517037716418,
      "learning_rate": 3.274575792552623e-07,
      "loss": 0.3818,
      "step": 13759
    },
    {
      "epoch": 1.6871015203531141,
      "grad_norm": 2.1920077709445103,
      "learning_rate": 3.2720684880311824e-07,
      "loss": 0.4064,
      "step": 13760
    },
    {
      "epoch": 1.6872241294752328,
      "grad_norm": 1.9365521069016645,
      "learning_rate": 3.2695620765661964e-07,
      "loss": 0.4626,
      "step": 13761
    },
    {
      "epoch": 1.6873467385973515,
      "grad_norm": 1.8839043579183101,
      "learning_rate": 3.267056558260673e-07,
      "loss": 0.4014,
      "step": 13762
    },
    {
      "epoch": 1.6874693477194702,
      "grad_norm": 1.9176325488023804,
      "learning_rate": 3.2645519332176024e-07,
      "loss": 0.4149,
      "step": 13763
    },
    {
      "epoch": 1.687591956841589,
      "grad_norm": 1.9454086743627732,
      "learning_rate": 3.2620482015399304e-07,
      "loss": 0.4347,
      "step": 13764
    },
    {
      "epoch": 1.6877145659637076,
      "grad_norm": 1.9126493827336817,
      "learning_rate": 3.25954536333056e-07,
      "loss": 0.4252,
      "step": 13765
    },
    {
      "epoch": 1.6878371750858263,
      "grad_norm": 1.7927748887386001,
      "learning_rate": 3.25704341869236e-07,
      "loss": 0.4304,
      "step": 13766
    },
    {
      "epoch": 1.687959784207945,
      "grad_norm": 1.8977293191567601,
      "learning_rate": 3.2545423677281693e-07,
      "loss": 0.4231,
      "step": 13767
    },
    {
      "epoch": 1.6880823933300637,
      "grad_norm": 2.0328900477948206,
      "learning_rate": 3.252042210540793e-07,
      "loss": 0.4259,
      "step": 13768
    },
    {
      "epoch": 1.6882050024521824,
      "grad_norm": 2.345621285870818,
      "learning_rate": 3.2495429472329755e-07,
      "loss": 0.4606,
      "step": 13769
    },
    {
      "epoch": 1.6883276115743011,
      "grad_norm": 2.074531771654757,
      "learning_rate": 3.247044577907446e-07,
      "loss": 0.4107,
      "step": 13770
    },
    {
      "epoch": 1.6884502206964198,
      "grad_norm": 1.8549814024871951,
      "learning_rate": 3.2445471026669034e-07,
      "loss": 0.4378,
      "step": 13771
    },
    {
      "epoch": 1.6885728298185385,
      "grad_norm": 1.7942191369566323,
      "learning_rate": 3.2420505216139875e-07,
      "loss": 0.4121,
      "step": 13772
    },
    {
      "epoch": 1.6886954389406572,
      "grad_norm": 1.7585014959871876,
      "learning_rate": 3.2395548348513105e-07,
      "loss": 0.4395,
      "step": 13773
    },
    {
      "epoch": 1.688818048062776,
      "grad_norm": 2.0510311342124212,
      "learning_rate": 3.2370600424814517e-07,
      "loss": 0.4424,
      "step": 13774
    },
    {
      "epoch": 1.6889406571848946,
      "grad_norm": 2.035974566798459,
      "learning_rate": 3.2345661446069626e-07,
      "loss": 0.4328,
      "step": 13775
    },
    {
      "epoch": 1.6890632663070133,
      "grad_norm": 2.0187786311783364,
      "learning_rate": 3.232073141330333e-07,
      "loss": 0.4034,
      "step": 13776
    },
    {
      "epoch": 1.689185875429132,
      "grad_norm": 1.9981411869809291,
      "learning_rate": 3.2295810327540337e-07,
      "loss": 0.4212,
      "step": 13777
    },
    {
      "epoch": 1.6893084845512507,
      "grad_norm": 2.034384549522991,
      "learning_rate": 3.227089818980497e-07,
      "loss": 0.4444,
      "step": 13778
    },
    {
      "epoch": 1.6894310936733694,
      "grad_norm": 1.937612734540059,
      "learning_rate": 3.2245995001121103e-07,
      "loss": 0.3996,
      "step": 13779
    },
    {
      "epoch": 1.689553702795488,
      "grad_norm": 2.1593543443379666,
      "learning_rate": 3.2221100762512363e-07,
      "loss": 0.4344,
      "step": 13780
    },
    {
      "epoch": 1.6896763119176068,
      "grad_norm": 1.9818439586538714,
      "learning_rate": 3.2196215475002013e-07,
      "loss": 0.4434,
      "step": 13781
    },
    {
      "epoch": 1.6897989210397255,
      "grad_norm": 2.01711650909699,
      "learning_rate": 3.217133913961271e-07,
      "loss": 0.4131,
      "step": 13782
    },
    {
      "epoch": 1.689921530161844,
      "grad_norm": 1.8780955149683554,
      "learning_rate": 3.214647175736699e-07,
      "loss": 0.4069,
      "step": 13783
    },
    {
      "epoch": 1.6900441392839627,
      "grad_norm": 1.9233177793515646,
      "learning_rate": 3.2121613329287013e-07,
      "loss": 0.4151,
      "step": 13784
    },
    {
      "epoch": 1.6901667484060814,
      "grad_norm": 1.840396801605336,
      "learning_rate": 3.209676385639446e-07,
      "loss": 0.4283,
      "step": 13785
    },
    {
      "epoch": 1.6902893575282,
      "grad_norm": 2.0035375992052646,
      "learning_rate": 3.2071923339710625e-07,
      "loss": 0.3932,
      "step": 13786
    },
    {
      "epoch": 1.6904119666503188,
      "grad_norm": 1.9235635074636275,
      "learning_rate": 3.204709178025653e-07,
      "loss": 0.4536,
      "step": 13787
    },
    {
      "epoch": 1.6905345757724375,
      "grad_norm": 1.892745045499676,
      "learning_rate": 3.202226917905291e-07,
      "loss": 0.461,
      "step": 13788
    },
    {
      "epoch": 1.6906571848945562,
      "grad_norm": 1.8829513428337594,
      "learning_rate": 3.199745553711989e-07,
      "loss": 0.4378,
      "step": 13789
    },
    {
      "epoch": 1.6907797940166749,
      "grad_norm": 1.982606310467832,
      "learning_rate": 3.197265085547735e-07,
      "loss": 0.3935,
      "step": 13790
    },
    {
      "epoch": 1.6909024031387934,
      "grad_norm": 2.034347545307146,
      "learning_rate": 3.1947855135144925e-07,
      "loss": 0.4077,
      "step": 13791
    },
    {
      "epoch": 1.691025012260912,
      "grad_norm": 2.025984022807149,
      "learning_rate": 3.1923068377141596e-07,
      "loss": 0.4535,
      "step": 13792
    },
    {
      "epoch": 1.6911476213830308,
      "grad_norm": 2.058637056724859,
      "learning_rate": 3.189829058248631e-07,
      "loss": 0.4562,
      "step": 13793
    },
    {
      "epoch": 1.6912702305051495,
      "grad_norm": 1.8466127000462547,
      "learning_rate": 3.187352175219732e-07,
      "loss": 0.4039,
      "step": 13794
    },
    {
      "epoch": 1.6913928396272682,
      "grad_norm": 1.946762760332756,
      "learning_rate": 3.184876188729283e-07,
      "loss": 0.409,
      "step": 13795
    },
    {
      "epoch": 1.6915154487493869,
      "grad_norm": 1.904952650110163,
      "learning_rate": 3.182401098879037e-07,
      "loss": 0.4182,
      "step": 13796
    },
    {
      "epoch": 1.6916380578715056,
      "grad_norm": 2.0286948619672978,
      "learning_rate": 3.179926905770736e-07,
      "loss": 0.4165,
      "step": 13797
    },
    {
      "epoch": 1.6917606669936243,
      "grad_norm": 1.8870955707576444,
      "learning_rate": 3.1774536095060676e-07,
      "loss": 0.3996,
      "step": 13798
    },
    {
      "epoch": 1.691883276115743,
      "grad_norm": 2.0150790114000197,
      "learning_rate": 3.174981210186684e-07,
      "loss": 0.4525,
      "step": 13799
    },
    {
      "epoch": 1.6920058852378617,
      "grad_norm": 1.9470072276089816,
      "learning_rate": 3.172509707914212e-07,
      "loss": 0.4195,
      "step": 13800
    },
    {
      "epoch": 1.6921284943599804,
      "grad_norm": 1.9190354891814463,
      "learning_rate": 3.1700391027902455e-07,
      "loss": 0.3816,
      "step": 13801
    },
    {
      "epoch": 1.692251103482099,
      "grad_norm": 1.9073843861811146,
      "learning_rate": 3.1675693949163063e-07,
      "loss": 0.4623,
      "step": 13802
    },
    {
      "epoch": 1.6923737126042178,
      "grad_norm": 2.0873890110365836,
      "learning_rate": 3.165100584393918e-07,
      "loss": 0.4007,
      "step": 13803
    },
    {
      "epoch": 1.6924963217263365,
      "grad_norm": 1.9402071566494683,
      "learning_rate": 3.1626326713245556e-07,
      "loss": 0.4179,
      "step": 13804
    },
    {
      "epoch": 1.6926189308484552,
      "grad_norm": 1.8947676187802938,
      "learning_rate": 3.1601656558096436e-07,
      "loss": 0.4312,
      "step": 13805
    },
    {
      "epoch": 1.6927415399705739,
      "grad_norm": 2.2021862146369315,
      "learning_rate": 3.1576995379505955e-07,
      "loss": 0.448,
      "step": 13806
    },
    {
      "epoch": 1.6928641490926926,
      "grad_norm": 1.8006323520413714,
      "learning_rate": 3.155234317848757e-07,
      "loss": 0.4312,
      "step": 13807
    },
    {
      "epoch": 1.6929867582148113,
      "grad_norm": 1.8806083556425708,
      "learning_rate": 3.1527699956054673e-07,
      "loss": 0.4553,
      "step": 13808
    },
    {
      "epoch": 1.69310936733693,
      "grad_norm": 1.8898429585719838,
      "learning_rate": 3.150306571322004e-07,
      "loss": 0.4115,
      "step": 13809
    },
    {
      "epoch": 1.6932319764590487,
      "grad_norm": 1.8809088660935442,
      "learning_rate": 3.1478440450996297e-07,
      "loss": 0.3779,
      "step": 13810
    },
    {
      "epoch": 1.6933545855811674,
      "grad_norm": 1.8724123244663728,
      "learning_rate": 3.145382417039547e-07,
      "loss": 0.4469,
      "step": 13811
    },
    {
      "epoch": 1.693477194703286,
      "grad_norm": 2.0782951213781113,
      "learning_rate": 3.1429216872429334e-07,
      "loss": 0.4308,
      "step": 13812
    },
    {
      "epoch": 1.6935998038254048,
      "grad_norm": 1.869237213490781,
      "learning_rate": 3.1404618558109363e-07,
      "loss": 0.3878,
      "step": 13813
    },
    {
      "epoch": 1.6937224129475235,
      "grad_norm": 2.119612651311498,
      "learning_rate": 3.138002922844657e-07,
      "loss": 0.4015,
      "step": 13814
    },
    {
      "epoch": 1.693845022069642,
      "grad_norm": 1.935368966134001,
      "learning_rate": 3.135544888445166e-07,
      "loss": 0.408,
      "step": 13815
    },
    {
      "epoch": 1.6939676311917606,
      "grad_norm": 1.9794266499123179,
      "learning_rate": 3.133087752713479e-07,
      "loss": 0.4563,
      "step": 13816
    },
    {
      "epoch": 1.6940902403138793,
      "grad_norm": 1.8125864636392293,
      "learning_rate": 3.130631515750604e-07,
      "loss": 0.4061,
      "step": 13817
    },
    {
      "epoch": 1.694212849435998,
      "grad_norm": 2.061894969523375,
      "learning_rate": 3.128176177657491e-07,
      "loss": 0.4918,
      "step": 13818
    },
    {
      "epoch": 1.6943354585581167,
      "grad_norm": 2.0030258612734153,
      "learning_rate": 3.1257217385350533e-07,
      "loss": 0.4888,
      "step": 13819
    },
    {
      "epoch": 1.6944580676802354,
      "grad_norm": 1.9397615546084426,
      "learning_rate": 3.123268198484178e-07,
      "loss": 0.4469,
      "step": 13820
    },
    {
      "epoch": 1.6945806768023541,
      "grad_norm": 1.9486244564532937,
      "learning_rate": 3.1208155576057137e-07,
      "loss": 0.4181,
      "step": 13821
    },
    {
      "epoch": 1.6947032859244728,
      "grad_norm": 1.8265170791892664,
      "learning_rate": 3.1183638160004636e-07,
      "loss": 0.412,
      "step": 13822
    },
    {
      "epoch": 1.6948258950465913,
      "grad_norm": 1.885437580524198,
      "learning_rate": 3.1159129737691917e-07,
      "loss": 0.4032,
      "step": 13823
    },
    {
      "epoch": 1.69494850416871,
      "grad_norm": 1.9642394573080826,
      "learning_rate": 3.113463031012648e-07,
      "loss": 0.4083,
      "step": 13824
    },
    {
      "epoch": 1.6950711132908287,
      "grad_norm": 1.9925220554946128,
      "learning_rate": 3.1110139878315154e-07,
      "loss": 0.4344,
      "step": 13825
    },
    {
      "epoch": 1.6951937224129474,
      "grad_norm": 2.026669105128644,
      "learning_rate": 3.108565844326458e-07,
      "loss": 0.5174,
      "step": 13826
    },
    {
      "epoch": 1.6953163315350661,
      "grad_norm": 1.9452213744685163,
      "learning_rate": 3.106118600598104e-07,
      "loss": 0.419,
      "step": 13827
    },
    {
      "epoch": 1.6954389406571848,
      "grad_norm": 1.9538324427290732,
      "learning_rate": 3.1036722567470397e-07,
      "loss": 0.4118,
      "step": 13828
    },
    {
      "epoch": 1.6955615497793035,
      "grad_norm": 1.9064602522713567,
      "learning_rate": 3.1012268128738e-07,
      "loss": 0.4243,
      "step": 13829
    },
    {
      "epoch": 1.6956841589014222,
      "grad_norm": 1.8956087966876327,
      "learning_rate": 3.0987822690789093e-07,
      "loss": 0.4385,
      "step": 13830
    },
    {
      "epoch": 1.695806768023541,
      "grad_norm": 2.12905729715689,
      "learning_rate": 3.0963386254628525e-07,
      "loss": 0.4684,
      "step": 13831
    },
    {
      "epoch": 1.6959293771456596,
      "grad_norm": 1.8762973533453398,
      "learning_rate": 3.093895882126044e-07,
      "loss": 0.4148,
      "step": 13832
    },
    {
      "epoch": 1.6960519862677783,
      "grad_norm": 1.9293001626147883,
      "learning_rate": 3.0914540391688954e-07,
      "loss": 0.4033,
      "step": 13833
    },
    {
      "epoch": 1.696174595389897,
      "grad_norm": 2.1233461052081615,
      "learning_rate": 3.0890130966917784e-07,
      "loss": 0.4376,
      "step": 13834
    },
    {
      "epoch": 1.6962972045120157,
      "grad_norm": 2.139152831090873,
      "learning_rate": 3.0865730547950115e-07,
      "loss": 0.4043,
      "step": 13835
    },
    {
      "epoch": 1.6964198136341344,
      "grad_norm": 1.8607606456569936,
      "learning_rate": 3.084133913578882e-07,
      "loss": 0.4302,
      "step": 13836
    },
    {
      "epoch": 1.696542422756253,
      "grad_norm": 1.99730336240975,
      "learning_rate": 3.081695673143656e-07,
      "loss": 0.4332,
      "step": 13837
    },
    {
      "epoch": 1.6966650318783718,
      "grad_norm": 1.967258128541941,
      "learning_rate": 3.0792583335895324e-07,
      "loss": 0.4144,
      "step": 13838
    },
    {
      "epoch": 1.6967876410004905,
      "grad_norm": 2.074646764410438,
      "learning_rate": 3.0768218950167047e-07,
      "loss": 0.4624,
      "step": 13839
    },
    {
      "epoch": 1.6969102501226092,
      "grad_norm": 1.9833038933669307,
      "learning_rate": 3.074386357525305e-07,
      "loss": 0.3744,
      "step": 13840
    },
    {
      "epoch": 1.697032859244728,
      "grad_norm": 1.9222618215196172,
      "learning_rate": 3.0719517212154463e-07,
      "loss": 0.3962,
      "step": 13841
    },
    {
      "epoch": 1.6971554683668466,
      "grad_norm": 1.8908594968733865,
      "learning_rate": 3.069517986187187e-07,
      "loss": 0.4427,
      "step": 13842
    },
    {
      "epoch": 1.6972780774889653,
      "grad_norm": 1.7071465326802273,
      "learning_rate": 3.067085152540572e-07,
      "loss": 0.3747,
      "step": 13843
    },
    {
      "epoch": 1.697400686611084,
      "grad_norm": 2.114830603294854,
      "learning_rate": 3.064653220375582e-07,
      "loss": 0.4747,
      "step": 13844
    },
    {
      "epoch": 1.6975232957332027,
      "grad_norm": 2.0812165027382634,
      "learning_rate": 3.0622221897921747e-07,
      "loss": 0.4384,
      "step": 13845
    },
    {
      "epoch": 1.6976459048553212,
      "grad_norm": 1.9076912874928247,
      "learning_rate": 3.0597920608902737e-07,
      "loss": 0.3727,
      "step": 13846
    },
    {
      "epoch": 1.6977685139774399,
      "grad_norm": 1.8770205961827682,
      "learning_rate": 3.0573628337697667e-07,
      "loss": 0.4244,
      "step": 13847
    },
    {
      "epoch": 1.6978911230995586,
      "grad_norm": 1.7210188173507637,
      "learning_rate": 3.054934508530491e-07,
      "loss": 0.3914,
      "step": 13848
    },
    {
      "epoch": 1.6980137322216773,
      "grad_norm": 1.9555583438694502,
      "learning_rate": 3.052507085272255e-07,
      "loss": 0.4151,
      "step": 13849
    },
    {
      "epoch": 1.698136341343796,
      "grad_norm": 1.9884628516772929,
      "learning_rate": 3.0500805640948314e-07,
      "loss": 0.4074,
      "step": 13850
    },
    {
      "epoch": 1.6982589504659147,
      "grad_norm": 1.940447352465439,
      "learning_rate": 3.047654945097961e-07,
      "loss": 0.3745,
      "step": 13851
    },
    {
      "epoch": 1.6983815595880334,
      "grad_norm": 1.9198201469166813,
      "learning_rate": 3.045230228381338e-07,
      "loss": 0.4051,
      "step": 13852
    },
    {
      "epoch": 1.698504168710152,
      "grad_norm": 1.9473954202000139,
      "learning_rate": 3.042806414044613e-07,
      "loss": 0.4302,
      "step": 13853
    },
    {
      "epoch": 1.6986267778322706,
      "grad_norm": 1.972144357300162,
      "learning_rate": 3.040383502187422e-07,
      "loss": 0.439,
      "step": 13854
    },
    {
      "epoch": 1.6987493869543893,
      "grad_norm": 1.9045184953017993,
      "learning_rate": 3.037961492909341e-07,
      "loss": 0.4548,
      "step": 13855
    },
    {
      "epoch": 1.698871996076508,
      "grad_norm": 2.0212702152817483,
      "learning_rate": 3.0355403863099293e-07,
      "loss": 0.408,
      "step": 13856
    },
    {
      "epoch": 1.6989946051986267,
      "grad_norm": 1.8445977546533034,
      "learning_rate": 3.0331201824886913e-07,
      "loss": 0.4153,
      "step": 13857
    },
    {
      "epoch": 1.6991172143207454,
      "grad_norm": 1.7829375956181133,
      "learning_rate": 3.0307008815450985e-07,
      "loss": 0.4333,
      "step": 13858
    },
    {
      "epoch": 1.699239823442864,
      "grad_norm": 1.9154231120970784,
      "learning_rate": 3.028282483578593e-07,
      "loss": 0.417,
      "step": 13859
    },
    {
      "epoch": 1.6993624325649828,
      "grad_norm": 1.9202089153233628,
      "learning_rate": 3.0258649886885814e-07,
      "loss": 0.4328,
      "step": 13860
    },
    {
      "epoch": 1.6994850416871015,
      "grad_norm": 2.011403573252394,
      "learning_rate": 3.023448396974418e-07,
      "loss": 0.48,
      "step": 13861
    },
    {
      "epoch": 1.6996076508092202,
      "grad_norm": 1.8597479680980697,
      "learning_rate": 3.021032708535429e-07,
      "loss": 0.435,
      "step": 13862
    },
    {
      "epoch": 1.6997302599313389,
      "grad_norm": 1.9551986685199498,
      "learning_rate": 3.0186179234709067e-07,
      "loss": 0.4305,
      "step": 13863
    },
    {
      "epoch": 1.6998528690534576,
      "grad_norm": 1.987667672535134,
      "learning_rate": 3.0162040418801136e-07,
      "loss": 0.435,
      "step": 13864
    },
    {
      "epoch": 1.6999754781755763,
      "grad_norm": 1.9183082721580558,
      "learning_rate": 3.0137910638622426e-07,
      "loss": 0.4274,
      "step": 13865
    },
    {
      "epoch": 1.700098087297695,
      "grad_norm": 1.855853889571186,
      "learning_rate": 3.011378989516481e-07,
      "loss": 0.4018,
      "step": 13866
    },
    {
      "epoch": 1.7002206964198137,
      "grad_norm": 1.9639591539621974,
      "learning_rate": 3.0089678189419767e-07,
      "loss": 0.3641,
      "step": 13867
    },
    {
      "epoch": 1.7003433055419324,
      "grad_norm": 2.0648689134378757,
      "learning_rate": 3.0065575522378266e-07,
      "loss": 0.4301,
      "step": 13868
    },
    {
      "epoch": 1.700465914664051,
      "grad_norm": 1.9408312296521417,
      "learning_rate": 3.004148189503092e-07,
      "loss": 0.4117,
      "step": 13869
    },
    {
      "epoch": 1.7005885237861698,
      "grad_norm": 2.1762460639391956,
      "learning_rate": 3.0017397308368085e-07,
      "loss": 0.4908,
      "step": 13870
    },
    {
      "epoch": 1.7007111329082885,
      "grad_norm": 2.100658740504224,
      "learning_rate": 2.999332176337974e-07,
      "loss": 0.4413,
      "step": 13871
    },
    {
      "epoch": 1.7008337420304072,
      "grad_norm": 1.9073143258570746,
      "learning_rate": 2.9969255261055347e-07,
      "loss": 0.3829,
      "step": 13872
    },
    {
      "epoch": 1.7009563511525259,
      "grad_norm": 1.8073327606476322,
      "learning_rate": 2.994519780238403e-07,
      "loss": 0.4515,
      "step": 13873
    },
    {
      "epoch": 1.7010789602746446,
      "grad_norm": 1.9403702139617913,
      "learning_rate": 2.992114938835472e-07,
      "loss": 0.4461,
      "step": 13874
    },
    {
      "epoch": 1.7012015693967633,
      "grad_norm": 1.9605978452787385,
      "learning_rate": 2.9897110019955737e-07,
      "loss": 0.4186,
      "step": 13875
    },
    {
      "epoch": 1.701324178518882,
      "grad_norm": 1.9955253422591037,
      "learning_rate": 2.9873079698175185e-07,
      "loss": 0.3966,
      "step": 13876
    },
    {
      "epoch": 1.7014467876410004,
      "grad_norm": 2.186311018325354,
      "learning_rate": 2.9849058424000877e-07,
      "loss": 0.446,
      "step": 13877
    },
    {
      "epoch": 1.7015693967631191,
      "grad_norm": 1.8602947436401265,
      "learning_rate": 2.982504619841992e-07,
      "loss": 0.4117,
      "step": 13878
    },
    {
      "epoch": 1.7016920058852378,
      "grad_norm": 1.9345391604026874,
      "learning_rate": 2.9801043022419345e-07,
      "loss": 0.4094,
      "step": 13879
    },
    {
      "epoch": 1.7018146150073565,
      "grad_norm": 2.0341051340723686,
      "learning_rate": 2.9777048896985767e-07,
      "loss": 0.4047,
      "step": 13880
    },
    {
      "epoch": 1.7019372241294752,
      "grad_norm": 2.0507634699210793,
      "learning_rate": 2.975306382310536e-07,
      "loss": 0.4445,
      "step": 13881
    },
    {
      "epoch": 1.702059833251594,
      "grad_norm": 1.9124039584316797,
      "learning_rate": 2.9729087801763924e-07,
      "loss": 0.4248,
      "step": 13882
    },
    {
      "epoch": 1.7021824423737126,
      "grad_norm": 1.8542065184805925,
      "learning_rate": 2.9705120833946917e-07,
      "loss": 0.4502,
      "step": 13883
    },
    {
      "epoch": 1.7023050514958313,
      "grad_norm": 1.9456499694732612,
      "learning_rate": 2.9681162920639493e-07,
      "loss": 0.4102,
      "step": 13884
    },
    {
      "epoch": 1.7024276606179498,
      "grad_norm": 2.287917033974492,
      "learning_rate": 2.965721406282632e-07,
      "loss": 0.4463,
      "step": 13885
    },
    {
      "epoch": 1.7025502697400685,
      "grad_norm": 2.041254288466701,
      "learning_rate": 2.963327426149168e-07,
      "loss": 0.4365,
      "step": 13886
    },
    {
      "epoch": 1.7026728788621872,
      "grad_norm": 1.8802333041872084,
      "learning_rate": 2.9609343517619653e-07,
      "loss": 0.4145,
      "step": 13887
    },
    {
      "epoch": 1.702795487984306,
      "grad_norm": 1.8809384227695414,
      "learning_rate": 2.958542183219373e-07,
      "loss": 0.3981,
      "step": 13888
    },
    {
      "epoch": 1.7029180971064246,
      "grad_norm": 1.7720546690312826,
      "learning_rate": 2.9561509206197214e-07,
      "loss": 0.4329,
      "step": 13889
    },
    {
      "epoch": 1.7030407062285433,
      "grad_norm": 2.192303751082395,
      "learning_rate": 2.9537605640612893e-07,
      "loss": 0.43,
      "step": 13890
    },
    {
      "epoch": 1.703163315350662,
      "grad_norm": 1.8635759805841747,
      "learning_rate": 2.9513711136423347e-07,
      "loss": 0.4406,
      "step": 13891
    },
    {
      "epoch": 1.7032859244727807,
      "grad_norm": 1.9931662334292983,
      "learning_rate": 2.9489825694610536e-07,
      "loss": 0.4015,
      "step": 13892
    },
    {
      "epoch": 1.7034085335948994,
      "grad_norm": 1.9163549881770525,
      "learning_rate": 2.9465949316156315e-07,
      "loss": 0.4239,
      "step": 13893
    },
    {
      "epoch": 1.703531142717018,
      "grad_norm": 1.9972840014932836,
      "learning_rate": 2.944208200204202e-07,
      "loss": 0.414,
      "step": 13894
    },
    {
      "epoch": 1.7036537518391368,
      "grad_norm": 1.9879694291093541,
      "learning_rate": 2.941822375324857e-07,
      "loss": 0.4288,
      "step": 13895
    },
    {
      "epoch": 1.7037763609612555,
      "grad_norm": 1.910041887285101,
      "learning_rate": 2.939437457075664e-07,
      "loss": 0.3951,
      "step": 13896
    },
    {
      "epoch": 1.7038989700833742,
      "grad_norm": 2.051707854178219,
      "learning_rate": 2.9370534455546494e-07,
      "loss": 0.4009,
      "step": 13897
    },
    {
      "epoch": 1.704021579205493,
      "grad_norm": 2.0240625787022393,
      "learning_rate": 2.934670340859799e-07,
      "loss": 0.436,
      "step": 13898
    },
    {
      "epoch": 1.7041441883276116,
      "grad_norm": 1.9201539422995964,
      "learning_rate": 2.932288143089057e-07,
      "loss": 0.4399,
      "step": 13899
    },
    {
      "epoch": 1.7042667974497303,
      "grad_norm": 2.1209747712514178,
      "learning_rate": 2.9299068523403444e-07,
      "loss": 0.4586,
      "step": 13900
    },
    {
      "epoch": 1.704389406571849,
      "grad_norm": 1.9172983644804413,
      "learning_rate": 2.927526468711525e-07,
      "loss": 0.4328,
      "step": 13901
    },
    {
      "epoch": 1.7045120156939677,
      "grad_norm": 1.8980275848614372,
      "learning_rate": 2.925146992300451e-07,
      "loss": 0.4368,
      "step": 13902
    },
    {
      "epoch": 1.7046346248160864,
      "grad_norm": 1.973523220865143,
      "learning_rate": 2.9227684232049106e-07,
      "loss": 0.4304,
      "step": 13903
    },
    {
      "epoch": 1.704757233938205,
      "grad_norm": 2.100152670497682,
      "learning_rate": 2.920390761522679e-07,
      "loss": 0.4117,
      "step": 13904
    },
    {
      "epoch": 1.7048798430603238,
      "grad_norm": 1.9768442743784693,
      "learning_rate": 2.918014007351472e-07,
      "loss": 0.3813,
      "step": 13905
    },
    {
      "epoch": 1.7050024521824425,
      "grad_norm": 2.111284611786192,
      "learning_rate": 2.9156381607889834e-07,
      "loss": 0.4096,
      "step": 13906
    },
    {
      "epoch": 1.7051250613045612,
      "grad_norm": 2.002769189163485,
      "learning_rate": 2.913263221932866e-07,
      "loss": 0.4533,
      "step": 13907
    },
    {
      "epoch": 1.70524767042668,
      "grad_norm": 1.8754329541929502,
      "learning_rate": 2.910889190880728e-07,
      "loss": 0.4156,
      "step": 13908
    },
    {
      "epoch": 1.7053702795487984,
      "grad_norm": 1.9180744718819218,
      "learning_rate": 2.9085160677301465e-07,
      "loss": 0.409,
      "step": 13909
    },
    {
      "epoch": 1.705492888670917,
      "grad_norm": 1.9513246718534576,
      "learning_rate": 2.9061438525786717e-07,
      "loss": 0.4181,
      "step": 13910
    },
    {
      "epoch": 1.7056154977930358,
      "grad_norm": 1.8734371989458172,
      "learning_rate": 2.9037725455237975e-07,
      "loss": 0.4278,
      "step": 13911
    },
    {
      "epoch": 1.7057381069151545,
      "grad_norm": 1.9043358196278022,
      "learning_rate": 2.901402146662988e-07,
      "loss": 0.4252,
      "step": 13912
    },
    {
      "epoch": 1.7058607160372732,
      "grad_norm": 2.0546946998629956,
      "learning_rate": 2.8990326560936753e-07,
      "loss": 0.4203,
      "step": 13913
    },
    {
      "epoch": 1.7059833251593919,
      "grad_norm": 2.119458493852735,
      "learning_rate": 2.896664073913247e-07,
      "loss": 0.3894,
      "step": 13914
    },
    {
      "epoch": 1.7061059342815106,
      "grad_norm": 1.8861650893667905,
      "learning_rate": 2.8942964002190515e-07,
      "loss": 0.4236,
      "step": 13915
    },
    {
      "epoch": 1.7062285434036293,
      "grad_norm": 2.008814166986506,
      "learning_rate": 2.891929635108409e-07,
      "loss": 0.3862,
      "step": 13916
    },
    {
      "epoch": 1.7063511525257478,
      "grad_norm": 1.9260749369377732,
      "learning_rate": 2.8895637786786027e-07,
      "loss": 0.4426,
      "step": 13917
    },
    {
      "epoch": 1.7064737616478665,
      "grad_norm": 1.9507412086450617,
      "learning_rate": 2.8871988310268687e-07,
      "loss": 0.4391,
      "step": 13918
    },
    {
      "epoch": 1.7065963707699852,
      "grad_norm": 1.9115218538136203,
      "learning_rate": 2.884834792250404e-07,
      "loss": 0.434,
      "step": 13919
    },
    {
      "epoch": 1.7067189798921039,
      "grad_norm": 1.9013467268481632,
      "learning_rate": 2.88247166244639e-07,
      "loss": 0.4266,
      "step": 13920
    },
    {
      "epoch": 1.7068415890142226,
      "grad_norm": 1.9752110750555492,
      "learning_rate": 2.880109441711937e-07,
      "loss": 0.3859,
      "step": 13921
    },
    {
      "epoch": 1.7069641981363413,
      "grad_norm": 1.8736804729744523,
      "learning_rate": 2.877748130144148e-07,
      "loss": 0.4136,
      "step": 13922
    },
    {
      "epoch": 1.70708680725846,
      "grad_norm": 1.8129869116192838,
      "learning_rate": 2.875387727840082e-07,
      "loss": 0.4359,
      "step": 13923
    },
    {
      "epoch": 1.7072094163805787,
      "grad_norm": 1.8398455829654978,
      "learning_rate": 2.873028234896746e-07,
      "loss": 0.4436,
      "step": 13924
    },
    {
      "epoch": 1.7073320255026974,
      "grad_norm": 1.9578659539761083,
      "learning_rate": 2.8706696514111203e-07,
      "loss": 0.422,
      "step": 13925
    },
    {
      "epoch": 1.707454634624816,
      "grad_norm": 1.879255030143607,
      "learning_rate": 2.8683119774801457e-07,
      "loss": 0.4335,
      "step": 13926
    },
    {
      "epoch": 1.7075772437469348,
      "grad_norm": 1.9896348173719531,
      "learning_rate": 2.865955213200744e-07,
      "loss": 0.441,
      "step": 13927
    },
    {
      "epoch": 1.7076998528690535,
      "grad_norm": 1.893457041854991,
      "learning_rate": 2.8635993586697555e-07,
      "loss": 0.4131,
      "step": 13928
    },
    {
      "epoch": 1.7078224619911722,
      "grad_norm": 1.8621666139788149,
      "learning_rate": 2.8612444139840215e-07,
      "loss": 0.3668,
      "step": 13929
    },
    {
      "epoch": 1.7079450711132909,
      "grad_norm": 2.0787055991841945,
      "learning_rate": 2.858890379240342e-07,
      "loss": 0.4455,
      "step": 13930
    },
    {
      "epoch": 1.7080676802354096,
      "grad_norm": 2.02847933933526,
      "learning_rate": 2.856537254535466e-07,
      "loss": 0.4748,
      "step": 13931
    },
    {
      "epoch": 1.7081902893575283,
      "grad_norm": 1.986624002079805,
      "learning_rate": 2.8541850399661065e-07,
      "loss": 0.4524,
      "step": 13932
    },
    {
      "epoch": 1.708312898479647,
      "grad_norm": 1.9173789866548234,
      "learning_rate": 2.8518337356289503e-07,
      "loss": 0.46,
      "step": 13933
    },
    {
      "epoch": 1.7084355076017657,
      "grad_norm": 1.9308814922048936,
      "learning_rate": 2.8494833416206346e-07,
      "loss": 0.3936,
      "step": 13934
    },
    {
      "epoch": 1.7085581167238844,
      "grad_norm": 2.203076360240479,
      "learning_rate": 2.847133858037773e-07,
      "loss": 0.3766,
      "step": 13935
    },
    {
      "epoch": 1.708680725846003,
      "grad_norm": 2.1128968512949675,
      "learning_rate": 2.8447852849769255e-07,
      "loss": 0.4454,
      "step": 13936
    },
    {
      "epoch": 1.7088033349681218,
      "grad_norm": 1.9402049992411932,
      "learning_rate": 2.8424376225346275e-07,
      "loss": 0.423,
      "step": 13937
    },
    {
      "epoch": 1.7089259440902405,
      "grad_norm": 2.052576770080828,
      "learning_rate": 2.840090870807366e-07,
      "loss": 0.4049,
      "step": 13938
    },
    {
      "epoch": 1.7090485532123592,
      "grad_norm": 1.9454749641065165,
      "learning_rate": 2.837745029891606e-07,
      "loss": 0.4151,
      "step": 13939
    },
    {
      "epoch": 1.7091711623344776,
      "grad_norm": 1.920391860919275,
      "learning_rate": 2.835400099883759e-07,
      "loss": 0.4385,
      "step": 13940
    },
    {
      "epoch": 1.7092937714565963,
      "grad_norm": 1.8227986277753998,
      "learning_rate": 2.8330560808802035e-07,
      "loss": 0.4104,
      "step": 13941
    },
    {
      "epoch": 1.709416380578715,
      "grad_norm": 1.8041675743565886,
      "learning_rate": 2.830712972977287e-07,
      "loss": 0.4032,
      "step": 13942
    },
    {
      "epoch": 1.7095389897008337,
      "grad_norm": 1.830179030851212,
      "learning_rate": 2.828370776271319e-07,
      "loss": 0.4315,
      "step": 13943
    },
    {
      "epoch": 1.7096615988229524,
      "grad_norm": 2.0728901828295414,
      "learning_rate": 2.8260294908585643e-07,
      "loss": 0.4503,
      "step": 13944
    },
    {
      "epoch": 1.7097842079450711,
      "grad_norm": 1.9173480705155848,
      "learning_rate": 2.8236891168352486e-07,
      "loss": 0.3806,
      "step": 13945
    },
    {
      "epoch": 1.7099068170671898,
      "grad_norm": 1.8923582296552806,
      "learning_rate": 2.82134965429757e-07,
      "loss": 0.4659,
      "step": 13946
    },
    {
      "epoch": 1.7100294261893085,
      "grad_norm": 1.9862471248711335,
      "learning_rate": 2.81901110334169e-07,
      "loss": 0.4191,
      "step": 13947
    },
    {
      "epoch": 1.710152035311427,
      "grad_norm": 1.9929037823747178,
      "learning_rate": 2.816673464063721e-07,
      "loss": 0.453,
      "step": 13948
    },
    {
      "epoch": 1.7102746444335457,
      "grad_norm": 1.9963651706327605,
      "learning_rate": 2.8143367365597417e-07,
      "loss": 0.4354,
      "step": 13949
    },
    {
      "epoch": 1.7103972535556644,
      "grad_norm": 1.9635052010940897,
      "learning_rate": 2.812000920925803e-07,
      "loss": 0.4146,
      "step": 13950
    },
    {
      "epoch": 1.710519862677783,
      "grad_norm": 2.1603727525435183,
      "learning_rate": 2.809666017257903e-07,
      "loss": 0.4836,
      "step": 13951
    },
    {
      "epoch": 1.7106424717999018,
      "grad_norm": 1.925948107968757,
      "learning_rate": 2.807332025652021e-07,
      "loss": 0.4245,
      "step": 13952
    },
    {
      "epoch": 1.7107650809220205,
      "grad_norm": 2.024905914049302,
      "learning_rate": 2.8049989462040795e-07,
      "loss": 0.4507,
      "step": 13953
    },
    {
      "epoch": 1.7108876900441392,
      "grad_norm": 1.972372697374412,
      "learning_rate": 2.8026667790099686e-07,
      "loss": 0.4231,
      "step": 13954
    },
    {
      "epoch": 1.711010299166258,
      "grad_norm": 2.09223520782078,
      "learning_rate": 2.800335524165551e-07,
      "loss": 0.4309,
      "step": 13955
    },
    {
      "epoch": 1.7111329082883766,
      "grad_norm": 1.9572364235362256,
      "learning_rate": 2.7980051817666527e-07,
      "loss": 0.423,
      "step": 13956
    },
    {
      "epoch": 1.7112555174104953,
      "grad_norm": 1.8848388285796458,
      "learning_rate": 2.795675751909044e-07,
      "loss": 0.417,
      "step": 13957
    },
    {
      "epoch": 1.711378126532614,
      "grad_norm": 1.935535535765694,
      "learning_rate": 2.7933472346884675e-07,
      "loss": 0.4151,
      "step": 13958
    },
    {
      "epoch": 1.7115007356547327,
      "grad_norm": 1.9642283079622986,
      "learning_rate": 2.791019630200634e-07,
      "loss": 0.4219,
      "step": 13959
    },
    {
      "epoch": 1.7116233447768514,
      "grad_norm": 1.9632311286718878,
      "learning_rate": 2.7886929385412207e-07,
      "loss": 0.4317,
      "step": 13960
    },
    {
      "epoch": 1.71174595389897,
      "grad_norm": 2.095310716263305,
      "learning_rate": 2.786367159805839e-07,
      "loss": 0.4503,
      "step": 13961
    },
    {
      "epoch": 1.7118685630210888,
      "grad_norm": 1.967276910670371,
      "learning_rate": 2.7840422940900944e-07,
      "loss": 0.4386,
      "step": 13962
    },
    {
      "epoch": 1.7119911721432075,
      "grad_norm": 1.7960833992851142,
      "learning_rate": 2.7817183414895476e-07,
      "loss": 0.4129,
      "step": 13963
    },
    {
      "epoch": 1.7121137812653262,
      "grad_norm": 1.9907480017348982,
      "learning_rate": 2.7793953020997105e-07,
      "loss": 0.429,
      "step": 13964
    },
    {
      "epoch": 1.712236390387445,
      "grad_norm": 2.1836119414466824,
      "learning_rate": 2.7770731760160597e-07,
      "loss": 0.4199,
      "step": 13965
    },
    {
      "epoch": 1.7123589995095636,
      "grad_norm": 1.8904856314212162,
      "learning_rate": 2.7747519633340436e-07,
      "loss": 0.4622,
      "step": 13966
    },
    {
      "epoch": 1.7124816086316823,
      "grad_norm": 1.8962573441055413,
      "learning_rate": 2.7724316641490724e-07,
      "loss": 0.4422,
      "step": 13967
    },
    {
      "epoch": 1.712604217753801,
      "grad_norm": 1.8730822563471903,
      "learning_rate": 2.7701122785565137e-07,
      "loss": 0.3918,
      "step": 13968
    },
    {
      "epoch": 1.7127268268759197,
      "grad_norm": 2.0978244806724007,
      "learning_rate": 2.767793806651686e-07,
      "loss": 0.4234,
      "step": 13969
    },
    {
      "epoch": 1.7128494359980384,
      "grad_norm": 2.0218533255620876,
      "learning_rate": 2.7654762485298995e-07,
      "loss": 0.4485,
      "step": 13970
    },
    {
      "epoch": 1.712972045120157,
      "grad_norm": 1.9544966009033742,
      "learning_rate": 2.7631596042864e-07,
      "loss": 0.4433,
      "step": 13971
    },
    {
      "epoch": 1.7130946542422756,
      "grad_norm": 1.7904737402340465,
      "learning_rate": 2.7608438740164026e-07,
      "loss": 0.4179,
      "step": 13972
    },
    {
      "epoch": 1.7132172633643943,
      "grad_norm": 1.8986158486675992,
      "learning_rate": 2.758529057815107e-07,
      "loss": 0.3951,
      "step": 13973
    },
    {
      "epoch": 1.713339872486513,
      "grad_norm": 1.8832736140172268,
      "learning_rate": 2.7562151557776304e-07,
      "loss": 0.4377,
      "step": 13974
    },
    {
      "epoch": 1.7134624816086317,
      "grad_norm": 1.7464893026457926,
      "learning_rate": 2.7539021679990924e-07,
      "loss": 0.4266,
      "step": 13975
    },
    {
      "epoch": 1.7135850907307504,
      "grad_norm": 1.896186944147059,
      "learning_rate": 2.7515900945745633e-07,
      "loss": 0.428,
      "step": 13976
    },
    {
      "epoch": 1.713707699852869,
      "grad_norm": 1.8434990276182466,
      "learning_rate": 2.74927893559907e-07,
      "loss": 0.4437,
      "step": 13977
    },
    {
      "epoch": 1.7138303089749878,
      "grad_norm": 2.048465333628648,
      "learning_rate": 2.746968691167598e-07,
      "loss": 0.4329,
      "step": 13978
    },
    {
      "epoch": 1.7139529180971063,
      "grad_norm": 1.8924558911227138,
      "learning_rate": 2.744659361375107e-07,
      "loss": 0.3816,
      "step": 13979
    },
    {
      "epoch": 1.714075527219225,
      "grad_norm": 1.8347704287917384,
      "learning_rate": 2.742350946316524e-07,
      "loss": 0.3958,
      "step": 13980
    },
    {
      "epoch": 1.7141981363413437,
      "grad_norm": 1.906366465129052,
      "learning_rate": 2.740043446086721e-07,
      "loss": 0.4323,
      "step": 13981
    },
    {
      "epoch": 1.7143207454634624,
      "grad_norm": 1.845708004457357,
      "learning_rate": 2.737736860780535e-07,
      "loss": 0.4296,
      "step": 13982
    },
    {
      "epoch": 1.714443354585581,
      "grad_norm": 2.0475371675434855,
      "learning_rate": 2.735431190492779e-07,
      "loss": 0.3948,
      "step": 13983
    },
    {
      "epoch": 1.7145659637076998,
      "grad_norm": 2.0507497669398322,
      "learning_rate": 2.7331264353182145e-07,
      "loss": 0.4231,
      "step": 13984
    },
    {
      "epoch": 1.7146885728298185,
      "grad_norm": 2.055536309532235,
      "learning_rate": 2.730822595351579e-07,
      "loss": 0.4165,
      "step": 13985
    },
    {
      "epoch": 1.7148111819519372,
      "grad_norm": 2.0855344115555696,
      "learning_rate": 2.7285196706875546e-07,
      "loss": 0.4272,
      "step": 13986
    },
    {
      "epoch": 1.7149337910740559,
      "grad_norm": 1.806793971124512,
      "learning_rate": 2.726217661420805e-07,
      "loss": 0.4434,
      "step": 13987
    },
    {
      "epoch": 1.7150564001961746,
      "grad_norm": 1.8696319492635645,
      "learning_rate": 2.723916567645937e-07,
      "loss": 0.4252,
      "step": 13988
    },
    {
      "epoch": 1.7151790093182933,
      "grad_norm": 1.8858166398043232,
      "learning_rate": 2.7216163894575404e-07,
      "loss": 0.4332,
      "step": 13989
    },
    {
      "epoch": 1.715301618440412,
      "grad_norm": 1.8642303559123998,
      "learning_rate": 2.7193171269501493e-07,
      "loss": 0.4435,
      "step": 13990
    },
    {
      "epoch": 1.7154242275625307,
      "grad_norm": 1.8971926268166162,
      "learning_rate": 2.717018780218264e-07,
      "loss": 0.4423,
      "step": 13991
    },
    {
      "epoch": 1.7155468366846494,
      "grad_norm": 1.8999275793536985,
      "learning_rate": 2.7147213493563587e-07,
      "loss": 0.4679,
      "step": 13992
    },
    {
      "epoch": 1.715669445806768,
      "grad_norm": 1.7973500411539292,
      "learning_rate": 2.712424834458863e-07,
      "loss": 0.4003,
      "step": 13993
    },
    {
      "epoch": 1.7157920549288868,
      "grad_norm": 1.8336571806211517,
      "learning_rate": 2.7101292356201626e-07,
      "loss": 0.4333,
      "step": 13994
    },
    {
      "epoch": 1.7159146640510055,
      "grad_norm": 2.096030537963101,
      "learning_rate": 2.707834552934605e-07,
      "loss": 0.4464,
      "step": 13995
    },
    {
      "epoch": 1.7160372731731242,
      "grad_norm": 2.05460016971298,
      "learning_rate": 2.705540786496519e-07,
      "loss": 0.3768,
      "step": 13996
    },
    {
      "epoch": 1.7161598822952429,
      "grad_norm": 2.0189633938225424,
      "learning_rate": 2.7032479364001717e-07,
      "loss": 0.3986,
      "step": 13997
    },
    {
      "epoch": 1.7162824914173616,
      "grad_norm": 2.016389177590552,
      "learning_rate": 2.700956002739813e-07,
      "loss": 0.4622,
      "step": 13998
    },
    {
      "epoch": 1.7164051005394803,
      "grad_norm": 1.8546009371878238,
      "learning_rate": 2.6986649856096337e-07,
      "loss": 0.4434,
      "step": 13999
    },
    {
      "epoch": 1.716527709661599,
      "grad_norm": 1.9230127494622034,
      "learning_rate": 2.6963748851038083e-07,
      "loss": 0.417,
      "step": 14000
    },
    {
      "epoch": 1.7166503187837177,
      "grad_norm": 2.0227954735186433,
      "learning_rate": 2.6940857013164594e-07,
      "loss": 0.412,
      "step": 14001
    },
    {
      "epoch": 1.7167729279058364,
      "grad_norm": 1.8810378809093973,
      "learning_rate": 2.6917974343416807e-07,
      "loss": 0.4247,
      "step": 14002
    },
    {
      "epoch": 1.7168955370279548,
      "grad_norm": 2.0598503902291845,
      "learning_rate": 2.6895100842735193e-07,
      "loss": 0.4534,
      "step": 14003
    },
    {
      "epoch": 1.7170181461500735,
      "grad_norm": 2.06452233764364,
      "learning_rate": 2.687223651205989e-07,
      "loss": 0.4379,
      "step": 14004
    },
    {
      "epoch": 1.7171407552721922,
      "grad_norm": 2.084054348642171,
      "learning_rate": 2.684938135233067e-07,
      "loss": 0.5159,
      "step": 14005
    },
    {
      "epoch": 1.717263364394311,
      "grad_norm": 1.9554862118425642,
      "learning_rate": 2.6826535364486983e-07,
      "loss": 0.4305,
      "step": 14006
    },
    {
      "epoch": 1.7173859735164296,
      "grad_norm": 1.9312283924876712,
      "learning_rate": 2.680369854946779e-07,
      "loss": 0.3988,
      "step": 14007
    },
    {
      "epoch": 1.7175085826385483,
      "grad_norm": 1.8787886161518073,
      "learning_rate": 2.6780870908211687e-07,
      "loss": 0.3934,
      "step": 14008
    },
    {
      "epoch": 1.717631191760667,
      "grad_norm": 1.7425848418006982,
      "learning_rate": 2.6758052441656997e-07,
      "loss": 0.4209,
      "step": 14009
    },
    {
      "epoch": 1.7177538008827857,
      "grad_norm": 1.9271252465576902,
      "learning_rate": 2.673524315074158e-07,
      "loss": 0.4308,
      "step": 14010
    },
    {
      "epoch": 1.7178764100049042,
      "grad_norm": 1.9993492255839855,
      "learning_rate": 2.671244303640289e-07,
      "loss": 0.4305,
      "step": 14011
    },
    {
      "epoch": 1.717999019127023,
      "grad_norm": 1.8389020540579664,
      "learning_rate": 2.668965209957808e-07,
      "loss": 0.414,
      "step": 14012
    },
    {
      "epoch": 1.7181216282491416,
      "grad_norm": 2.194768203430765,
      "learning_rate": 2.666687034120394e-07,
      "loss": 0.4116,
      "step": 14013
    },
    {
      "epoch": 1.7182442373712603,
      "grad_norm": 2.0310959705310037,
      "learning_rate": 2.6644097762216824e-07,
      "loss": 0.4114,
      "step": 14014
    },
    {
      "epoch": 1.718366846493379,
      "grad_norm": 1.946075787118447,
      "learning_rate": 2.6621334363552686e-07,
      "loss": 0.4323,
      "step": 14015
    },
    {
      "epoch": 1.7184894556154977,
      "grad_norm": 1.9604109081738275,
      "learning_rate": 2.659858014614719e-07,
      "loss": 0.4346,
      "step": 14016
    },
    {
      "epoch": 1.7186120647376164,
      "grad_norm": 1.980235768562664,
      "learning_rate": 2.6575835110935504e-07,
      "loss": 0.4226,
      "step": 14017
    },
    {
      "epoch": 1.718734673859735,
      "grad_norm": 1.9930130795753171,
      "learning_rate": 2.655309925885255e-07,
      "loss": 0.4281,
      "step": 14018
    },
    {
      "epoch": 1.7188572829818538,
      "grad_norm": 1.8049073503043516,
      "learning_rate": 2.6530372590832826e-07,
      "loss": 0.4321,
      "step": 14019
    },
    {
      "epoch": 1.7189798921039725,
      "grad_norm": 1.8523610309790408,
      "learning_rate": 2.650765510781042e-07,
      "loss": 0.4405,
      "step": 14020
    },
    {
      "epoch": 1.7191025012260912,
      "grad_norm": 1.8938009251594965,
      "learning_rate": 2.6484946810719003e-07,
      "loss": 0.424,
      "step": 14021
    },
    {
      "epoch": 1.71922511034821,
      "grad_norm": 2.110245616232536,
      "learning_rate": 2.6462247700491966e-07,
      "loss": 0.4488,
      "step": 14022
    },
    {
      "epoch": 1.7193477194703286,
      "grad_norm": 1.8215848651537003,
      "learning_rate": 2.64395577780624e-07,
      "loss": 0.4038,
      "step": 14023
    },
    {
      "epoch": 1.7194703285924473,
      "grad_norm": 2.1121384146112083,
      "learning_rate": 2.641687704436269e-07,
      "loss": 0.4258,
      "step": 14024
    },
    {
      "epoch": 1.719592937714566,
      "grad_norm": 1.8187531488784638,
      "learning_rate": 2.639420550032515e-07,
      "loss": 0.4015,
      "step": 14025
    },
    {
      "epoch": 1.7197155468366847,
      "grad_norm": 1.8548801735843232,
      "learning_rate": 2.63715431468817e-07,
      "loss": 0.4344,
      "step": 14026
    },
    {
      "epoch": 1.7198381559588034,
      "grad_norm": 1.966281124496807,
      "learning_rate": 2.634888998496371e-07,
      "loss": 0.4291,
      "step": 14027
    },
    {
      "epoch": 1.719960765080922,
      "grad_norm": 1.8715265490430044,
      "learning_rate": 2.632624601550224e-07,
      "loss": 0.4406,
      "step": 14028
    },
    {
      "epoch": 1.7200833742030408,
      "grad_norm": 1.8244158628004263,
      "learning_rate": 2.6303611239428096e-07,
      "loss": 0.408,
      "step": 14029
    },
    {
      "epoch": 1.7202059833251595,
      "grad_norm": 1.7678173246476572,
      "learning_rate": 2.6280985657671484e-07,
      "loss": 0.4104,
      "step": 14030
    },
    {
      "epoch": 1.7203285924472782,
      "grad_norm": 2.0801618554499828,
      "learning_rate": 2.625836927116249e-07,
      "loss": 0.4338,
      "step": 14031
    },
    {
      "epoch": 1.720451201569397,
      "grad_norm": 1.803870054556988,
      "learning_rate": 2.623576208083056e-07,
      "loss": 0.3967,
      "step": 14032
    },
    {
      "epoch": 1.7205738106915156,
      "grad_norm": 2.049515457617594,
      "learning_rate": 2.621316408760502e-07,
      "loss": 0.4468,
      "step": 14033
    },
    {
      "epoch": 1.720696419813634,
      "grad_norm": 1.9046968962862076,
      "learning_rate": 2.619057529241456e-07,
      "loss": 0.4092,
      "step": 14034
    },
    {
      "epoch": 1.7208190289357528,
      "grad_norm": 1.8506302495231484,
      "learning_rate": 2.616799569618775e-07,
      "loss": 0.4347,
      "step": 14035
    },
    {
      "epoch": 1.7209416380578715,
      "grad_norm": 1.9379151104757582,
      "learning_rate": 2.6145425299852534e-07,
      "loss": 0.4234,
      "step": 14036
    },
    {
      "epoch": 1.7210642471799902,
      "grad_norm": 2.0010914997835334,
      "learning_rate": 2.612286410433662e-07,
      "loss": 0.4385,
      "step": 14037
    },
    {
      "epoch": 1.7211868563021089,
      "grad_norm": 2.05086450826223,
      "learning_rate": 2.610031211056735e-07,
      "loss": 0.449,
      "step": 14038
    },
    {
      "epoch": 1.7213094654242276,
      "grad_norm": 2.0665619522927923,
      "learning_rate": 2.6077769319471676e-07,
      "loss": 0.4388,
      "step": 14039
    },
    {
      "epoch": 1.7214320745463463,
      "grad_norm": 1.9959817505982103,
      "learning_rate": 2.605523573197608e-07,
      "loss": 0.4478,
      "step": 14040
    },
    {
      "epoch": 1.721554683668465,
      "grad_norm": 1.8143357307810077,
      "learning_rate": 2.603271134900673e-07,
      "loss": 0.425,
      "step": 14041
    },
    {
      "epoch": 1.7216772927905835,
      "grad_norm": 1.7170527540040128,
      "learning_rate": 2.601019617148945e-07,
      "loss": 0.3747,
      "step": 14042
    },
    {
      "epoch": 1.7217999019127022,
      "grad_norm": 1.83768519500391,
      "learning_rate": 2.5987690200349693e-07,
      "loss": 0.4071,
      "step": 14043
    },
    {
      "epoch": 1.7219225110348209,
      "grad_norm": 1.8931257685185239,
      "learning_rate": 2.5965193436512435e-07,
      "loss": 0.4192,
      "step": 14044
    },
    {
      "epoch": 1.7220451201569396,
      "grad_norm": 1.9026391353339702,
      "learning_rate": 2.5942705880902336e-07,
      "loss": 0.4522,
      "step": 14045
    },
    {
      "epoch": 1.7221677292790583,
      "grad_norm": 1.8844805807052947,
      "learning_rate": 2.5920227534443704e-07,
      "loss": 0.3874,
      "step": 14046
    },
    {
      "epoch": 1.722290338401177,
      "grad_norm": 1.8741912490620192,
      "learning_rate": 2.589775839806039e-07,
      "loss": 0.3715,
      "step": 14047
    },
    {
      "epoch": 1.7224129475232957,
      "grad_norm": 1.8120914064892168,
      "learning_rate": 2.5875298472676e-07,
      "loss": 0.3761,
      "step": 14048
    },
    {
      "epoch": 1.7225355566454144,
      "grad_norm": 1.9840681661127026,
      "learning_rate": 2.5852847759213623e-07,
      "loss": 0.4624,
      "step": 14049
    },
    {
      "epoch": 1.722658165767533,
      "grad_norm": 2.045576582437881,
      "learning_rate": 2.5830406258595955e-07,
      "loss": 0.4213,
      "step": 14050
    },
    {
      "epoch": 1.7227807748896518,
      "grad_norm": 2.0201173465340494,
      "learning_rate": 2.5807973971745475e-07,
      "loss": 0.4198,
      "step": 14051
    },
    {
      "epoch": 1.7229033840117705,
      "grad_norm": 1.9314485274309434,
      "learning_rate": 2.5785550899584204e-07,
      "loss": 0.4196,
      "step": 14052
    },
    {
      "epoch": 1.7230259931338892,
      "grad_norm": 1.8983013209198778,
      "learning_rate": 2.5763137043033737e-07,
      "loss": 0.4325,
      "step": 14053
    },
    {
      "epoch": 1.7231486022560079,
      "grad_norm": 2.1185487910054257,
      "learning_rate": 2.5740732403015253e-07,
      "loss": 0.4199,
      "step": 14054
    },
    {
      "epoch": 1.7232712113781266,
      "grad_norm": 1.961757319680289,
      "learning_rate": 2.5718336980449704e-07,
      "loss": 0.4709,
      "step": 14055
    },
    {
      "epoch": 1.7233938205002453,
      "grad_norm": 2.0885856762852733,
      "learning_rate": 2.5695950776257667e-07,
      "loss": 0.4586,
      "step": 14056
    },
    {
      "epoch": 1.723516429622364,
      "grad_norm": 1.8402929078596895,
      "learning_rate": 2.567357379135904e-07,
      "loss": 0.4033,
      "step": 14057
    },
    {
      "epoch": 1.7236390387444827,
      "grad_norm": 2.121850103355767,
      "learning_rate": 2.5651206026673673e-07,
      "loss": 0.4097,
      "step": 14058
    },
    {
      "epoch": 1.7237616478666014,
      "grad_norm": 1.9547408797451569,
      "learning_rate": 2.5628847483120973e-07,
      "loss": 0.397,
      "step": 14059
    },
    {
      "epoch": 1.72388425698872,
      "grad_norm": 1.9571925951183602,
      "learning_rate": 2.5606498161619836e-07,
      "loss": 0.4163,
      "step": 14060
    },
    {
      "epoch": 1.7240068661108388,
      "grad_norm": 2.2727638452911947,
      "learning_rate": 2.558415806308887e-07,
      "loss": 0.4542,
      "step": 14061
    },
    {
      "epoch": 1.7241294752329575,
      "grad_norm": 1.9823127649997225,
      "learning_rate": 2.556182718844627e-07,
      "loss": 0.4192,
      "step": 14062
    },
    {
      "epoch": 1.7242520843550762,
      "grad_norm": 1.997445431548458,
      "learning_rate": 2.5539505538609983e-07,
      "loss": 0.4797,
      "step": 14063
    },
    {
      "epoch": 1.7243746934771949,
      "grad_norm": 2.1125062845503946,
      "learning_rate": 2.551719311449738e-07,
      "loss": 0.4165,
      "step": 14064
    },
    {
      "epoch": 1.7244973025993136,
      "grad_norm": 1.8398977526399842,
      "learning_rate": 2.5494889917025507e-07,
      "loss": 0.4438,
      "step": 14065
    },
    {
      "epoch": 1.724619911721432,
      "grad_norm": 1.8697046245664473,
      "learning_rate": 2.547259594711116e-07,
      "loss": 0.4163,
      "step": 14066
    },
    {
      "epoch": 1.7247425208435507,
      "grad_norm": 2.0242626412498224,
      "learning_rate": 2.545031120567057e-07,
      "loss": 0.4339,
      "step": 14067
    },
    {
      "epoch": 1.7248651299656694,
      "grad_norm": 1.896781832139453,
      "learning_rate": 2.5428035693619735e-07,
      "loss": 0.4662,
      "step": 14068
    },
    {
      "epoch": 1.7249877390877881,
      "grad_norm": 1.866118704261449,
      "learning_rate": 2.5405769411874284e-07,
      "loss": 0.4401,
      "step": 14069
    },
    {
      "epoch": 1.7251103482099068,
      "grad_norm": 1.8069342503487962,
      "learning_rate": 2.538351236134923e-07,
      "loss": 0.4282,
      "step": 14070
    },
    {
      "epoch": 1.7252329573320255,
      "grad_norm": 1.7803958419721984,
      "learning_rate": 2.536126454295948e-07,
      "loss": 0.3701,
      "step": 14071
    },
    {
      "epoch": 1.7253555664541442,
      "grad_norm": 2.083092603740033,
      "learning_rate": 2.53390259576195e-07,
      "loss": 0.3942,
      "step": 14072
    },
    {
      "epoch": 1.725478175576263,
      "grad_norm": 1.8795648780678826,
      "learning_rate": 2.531679660624328e-07,
      "loss": 0.4284,
      "step": 14073
    },
    {
      "epoch": 1.7256007846983814,
      "grad_norm": 1.8023302748262071,
      "learning_rate": 2.5294576489744426e-07,
      "loss": 0.4268,
      "step": 14074
    },
    {
      "epoch": 1.7257233938205,
      "grad_norm": 1.9128159503491504,
      "learning_rate": 2.5272365609036315e-07,
      "loss": 0.4102,
      "step": 14075
    },
    {
      "epoch": 1.7258460029426188,
      "grad_norm": 2.083964310885614,
      "learning_rate": 2.525016396503185e-07,
      "loss": 0.4374,
      "step": 14076
    },
    {
      "epoch": 1.7259686120647375,
      "grad_norm": 1.8836500166921708,
      "learning_rate": 2.522797155864354e-07,
      "loss": 0.4276,
      "step": 14077
    },
    {
      "epoch": 1.7260912211868562,
      "grad_norm": 2.0792708107813045,
      "learning_rate": 2.52057883907835e-07,
      "loss": 0.4039,
      "step": 14078
    },
    {
      "epoch": 1.726213830308975,
      "grad_norm": 1.7997326341282567,
      "learning_rate": 2.518361446236353e-07,
      "loss": 0.4146,
      "step": 14079
    },
    {
      "epoch": 1.7263364394310936,
      "grad_norm": 1.8083233019495943,
      "learning_rate": 2.5161449774294993e-07,
      "loss": 0.4232,
      "step": 14080
    },
    {
      "epoch": 1.7264590485532123,
      "grad_norm": 1.8676542806162015,
      "learning_rate": 2.513929432748896e-07,
      "loss": 0.4102,
      "step": 14081
    },
    {
      "epoch": 1.726581657675331,
      "grad_norm": 1.9020757851546972,
      "learning_rate": 2.511714812285596e-07,
      "loss": 0.4461,
      "step": 14082
    },
    {
      "epoch": 1.7267042667974497,
      "grad_norm": 2.0202900423973404,
      "learning_rate": 2.509501116130636e-07,
      "loss": 0.4623,
      "step": 14083
    },
    {
      "epoch": 1.7268268759195684,
      "grad_norm": 1.9172574590954123,
      "learning_rate": 2.507288344374989e-07,
      "loss": 0.4215,
      "step": 14084
    },
    {
      "epoch": 1.726949485041687,
      "grad_norm": 1.941294105323367,
      "learning_rate": 2.505076497109618e-07,
      "loss": 0.4102,
      "step": 14085
    },
    {
      "epoch": 1.7270720941638058,
      "grad_norm": 1.849263748784832,
      "learning_rate": 2.502865574425428e-07,
      "loss": 0.4103,
      "step": 14086
    },
    {
      "epoch": 1.7271947032859245,
      "grad_norm": 2.1865129474159866,
      "learning_rate": 2.5006555764132855e-07,
      "loss": 0.3876,
      "step": 14087
    },
    {
      "epoch": 1.7273173124080432,
      "grad_norm": 1.7758271864897786,
      "learning_rate": 2.4984465031640286e-07,
      "loss": 0.4113,
      "step": 14088
    },
    {
      "epoch": 1.727439921530162,
      "grad_norm": 2.011505314927748,
      "learning_rate": 2.4962383547684627e-07,
      "loss": 0.4472,
      "step": 14089
    },
    {
      "epoch": 1.7275625306522806,
      "grad_norm": 1.859249157711975,
      "learning_rate": 2.4940311313173396e-07,
      "loss": 0.4221,
      "step": 14090
    },
    {
      "epoch": 1.7276851397743993,
      "grad_norm": 1.7712866959191416,
      "learning_rate": 2.4918248329013766e-07,
      "loss": 0.3827,
      "step": 14091
    },
    {
      "epoch": 1.727807748896518,
      "grad_norm": 2.067305196495132,
      "learning_rate": 2.4896194596112674e-07,
      "loss": 0.4508,
      "step": 14092
    },
    {
      "epoch": 1.7279303580186367,
      "grad_norm": 1.8853625772706168,
      "learning_rate": 2.4874150115376415e-07,
      "loss": 0.4215,
      "step": 14093
    },
    {
      "epoch": 1.7280529671407554,
      "grad_norm": 1.9614923812983367,
      "learning_rate": 2.485211488771119e-07,
      "loss": 0.4657,
      "step": 14094
    },
    {
      "epoch": 1.728175576262874,
      "grad_norm": 1.8423139211930049,
      "learning_rate": 2.483008891402261e-07,
      "loss": 0.4167,
      "step": 14095
    },
    {
      "epoch": 1.7282981853849928,
      "grad_norm": 1.9280590631850094,
      "learning_rate": 2.4808072195216054e-07,
      "loss": 0.3989,
      "step": 14096
    },
    {
      "epoch": 1.7284207945071113,
      "grad_norm": 1.9890685887027866,
      "learning_rate": 2.4786064732196327e-07,
      "loss": 0.4275,
      "step": 14097
    },
    {
      "epoch": 1.72854340362923,
      "grad_norm": 1.7709375344646194,
      "learning_rate": 2.476406652586813e-07,
      "loss": 0.4593,
      "step": 14098
    },
    {
      "epoch": 1.7286660127513487,
      "grad_norm": 1.8309362570171341,
      "learning_rate": 2.4742077577135535e-07,
      "loss": 0.4123,
      "step": 14099
    },
    {
      "epoch": 1.7287886218734674,
      "grad_norm": 1.8807303622604135,
      "learning_rate": 2.472009788690227e-07,
      "loss": 0.4006,
      "step": 14100
    },
    {
      "epoch": 1.728911230995586,
      "grad_norm": 1.9633956438795188,
      "learning_rate": 2.4698127456071836e-07,
      "loss": 0.4014,
      "step": 14101
    },
    {
      "epoch": 1.7290338401177048,
      "grad_norm": 1.9556745991201183,
      "learning_rate": 2.467616628554728e-07,
      "loss": 0.449,
      "step": 14102
    },
    {
      "epoch": 1.7291564492398235,
      "grad_norm": 1.9980145142172463,
      "learning_rate": 2.465421437623117e-07,
      "loss": 0.4036,
      "step": 14103
    },
    {
      "epoch": 1.7292790583619422,
      "grad_norm": 1.8384264644936918,
      "learning_rate": 2.463227172902577e-07,
      "loss": 0.4134,
      "step": 14104
    },
    {
      "epoch": 1.7294016674840607,
      "grad_norm": 2.159153653070982,
      "learning_rate": 2.461033834483301e-07,
      "loss": 0.4335,
      "step": 14105
    },
    {
      "epoch": 1.7295242766061794,
      "grad_norm": 1.9879380163737426,
      "learning_rate": 2.458841422455438e-07,
      "loss": 0.486,
      "step": 14106
    },
    {
      "epoch": 1.729646885728298,
      "grad_norm": 2.0925652441019285,
      "learning_rate": 2.4566499369090947e-07,
      "loss": 0.4217,
      "step": 14107
    },
    {
      "epoch": 1.7297694948504168,
      "grad_norm": 2.1566438065101026,
      "learning_rate": 2.4544593779343455e-07,
      "loss": 0.4504,
      "step": 14108
    },
    {
      "epoch": 1.7298921039725355,
      "grad_norm": 1.847216719270835,
      "learning_rate": 2.452269745621236e-07,
      "loss": 0.4249,
      "step": 14109
    },
    {
      "epoch": 1.7300147130946542,
      "grad_norm": 1.9751648146018437,
      "learning_rate": 2.45008104005976e-07,
      "loss": 0.4182,
      "step": 14110
    },
    {
      "epoch": 1.7301373222167729,
      "grad_norm": 1.9195304220777918,
      "learning_rate": 2.4478932613398674e-07,
      "loss": 0.4478,
      "step": 14111
    },
    {
      "epoch": 1.7302599313388916,
      "grad_norm": 2.044080112324382,
      "learning_rate": 2.4457064095514904e-07,
      "loss": 0.4228,
      "step": 14112
    },
    {
      "epoch": 1.7303825404610103,
      "grad_norm": 1.8421510388420794,
      "learning_rate": 2.4435204847845064e-07,
      "loss": 0.4556,
      "step": 14113
    },
    {
      "epoch": 1.730505149583129,
      "grad_norm": 1.8775712945255618,
      "learning_rate": 2.4413354871287654e-07,
      "loss": 0.4034,
      "step": 14114
    },
    {
      "epoch": 1.7306277587052477,
      "grad_norm": 2.1739316793685015,
      "learning_rate": 2.439151416674076e-07,
      "loss": 0.401,
      "step": 14115
    },
    {
      "epoch": 1.7307503678273664,
      "grad_norm": 1.797661054868667,
      "learning_rate": 2.4369682735102043e-07,
      "loss": 0.3854,
      "step": 14116
    },
    {
      "epoch": 1.730872976949485,
      "grad_norm": 1.897735010327971,
      "learning_rate": 2.434786057726876e-07,
      "loss": 0.4361,
      "step": 14117
    },
    {
      "epoch": 1.7309955860716038,
      "grad_norm": 1.9915234243080544,
      "learning_rate": 2.4326047694137876e-07,
      "loss": 0.4245,
      "step": 14118
    },
    {
      "epoch": 1.7311181951937225,
      "grad_norm": 1.8475566986313308,
      "learning_rate": 2.430424408660609e-07,
      "loss": 0.4604,
      "step": 14119
    },
    {
      "epoch": 1.7312408043158412,
      "grad_norm": 1.858099930532083,
      "learning_rate": 2.4282449755569317e-07,
      "loss": 0.4014,
      "step": 14120
    },
    {
      "epoch": 1.7313634134379599,
      "grad_norm": 1.887251744164011,
      "learning_rate": 2.426066470192345e-07,
      "loss": 0.39,
      "step": 14121
    },
    {
      "epoch": 1.7314860225600786,
      "grad_norm": 2.1263646101283395,
      "learning_rate": 2.423888892656398e-07,
      "loss": 0.4473,
      "step": 14122
    },
    {
      "epoch": 1.7316086316821973,
      "grad_norm": 1.8673901231125076,
      "learning_rate": 2.421712243038582e-07,
      "loss": 0.4036,
      "step": 14123
    },
    {
      "epoch": 1.731731240804316,
      "grad_norm": 2.0889331649243137,
      "learning_rate": 2.4195365214283615e-07,
      "loss": 0.4542,
      "step": 14124
    },
    {
      "epoch": 1.7318538499264347,
      "grad_norm": 1.9130124710282506,
      "learning_rate": 2.417361727915171e-07,
      "loss": 0.4546,
      "step": 14125
    },
    {
      "epoch": 1.7319764590485534,
      "grad_norm": 1.9062762734354144,
      "learning_rate": 2.415187862588389e-07,
      "loss": 0.4172,
      "step": 14126
    },
    {
      "epoch": 1.732099068170672,
      "grad_norm": 2.050276275259089,
      "learning_rate": 2.4130149255373713e-07,
      "loss": 0.4381,
      "step": 14127
    },
    {
      "epoch": 1.7322216772927905,
      "grad_norm": 1.934013669060603,
      "learning_rate": 2.410842916851425e-07,
      "loss": 0.4121,
      "step": 14128
    },
    {
      "epoch": 1.7323442864149092,
      "grad_norm": 1.7972208014418478,
      "learning_rate": 2.4086718366198273e-07,
      "loss": 0.4343,
      "step": 14129
    },
    {
      "epoch": 1.732466895537028,
      "grad_norm": 1.922937444621063,
      "learning_rate": 2.4065016849318086e-07,
      "loss": 0.4302,
      "step": 14130
    },
    {
      "epoch": 1.7325895046591466,
      "grad_norm": 1.8917590502242256,
      "learning_rate": 2.4043324618765767e-07,
      "loss": 0.4147,
      "step": 14131
    },
    {
      "epoch": 1.7327121137812653,
      "grad_norm": 2.1221332573484943,
      "learning_rate": 2.4021641675432804e-07,
      "loss": 0.4317,
      "step": 14132
    },
    {
      "epoch": 1.732834722903384,
      "grad_norm": 1.8128011870980483,
      "learning_rate": 2.3999968020210374e-07,
      "loss": 0.4224,
      "step": 14133
    },
    {
      "epoch": 1.7329573320255027,
      "grad_norm": 2.188582426795779,
      "learning_rate": 2.39783036539894e-07,
      "loss": 0.4442,
      "step": 14134
    },
    {
      "epoch": 1.7330799411476214,
      "grad_norm": 1.9176930679158082,
      "learning_rate": 2.395664857766028e-07,
      "loss": 0.408,
      "step": 14135
    },
    {
      "epoch": 1.73320255026974,
      "grad_norm": 2.0575549761824004,
      "learning_rate": 2.393500279211311e-07,
      "loss": 0.4088,
      "step": 14136
    },
    {
      "epoch": 1.7333251593918586,
      "grad_norm": 1.8820329651506393,
      "learning_rate": 2.391336629823751e-07,
      "loss": 0.4124,
      "step": 14137
    },
    {
      "epoch": 1.7334477685139773,
      "grad_norm": 1.945293115896585,
      "learning_rate": 2.38917390969228e-07,
      "loss": 0.4519,
      "step": 14138
    },
    {
      "epoch": 1.733570377636096,
      "grad_norm": 2.202491594996862,
      "learning_rate": 2.387012118905793e-07,
      "loss": 0.4341,
      "step": 14139
    },
    {
      "epoch": 1.7336929867582147,
      "grad_norm": 2.1838092636597835,
      "learning_rate": 2.384851257553145e-07,
      "loss": 0.396,
      "step": 14140
    },
    {
      "epoch": 1.7338155958803334,
      "grad_norm": 1.7920954450613698,
      "learning_rate": 2.3826913257231383e-07,
      "loss": 0.4113,
      "step": 14141
    },
    {
      "epoch": 1.733938205002452,
      "grad_norm": 1.8235857833312181,
      "learning_rate": 2.380532323504567e-07,
      "loss": 0.4065,
      "step": 14142
    },
    {
      "epoch": 1.7340608141245708,
      "grad_norm": 1.7966929420751778,
      "learning_rate": 2.3783742509861567e-07,
      "loss": 0.3652,
      "step": 14143
    },
    {
      "epoch": 1.7341834232466895,
      "grad_norm": 1.8466721489887192,
      "learning_rate": 2.376217108256615e-07,
      "loss": 0.3998,
      "step": 14144
    },
    {
      "epoch": 1.7343060323688082,
      "grad_norm": 1.9477292439888494,
      "learning_rate": 2.3740608954046063e-07,
      "loss": 0.4551,
      "step": 14145
    },
    {
      "epoch": 1.734428641490927,
      "grad_norm": 1.8773182662180405,
      "learning_rate": 2.3719056125187434e-07,
      "loss": 0.4168,
      "step": 14146
    },
    {
      "epoch": 1.7345512506130456,
      "grad_norm": 2.0875091865907773,
      "learning_rate": 2.3697512596876192e-07,
      "loss": 0.4133,
      "step": 14147
    },
    {
      "epoch": 1.7346738597351643,
      "grad_norm": 2.1638135714109876,
      "learning_rate": 2.367597836999788e-07,
      "loss": 0.4618,
      "step": 14148
    },
    {
      "epoch": 1.734796468857283,
      "grad_norm": 1.9498005426818725,
      "learning_rate": 2.3654453445437543e-07,
      "loss": 0.4218,
      "step": 14149
    },
    {
      "epoch": 1.7349190779794017,
      "grad_norm": 2.019693908454731,
      "learning_rate": 2.3632937824079828e-07,
      "loss": 0.4615,
      "step": 14150
    },
    {
      "epoch": 1.7350416871015204,
      "grad_norm": 2.014253261830236,
      "learning_rate": 2.3611431506809112e-07,
      "loss": 0.4626,
      "step": 14151
    },
    {
      "epoch": 1.735164296223639,
      "grad_norm": 1.8733501355241213,
      "learning_rate": 2.3589934494509442e-07,
      "loss": 0.4665,
      "step": 14152
    },
    {
      "epoch": 1.7352869053457578,
      "grad_norm": 1.893862167474128,
      "learning_rate": 2.356844678806422e-07,
      "loss": 0.4808,
      "step": 14153
    },
    {
      "epoch": 1.7354095144678765,
      "grad_norm": 2.1260531499025737,
      "learning_rate": 2.3546968388356685e-07,
      "loss": 0.4391,
      "step": 14154
    },
    {
      "epoch": 1.7355321235899952,
      "grad_norm": 1.9687892686161603,
      "learning_rate": 2.352549929626971e-07,
      "loss": 0.3903,
      "step": 14155
    },
    {
      "epoch": 1.735654732712114,
      "grad_norm": 1.8970522537287062,
      "learning_rate": 2.3504039512685623e-07,
      "loss": 0.402,
      "step": 14156
    },
    {
      "epoch": 1.7357773418342326,
      "grad_norm": 1.844088692818511,
      "learning_rate": 2.3482589038486465e-07,
      "loss": 0.4332,
      "step": 14157
    },
    {
      "epoch": 1.7358999509563513,
      "grad_norm": 1.8062695134883735,
      "learning_rate": 2.3461147874553918e-07,
      "loss": 0.3972,
      "step": 14158
    },
    {
      "epoch": 1.73602256007847,
      "grad_norm": 1.9011016457358272,
      "learning_rate": 2.343971602176931e-07,
      "loss": 0.4003,
      "step": 14159
    },
    {
      "epoch": 1.7361451692005885,
      "grad_norm": 1.874712550770138,
      "learning_rate": 2.3418293481013405e-07,
      "loss": 0.4168,
      "step": 14160
    },
    {
      "epoch": 1.7362677783227072,
      "grad_norm": 2.0137480695997527,
      "learning_rate": 2.3396880253166804e-07,
      "loss": 0.4414,
      "step": 14161
    },
    {
      "epoch": 1.7363903874448259,
      "grad_norm": 1.883320708975016,
      "learning_rate": 2.337547633910961e-07,
      "loss": 0.3768,
      "step": 14162
    },
    {
      "epoch": 1.7365129965669446,
      "grad_norm": 1.884004469629048,
      "learning_rate": 2.3354081739721508e-07,
      "loss": 0.4228,
      "step": 14163
    },
    {
      "epoch": 1.7366356056890633,
      "grad_norm": 2.103727064972753,
      "learning_rate": 2.3332696455881875e-07,
      "loss": 0.4553,
      "step": 14164
    },
    {
      "epoch": 1.736758214811182,
      "grad_norm": 1.875283305031028,
      "learning_rate": 2.331132048846979e-07,
      "loss": 0.4209,
      "step": 14165
    },
    {
      "epoch": 1.7368808239333007,
      "grad_norm": 1.9779976954638794,
      "learning_rate": 2.3289953838363688e-07,
      "loss": 0.4359,
      "step": 14166
    },
    {
      "epoch": 1.7370034330554194,
      "grad_norm": 1.921169975538708,
      "learning_rate": 2.326859650644181e-07,
      "loss": 0.3997,
      "step": 14167
    },
    {
      "epoch": 1.7371260421775379,
      "grad_norm": 1.967519560652213,
      "learning_rate": 2.3247248493582064e-07,
      "loss": 0.4616,
      "step": 14168
    },
    {
      "epoch": 1.7372486512996566,
      "grad_norm": 1.9716255897435162,
      "learning_rate": 2.322590980066186e-07,
      "loss": 0.46,
      "step": 14169
    },
    {
      "epoch": 1.7373712604217753,
      "grad_norm": 1.9745788558869086,
      "learning_rate": 2.3204580428558165e-07,
      "loss": 0.4569,
      "step": 14170
    },
    {
      "epoch": 1.737493869543894,
      "grad_norm": 2.0536425931132385,
      "learning_rate": 2.318326037814772e-07,
      "loss": 0.4192,
      "step": 14171
    },
    {
      "epoch": 1.7376164786660127,
      "grad_norm": 2.04265345544865,
      "learning_rate": 2.316194965030691e-07,
      "loss": 0.3773,
      "step": 14172
    },
    {
      "epoch": 1.7377390877881314,
      "grad_norm": 2.1095971543211243,
      "learning_rate": 2.3140648245911502e-07,
      "loss": 0.4402,
      "step": 14173
    },
    {
      "epoch": 1.73786169691025,
      "grad_norm": 1.9146095292893917,
      "learning_rate": 2.311935616583705e-07,
      "loss": 0.4439,
      "step": 14174
    },
    {
      "epoch": 1.7379843060323688,
      "grad_norm": 2.0568630208769383,
      "learning_rate": 2.3098073410958765e-07,
      "loss": 0.4024,
      "step": 14175
    },
    {
      "epoch": 1.7381069151544875,
      "grad_norm": 2.0959979223347682,
      "learning_rate": 2.3076799982151316e-07,
      "loss": 0.4219,
      "step": 14176
    },
    {
      "epoch": 1.7382295242766062,
      "grad_norm": 1.8533708621491836,
      "learning_rate": 2.3055535880289193e-07,
      "loss": 0.4195,
      "step": 14177
    },
    {
      "epoch": 1.7383521333987249,
      "grad_norm": 2.116118750125328,
      "learning_rate": 2.3034281106246253e-07,
      "loss": 0.431,
      "step": 14178
    },
    {
      "epoch": 1.7384747425208436,
      "grad_norm": 1.9339165021818827,
      "learning_rate": 2.3013035660896217e-07,
      "loss": 0.4081,
      "step": 14179
    },
    {
      "epoch": 1.7385973516429623,
      "grad_norm": 1.943380278352194,
      "learning_rate": 2.2991799545112214e-07,
      "loss": 0.4282,
      "step": 14180
    },
    {
      "epoch": 1.738719960765081,
      "grad_norm": 1.8883769961329506,
      "learning_rate": 2.2970572759767163e-07,
      "loss": 0.3927,
      "step": 14181
    },
    {
      "epoch": 1.7388425698871997,
      "grad_norm": 1.9588895895138185,
      "learning_rate": 2.2949355305733529e-07,
      "loss": 0.4332,
      "step": 14182
    },
    {
      "epoch": 1.7389651790093184,
      "grad_norm": 1.989755695871147,
      "learning_rate": 2.292814718388328e-07,
      "loss": 0.4052,
      "step": 14183
    },
    {
      "epoch": 1.739087788131437,
      "grad_norm": 1.8894887799757063,
      "learning_rate": 2.2906948395088197e-07,
      "loss": 0.3988,
      "step": 14184
    },
    {
      "epoch": 1.7392103972535558,
      "grad_norm": 1.8979417590352543,
      "learning_rate": 2.2885758940219633e-07,
      "loss": 0.4175,
      "step": 14185
    },
    {
      "epoch": 1.7393330063756745,
      "grad_norm": 2.227621875755111,
      "learning_rate": 2.286457882014842e-07,
      "loss": 0.4019,
      "step": 14186
    },
    {
      "epoch": 1.7394556154977932,
      "grad_norm": 2.222472510020367,
      "learning_rate": 2.2843408035745112e-07,
      "loss": 0.4782,
      "step": 14187
    },
    {
      "epoch": 1.7395782246199119,
      "grad_norm": 2.016446258896941,
      "learning_rate": 2.2822246587879905e-07,
      "loss": 0.4153,
      "step": 14188
    },
    {
      "epoch": 1.7397008337420306,
      "grad_norm": 2.0333346057566537,
      "learning_rate": 2.2801094477422514e-07,
      "loss": 0.4334,
      "step": 14189
    },
    {
      "epoch": 1.7398234428641492,
      "grad_norm": 1.7729465112461738,
      "learning_rate": 2.2779951705242415e-07,
      "loss": 0.4434,
      "step": 14190
    },
    {
      "epoch": 1.7399460519862677,
      "grad_norm": 1.9773527378287676,
      "learning_rate": 2.2758818272208522e-07,
      "loss": 0.4557,
      "step": 14191
    },
    {
      "epoch": 1.7400686611083864,
      "grad_norm": 1.978560943816195,
      "learning_rate": 2.273769417918953e-07,
      "loss": 0.4613,
      "step": 14192
    },
    {
      "epoch": 1.7401912702305051,
      "grad_norm": 2.031235708986976,
      "learning_rate": 2.2716579427053576e-07,
      "loss": 0.3899,
      "step": 14193
    },
    {
      "epoch": 1.7403138793526238,
      "grad_norm": 1.848339011732577,
      "learning_rate": 2.2695474016668663e-07,
      "loss": 0.3834,
      "step": 14194
    },
    {
      "epoch": 1.7404364884747425,
      "grad_norm": 1.9490270004161112,
      "learning_rate": 2.2674377948902177e-07,
      "loss": 0.4248,
      "step": 14195
    },
    {
      "epoch": 1.7405590975968612,
      "grad_norm": 1.737792374436734,
      "learning_rate": 2.265329122462115e-07,
      "loss": 0.4457,
      "step": 14196
    },
    {
      "epoch": 1.74068170671898,
      "grad_norm": 1.8857853739843897,
      "learning_rate": 2.263221384469233e-07,
      "loss": 0.4127,
      "step": 14197
    },
    {
      "epoch": 1.7408043158410986,
      "grad_norm": 2.030718442724978,
      "learning_rate": 2.2611145809982082e-07,
      "loss": 0.4343,
      "step": 14198
    },
    {
      "epoch": 1.740926924963217,
      "grad_norm": 1.803893058535886,
      "learning_rate": 2.2590087121356325e-07,
      "loss": 0.417,
      "step": 14199
    },
    {
      "epoch": 1.7410495340853358,
      "grad_norm": 1.9206461417774494,
      "learning_rate": 2.256903777968053e-07,
      "loss": 0.4219,
      "step": 14200
    },
    {
      "epoch": 1.7411721432074545,
      "grad_norm": 2.035774923803477,
      "learning_rate": 2.254799778581998e-07,
      "loss": 0.4207,
      "step": 14201
    },
    {
      "epoch": 1.7412947523295732,
      "grad_norm": 1.8962783411525577,
      "learning_rate": 2.2526967140639373e-07,
      "loss": 0.4156,
      "step": 14202
    },
    {
      "epoch": 1.741417361451692,
      "grad_norm": 2.034540248228445,
      "learning_rate": 2.250594584500307e-07,
      "loss": 0.4631,
      "step": 14203
    },
    {
      "epoch": 1.7415399705738106,
      "grad_norm": 1.8287598005456671,
      "learning_rate": 2.2484933899775157e-07,
      "loss": 0.4113,
      "step": 14204
    },
    {
      "epoch": 1.7416625796959293,
      "grad_norm": 1.8887346405425731,
      "learning_rate": 2.246393130581928e-07,
      "loss": 0.442,
      "step": 14205
    },
    {
      "epoch": 1.741785188818048,
      "grad_norm": 1.9722047736322081,
      "learning_rate": 2.2442938063998665e-07,
      "loss": 0.4433,
      "step": 14206
    },
    {
      "epoch": 1.7419077979401667,
      "grad_norm": 2.043104712967313,
      "learning_rate": 2.242195417517609e-07,
      "loss": 0.4354,
      "step": 14207
    },
    {
      "epoch": 1.7420304070622854,
      "grad_norm": 2.0839969599457224,
      "learning_rate": 2.240097964021415e-07,
      "loss": 0.4244,
      "step": 14208
    },
    {
      "epoch": 1.742153016184404,
      "grad_norm": 1.9372199774052663,
      "learning_rate": 2.2380014459974843e-07,
      "loss": 0.4208,
      "step": 14209
    },
    {
      "epoch": 1.7422756253065228,
      "grad_norm": 2.0316088433400457,
      "learning_rate": 2.2359058635319896e-07,
      "loss": 0.432,
      "step": 14210
    },
    {
      "epoch": 1.7423982344286415,
      "grad_norm": 2.0135160682432423,
      "learning_rate": 2.2338112167110682e-07,
      "loss": 0.516,
      "step": 14211
    },
    {
      "epoch": 1.7425208435507602,
      "grad_norm": 2.1346957041320276,
      "learning_rate": 2.231717505620812e-07,
      "loss": 0.4636,
      "step": 14212
    },
    {
      "epoch": 1.742643452672879,
      "grad_norm": 1.8205463043207664,
      "learning_rate": 2.2296247303472685e-07,
      "loss": 0.3984,
      "step": 14213
    },
    {
      "epoch": 1.7427660617949976,
      "grad_norm": 2.055894727500674,
      "learning_rate": 2.227532890976461e-07,
      "loss": 0.4209,
      "step": 14214
    },
    {
      "epoch": 1.7428886709171163,
      "grad_norm": 1.9893164569374846,
      "learning_rate": 2.225441987594376e-07,
      "loss": 0.4352,
      "step": 14215
    },
    {
      "epoch": 1.743011280039235,
      "grad_norm": 1.9929269585706508,
      "learning_rate": 2.223352020286937e-07,
      "loss": 0.4024,
      "step": 14216
    },
    {
      "epoch": 1.7431338891613537,
      "grad_norm": 1.8366028648379953,
      "learning_rate": 2.2212629891400523e-07,
      "loss": 0.384,
      "step": 14217
    },
    {
      "epoch": 1.7432564982834724,
      "grad_norm": 1.9262772627105893,
      "learning_rate": 2.2191748942395896e-07,
      "loss": 0.4361,
      "step": 14218
    },
    {
      "epoch": 1.743379107405591,
      "grad_norm": 1.7986476936242142,
      "learning_rate": 2.2170877356713667e-07,
      "loss": 0.4139,
      "step": 14219
    },
    {
      "epoch": 1.7435017165277098,
      "grad_norm": 1.8962094288541984,
      "learning_rate": 2.21500151352117e-07,
      "loss": 0.3958,
      "step": 14220
    },
    {
      "epoch": 1.7436243256498285,
      "grad_norm": 1.9449119387891736,
      "learning_rate": 2.2129162278747533e-07,
      "loss": 0.4431,
      "step": 14221
    },
    {
      "epoch": 1.7437469347719472,
      "grad_norm": 1.992537647822792,
      "learning_rate": 2.210831878817815e-07,
      "loss": 0.3808,
      "step": 14222
    },
    {
      "epoch": 1.7438695438940657,
      "grad_norm": 1.9093492398414182,
      "learning_rate": 2.2087484664360366e-07,
      "loss": 0.4346,
      "step": 14223
    },
    {
      "epoch": 1.7439921530161844,
      "grad_norm": 1.6965931798952774,
      "learning_rate": 2.2066659908150405e-07,
      "loss": 0.4024,
      "step": 14224
    },
    {
      "epoch": 1.744114762138303,
      "grad_norm": 1.9008873814412655,
      "learning_rate": 2.2045844520404285e-07,
      "loss": 0.4125,
      "step": 14225
    },
    {
      "epoch": 1.7442373712604218,
      "grad_norm": 1.8862798173912763,
      "learning_rate": 2.2025038501977485e-07,
      "loss": 0.4237,
      "step": 14226
    },
    {
      "epoch": 1.7443599803825405,
      "grad_norm": 1.7749235181701863,
      "learning_rate": 2.200424185372524e-07,
      "loss": 0.4127,
      "step": 14227
    },
    {
      "epoch": 1.7444825895046592,
      "grad_norm": 1.9756515295694643,
      "learning_rate": 2.1983454576502277e-07,
      "loss": 0.4084,
      "step": 14228
    },
    {
      "epoch": 1.7446051986267779,
      "grad_norm": 2.091031879090457,
      "learning_rate": 2.1962676671162974e-07,
      "loss": 0.433,
      "step": 14229
    },
    {
      "epoch": 1.7447278077488964,
      "grad_norm": 2.244918649806147,
      "learning_rate": 2.194190813856137e-07,
      "loss": 0.4806,
      "step": 14230
    },
    {
      "epoch": 1.744850416871015,
      "grad_norm": 1.8177936466384501,
      "learning_rate": 2.192114897955111e-07,
      "loss": 0.4,
      "step": 14231
    },
    {
      "epoch": 1.7449730259931338,
      "grad_norm": 1.9312000784287287,
      "learning_rate": 2.1900399194985434e-07,
      "loss": 0.4514,
      "step": 14232
    },
    {
      "epoch": 1.7450956351152525,
      "grad_norm": 1.8310001195615482,
      "learning_rate": 2.18796587857171e-07,
      "loss": 0.3927,
      "step": 14233
    },
    {
      "epoch": 1.7452182442373712,
      "grad_norm": 1.9417101687051126,
      "learning_rate": 2.1858927752598652e-07,
      "loss": 0.4133,
      "step": 14234
    },
    {
      "epoch": 1.7453408533594899,
      "grad_norm": 1.9914977234054763,
      "learning_rate": 2.183820609648224e-07,
      "loss": 0.4329,
      "step": 14235
    },
    {
      "epoch": 1.7454634624816086,
      "grad_norm": 1.9123629184157889,
      "learning_rate": 2.1817493818219458e-07,
      "loss": 0.4459,
      "step": 14236
    },
    {
      "epoch": 1.7455860716037273,
      "grad_norm": 1.8281889442697554,
      "learning_rate": 2.1796790918661605e-07,
      "loss": 0.3812,
      "step": 14237
    },
    {
      "epoch": 1.745708680725846,
      "grad_norm": 2.047481796410662,
      "learning_rate": 2.1776097398659684e-07,
      "loss": 0.4623,
      "step": 14238
    },
    {
      "epoch": 1.7458312898479647,
      "grad_norm": 1.9568490544498993,
      "learning_rate": 2.1755413259064163e-07,
      "loss": 0.4198,
      "step": 14239
    },
    {
      "epoch": 1.7459538989700834,
      "grad_norm": 1.963316982554082,
      "learning_rate": 2.173473850072527e-07,
      "loss": 0.4602,
      "step": 14240
    },
    {
      "epoch": 1.746076508092202,
      "grad_norm": 1.9808359094633199,
      "learning_rate": 2.1714073124492752e-07,
      "loss": 0.4461,
      "step": 14241
    },
    {
      "epoch": 1.7461991172143208,
      "grad_norm": 1.9497337654052591,
      "learning_rate": 2.169341713121592e-07,
      "loss": 0.4359,
      "step": 14242
    },
    {
      "epoch": 1.7463217263364395,
      "grad_norm": 1.7261224984453771,
      "learning_rate": 2.1672770521743825e-07,
      "loss": 0.4783,
      "step": 14243
    },
    {
      "epoch": 1.7464443354585582,
      "grad_norm": 1.8527184710314268,
      "learning_rate": 2.1652133296925143e-07,
      "loss": 0.4304,
      "step": 14244
    },
    {
      "epoch": 1.7465669445806769,
      "grad_norm": 2.023201950731179,
      "learning_rate": 2.1631505457608031e-07,
      "loss": 0.4463,
      "step": 14245
    },
    {
      "epoch": 1.7466895537027955,
      "grad_norm": 2.3460633768235195,
      "learning_rate": 2.1610887004640314e-07,
      "loss": 0.4699,
      "step": 14246
    },
    {
      "epoch": 1.7468121628249142,
      "grad_norm": 1.955370745438236,
      "learning_rate": 2.1590277938869446e-07,
      "loss": 0.3688,
      "step": 14247
    },
    {
      "epoch": 1.746934771947033,
      "grad_norm": 1.856772936298183,
      "learning_rate": 2.1569678261142619e-07,
      "loss": 0.4346,
      "step": 14248
    },
    {
      "epoch": 1.7470573810691516,
      "grad_norm": 2.1391960460868558,
      "learning_rate": 2.1549087972306342e-07,
      "loss": 0.4499,
      "step": 14249
    },
    {
      "epoch": 1.7471799901912703,
      "grad_norm": 1.9539536712972239,
      "learning_rate": 2.1528507073206972e-07,
      "loss": 0.3994,
      "step": 14250
    },
    {
      "epoch": 1.747302599313389,
      "grad_norm": 1.9606033530206515,
      "learning_rate": 2.1507935564690496e-07,
      "loss": 0.4222,
      "step": 14251
    },
    {
      "epoch": 1.7474252084355077,
      "grad_norm": 1.9055417688158829,
      "learning_rate": 2.1487373447602376e-07,
      "loss": 0.4044,
      "step": 14252
    },
    {
      "epoch": 1.7475478175576264,
      "grad_norm": 1.7982758206353264,
      "learning_rate": 2.146682072278769e-07,
      "loss": 0.4181,
      "step": 14253
    },
    {
      "epoch": 1.747670426679745,
      "grad_norm": 2.0275403958283023,
      "learning_rate": 2.1446277391091287e-07,
      "loss": 0.4263,
      "step": 14254
    },
    {
      "epoch": 1.7477930358018636,
      "grad_norm": 2.127246193563599,
      "learning_rate": 2.1425743453357546e-07,
      "loss": 0.4546,
      "step": 14255
    },
    {
      "epoch": 1.7479156449239823,
      "grad_norm": 1.9559150193722952,
      "learning_rate": 2.1405218910430376e-07,
      "loss": 0.3724,
      "step": 14256
    },
    {
      "epoch": 1.748038254046101,
      "grad_norm": 1.910880244632579,
      "learning_rate": 2.1384703763153437e-07,
      "loss": 0.4636,
      "step": 14257
    },
    {
      "epoch": 1.7481608631682197,
      "grad_norm": 2.07220656770727,
      "learning_rate": 2.1364198012369885e-07,
      "loss": 0.3923,
      "step": 14258
    },
    {
      "epoch": 1.7482834722903384,
      "grad_norm": 1.9564703481545205,
      "learning_rate": 2.1343701658922549e-07,
      "loss": 0.3946,
      "step": 14259
    },
    {
      "epoch": 1.7484060814124571,
      "grad_norm": 2.0806471233131982,
      "learning_rate": 2.1323214703653866e-07,
      "loss": 0.4569,
      "step": 14260
    },
    {
      "epoch": 1.7485286905345758,
      "grad_norm": 1.9798770290378334,
      "learning_rate": 2.1302737147405966e-07,
      "loss": 0.3999,
      "step": 14261
    },
    {
      "epoch": 1.7486512996566943,
      "grad_norm": 2.0427368049234,
      "learning_rate": 2.1282268991020399e-07,
      "loss": 0.4065,
      "step": 14262
    },
    {
      "epoch": 1.748773908778813,
      "grad_norm": 1.9306764066586766,
      "learning_rate": 2.1261810235338464e-07,
      "loss": 0.4476,
      "step": 14263
    },
    {
      "epoch": 1.7488965179009317,
      "grad_norm": 1.753254648605776,
      "learning_rate": 2.1241360881201128e-07,
      "loss": 0.444,
      "step": 14264
    },
    {
      "epoch": 1.7490191270230504,
      "grad_norm": 1.9520145528308912,
      "learning_rate": 2.122092092944883e-07,
      "loss": 0.4356,
      "step": 14265
    },
    {
      "epoch": 1.749141736145169,
      "grad_norm": 1.9199121633022918,
      "learning_rate": 2.1200490380921673e-07,
      "loss": 0.4147,
      "step": 14266
    },
    {
      "epoch": 1.7492643452672878,
      "grad_norm": 2.0410995376890573,
      "learning_rate": 2.1180069236459432e-07,
      "loss": 0.4309,
      "step": 14267
    },
    {
      "epoch": 1.7493869543894065,
      "grad_norm": 2.0405022123979735,
      "learning_rate": 2.1159657496901464e-07,
      "loss": 0.4257,
      "step": 14268
    },
    {
      "epoch": 1.7495095635115252,
      "grad_norm": 2.0123819296858434,
      "learning_rate": 2.1139255163086707e-07,
      "loss": 0.3925,
      "step": 14269
    },
    {
      "epoch": 1.749632172633644,
      "grad_norm": 1.968181936020823,
      "learning_rate": 2.1118862235853688e-07,
      "loss": 0.4649,
      "step": 14270
    },
    {
      "epoch": 1.7497547817557626,
      "grad_norm": 2.110806872086328,
      "learning_rate": 2.1098478716040676e-07,
      "loss": 0.4059,
      "step": 14271
    },
    {
      "epoch": 1.7498773908778813,
      "grad_norm": 1.9371981703653431,
      "learning_rate": 2.1078104604485394e-07,
      "loss": 0.3855,
      "step": 14272
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.0911139860255945,
      "learning_rate": 2.105773990202531e-07,
      "loss": 0.4142,
      "step": 14273
    },
    {
      "epoch": 1.7501226091221187,
      "grad_norm": 1.9089418064845014,
      "learning_rate": 2.103738460949739e-07,
      "loss": 0.3728,
      "step": 14274
    },
    {
      "epoch": 1.7502452182442374,
      "grad_norm": 1.9489580491957241,
      "learning_rate": 2.1017038727738358e-07,
      "loss": 0.4206,
      "step": 14275
    },
    {
      "epoch": 1.750367827366356,
      "grad_norm": 1.785464377581407,
      "learning_rate": 2.0996702257584377e-07,
      "loss": 0.4342,
      "step": 14276
    },
    {
      "epoch": 1.7504904364884748,
      "grad_norm": 1.9827294504336197,
      "learning_rate": 2.097637519987139e-07,
      "loss": 0.4163,
      "step": 14277
    },
    {
      "epoch": 1.7506130456105935,
      "grad_norm": 1.935508782405361,
      "learning_rate": 2.095605755543484e-07,
      "loss": 0.4197,
      "step": 14278
    },
    {
      "epoch": 1.7507356547327122,
      "grad_norm": 1.939282427611402,
      "learning_rate": 2.093574932510975e-07,
      "loss": 0.4531,
      "step": 14279
    },
    {
      "epoch": 1.750858263854831,
      "grad_norm": 1.9305335738650922,
      "learning_rate": 2.09154505097309e-07,
      "loss": 0.3968,
      "step": 14280
    },
    {
      "epoch": 1.7509808729769496,
      "grad_norm": 1.9573039276539592,
      "learning_rate": 2.089516111013265e-07,
      "loss": 0.4384,
      "step": 14281
    },
    {
      "epoch": 1.7511034820990683,
      "grad_norm": 2.1664293648028274,
      "learning_rate": 2.0874881127148887e-07,
      "loss": 0.4598,
      "step": 14282
    },
    {
      "epoch": 1.751226091221187,
      "grad_norm": 1.8230052430553705,
      "learning_rate": 2.0854610561613087e-07,
      "loss": 0.4167,
      "step": 14283
    },
    {
      "epoch": 1.7513487003433057,
      "grad_norm": 2.1385331909918275,
      "learning_rate": 2.0834349414358497e-07,
      "loss": 0.4091,
      "step": 14284
    },
    {
      "epoch": 1.7514713094654242,
      "grad_norm": 1.7766226246730803,
      "learning_rate": 2.081409768621781e-07,
      "loss": 0.4487,
      "step": 14285
    },
    {
      "epoch": 1.7515939185875429,
      "grad_norm": 1.8416974112756401,
      "learning_rate": 2.0793855378023504e-07,
      "loss": 0.4323,
      "step": 14286
    },
    {
      "epoch": 1.7517165277096616,
      "grad_norm": 1.8478670500409713,
      "learning_rate": 2.0773622490607466e-07,
      "loss": 0.4385,
      "step": 14287
    },
    {
      "epoch": 1.7518391368317803,
      "grad_norm": 2.046079717671001,
      "learning_rate": 2.0753399024801419e-07,
      "loss": 0.4494,
      "step": 14288
    },
    {
      "epoch": 1.751961745953899,
      "grad_norm": 1.973258507902422,
      "learning_rate": 2.073318498143645e-07,
      "loss": 0.3933,
      "step": 14289
    },
    {
      "epoch": 1.7520843550760177,
      "grad_norm": 2.054407826529022,
      "learning_rate": 2.0712980361343532e-07,
      "loss": 0.4474,
      "step": 14290
    },
    {
      "epoch": 1.7522069641981364,
      "grad_norm": 1.9686516693916372,
      "learning_rate": 2.069278516535303e-07,
      "loss": 0.4192,
      "step": 14291
    },
    {
      "epoch": 1.752329573320255,
      "grad_norm": 2.000629228937426,
      "learning_rate": 2.0672599394294974e-07,
      "loss": 0.4212,
      "step": 14292
    },
    {
      "epoch": 1.7524521824423736,
      "grad_norm": 1.913781113819877,
      "learning_rate": 2.0652423048999087e-07,
      "loss": 0.4036,
      "step": 14293
    },
    {
      "epoch": 1.7525747915644923,
      "grad_norm": 2.086157724784073,
      "learning_rate": 2.0632256130294653e-07,
      "loss": 0.4217,
      "step": 14294
    },
    {
      "epoch": 1.752697400686611,
      "grad_norm": 1.9805628436169567,
      "learning_rate": 2.0612098639010592e-07,
      "loss": 0.3979,
      "step": 14295
    },
    {
      "epoch": 1.7528200098087297,
      "grad_norm": 1.8607033343043482,
      "learning_rate": 2.0591950575975294e-07,
      "loss": 0.3924,
      "step": 14296
    },
    {
      "epoch": 1.7529426189308484,
      "grad_norm": 1.826844745429458,
      "learning_rate": 2.0571811942017045e-07,
      "loss": 0.4224,
      "step": 14297
    },
    {
      "epoch": 1.753065228052967,
      "grad_norm": 1.9267501983898594,
      "learning_rate": 2.0551682737963456e-07,
      "loss": 0.3957,
      "step": 14298
    },
    {
      "epoch": 1.7531878371750858,
      "grad_norm": 1.9533633035312958,
      "learning_rate": 2.0531562964641898e-07,
      "loss": 0.4188,
      "step": 14299
    },
    {
      "epoch": 1.7533104462972045,
      "grad_norm": 1.9577602923994564,
      "learning_rate": 2.0511452622879347e-07,
      "loss": 0.3851,
      "step": 14300
    },
    {
      "epoch": 1.7534330554193231,
      "grad_norm": 1.8419231227686645,
      "learning_rate": 2.0491351713502388e-07,
      "loss": 0.357,
      "step": 14301
    },
    {
      "epoch": 1.7535556645414418,
      "grad_norm": 1.7856757163690369,
      "learning_rate": 2.04712602373372e-07,
      "loss": 0.4397,
      "step": 14302
    },
    {
      "epoch": 1.7536782736635605,
      "grad_norm": 1.7784046396331181,
      "learning_rate": 2.045117819520953e-07,
      "loss": 0.3814,
      "step": 14303
    },
    {
      "epoch": 1.7538008827856792,
      "grad_norm": 1.9830389076553063,
      "learning_rate": 2.0431105587944866e-07,
      "loss": 0.3915,
      "step": 14304
    },
    {
      "epoch": 1.753923491907798,
      "grad_norm": 1.813385748641901,
      "learning_rate": 2.0411042416368128e-07,
      "loss": 0.4306,
      "step": 14305
    },
    {
      "epoch": 1.7540461010299166,
      "grad_norm": 1.8696645211904477,
      "learning_rate": 2.0390988681304013e-07,
      "loss": 0.3895,
      "step": 14306
    },
    {
      "epoch": 1.7541687101520353,
      "grad_norm": 1.7850943577672709,
      "learning_rate": 2.0370944383576784e-07,
      "loss": 0.4194,
      "step": 14307
    },
    {
      "epoch": 1.754291319274154,
      "grad_norm": 2.126795247396531,
      "learning_rate": 2.0350909524010253e-07,
      "loss": 0.4313,
      "step": 14308
    },
    {
      "epoch": 1.7544139283962727,
      "grad_norm": 1.9377224582852,
      "learning_rate": 2.0330884103427902e-07,
      "loss": 0.4098,
      "step": 14309
    },
    {
      "epoch": 1.7545365375183914,
      "grad_norm": 1.940483076596185,
      "learning_rate": 2.0310868122652787e-07,
      "loss": 0.4492,
      "step": 14310
    },
    {
      "epoch": 1.7546591466405101,
      "grad_norm": 2.0180083576943115,
      "learning_rate": 2.0290861582507705e-07,
      "loss": 0.408,
      "step": 14311
    },
    {
      "epoch": 1.7547817557626288,
      "grad_norm": 1.8521804900779901,
      "learning_rate": 2.0270864483814794e-07,
      "loss": 0.4082,
      "step": 14312
    },
    {
      "epoch": 1.7549043648847475,
      "grad_norm": 1.8829870081499973,
      "learning_rate": 2.0250876827396072e-07,
      "loss": 0.4176,
      "step": 14313
    },
    {
      "epoch": 1.7550269740068662,
      "grad_norm": 1.896247442927423,
      "learning_rate": 2.0230898614073068e-07,
      "loss": 0.4099,
      "step": 14314
    },
    {
      "epoch": 1.755149583128985,
      "grad_norm": 1.8419490797548717,
      "learning_rate": 2.0210929844666937e-07,
      "loss": 0.4241,
      "step": 14315
    },
    {
      "epoch": 1.7552721922511036,
      "grad_norm": 1.9772393880141645,
      "learning_rate": 2.0190970519998326e-07,
      "loss": 0.4098,
      "step": 14316
    },
    {
      "epoch": 1.7553948013732221,
      "grad_norm": 1.8386313944698482,
      "learning_rate": 2.0171020640887745e-07,
      "loss": 0.4155,
      "step": 14317
    },
    {
      "epoch": 1.7555174104953408,
      "grad_norm": 1.922804085051697,
      "learning_rate": 2.0151080208155038e-07,
      "loss": 0.3968,
      "step": 14318
    },
    {
      "epoch": 1.7556400196174595,
      "grad_norm": 1.9894142737397864,
      "learning_rate": 2.0131149222619883e-07,
      "loss": 0.4364,
      "step": 14319
    },
    {
      "epoch": 1.7557626287395782,
      "grad_norm": 1.9492205384769674,
      "learning_rate": 2.0111227685101403e-07,
      "loss": 0.4325,
      "step": 14320
    },
    {
      "epoch": 1.755885237861697,
      "grad_norm": 2.1407904702975165,
      "learning_rate": 2.0091315596418525e-07,
      "loss": 0.4548,
      "step": 14321
    },
    {
      "epoch": 1.7560078469838156,
      "grad_norm": 1.956038070997772,
      "learning_rate": 2.0071412957389513e-07,
      "loss": 0.4366,
      "step": 14322
    },
    {
      "epoch": 1.7561304561059343,
      "grad_norm": 1.8912465588335414,
      "learning_rate": 2.0051519768832544e-07,
      "loss": 0.4488,
      "step": 14323
    },
    {
      "epoch": 1.7562530652280528,
      "grad_norm": 1.9079583584206683,
      "learning_rate": 2.0031636031565216e-07,
      "loss": 0.4001,
      "step": 14324
    },
    {
      "epoch": 1.7563756743501715,
      "grad_norm": 2.027521350217267,
      "learning_rate": 2.0011761746404735e-07,
      "loss": 0.407,
      "step": 14325
    },
    {
      "epoch": 1.7564982834722902,
      "grad_norm": 1.8399059687166472,
      "learning_rate": 1.9991896914167973e-07,
      "loss": 0.4245,
      "step": 14326
    },
    {
      "epoch": 1.756620892594409,
      "grad_norm": 1.9557597581444697,
      "learning_rate": 1.9972041535671531e-07,
      "loss": 0.438,
      "step": 14327
    },
    {
      "epoch": 1.7567435017165276,
      "grad_norm": 1.9673945290568087,
      "learning_rate": 1.9952195611731396e-07,
      "loss": 0.4311,
      "step": 14328
    },
    {
      "epoch": 1.7568661108386463,
      "grad_norm": 1.9747982792324925,
      "learning_rate": 1.9932359143163244e-07,
      "loss": 0.4565,
      "step": 14329
    },
    {
      "epoch": 1.756988719960765,
      "grad_norm": 1.997643334955913,
      "learning_rate": 1.9912532130782424e-07,
      "loss": 0.4096,
      "step": 14330
    },
    {
      "epoch": 1.7571113290828837,
      "grad_norm": 1.906605335992551,
      "learning_rate": 1.9892714575403925e-07,
      "loss": 0.3802,
      "step": 14331
    },
    {
      "epoch": 1.7572339382050024,
      "grad_norm": 1.9892174591270009,
      "learning_rate": 1.9872906477842235e-07,
      "loss": 0.4255,
      "step": 14332
    },
    {
      "epoch": 1.757356547327121,
      "grad_norm": 1.856618252275852,
      "learning_rate": 1.985310783891145e-07,
      "loss": 0.3816,
      "step": 14333
    },
    {
      "epoch": 1.7574791564492398,
      "grad_norm": 2.0327957949152755,
      "learning_rate": 1.9833318659425422e-07,
      "loss": 0.3963,
      "step": 14334
    },
    {
      "epoch": 1.7576017655713585,
      "grad_norm": 1.9727987814057661,
      "learning_rate": 1.9813538940197412e-07,
      "loss": 0.4371,
      "step": 14335
    },
    {
      "epoch": 1.7577243746934772,
      "grad_norm": 2.1668084932809695,
      "learning_rate": 1.9793768682040526e-07,
      "loss": 0.4764,
      "step": 14336
    },
    {
      "epoch": 1.757846983815596,
      "grad_norm": 1.8395038673316757,
      "learning_rate": 1.9774007885767305e-07,
      "loss": 0.4207,
      "step": 14337
    },
    {
      "epoch": 1.7579695929377146,
      "grad_norm": 1.9766807768381143,
      "learning_rate": 1.9754256552189876e-07,
      "loss": 0.433,
      "step": 14338
    },
    {
      "epoch": 1.7580922020598333,
      "grad_norm": 2.076580588457096,
      "learning_rate": 1.9734514682120148e-07,
      "loss": 0.4768,
      "step": 14339
    },
    {
      "epoch": 1.758214811181952,
      "grad_norm": 1.9407256155823287,
      "learning_rate": 1.9714782276369526e-07,
      "loss": 0.422,
      "step": 14340
    },
    {
      "epoch": 1.7583374203040707,
      "grad_norm": 2.0216728864078966,
      "learning_rate": 1.9695059335749084e-07,
      "loss": 0.4212,
      "step": 14341
    },
    {
      "epoch": 1.7584600294261894,
      "grad_norm": 2.117512472091556,
      "learning_rate": 1.967534586106934e-07,
      "loss": 0.3864,
      "step": 14342
    },
    {
      "epoch": 1.758582638548308,
      "grad_norm": 1.9502372834627972,
      "learning_rate": 1.9655641853140672e-07,
      "loss": 0.4351,
      "step": 14343
    },
    {
      "epoch": 1.7587052476704268,
      "grad_norm": 1.9633957868897094,
      "learning_rate": 1.963594731277299e-07,
      "loss": 0.3874,
      "step": 14344
    },
    {
      "epoch": 1.7588278567925455,
      "grad_norm": 2.029688213766496,
      "learning_rate": 1.961626224077562e-07,
      "loss": 0.4377,
      "step": 14345
    },
    {
      "epoch": 1.7589504659146642,
      "grad_norm": 2.0610027175799646,
      "learning_rate": 1.9596586637957716e-07,
      "loss": 0.4367,
      "step": 14346
    },
    {
      "epoch": 1.759073075036783,
      "grad_norm": 1.8196444608706195,
      "learning_rate": 1.957692050512805e-07,
      "loss": 0.3857,
      "step": 14347
    },
    {
      "epoch": 1.7591956841589014,
      "grad_norm": 1.849471587217102,
      "learning_rate": 1.9557263843094893e-07,
      "loss": 0.4161,
      "step": 14348
    },
    {
      "epoch": 1.75931829328102,
      "grad_norm": 1.9450489896580812,
      "learning_rate": 1.9537616652666098e-07,
      "loss": 0.4048,
      "step": 14349
    },
    {
      "epoch": 1.7594409024031388,
      "grad_norm": 2.0833475206296246,
      "learning_rate": 1.9517978934649294e-07,
      "loss": 0.3853,
      "step": 14350
    },
    {
      "epoch": 1.7595635115252575,
      "grad_norm": 1.8898737877439529,
      "learning_rate": 1.9498350689851593e-07,
      "loss": 0.3692,
      "step": 14351
    },
    {
      "epoch": 1.7596861206473762,
      "grad_norm": 2.0513301399709665,
      "learning_rate": 1.947873191907973e-07,
      "loss": 0.4106,
      "step": 14352
    },
    {
      "epoch": 1.7598087297694949,
      "grad_norm": 1.8883882063877628,
      "learning_rate": 1.9459122623140148e-07,
      "loss": 0.4246,
      "step": 14353
    },
    {
      "epoch": 1.7599313388916136,
      "grad_norm": 2.133253176643932,
      "learning_rate": 1.9439522802838727e-07,
      "loss": 0.4286,
      "step": 14354
    },
    {
      "epoch": 1.7600539480137323,
      "grad_norm": 1.911240213274398,
      "learning_rate": 1.941993245898108e-07,
      "loss": 0.452,
      "step": 14355
    },
    {
      "epoch": 1.7601765571358508,
      "grad_norm": 1.9581355180819968,
      "learning_rate": 1.9400351592372414e-07,
      "loss": 0.4042,
      "step": 14356
    },
    {
      "epoch": 1.7602991662579694,
      "grad_norm": 2.060153264325984,
      "learning_rate": 1.938078020381759e-07,
      "loss": 0.4498,
      "step": 14357
    },
    {
      "epoch": 1.7604217753800881,
      "grad_norm": 1.9174545620287273,
      "learning_rate": 1.9361218294120964e-07,
      "loss": 0.4109,
      "step": 14358
    },
    {
      "epoch": 1.7605443845022068,
      "grad_norm": 1.9224849780776936,
      "learning_rate": 1.9341665864086563e-07,
      "loss": 0.4361,
      "step": 14359
    },
    {
      "epoch": 1.7606669936243255,
      "grad_norm": 2.0166862984875085,
      "learning_rate": 1.9322122914518072e-07,
      "loss": 0.4403,
      "step": 14360
    },
    {
      "epoch": 1.7607896027464442,
      "grad_norm": 1.9286667398064699,
      "learning_rate": 1.9302589446218685e-07,
      "loss": 0.4257,
      "step": 14361
    },
    {
      "epoch": 1.760912211868563,
      "grad_norm": 1.9566427954156669,
      "learning_rate": 1.9283065459991284e-07,
      "loss": 0.4017,
      "step": 14362
    },
    {
      "epoch": 1.7610348209906816,
      "grad_norm": 1.8326439848446814,
      "learning_rate": 1.9263550956638343e-07,
      "loss": 0.452,
      "step": 14363
    },
    {
      "epoch": 1.7611574301128003,
      "grad_norm": 1.9398352705661317,
      "learning_rate": 1.9244045936961968e-07,
      "loss": 0.4677,
      "step": 14364
    },
    {
      "epoch": 1.761280039234919,
      "grad_norm": 1.9928340549905323,
      "learning_rate": 1.9224550401763826e-07,
      "loss": 0.4301,
      "step": 14365
    },
    {
      "epoch": 1.7614026483570377,
      "grad_norm": 2.241699986614742,
      "learning_rate": 1.9205064351845188e-07,
      "loss": 0.4336,
      "step": 14366
    },
    {
      "epoch": 1.7615252574791564,
      "grad_norm": 1.778127059970338,
      "learning_rate": 1.9185587788007027e-07,
      "loss": 0.4677,
      "step": 14367
    },
    {
      "epoch": 1.7616478666012751,
      "grad_norm": 2.129606365695488,
      "learning_rate": 1.9166120711049812e-07,
      "loss": 0.4551,
      "step": 14368
    },
    {
      "epoch": 1.7617704757233938,
      "grad_norm": 2.1127412427083305,
      "learning_rate": 1.914666312177371e-07,
      "loss": 0.4124,
      "step": 14369
    },
    {
      "epoch": 1.7618930848455125,
      "grad_norm": 2.199322325094193,
      "learning_rate": 1.9127215020978418e-07,
      "loss": 0.3973,
      "step": 14370
    },
    {
      "epoch": 1.7620156939676312,
      "grad_norm": 1.9597897243475897,
      "learning_rate": 1.9107776409463375e-07,
      "loss": 0.4112,
      "step": 14371
    },
    {
      "epoch": 1.76213830308975,
      "grad_norm": 1.9711671701146911,
      "learning_rate": 1.9088347288027414e-07,
      "loss": 0.3993,
      "step": 14372
    },
    {
      "epoch": 1.7622609122118686,
      "grad_norm": 2.0782020289972323,
      "learning_rate": 1.9068927657469234e-07,
      "loss": 0.4304,
      "step": 14373
    },
    {
      "epoch": 1.7623835213339873,
      "grad_norm": 1.8221124197857135,
      "learning_rate": 1.904951751858694e-07,
      "loss": 0.3999,
      "step": 14374
    },
    {
      "epoch": 1.762506130456106,
      "grad_norm": 1.84660134402334,
      "learning_rate": 1.9030116872178317e-07,
      "loss": 0.4585,
      "step": 14375
    },
    {
      "epoch": 1.7626287395782247,
      "grad_norm": 2.0559257773314874,
      "learning_rate": 1.9010725719040806e-07,
      "loss": 0.441,
      "step": 14376
    },
    {
      "epoch": 1.7627513487003434,
      "grad_norm": 2.120029301147045,
      "learning_rate": 1.8991344059971412e-07,
      "loss": 0.4369,
      "step": 14377
    },
    {
      "epoch": 1.7628739578224621,
      "grad_norm": 1.8226064150516694,
      "learning_rate": 1.8971971895766772e-07,
      "loss": 0.3668,
      "step": 14378
    },
    {
      "epoch": 1.7629965669445806,
      "grad_norm": 2.0157553524854963,
      "learning_rate": 1.895260922722303e-07,
      "loss": 0.4073,
      "step": 14379
    },
    {
      "epoch": 1.7631191760666993,
      "grad_norm": 1.8607267906284766,
      "learning_rate": 1.8933256055136128e-07,
      "loss": 0.4084,
      "step": 14380
    },
    {
      "epoch": 1.763241785188818,
      "grad_norm": 1.9449074009367415,
      "learning_rate": 1.8913912380301435e-07,
      "loss": 0.4309,
      "step": 14381
    },
    {
      "epoch": 1.7633643943109367,
      "grad_norm": 1.8559645177628843,
      "learning_rate": 1.8894578203514092e-07,
      "loss": 0.4229,
      "step": 14382
    },
    {
      "epoch": 1.7634870034330554,
      "grad_norm": 1.910160345174113,
      "learning_rate": 1.8875253525568682e-07,
      "loss": 0.4494,
      "step": 14383
    },
    {
      "epoch": 1.7636096125551741,
      "grad_norm": 2.099089727887406,
      "learning_rate": 1.8855938347259573e-07,
      "loss": 0.4534,
      "step": 14384
    },
    {
      "epoch": 1.7637322216772928,
      "grad_norm": 2.0759659009311164,
      "learning_rate": 1.8836632669380572e-07,
      "loss": 0.4363,
      "step": 14385
    },
    {
      "epoch": 1.7638548307994115,
      "grad_norm": 1.8985533875476792,
      "learning_rate": 1.8817336492725242e-07,
      "loss": 0.3914,
      "step": 14386
    },
    {
      "epoch": 1.76397743992153,
      "grad_norm": 1.8608333358901326,
      "learning_rate": 1.8798049818086665e-07,
      "loss": 0.4227,
      "step": 14387
    },
    {
      "epoch": 1.7641000490436487,
      "grad_norm": 2.058430999033402,
      "learning_rate": 1.8778772646257494e-07,
      "loss": 0.4565,
      "step": 14388
    },
    {
      "epoch": 1.7642226581657674,
      "grad_norm": 1.9016856427560873,
      "learning_rate": 1.8759504978030112e-07,
      "loss": 0.4314,
      "step": 14389
    },
    {
      "epoch": 1.764345267287886,
      "grad_norm": 1.867036667662646,
      "learning_rate": 1.8740246814196505e-07,
      "loss": 0.4072,
      "step": 14390
    },
    {
      "epoch": 1.7644678764100048,
      "grad_norm": 1.8351880535743963,
      "learning_rate": 1.8720998155548175e-07,
      "loss": 0.3938,
      "step": 14391
    },
    {
      "epoch": 1.7645904855321235,
      "grad_norm": 1.918941758984214,
      "learning_rate": 1.8701759002876212e-07,
      "loss": 0.417,
      "step": 14392
    },
    {
      "epoch": 1.7647130946542422,
      "grad_norm": 1.7620345907630863,
      "learning_rate": 1.8682529356971514e-07,
      "loss": 0.3882,
      "step": 14393
    },
    {
      "epoch": 1.764835703776361,
      "grad_norm": 1.9472963141356776,
      "learning_rate": 1.8663309218624332e-07,
      "loss": 0.4014,
      "step": 14394
    },
    {
      "epoch": 1.7649583128984796,
      "grad_norm": 1.9311457281201396,
      "learning_rate": 1.8644098588624675e-07,
      "loss": 0.4182,
      "step": 14395
    },
    {
      "epoch": 1.7650809220205983,
      "grad_norm": 2.1825174858346585,
      "learning_rate": 1.8624897467762138e-07,
      "loss": 0.4434,
      "step": 14396
    },
    {
      "epoch": 1.765203531142717,
      "grad_norm": 2.1213610626730994,
      "learning_rate": 1.8605705856825972e-07,
      "loss": 0.3984,
      "step": 14397
    },
    {
      "epoch": 1.7653261402648357,
      "grad_norm": 1.9431640634258416,
      "learning_rate": 1.858652375660494e-07,
      "loss": 0.4463,
      "step": 14398
    },
    {
      "epoch": 1.7654487493869544,
      "grad_norm": 2.011447442674096,
      "learning_rate": 1.8567351167887438e-07,
      "loss": 0.4169,
      "step": 14399
    },
    {
      "epoch": 1.765571358509073,
      "grad_norm": 2.0981634001431133,
      "learning_rate": 1.8548188091461554e-07,
      "loss": 0.4311,
      "step": 14400
    },
    {
      "epoch": 1.7656939676311918,
      "grad_norm": 1.9391782163681854,
      "learning_rate": 1.852903452811486e-07,
      "loss": 0.3968,
      "step": 14401
    },
    {
      "epoch": 1.7658165767533105,
      "grad_norm": 2.0936820124361044,
      "learning_rate": 1.8509890478634635e-07,
      "loss": 0.4182,
      "step": 14402
    },
    {
      "epoch": 1.7659391858754292,
      "grad_norm": 1.9604088890141813,
      "learning_rate": 1.8490755943807754e-07,
      "loss": 0.4011,
      "step": 14403
    },
    {
      "epoch": 1.766061794997548,
      "grad_norm": 1.8184843852762207,
      "learning_rate": 1.847163092442067e-07,
      "loss": 0.3723,
      "step": 14404
    },
    {
      "epoch": 1.7661844041196666,
      "grad_norm": 1.9565641243921903,
      "learning_rate": 1.845251542125942e-07,
      "loss": 0.4675,
      "step": 14405
    },
    {
      "epoch": 1.7663070132417853,
      "grad_norm": 2.012509018356963,
      "learning_rate": 1.8433409435109684e-07,
      "loss": 0.4422,
      "step": 14406
    },
    {
      "epoch": 1.766429622363904,
      "grad_norm": 1.9471759883386919,
      "learning_rate": 1.8414312966756888e-07,
      "loss": 0.4179,
      "step": 14407
    },
    {
      "epoch": 1.7665522314860227,
      "grad_norm": 1.771647397795399,
      "learning_rate": 1.8395226016985707e-07,
      "loss": 0.3785,
      "step": 14408
    },
    {
      "epoch": 1.7666748406081414,
      "grad_norm": 2.2278201736576424,
      "learning_rate": 1.8376148586580766e-07,
      "loss": 0.4691,
      "step": 14409
    },
    {
      "epoch": 1.76679744973026,
      "grad_norm": 1.9913900147370032,
      "learning_rate": 1.8357080676326216e-07,
      "loss": 0.4192,
      "step": 14410
    },
    {
      "epoch": 1.7669200588523786,
      "grad_norm": 1.955563657854991,
      "learning_rate": 1.8338022287005763e-07,
      "loss": 0.4033,
      "step": 14411
    },
    {
      "epoch": 1.7670426679744973,
      "grad_norm": 1.9546635326725514,
      "learning_rate": 1.831897341940267e-07,
      "loss": 0.4153,
      "step": 14412
    },
    {
      "epoch": 1.767165277096616,
      "grad_norm": 1.9729849569904778,
      "learning_rate": 1.829993407429992e-07,
      "loss": 0.4004,
      "step": 14413
    },
    {
      "epoch": 1.7672878862187347,
      "grad_norm": 1.8599004978885416,
      "learning_rate": 1.8280904252480107e-07,
      "loss": 0.4041,
      "step": 14414
    },
    {
      "epoch": 1.7674104953408534,
      "grad_norm": 1.8184415995555367,
      "learning_rate": 1.8261883954725361e-07,
      "loss": 0.4402,
      "step": 14415
    },
    {
      "epoch": 1.767533104462972,
      "grad_norm": 1.8825390591746496,
      "learning_rate": 1.8242873181817417e-07,
      "loss": 0.4197,
      "step": 14416
    },
    {
      "epoch": 1.7676557135850908,
      "grad_norm": 1.8471969725232833,
      "learning_rate": 1.82238719345377e-07,
      "loss": 0.3912,
      "step": 14417
    },
    {
      "epoch": 1.7677783227072095,
      "grad_norm": 1.8898016335835162,
      "learning_rate": 1.8204880213667148e-07,
      "loss": 0.4347,
      "step": 14418
    },
    {
      "epoch": 1.767900931829328,
      "grad_norm": 1.9726839360145096,
      "learning_rate": 1.8185898019986435e-07,
      "loss": 0.4461,
      "step": 14419
    },
    {
      "epoch": 1.7680235409514466,
      "grad_norm": 2.135365025175268,
      "learning_rate": 1.8166925354275666e-07,
      "loss": 0.4004,
      "step": 14420
    },
    {
      "epoch": 1.7681461500735653,
      "grad_norm": 1.9934199974092095,
      "learning_rate": 1.8147962217314686e-07,
      "loss": 0.4198,
      "step": 14421
    },
    {
      "epoch": 1.768268759195684,
      "grad_norm": 2.118294628601723,
      "learning_rate": 1.8129008609882926e-07,
      "loss": 0.4655,
      "step": 14422
    },
    {
      "epoch": 1.7683913683178027,
      "grad_norm": 2.0496678409788265,
      "learning_rate": 1.811006453275943e-07,
      "loss": 0.4143,
      "step": 14423
    },
    {
      "epoch": 1.7685139774399214,
      "grad_norm": 1.9362035047127837,
      "learning_rate": 1.8091129986722798e-07,
      "loss": 0.4231,
      "step": 14424
    },
    {
      "epoch": 1.7686365865620401,
      "grad_norm": 2.0922000295572176,
      "learning_rate": 1.8072204972551272e-07,
      "loss": 0.4621,
      "step": 14425
    },
    {
      "epoch": 1.7687591956841588,
      "grad_norm": 1.8344980143307204,
      "learning_rate": 1.8053289491022696e-07,
      "loss": 0.4275,
      "step": 14426
    },
    {
      "epoch": 1.7688818048062775,
      "grad_norm": 2.0120313570342003,
      "learning_rate": 1.803438354291459e-07,
      "loss": 0.4098,
      "step": 14427
    },
    {
      "epoch": 1.7690044139283962,
      "grad_norm": 2.0024077571838426,
      "learning_rate": 1.8015487129004e-07,
      "loss": 0.4265,
      "step": 14428
    },
    {
      "epoch": 1.769127023050515,
      "grad_norm": 1.8827191043547589,
      "learning_rate": 1.7996600250067553e-07,
      "loss": 0.4188,
      "step": 14429
    },
    {
      "epoch": 1.7692496321726336,
      "grad_norm": 1.970785270102439,
      "learning_rate": 1.79777229068816e-07,
      "loss": 0.4364,
      "step": 14430
    },
    {
      "epoch": 1.7693722412947523,
      "grad_norm": 2.0202990117727473,
      "learning_rate": 1.7958855100221938e-07,
      "loss": 0.4198,
      "step": 14431
    },
    {
      "epoch": 1.769494850416871,
      "grad_norm": 1.9585105566486947,
      "learning_rate": 1.793999683086417e-07,
      "loss": 0.4667,
      "step": 14432
    },
    {
      "epoch": 1.7696174595389897,
      "grad_norm": 2.075147248958236,
      "learning_rate": 1.7921148099583342e-07,
      "loss": 0.4567,
      "step": 14433
    },
    {
      "epoch": 1.7697400686611084,
      "grad_norm": 1.9005054114775193,
      "learning_rate": 1.790230890715422e-07,
      "loss": 0.3889,
      "step": 14434
    },
    {
      "epoch": 1.7698626777832271,
      "grad_norm": 1.9409727382521227,
      "learning_rate": 1.7883479254351077e-07,
      "loss": 0.4325,
      "step": 14435
    },
    {
      "epoch": 1.7699852869053458,
      "grad_norm": 2.0731570123248435,
      "learning_rate": 1.7864659141947877e-07,
      "loss": 0.4216,
      "step": 14436
    },
    {
      "epoch": 1.7701078960274645,
      "grad_norm": 1.8665988069645314,
      "learning_rate": 1.7845848570718165e-07,
      "loss": 0.4322,
      "step": 14437
    },
    {
      "epoch": 1.7702305051495832,
      "grad_norm": 1.8956915470275262,
      "learning_rate": 1.7827047541435022e-07,
      "loss": 0.4077,
      "step": 14438
    },
    {
      "epoch": 1.770353114271702,
      "grad_norm": 1.9237652022035716,
      "learning_rate": 1.780825605487127e-07,
      "loss": 0.4275,
      "step": 14439
    },
    {
      "epoch": 1.7704757233938206,
      "grad_norm": 1.8945285759661394,
      "learning_rate": 1.778947411179932e-07,
      "loss": 0.4391,
      "step": 14440
    },
    {
      "epoch": 1.7705983325159393,
      "grad_norm": 1.9342791559556252,
      "learning_rate": 1.777070171299103e-07,
      "loss": 0.4061,
      "step": 14441
    },
    {
      "epoch": 1.7707209416380578,
      "grad_norm": 2.2515207550010334,
      "learning_rate": 1.7751938859218e-07,
      "loss": 0.3995,
      "step": 14442
    },
    {
      "epoch": 1.7708435507601765,
      "grad_norm": 1.972028372981769,
      "learning_rate": 1.7733185551251504e-07,
      "loss": 0.4308,
      "step": 14443
    },
    {
      "epoch": 1.7709661598822952,
      "grad_norm": 1.9938302684610092,
      "learning_rate": 1.7714441789862257e-07,
      "loss": 0.4293,
      "step": 14444
    },
    {
      "epoch": 1.771088769004414,
      "grad_norm": 2.022181334900788,
      "learning_rate": 1.7695707575820674e-07,
      "loss": 0.447,
      "step": 14445
    },
    {
      "epoch": 1.7712113781265326,
      "grad_norm": 1.9126401609763026,
      "learning_rate": 1.7676982909896752e-07,
      "loss": 0.447,
      "step": 14446
    },
    {
      "epoch": 1.7713339872486513,
      "grad_norm": 2.1524879202829186,
      "learning_rate": 1.7658267792860172e-07,
      "loss": 0.4257,
      "step": 14447
    },
    {
      "epoch": 1.77145659637077,
      "grad_norm": 2.018139069413168,
      "learning_rate": 1.763956222548005e-07,
      "loss": 0.4403,
      "step": 14448
    },
    {
      "epoch": 1.7715792054928887,
      "grad_norm": 2.017204197478352,
      "learning_rate": 1.762086620852535e-07,
      "loss": 0.4216,
      "step": 14449
    },
    {
      "epoch": 1.7717018146150072,
      "grad_norm": 1.9963911548975792,
      "learning_rate": 1.760217974276443e-07,
      "loss": 0.4095,
      "step": 14450
    },
    {
      "epoch": 1.771824423737126,
      "grad_norm": 2.010353987657715,
      "learning_rate": 1.7583502828965343e-07,
      "loss": 0.426,
      "step": 14451
    },
    {
      "epoch": 1.7719470328592446,
      "grad_norm": 2.0148591587504683,
      "learning_rate": 1.7564835467895726e-07,
      "loss": 0.4243,
      "step": 14452
    },
    {
      "epoch": 1.7720696419813633,
      "grad_norm": 1.9860850881375085,
      "learning_rate": 1.7546177660322904e-07,
      "loss": 0.4257,
      "step": 14453
    },
    {
      "epoch": 1.772192251103482,
      "grad_norm": 1.9499168675136411,
      "learning_rate": 1.752752940701369e-07,
      "loss": 0.4204,
      "step": 14454
    },
    {
      "epoch": 1.7723148602256007,
      "grad_norm": 1.9095029737670692,
      "learning_rate": 1.7508890708734571e-07,
      "loss": 0.4003,
      "step": 14455
    },
    {
      "epoch": 1.7724374693477194,
      "grad_norm": 1.9505404628412282,
      "learning_rate": 1.7490261566251666e-07,
      "loss": 0.4468,
      "step": 14456
    },
    {
      "epoch": 1.772560078469838,
      "grad_norm": 2.044676340615696,
      "learning_rate": 1.7471641980330633e-07,
      "loss": 0.4141,
      "step": 14457
    },
    {
      "epoch": 1.7726826875919568,
      "grad_norm": 1.8276404748127044,
      "learning_rate": 1.7453031951736726e-07,
      "loss": 0.432,
      "step": 14458
    },
    {
      "epoch": 1.7728052967140755,
      "grad_norm": 1.9269052002362683,
      "learning_rate": 1.7434431481234914e-07,
      "loss": 0.4068,
      "step": 14459
    },
    {
      "epoch": 1.7729279058361942,
      "grad_norm": 2.05085616741776,
      "learning_rate": 1.7415840569589726e-07,
      "loss": 0.4,
      "step": 14460
    },
    {
      "epoch": 1.773050514958313,
      "grad_norm": 1.736654511850194,
      "learning_rate": 1.7397259217565242e-07,
      "loss": 0.4297,
      "step": 14461
    },
    {
      "epoch": 1.7731731240804316,
      "grad_norm": 1.9739150620564623,
      "learning_rate": 1.7378687425925162e-07,
      "loss": 0.4846,
      "step": 14462
    },
    {
      "epoch": 1.7732957332025503,
      "grad_norm": 1.895525075488215,
      "learning_rate": 1.736012519543287e-07,
      "loss": 0.423,
      "step": 14463
    },
    {
      "epoch": 1.773418342324669,
      "grad_norm": 1.9840951652112986,
      "learning_rate": 1.7341572526851286e-07,
      "loss": 0.3721,
      "step": 14464
    },
    {
      "epoch": 1.7735409514467877,
      "grad_norm": 1.9029705464940394,
      "learning_rate": 1.7323029420942967e-07,
      "loss": 0.4074,
      "step": 14465
    },
    {
      "epoch": 1.7736635605689064,
      "grad_norm": 1.8700967266075077,
      "learning_rate": 1.730449587847005e-07,
      "loss": 0.4169,
      "step": 14466
    },
    {
      "epoch": 1.773786169691025,
      "grad_norm": 2.1186636210623058,
      "learning_rate": 1.728597190019432e-07,
      "loss": 0.4652,
      "step": 14467
    },
    {
      "epoch": 1.7739087788131438,
      "grad_norm": 1.9875095022888063,
      "learning_rate": 1.726745748687711e-07,
      "loss": 0.453,
      "step": 14468
    },
    {
      "epoch": 1.7740313879352625,
      "grad_norm": 1.7096454494062274,
      "learning_rate": 1.7248952639279475e-07,
      "loss": 0.4153,
      "step": 14469
    },
    {
      "epoch": 1.7741539970573812,
      "grad_norm": 1.7356202163499728,
      "learning_rate": 1.7230457358161922e-07,
      "loss": 0.3384,
      "step": 14470
    },
    {
      "epoch": 1.7742766061795,
      "grad_norm": 1.7935705640949058,
      "learning_rate": 1.721197164428462e-07,
      "loss": 0.4069,
      "step": 14471
    },
    {
      "epoch": 1.7743992153016186,
      "grad_norm": 2.0041154824565672,
      "learning_rate": 1.7193495498407375e-07,
      "loss": 0.4072,
      "step": 14472
    },
    {
      "epoch": 1.774521824423737,
      "grad_norm": 1.9033702083574773,
      "learning_rate": 1.7175028921289694e-07,
      "loss": 0.4263,
      "step": 14473
    },
    {
      "epoch": 1.7746444335458558,
      "grad_norm": 1.8226391188790618,
      "learning_rate": 1.7156571913690496e-07,
      "loss": 0.4144,
      "step": 14474
    },
    {
      "epoch": 1.7747670426679745,
      "grad_norm": 1.9156303528576353,
      "learning_rate": 1.7138124476368366e-07,
      "loss": 0.4072,
      "step": 14475
    },
    {
      "epoch": 1.7748896517900932,
      "grad_norm": 1.870040795890758,
      "learning_rate": 1.711968661008162e-07,
      "loss": 0.4744,
      "step": 14476
    },
    {
      "epoch": 1.7750122609122119,
      "grad_norm": 2.018415228128227,
      "learning_rate": 1.710125831558798e-07,
      "loss": 0.4178,
      "step": 14477
    },
    {
      "epoch": 1.7751348700343306,
      "grad_norm": 2.1192519756021166,
      "learning_rate": 1.7082839593644985e-07,
      "loss": 0.3963,
      "step": 14478
    },
    {
      "epoch": 1.7752574791564493,
      "grad_norm": 1.938572899900796,
      "learning_rate": 1.7064430445009578e-07,
      "loss": 0.427,
      "step": 14479
    },
    {
      "epoch": 1.775380088278568,
      "grad_norm": 1.8948634596629181,
      "learning_rate": 1.7046030870438517e-07,
      "loss": 0.4043,
      "step": 14480
    },
    {
      "epoch": 1.7755026974006864,
      "grad_norm": 2.1072264775202907,
      "learning_rate": 1.702764087068795e-07,
      "loss": 0.4474,
      "step": 14481
    },
    {
      "epoch": 1.7756253065228051,
      "grad_norm": 1.8682144598274584,
      "learning_rate": 1.7009260446513854e-07,
      "loss": 0.4268,
      "step": 14482
    },
    {
      "epoch": 1.7757479156449238,
      "grad_norm": 2.0898045596238597,
      "learning_rate": 1.6990889598671594e-07,
      "loss": 0.4573,
      "step": 14483
    },
    {
      "epoch": 1.7758705247670425,
      "grad_norm": 1.9986666120734935,
      "learning_rate": 1.6972528327916264e-07,
      "loss": 0.4112,
      "step": 14484
    },
    {
      "epoch": 1.7759931338891612,
      "grad_norm": 2.025890421406165,
      "learning_rate": 1.6954176635002567e-07,
      "loss": 0.4487,
      "step": 14485
    },
    {
      "epoch": 1.77611574301128,
      "grad_norm": 1.8306062051370917,
      "learning_rate": 1.6935834520684813e-07,
      "loss": 0.4295,
      "step": 14486
    },
    {
      "epoch": 1.7762383521333986,
      "grad_norm": 2.1583091174957296,
      "learning_rate": 1.6917501985716872e-07,
      "loss": 0.4577,
      "step": 14487
    },
    {
      "epoch": 1.7763609612555173,
      "grad_norm": 1.986619070974674,
      "learning_rate": 1.6899179030852197e-07,
      "loss": 0.4132,
      "step": 14488
    },
    {
      "epoch": 1.776483570377636,
      "grad_norm": 1.7368737123800102,
      "learning_rate": 1.6880865656843937e-07,
      "loss": 0.3914,
      "step": 14489
    },
    {
      "epoch": 1.7766061794997547,
      "grad_norm": 1.679414126709288,
      "learning_rate": 1.6862561864444877e-07,
      "loss": 0.3954,
      "step": 14490
    },
    {
      "epoch": 1.7767287886218734,
      "grad_norm": 1.8529337863527529,
      "learning_rate": 1.6844267654407193e-07,
      "loss": 0.3823,
      "step": 14491
    },
    {
      "epoch": 1.7768513977439921,
      "grad_norm": 2.1452409068520746,
      "learning_rate": 1.6825983027482867e-07,
      "loss": 0.4723,
      "step": 14492
    },
    {
      "epoch": 1.7769740068661108,
      "grad_norm": 2.028610234872919,
      "learning_rate": 1.6807707984423493e-07,
      "loss": 0.4409,
      "step": 14493
    },
    {
      "epoch": 1.7770966159882295,
      "grad_norm": 1.9659976798458065,
      "learning_rate": 1.6789442525980138e-07,
      "loss": 0.4199,
      "step": 14494
    },
    {
      "epoch": 1.7772192251103482,
      "grad_norm": 1.944136955599325,
      "learning_rate": 1.6771186652903532e-07,
      "loss": 0.4263,
      "step": 14495
    },
    {
      "epoch": 1.777341834232467,
      "grad_norm": 1.8670078925892202,
      "learning_rate": 1.6752940365944103e-07,
      "loss": 0.4104,
      "step": 14496
    },
    {
      "epoch": 1.7774644433545856,
      "grad_norm": 1.8600857791678622,
      "learning_rate": 1.6734703665851726e-07,
      "loss": 0.3713,
      "step": 14497
    },
    {
      "epoch": 1.7775870524767043,
      "grad_norm": 2.0472618523275465,
      "learning_rate": 1.6716476553375966e-07,
      "loss": 0.3922,
      "step": 14498
    },
    {
      "epoch": 1.777709661598823,
      "grad_norm": 1.8055550779996266,
      "learning_rate": 1.6698259029266055e-07,
      "loss": 0.397,
      "step": 14499
    },
    {
      "epoch": 1.7778322707209417,
      "grad_norm": 2.0268686285009694,
      "learning_rate": 1.6680051094270734e-07,
      "loss": 0.4012,
      "step": 14500
    },
    {
      "epoch": 1.7779548798430604,
      "grad_norm": 2.010931069538316,
      "learning_rate": 1.666185274913834e-07,
      "loss": 0.425,
      "step": 14501
    },
    {
      "epoch": 1.7780774889651791,
      "grad_norm": 2.0158288774941777,
      "learning_rate": 1.6643663994616893e-07,
      "loss": 0.4643,
      "step": 14502
    },
    {
      "epoch": 1.7782000980872978,
      "grad_norm": 1.8913276922429099,
      "learning_rate": 1.6625484831454042e-07,
      "loss": 0.4229,
      "step": 14503
    },
    {
      "epoch": 1.7783227072094165,
      "grad_norm": 1.7613246208725324,
      "learning_rate": 1.6607315260396856e-07,
      "loss": 0.3817,
      "step": 14504
    },
    {
      "epoch": 1.778445316331535,
      "grad_norm": 2.0524462790390987,
      "learning_rate": 1.658915528219221e-07,
      "loss": 0.4082,
      "step": 14505
    },
    {
      "epoch": 1.7785679254536537,
      "grad_norm": 1.9650986998554714,
      "learning_rate": 1.6571004897586534e-07,
      "loss": 0.3953,
      "step": 14506
    },
    {
      "epoch": 1.7786905345757724,
      "grad_norm": 2.0481320290345675,
      "learning_rate": 1.6552864107325818e-07,
      "loss": 0.3961,
      "step": 14507
    },
    {
      "epoch": 1.7788131436978911,
      "grad_norm": 2.0161266492178456,
      "learning_rate": 1.6534732912155598e-07,
      "loss": 0.4119,
      "step": 14508
    },
    {
      "epoch": 1.7789357528200098,
      "grad_norm": 1.9425721804468694,
      "learning_rate": 1.6516611312821201e-07,
      "loss": 0.4192,
      "step": 14509
    },
    {
      "epoch": 1.7790583619421285,
      "grad_norm": 1.9295617792245674,
      "learning_rate": 1.6498499310067473e-07,
      "loss": 0.4338,
      "step": 14510
    },
    {
      "epoch": 1.7791809710642472,
      "grad_norm": 2.061264597741423,
      "learning_rate": 1.648039690463879e-07,
      "loss": 0.389,
      "step": 14511
    },
    {
      "epoch": 1.779303580186366,
      "grad_norm": 1.9326735606974348,
      "learning_rate": 1.6462304097279197e-07,
      "loss": 0.4051,
      "step": 14512
    },
    {
      "epoch": 1.7794261893084844,
      "grad_norm": 1.7931649160251641,
      "learning_rate": 1.6444220888732376e-07,
      "loss": 0.3942,
      "step": 14513
    },
    {
      "epoch": 1.779548798430603,
      "grad_norm": 2.138270563689607,
      "learning_rate": 1.6426147279741512e-07,
      "loss": 0.4349,
      "step": 14514
    },
    {
      "epoch": 1.7796714075527218,
      "grad_norm": 2.198132712458346,
      "learning_rate": 1.6408083271049536e-07,
      "loss": 0.4654,
      "step": 14515
    },
    {
      "epoch": 1.7797940166748405,
      "grad_norm": 2.0227413041539397,
      "learning_rate": 1.6390028863398883e-07,
      "loss": 0.3981,
      "step": 14516
    },
    {
      "epoch": 1.7799166257969592,
      "grad_norm": 1.932368454613953,
      "learning_rate": 1.637198405753157e-07,
      "loss": 0.4128,
      "step": 14517
    },
    {
      "epoch": 1.780039234919078,
      "grad_norm": 2.0736066519055103,
      "learning_rate": 1.635394885418934e-07,
      "loss": 0.419,
      "step": 14518
    },
    {
      "epoch": 1.7801618440411966,
      "grad_norm": 1.8570302205155196,
      "learning_rate": 1.6335923254113483e-07,
      "loss": 0.4068,
      "step": 14519
    },
    {
      "epoch": 1.7802844531633153,
      "grad_norm": 1.814995719067084,
      "learning_rate": 1.6317907258044828e-07,
      "loss": 0.4363,
      "step": 14520
    },
    {
      "epoch": 1.780407062285434,
      "grad_norm": 2.1143399849573576,
      "learning_rate": 1.6299900866723866e-07,
      "loss": 0.4379,
      "step": 14521
    },
    {
      "epoch": 1.7805296714075527,
      "grad_norm": 1.81274637004855,
      "learning_rate": 1.6281904080890726e-07,
      "loss": 0.393,
      "step": 14522
    },
    {
      "epoch": 1.7806522805296714,
      "grad_norm": 1.9789542209846167,
      "learning_rate": 1.626391690128512e-07,
      "loss": 0.4285,
      "step": 14523
    },
    {
      "epoch": 1.78077488965179,
      "grad_norm": 1.82341528306388,
      "learning_rate": 1.6245939328646322e-07,
      "loss": 0.4084,
      "step": 14524
    },
    {
      "epoch": 1.7808974987739088,
      "grad_norm": 2.042911926847833,
      "learning_rate": 1.6227971363713208e-07,
      "loss": 0.4763,
      "step": 14525
    },
    {
      "epoch": 1.7810201078960275,
      "grad_norm": 1.9764886349328035,
      "learning_rate": 1.6210013007224357e-07,
      "loss": 0.4394,
      "step": 14526
    },
    {
      "epoch": 1.7811427170181462,
      "grad_norm": 2.100241458987615,
      "learning_rate": 1.6192064259917844e-07,
      "loss": 0.4148,
      "step": 14527
    },
    {
      "epoch": 1.781265326140265,
      "grad_norm": 1.8709154492967126,
      "learning_rate": 1.617412512253147e-07,
      "loss": 0.4047,
      "step": 14528
    },
    {
      "epoch": 1.7813879352623836,
      "grad_norm": 2.0031204726905085,
      "learning_rate": 1.615619559580245e-07,
      "loss": 0.4372,
      "step": 14529
    },
    {
      "epoch": 1.7815105443845023,
      "grad_norm": 1.9413413582409724,
      "learning_rate": 1.6138275680467802e-07,
      "loss": 0.4093,
      "step": 14530
    },
    {
      "epoch": 1.781633153506621,
      "grad_norm": 2.285580633138703,
      "learning_rate": 1.6120365377264024e-07,
      "loss": 0.3945,
      "step": 14531
    },
    {
      "epoch": 1.7817557626287397,
      "grad_norm": 1.9415939369616393,
      "learning_rate": 1.6102464686927306e-07,
      "loss": 0.4712,
      "step": 14532
    },
    {
      "epoch": 1.7818783717508584,
      "grad_norm": 1.8046615110579207,
      "learning_rate": 1.6084573610193388e-07,
      "loss": 0.4239,
      "step": 14533
    },
    {
      "epoch": 1.782000980872977,
      "grad_norm": 1.9920359341093277,
      "learning_rate": 1.6066692147797574e-07,
      "loss": 0.4461,
      "step": 14534
    },
    {
      "epoch": 1.7821235899950958,
      "grad_norm": 2.077768363643889,
      "learning_rate": 1.604882030047486e-07,
      "loss": 0.4069,
      "step": 14535
    },
    {
      "epoch": 1.7822461991172143,
      "grad_norm": 1.7890971488967038,
      "learning_rate": 1.603095806895985e-07,
      "loss": 0.4142,
      "step": 14536
    },
    {
      "epoch": 1.782368808239333,
      "grad_norm": 1.9430221861560157,
      "learning_rate": 1.6013105453986627e-07,
      "loss": 0.4538,
      "step": 14537
    },
    {
      "epoch": 1.7824914173614517,
      "grad_norm": 1.8127817166628741,
      "learning_rate": 1.5995262456289018e-07,
      "loss": 0.4337,
      "step": 14538
    },
    {
      "epoch": 1.7826140264835704,
      "grad_norm": 2.311060253378358,
      "learning_rate": 1.5977429076600409e-07,
      "loss": 0.4049,
      "step": 14539
    },
    {
      "epoch": 1.782736635605689,
      "grad_norm": 2.1074293025708974,
      "learning_rate": 1.59596053156538e-07,
      "loss": 0.4239,
      "step": 14540
    },
    {
      "epoch": 1.7828592447278078,
      "grad_norm": 1.9374331647113394,
      "learning_rate": 1.5941791174181682e-07,
      "loss": 0.3949,
      "step": 14541
    },
    {
      "epoch": 1.7829818538499265,
      "grad_norm": 1.8085600051794042,
      "learning_rate": 1.5923986652916335e-07,
      "loss": 0.4207,
      "step": 14542
    },
    {
      "epoch": 1.7831044629720452,
      "grad_norm": 1.9016723133081586,
      "learning_rate": 1.5906191752589561e-07,
      "loss": 0.4331,
      "step": 14543
    },
    {
      "epoch": 1.7832270720941636,
      "grad_norm": 1.8186640979407254,
      "learning_rate": 1.5888406473932694e-07,
      "loss": 0.4149,
      "step": 14544
    },
    {
      "epoch": 1.7833496812162823,
      "grad_norm": 1.866866264138067,
      "learning_rate": 1.5870630817676841e-07,
      "loss": 0.4477,
      "step": 14545
    },
    {
      "epoch": 1.783472290338401,
      "grad_norm": 2.1700493790266195,
      "learning_rate": 1.585286478455253e-07,
      "loss": 0.4749,
      "step": 14546
    },
    {
      "epoch": 1.7835948994605197,
      "grad_norm": 2.011645067087164,
      "learning_rate": 1.5835108375289982e-07,
      "loss": 0.4004,
      "step": 14547
    },
    {
      "epoch": 1.7837175085826384,
      "grad_norm": 1.9546272661855753,
      "learning_rate": 1.581736159061903e-07,
      "loss": 0.4155,
      "step": 14548
    },
    {
      "epoch": 1.7838401177047571,
      "grad_norm": 1.8460264445652779,
      "learning_rate": 1.5799624431269144e-07,
      "loss": 0.4277,
      "step": 14549
    },
    {
      "epoch": 1.7839627268268758,
      "grad_norm": 1.8260281479605023,
      "learning_rate": 1.57818968979693e-07,
      "loss": 0.4125,
      "step": 14550
    },
    {
      "epoch": 1.7840853359489945,
      "grad_norm": 1.8592703685703982,
      "learning_rate": 1.576417899144811e-07,
      "loss": 0.3871,
      "step": 14551
    },
    {
      "epoch": 1.7842079450711132,
      "grad_norm": 2.022037875027037,
      "learning_rate": 1.5746470712433902e-07,
      "loss": 0.4041,
      "step": 14552
    },
    {
      "epoch": 1.784330554193232,
      "grad_norm": 1.7940167731630676,
      "learning_rate": 1.5728772061654434e-07,
      "loss": 0.3888,
      "step": 14553
    },
    {
      "epoch": 1.7844531633153506,
      "grad_norm": 1.850473004731257,
      "learning_rate": 1.5711083039837177e-07,
      "loss": 0.451,
      "step": 14554
    },
    {
      "epoch": 1.7845757724374693,
      "grad_norm": 2.018249541402582,
      "learning_rate": 1.569340364770916e-07,
      "loss": 0.4247,
      "step": 14555
    },
    {
      "epoch": 1.784698381559588,
      "grad_norm": 1.8448519655918636,
      "learning_rate": 1.567573388599708e-07,
      "loss": 0.4261,
      "step": 14556
    },
    {
      "epoch": 1.7848209906817067,
      "grad_norm": 2.116699135981618,
      "learning_rate": 1.5658073755427217e-07,
      "loss": 0.4596,
      "step": 14557
    },
    {
      "epoch": 1.7849435998038254,
      "grad_norm": 2.0945628405281767,
      "learning_rate": 1.5640423256725322e-07,
      "loss": 0.4283,
      "step": 14558
    },
    {
      "epoch": 1.7850662089259441,
      "grad_norm": 1.8638990319011235,
      "learning_rate": 1.5622782390616986e-07,
      "loss": 0.4468,
      "step": 14559
    },
    {
      "epoch": 1.7851888180480628,
      "grad_norm": 1.8955911415664584,
      "learning_rate": 1.560515115782718e-07,
      "loss": 0.4411,
      "step": 14560
    },
    {
      "epoch": 1.7853114271701815,
      "grad_norm": 1.918609233016351,
      "learning_rate": 1.5587529559080661e-07,
      "loss": 0.4278,
      "step": 14561
    },
    {
      "epoch": 1.7854340362923002,
      "grad_norm": 2.0662637424092303,
      "learning_rate": 1.5569917595101653e-07,
      "loss": 0.3837,
      "step": 14562
    },
    {
      "epoch": 1.785556645414419,
      "grad_norm": 2.1005374236295573,
      "learning_rate": 1.5552315266614075e-07,
      "loss": 0.4299,
      "step": 14563
    },
    {
      "epoch": 1.7856792545365376,
      "grad_norm": 2.1269766444282077,
      "learning_rate": 1.5534722574341354e-07,
      "loss": 0.4137,
      "step": 14564
    },
    {
      "epoch": 1.7858018636586563,
      "grad_norm": 1.8717078979892112,
      "learning_rate": 1.5517139519006685e-07,
      "loss": 0.4221,
      "step": 14565
    },
    {
      "epoch": 1.785924472780775,
      "grad_norm": 1.9099424234691118,
      "learning_rate": 1.5499566101332685e-07,
      "loss": 0.4391,
      "step": 14566
    },
    {
      "epoch": 1.7860470819028937,
      "grad_norm": 2.083174713716961,
      "learning_rate": 1.548200232204164e-07,
      "loss": 0.4251,
      "step": 14567
    },
    {
      "epoch": 1.7861696910250122,
      "grad_norm": 2.131936743652487,
      "learning_rate": 1.5464448181855468e-07,
      "loss": 0.4212,
      "step": 14568
    },
    {
      "epoch": 1.786292300147131,
      "grad_norm": 1.9057838783043437,
      "learning_rate": 1.5446903681495735e-07,
      "loss": 0.4408,
      "step": 14569
    },
    {
      "epoch": 1.7864149092692496,
      "grad_norm": 1.771027396400799,
      "learning_rate": 1.5429368821683498e-07,
      "loss": 0.4235,
      "step": 14570
    },
    {
      "epoch": 1.7865375183913683,
      "grad_norm": 1.926910204819824,
      "learning_rate": 1.5411843603139437e-07,
      "loss": 0.4311,
      "step": 14571
    },
    {
      "epoch": 1.786660127513487,
      "grad_norm": 1.8847241449464478,
      "learning_rate": 1.539432802658397e-07,
      "loss": 0.3989,
      "step": 14572
    },
    {
      "epoch": 1.7867827366356057,
      "grad_norm": 1.9972122529927843,
      "learning_rate": 1.5376822092736887e-07,
      "loss": 0.413,
      "step": 14573
    },
    {
      "epoch": 1.7869053457577244,
      "grad_norm": 1.9910664184421751,
      "learning_rate": 1.5359325802317858e-07,
      "loss": 0.4499,
      "step": 14574
    },
    {
      "epoch": 1.787027954879843,
      "grad_norm": 1.6995889503426873,
      "learning_rate": 1.534183915604587e-07,
      "loss": 0.3839,
      "step": 14575
    },
    {
      "epoch": 1.7871505640019616,
      "grad_norm": 1.882025275912105,
      "learning_rate": 1.5324362154639781e-07,
      "loss": 0.4114,
      "step": 14576
    },
    {
      "epoch": 1.7872731731240803,
      "grad_norm": 2.0612119099708273,
      "learning_rate": 1.5306894798817807e-07,
      "loss": 0.3946,
      "step": 14577
    },
    {
      "epoch": 1.787395782246199,
      "grad_norm": 1.8398216970657855,
      "learning_rate": 1.5289437089298005e-07,
      "loss": 0.4249,
      "step": 14578
    },
    {
      "epoch": 1.7875183913683177,
      "grad_norm": 1.997966609558995,
      "learning_rate": 1.5271989026797857e-07,
      "loss": 0.4163,
      "step": 14579
    },
    {
      "epoch": 1.7876410004904364,
      "grad_norm": 1.9228826466778566,
      "learning_rate": 1.5254550612034456e-07,
      "loss": 0.4125,
      "step": 14580
    },
    {
      "epoch": 1.787763609612555,
      "grad_norm": 2.055603760493017,
      "learning_rate": 1.523712184572465e-07,
      "loss": 0.4377,
      "step": 14581
    },
    {
      "epoch": 1.7878862187346738,
      "grad_norm": 2.0574076231201888,
      "learning_rate": 1.521970272858475e-07,
      "loss": 0.4306,
      "step": 14582
    },
    {
      "epoch": 1.7880088278567925,
      "grad_norm": 2.145030070104893,
      "learning_rate": 1.5202293261330742e-07,
      "loss": 0.4462,
      "step": 14583
    },
    {
      "epoch": 1.7881314369789112,
      "grad_norm": 2.0245020454049287,
      "learning_rate": 1.5184893444678105e-07,
      "loss": 0.409,
      "step": 14584
    },
    {
      "epoch": 1.78825404610103,
      "grad_norm": 1.9504504402276983,
      "learning_rate": 1.516750327934205e-07,
      "loss": 0.4068,
      "step": 14585
    },
    {
      "epoch": 1.7883766552231486,
      "grad_norm": 2.077257801008128,
      "learning_rate": 1.5150122766037418e-07,
      "loss": 0.4423,
      "step": 14586
    },
    {
      "epoch": 1.7884992643452673,
      "grad_norm": 1.8886822314504361,
      "learning_rate": 1.5132751905478444e-07,
      "loss": 0.4148,
      "step": 14587
    },
    {
      "epoch": 1.788621873467386,
      "grad_norm": 2.0290049339866565,
      "learning_rate": 1.5115390698379167e-07,
      "loss": 0.4032,
      "step": 14588
    },
    {
      "epoch": 1.7887444825895047,
      "grad_norm": 1.9935746841736146,
      "learning_rate": 1.5098039145453213e-07,
      "loss": 0.4373,
      "step": 14589
    },
    {
      "epoch": 1.7888670917116234,
      "grad_norm": 1.9977470485297029,
      "learning_rate": 1.5080697247413705e-07,
      "loss": 0.4416,
      "step": 14590
    },
    {
      "epoch": 1.788989700833742,
      "grad_norm": 2.091616553367257,
      "learning_rate": 1.506336500497338e-07,
      "loss": 0.3862,
      "step": 14591
    },
    {
      "epoch": 1.7891123099558608,
      "grad_norm": 1.8747020948240483,
      "learning_rate": 1.504604241884472e-07,
      "loss": 0.4513,
      "step": 14592
    },
    {
      "epoch": 1.7892349190779795,
      "grad_norm": 1.8240142755148465,
      "learning_rate": 1.5028729489739658e-07,
      "loss": 0.3972,
      "step": 14593
    },
    {
      "epoch": 1.7893575282000982,
      "grad_norm": 2.1126457375108783,
      "learning_rate": 1.501142621836976e-07,
      "loss": 0.4203,
      "step": 14594
    },
    {
      "epoch": 1.789480137322217,
      "grad_norm": 1.7999673018205062,
      "learning_rate": 1.4994132605446326e-07,
      "loss": 0.4025,
      "step": 14595
    },
    {
      "epoch": 1.7896027464443356,
      "grad_norm": 2.04519283102425,
      "learning_rate": 1.497684865168006e-07,
      "loss": 0.4195,
      "step": 14596
    },
    {
      "epoch": 1.7897253555664543,
      "grad_norm": 1.969621187018643,
      "learning_rate": 1.4959574357781364e-07,
      "loss": 0.4476,
      "step": 14597
    },
    {
      "epoch": 1.789847964688573,
      "grad_norm": 1.7578436040844707,
      "learning_rate": 1.4942309724460285e-07,
      "loss": 0.425,
      "step": 14598
    },
    {
      "epoch": 1.7899705738106915,
      "grad_norm": 1.942608101401985,
      "learning_rate": 1.492505475242645e-07,
      "loss": 0.4363,
      "step": 14599
    },
    {
      "epoch": 1.7900931829328102,
      "grad_norm": 2.004873755992681,
      "learning_rate": 1.490780944238898e-07,
      "loss": 0.4358,
      "step": 14600
    },
    {
      "epoch": 1.7902157920549289,
      "grad_norm": 1.9823261026375545,
      "learning_rate": 1.489057379505676e-07,
      "loss": 0.4071,
      "step": 14601
    },
    {
      "epoch": 1.7903384011770476,
      "grad_norm": 1.9209945724541564,
      "learning_rate": 1.4873347811138195e-07,
      "loss": 0.4005,
      "step": 14602
    },
    {
      "epoch": 1.7904610102991663,
      "grad_norm": 1.9045178905804334,
      "learning_rate": 1.4856131491341296e-07,
      "loss": 0.375,
      "step": 14603
    },
    {
      "epoch": 1.790583619421285,
      "grad_norm": 1.9760768253460899,
      "learning_rate": 1.483892483637367e-07,
      "loss": 0.4382,
      "step": 14604
    },
    {
      "epoch": 1.7907062285434037,
      "grad_norm": 1.9728234804625893,
      "learning_rate": 1.4821727846942525e-07,
      "loss": 0.4177,
      "step": 14605
    },
    {
      "epoch": 1.7908288376655224,
      "grad_norm": 2.1417508536767196,
      "learning_rate": 1.4804540523754768e-07,
      "loss": 0.4652,
      "step": 14606
    },
    {
      "epoch": 1.7909514467876408,
      "grad_norm": 1.9709475181378422,
      "learning_rate": 1.4787362867516752e-07,
      "loss": 0.4212,
      "step": 14607
    },
    {
      "epoch": 1.7910740559097595,
      "grad_norm": 1.9112369040853863,
      "learning_rate": 1.4770194878934524e-07,
      "loss": 0.4487,
      "step": 14608
    },
    {
      "epoch": 1.7911966650318782,
      "grad_norm": 2.1184535450467945,
      "learning_rate": 1.4753036558713767e-07,
      "loss": 0.4186,
      "step": 14609
    },
    {
      "epoch": 1.791319274153997,
      "grad_norm": 1.9086240422950955,
      "learning_rate": 1.4735887907559616e-07,
      "loss": 0.4607,
      "step": 14610
    },
    {
      "epoch": 1.7914418832761156,
      "grad_norm": 2.130576509581498,
      "learning_rate": 1.471874892617703e-07,
      "loss": 0.3962,
      "step": 14611
    },
    {
      "epoch": 1.7915644923982343,
      "grad_norm": 1.9806944846924461,
      "learning_rate": 1.4701619615270391e-07,
      "loss": 0.4385,
      "step": 14612
    },
    {
      "epoch": 1.791687101520353,
      "grad_norm": 1.935169693261653,
      "learning_rate": 1.468449997554372e-07,
      "loss": 0.4308,
      "step": 14613
    },
    {
      "epoch": 1.7918097106424717,
      "grad_norm": 1.8608164694474212,
      "learning_rate": 1.4667390007700677e-07,
      "loss": 0.4352,
      "step": 14614
    },
    {
      "epoch": 1.7919323197645904,
      "grad_norm": 2.050063484368562,
      "learning_rate": 1.4650289712444586e-07,
      "loss": 0.4861,
      "step": 14615
    },
    {
      "epoch": 1.7920549288867091,
      "grad_norm": 1.9896416855753554,
      "learning_rate": 1.4633199090478223e-07,
      "loss": 0.4624,
      "step": 14616
    },
    {
      "epoch": 1.7921775380088278,
      "grad_norm": 1.9933656133305422,
      "learning_rate": 1.461611814250405e-07,
      "loss": 0.4404,
      "step": 14617
    },
    {
      "epoch": 1.7923001471309465,
      "grad_norm": 1.975136522899938,
      "learning_rate": 1.4599046869224115e-07,
      "loss": 0.4005,
      "step": 14618
    },
    {
      "epoch": 1.7924227562530652,
      "grad_norm": 1.8913864826037183,
      "learning_rate": 1.458198527134014e-07,
      "loss": 0.4014,
      "step": 14619
    },
    {
      "epoch": 1.792545365375184,
      "grad_norm": 1.877257700494621,
      "learning_rate": 1.4564933349553333e-07,
      "loss": 0.4698,
      "step": 14620
    },
    {
      "epoch": 1.7926679744973026,
      "grad_norm": 2.0575060378837957,
      "learning_rate": 1.4547891104564533e-07,
      "loss": 0.4302,
      "step": 14621
    },
    {
      "epoch": 1.7927905836194213,
      "grad_norm": 1.7726112755204597,
      "learning_rate": 1.4530858537074312e-07,
      "loss": 0.4039,
      "step": 14622
    },
    {
      "epoch": 1.79291319274154,
      "grad_norm": 2.0943442463755577,
      "learning_rate": 1.451383564778261e-07,
      "loss": 0.3677,
      "step": 14623
    },
    {
      "epoch": 1.7930358018636587,
      "grad_norm": 2.035687557827469,
      "learning_rate": 1.44968224373892e-07,
      "loss": 0.4668,
      "step": 14624
    },
    {
      "epoch": 1.7931584109857774,
      "grad_norm": 1.8272951344994455,
      "learning_rate": 1.447981890659328e-07,
      "loss": 0.4196,
      "step": 14625
    },
    {
      "epoch": 1.7932810201078961,
      "grad_norm": 2.000264242335232,
      "learning_rate": 1.4462825056093783e-07,
      "loss": 0.416,
      "step": 14626
    },
    {
      "epoch": 1.7934036292300148,
      "grad_norm": 1.8428514643224105,
      "learning_rate": 1.4445840886589124e-07,
      "loss": 0.4142,
      "step": 14627
    },
    {
      "epoch": 1.7935262383521335,
      "grad_norm": 1.9447822024692731,
      "learning_rate": 1.442886639877747e-07,
      "loss": 0.4134,
      "step": 14628
    },
    {
      "epoch": 1.7936488474742522,
      "grad_norm": 1.9155016072206599,
      "learning_rate": 1.4411901593356453e-07,
      "loss": 0.4366,
      "step": 14629
    },
    {
      "epoch": 1.7937714565963707,
      "grad_norm": 2.0045222754234486,
      "learning_rate": 1.4394946471023324e-07,
      "loss": 0.4179,
      "step": 14630
    },
    {
      "epoch": 1.7938940657184894,
      "grad_norm": 1.9657047794577276,
      "learning_rate": 1.437800103247497e-07,
      "loss": 0.4008,
      "step": 14631
    },
    {
      "epoch": 1.7940166748406081,
      "grad_norm": 1.7574167500897668,
      "learning_rate": 1.4361065278408025e-07,
      "loss": 0.3926,
      "step": 14632
    },
    {
      "epoch": 1.7941392839627268,
      "grad_norm": 2.145736283167722,
      "learning_rate": 1.4344139209518354e-07,
      "loss": 0.4531,
      "step": 14633
    },
    {
      "epoch": 1.7942618930848455,
      "grad_norm": 2.0020699075868573,
      "learning_rate": 1.4327222826501786e-07,
      "loss": 0.4391,
      "step": 14634
    },
    {
      "epoch": 1.7943845022069642,
      "grad_norm": 2.0896379304615285,
      "learning_rate": 1.4310316130053598e-07,
      "loss": 0.4151,
      "step": 14635
    },
    {
      "epoch": 1.794507111329083,
      "grad_norm": 1.8723413156060476,
      "learning_rate": 1.429341912086868e-07,
      "loss": 0.4059,
      "step": 14636
    },
    {
      "epoch": 1.7946297204512016,
      "grad_norm": 1.9762417684685643,
      "learning_rate": 1.4276531799641502e-07,
      "loss": 0.4458,
      "step": 14637
    },
    {
      "epoch": 1.79475232957332,
      "grad_norm": 1.7863247530692647,
      "learning_rate": 1.4259654167066178e-07,
      "loss": 0.4267,
      "step": 14638
    },
    {
      "epoch": 1.7948749386954388,
      "grad_norm": 1.986570788984159,
      "learning_rate": 1.424278622383643e-07,
      "loss": 0.3806,
      "step": 14639
    },
    {
      "epoch": 1.7949975478175575,
      "grad_norm": 2.063298876203575,
      "learning_rate": 1.4225927970645508e-07,
      "loss": 0.3927,
      "step": 14640
    },
    {
      "epoch": 1.7951201569396762,
      "grad_norm": 1.8436551108064054,
      "learning_rate": 1.4209079408186416e-07,
      "loss": 0.4401,
      "step": 14641
    },
    {
      "epoch": 1.795242766061795,
      "grad_norm": 1.9126122946641855,
      "learning_rate": 1.419224053715157e-07,
      "loss": 0.4467,
      "step": 14642
    },
    {
      "epoch": 1.7953653751839136,
      "grad_norm": 1.986421242330852,
      "learning_rate": 1.417541135823308e-07,
      "loss": 0.4221,
      "step": 14643
    },
    {
      "epoch": 1.7954879843060323,
      "grad_norm": 1.9678519664644665,
      "learning_rate": 1.415859187212268e-07,
      "loss": 0.431,
      "step": 14644
    },
    {
      "epoch": 1.795610593428151,
      "grad_norm": 1.95960209379646,
      "learning_rate": 1.4141782079511696e-07,
      "loss": 0.4394,
      "step": 14645
    },
    {
      "epoch": 1.7957332025502697,
      "grad_norm": 1.9500760189751718,
      "learning_rate": 1.4124981981091052e-07,
      "loss": 0.4323,
      "step": 14646
    },
    {
      "epoch": 1.7958558116723884,
      "grad_norm": 1.973199828223311,
      "learning_rate": 1.4108191577551166e-07,
      "loss": 0.3965,
      "step": 14647
    },
    {
      "epoch": 1.795978420794507,
      "grad_norm": 2.0137269101537587,
      "learning_rate": 1.4091410869582267e-07,
      "loss": 0.3971,
      "step": 14648
    },
    {
      "epoch": 1.7961010299166258,
      "grad_norm": 1.9712323997468206,
      "learning_rate": 1.4074639857874023e-07,
      "loss": 0.455,
      "step": 14649
    },
    {
      "epoch": 1.7962236390387445,
      "grad_norm": 1.909731657219128,
      "learning_rate": 1.4057878543115715e-07,
      "loss": 0.4266,
      "step": 14650
    },
    {
      "epoch": 1.7963462481608632,
      "grad_norm": 2.0438775327793492,
      "learning_rate": 1.4041126925996322e-07,
      "loss": 0.4083,
      "step": 14651
    },
    {
      "epoch": 1.796468857282982,
      "grad_norm": 1.9458627766312695,
      "learning_rate": 1.402438500720435e-07,
      "loss": 0.4539,
      "step": 14652
    },
    {
      "epoch": 1.7965914664051006,
      "grad_norm": 2.045900142969812,
      "learning_rate": 1.400765278742794e-07,
      "loss": 0.4584,
      "step": 14653
    },
    {
      "epoch": 1.7967140755272193,
      "grad_norm": 2.0225877280313815,
      "learning_rate": 1.3990930267354763e-07,
      "loss": 0.4487,
      "step": 14654
    },
    {
      "epoch": 1.796836684649338,
      "grad_norm": 2.1732011619736427,
      "learning_rate": 1.397421744767219e-07,
      "loss": 0.4565,
      "step": 14655
    },
    {
      "epoch": 1.7969592937714567,
      "grad_norm": 1.893065871243238,
      "learning_rate": 1.3957514329067084e-07,
      "loss": 0.3965,
      "step": 14656
    },
    {
      "epoch": 1.7970819028935754,
      "grad_norm": 1.933125417741652,
      "learning_rate": 1.3940820912226067e-07,
      "loss": 0.4336,
      "step": 14657
    },
    {
      "epoch": 1.797204512015694,
      "grad_norm": 1.9444872573439098,
      "learning_rate": 1.3924137197835198e-07,
      "loss": 0.4628,
      "step": 14658
    },
    {
      "epoch": 1.7973271211378128,
      "grad_norm": 2.0685513432660625,
      "learning_rate": 1.3907463186580262e-07,
      "loss": 0.4304,
      "step": 14659
    },
    {
      "epoch": 1.7974497302599315,
      "grad_norm": 1.9648019238931105,
      "learning_rate": 1.3890798879146518e-07,
      "loss": 0.4387,
      "step": 14660
    },
    {
      "epoch": 1.7975723393820502,
      "grad_norm": 2.0774110608821106,
      "learning_rate": 1.387414427621897e-07,
      "loss": 0.3888,
      "step": 14661
    },
    {
      "epoch": 1.7976949485041687,
      "grad_norm": 1.8912190878045374,
      "learning_rate": 1.385749937848213e-07,
      "loss": 0.4044,
      "step": 14662
    },
    {
      "epoch": 1.7978175576262874,
      "grad_norm": 1.7986592786511115,
      "learning_rate": 1.3840864186620061e-07,
      "loss": 0.4286,
      "step": 14663
    },
    {
      "epoch": 1.797940166748406,
      "grad_norm": 1.992361363959406,
      "learning_rate": 1.3824238701316578e-07,
      "loss": 0.4226,
      "step": 14664
    },
    {
      "epoch": 1.7980627758705248,
      "grad_norm": 1.9559642044380965,
      "learning_rate": 1.380762292325505e-07,
      "loss": 0.3964,
      "step": 14665
    },
    {
      "epoch": 1.7981853849926435,
      "grad_norm": 2.0017871758827592,
      "learning_rate": 1.3791016853118344e-07,
      "loss": 0.4032,
      "step": 14666
    },
    {
      "epoch": 1.7983079941147622,
      "grad_norm": 1.926356723419226,
      "learning_rate": 1.3774420491589002e-07,
      "loss": 0.4246,
      "step": 14667
    },
    {
      "epoch": 1.7984306032368809,
      "grad_norm": 1.995076447435835,
      "learning_rate": 1.3757833839349199e-07,
      "loss": 0.4107,
      "step": 14668
    },
    {
      "epoch": 1.7985532123589996,
      "grad_norm": 2.0344800216822474,
      "learning_rate": 1.374125689708064e-07,
      "loss": 0.4632,
      "step": 14669
    },
    {
      "epoch": 1.798675821481118,
      "grad_norm": 2.0045177898820183,
      "learning_rate": 1.3724689665464723e-07,
      "loss": 0.4271,
      "step": 14670
    },
    {
      "epoch": 1.7987984306032367,
      "grad_norm": 2.003660562152443,
      "learning_rate": 1.3708132145182295e-07,
      "loss": 0.4389,
      "step": 14671
    },
    {
      "epoch": 1.7989210397253554,
      "grad_norm": 2.1179924143423325,
      "learning_rate": 1.3691584336914005e-07,
      "loss": 0.3951,
      "step": 14672
    },
    {
      "epoch": 1.7990436488474741,
      "grad_norm": 2.0463366721966496,
      "learning_rate": 1.3675046241339918e-07,
      "loss": 0.3923,
      "step": 14673
    },
    {
      "epoch": 1.7991662579695928,
      "grad_norm": 1.8264720232060427,
      "learning_rate": 1.3658517859139852e-07,
      "loss": 0.4277,
      "step": 14674
    },
    {
      "epoch": 1.7992888670917115,
      "grad_norm": 1.8963984226029482,
      "learning_rate": 1.3641999190993128e-07,
      "loss": 0.4228,
      "step": 14675
    },
    {
      "epoch": 1.7994114762138302,
      "grad_norm": 1.9979303839296798,
      "learning_rate": 1.3625490237578615e-07,
      "loss": 0.4259,
      "step": 14676
    },
    {
      "epoch": 1.799534085335949,
      "grad_norm": 1.9241239443312077,
      "learning_rate": 1.3608990999574913e-07,
      "loss": 0.3934,
      "step": 14677
    },
    {
      "epoch": 1.7996566944580676,
      "grad_norm": 1.7955569508480962,
      "learning_rate": 1.3592501477660226e-07,
      "loss": 0.406,
      "step": 14678
    },
    {
      "epoch": 1.7997793035801863,
      "grad_norm": 2.081547771127886,
      "learning_rate": 1.3576021672512263e-07,
      "loss": 0.393,
      "step": 14679
    },
    {
      "epoch": 1.799901912702305,
      "grad_norm": 1.831484189242786,
      "learning_rate": 1.3559551584808317e-07,
      "loss": 0.4542,
      "step": 14680
    },
    {
      "epoch": 1.8000245218244237,
      "grad_norm": 2.0289717746395093,
      "learning_rate": 1.35430912152254e-07,
      "loss": 0.4467,
      "step": 14681
    },
    {
      "epoch": 1.8001471309465424,
      "grad_norm": 1.9881957358195954,
      "learning_rate": 1.3526640564440086e-07,
      "loss": 0.3893,
      "step": 14682
    },
    {
      "epoch": 1.8002697400686611,
      "grad_norm": 2.1342059087133047,
      "learning_rate": 1.351019963312844e-07,
      "loss": 0.4095,
      "step": 14683
    },
    {
      "epoch": 1.8003923491907798,
      "grad_norm": 1.9428585159024803,
      "learning_rate": 1.3493768421966286e-07,
      "loss": 0.4027,
      "step": 14684
    },
    {
      "epoch": 1.8005149583128985,
      "grad_norm": 1.8554279308503066,
      "learning_rate": 1.3477346931628943e-07,
      "loss": 0.4104,
      "step": 14685
    },
    {
      "epoch": 1.8006375674350172,
      "grad_norm": 1.897954120929063,
      "learning_rate": 1.3460935162791372e-07,
      "loss": 0.4094,
      "step": 14686
    },
    {
      "epoch": 1.800760176557136,
      "grad_norm": 1.9169958409965937,
      "learning_rate": 1.3444533116128145e-07,
      "loss": 0.4139,
      "step": 14687
    },
    {
      "epoch": 1.8008827856792546,
      "grad_norm": 1.8204251681611574,
      "learning_rate": 1.3428140792313389e-07,
      "loss": 0.4051,
      "step": 14688
    },
    {
      "epoch": 1.8010053948013733,
      "grad_norm": 2.062401138408039,
      "learning_rate": 1.3411758192020841e-07,
      "loss": 0.4563,
      "step": 14689
    },
    {
      "epoch": 1.801128003923492,
      "grad_norm": 1.9930572873186798,
      "learning_rate": 1.3395385315923855e-07,
      "loss": 0.4023,
      "step": 14690
    },
    {
      "epoch": 1.8012506130456107,
      "grad_norm": 2.090597138960239,
      "learning_rate": 1.337902216469547e-07,
      "loss": 0.4256,
      "step": 14691
    },
    {
      "epoch": 1.8013732221677294,
      "grad_norm": 2.0295114221304162,
      "learning_rate": 1.3362668739008155e-07,
      "loss": 0.4587,
      "step": 14692
    },
    {
      "epoch": 1.801495831289848,
      "grad_norm": 1.874911618672097,
      "learning_rate": 1.3346325039534063e-07,
      "loss": 0.4316,
      "step": 14693
    },
    {
      "epoch": 1.8016184404119666,
      "grad_norm": 2.051542018474108,
      "learning_rate": 1.3329991066944992e-07,
      "loss": 0.4353,
      "step": 14694
    },
    {
      "epoch": 1.8017410495340853,
      "grad_norm": 1.8844580524016155,
      "learning_rate": 1.3313666821912318e-07,
      "loss": 0.4312,
      "step": 14695
    },
    {
      "epoch": 1.801863658656204,
      "grad_norm": 1.88740543883244,
      "learning_rate": 1.3297352305106926e-07,
      "loss": 0.4262,
      "step": 14696
    },
    {
      "epoch": 1.8019862677783227,
      "grad_norm": 2.116852192482943,
      "learning_rate": 1.3281047517199357e-07,
      "loss": 0.4037,
      "step": 14697
    },
    {
      "epoch": 1.8021088769004414,
      "grad_norm": 2.027345430102792,
      "learning_rate": 1.3264752458859886e-07,
      "loss": 0.4249,
      "step": 14698
    },
    {
      "epoch": 1.8022314860225601,
      "grad_norm": 1.93665011928369,
      "learning_rate": 1.3248467130758197e-07,
      "loss": 0.4083,
      "step": 14699
    },
    {
      "epoch": 1.8023540951446788,
      "grad_norm": 1.8301240805887682,
      "learning_rate": 1.3232191533563587e-07,
      "loss": 0.4112,
      "step": 14700
    },
    {
      "epoch": 1.8024767042667973,
      "grad_norm": 2.0960541571423854,
      "learning_rate": 1.3215925667945078e-07,
      "loss": 0.4403,
      "step": 14701
    },
    {
      "epoch": 1.802599313388916,
      "grad_norm": 1.8604906732777746,
      "learning_rate": 1.3199669534571247e-07,
      "loss": 0.4183,
      "step": 14702
    },
    {
      "epoch": 1.8027219225110347,
      "grad_norm": 1.9418448760424813,
      "learning_rate": 1.3183423134110224e-07,
      "loss": 0.4004,
      "step": 14703
    },
    {
      "epoch": 1.8028445316331534,
      "grad_norm": 2.091637203806182,
      "learning_rate": 1.3167186467229725e-07,
      "loss": 0.396,
      "step": 14704
    },
    {
      "epoch": 1.802967140755272,
      "grad_norm": 1.8828339874889162,
      "learning_rate": 1.315095953459719e-07,
      "loss": 0.4022,
      "step": 14705
    },
    {
      "epoch": 1.8030897498773908,
      "grad_norm": 1.7968765167438114,
      "learning_rate": 1.3134742336879475e-07,
      "loss": 0.3925,
      "step": 14706
    },
    {
      "epoch": 1.8032123589995095,
      "grad_norm": 2.0819733780758334,
      "learning_rate": 1.311853487474321e-07,
      "loss": 0.4312,
      "step": 14707
    },
    {
      "epoch": 1.8033349681216282,
      "grad_norm": 2.0808902364786657,
      "learning_rate": 1.310233714885456e-07,
      "loss": 0.4231,
      "step": 14708
    },
    {
      "epoch": 1.803457577243747,
      "grad_norm": 1.9488478976091763,
      "learning_rate": 1.3086149159879187e-07,
      "loss": 0.4117,
      "step": 14709
    },
    {
      "epoch": 1.8035801863658656,
      "grad_norm": 1.8440155117930663,
      "learning_rate": 1.3069970908482527e-07,
      "loss": 0.4559,
      "step": 14710
    },
    {
      "epoch": 1.8037027954879843,
      "grad_norm": 1.956309703394989,
      "learning_rate": 1.3053802395329525e-07,
      "loss": 0.4332,
      "step": 14711
    },
    {
      "epoch": 1.803825404610103,
      "grad_norm": 1.744480970338407,
      "learning_rate": 1.3037643621084757e-07,
      "loss": 0.4125,
      "step": 14712
    },
    {
      "epoch": 1.8039480137322217,
      "grad_norm": 1.943107235072198,
      "learning_rate": 1.3021494586412275e-07,
      "loss": 0.4322,
      "step": 14713
    },
    {
      "epoch": 1.8040706228543404,
      "grad_norm": 1.9633473377864992,
      "learning_rate": 1.300535529197594e-07,
      "loss": 0.3691,
      "step": 14714
    },
    {
      "epoch": 1.804193231976459,
      "grad_norm": 1.8939505881386822,
      "learning_rate": 1.2989225738439082e-07,
      "loss": 0.4367,
      "step": 14715
    },
    {
      "epoch": 1.8043158410985778,
      "grad_norm": 2.1059605648360047,
      "learning_rate": 1.297310592646464e-07,
      "loss": 0.4034,
      "step": 14716
    },
    {
      "epoch": 1.8044384502206965,
      "grad_norm": 2.1235894105461566,
      "learning_rate": 1.2956995856715115e-07,
      "loss": 0.4235,
      "step": 14717
    },
    {
      "epoch": 1.8045610593428152,
      "grad_norm": 1.7594786440513541,
      "learning_rate": 1.2940895529852783e-07,
      "loss": 0.3996,
      "step": 14718
    },
    {
      "epoch": 1.8046836684649339,
      "grad_norm": 1.932997041065551,
      "learning_rate": 1.2924804946539276e-07,
      "loss": 0.4439,
      "step": 14719
    },
    {
      "epoch": 1.8048062775870526,
      "grad_norm": 1.9494001538929746,
      "learning_rate": 1.2908724107436015e-07,
      "loss": 0.4233,
      "step": 14720
    },
    {
      "epoch": 1.8049288867091713,
      "grad_norm": 1.9114547906618997,
      "learning_rate": 1.2892653013203916e-07,
      "loss": 0.4594,
      "step": 14721
    },
    {
      "epoch": 1.80505149583129,
      "grad_norm": 1.8925955661030973,
      "learning_rate": 1.2876591664503556e-07,
      "loss": 0.387,
      "step": 14722
    },
    {
      "epoch": 1.8051741049534087,
      "grad_norm": 1.7894987198548402,
      "learning_rate": 1.2860540061995054e-07,
      "loss": 0.4335,
      "step": 14723
    },
    {
      "epoch": 1.8052967140755272,
      "grad_norm": 2.0404025961131236,
      "learning_rate": 1.2844498206338207e-07,
      "loss": 0.4019,
      "step": 14724
    },
    {
      "epoch": 1.8054193231976459,
      "grad_norm": 1.862685033052001,
      "learning_rate": 1.2828466098192355e-07,
      "loss": 0.418,
      "step": 14725
    },
    {
      "epoch": 1.8055419323197646,
      "grad_norm": 1.9575564931105658,
      "learning_rate": 1.2812443738216357e-07,
      "loss": 0.4672,
      "step": 14726
    },
    {
      "epoch": 1.8056645414418833,
      "grad_norm": 2.0264147866122855,
      "learning_rate": 1.2796431127068853e-07,
      "loss": 0.4195,
      "step": 14727
    },
    {
      "epoch": 1.805787150564002,
      "grad_norm": 2.079199446736772,
      "learning_rate": 1.2780428265408013e-07,
      "loss": 0.408,
      "step": 14728
    },
    {
      "epoch": 1.8059097596861207,
      "grad_norm": 1.927833549835366,
      "learning_rate": 1.2764435153891503e-07,
      "loss": 0.3922,
      "step": 14729
    },
    {
      "epoch": 1.8060323688082394,
      "grad_norm": 1.99069278876686,
      "learning_rate": 1.2748451793176658e-07,
      "loss": 0.4347,
      "step": 14730
    },
    {
      "epoch": 1.806154977930358,
      "grad_norm": 1.8573304522021121,
      "learning_rate": 1.273247818392054e-07,
      "loss": 0.3794,
      "step": 14731
    },
    {
      "epoch": 1.8062775870524765,
      "grad_norm": 2.1458087911472976,
      "learning_rate": 1.2716514326779588e-07,
      "loss": 0.4186,
      "step": 14732
    },
    {
      "epoch": 1.8064001961745952,
      "grad_norm": 2.0375586997472954,
      "learning_rate": 1.270056022240995e-07,
      "loss": 0.4164,
      "step": 14733
    },
    {
      "epoch": 1.806522805296714,
      "grad_norm": 1.7710987988329827,
      "learning_rate": 1.2684615871467382e-07,
      "loss": 0.3545,
      "step": 14734
    },
    {
      "epoch": 1.8066454144188326,
      "grad_norm": 2.048652797840363,
      "learning_rate": 1.2668681274607298e-07,
      "loss": 0.4348,
      "step": 14735
    },
    {
      "epoch": 1.8067680235409513,
      "grad_norm": 1.9449106629882795,
      "learning_rate": 1.265275643248451e-07,
      "loss": 0.4317,
      "step": 14736
    },
    {
      "epoch": 1.80689063266307,
      "grad_norm": 2.075486431374806,
      "learning_rate": 1.2636841345753686e-07,
      "loss": 0.4179,
      "step": 14737
    },
    {
      "epoch": 1.8070132417851887,
      "grad_norm": 1.7703792013372248,
      "learning_rate": 1.2620936015068864e-07,
      "loss": 0.3885,
      "step": 14738
    },
    {
      "epoch": 1.8071358509073074,
      "grad_norm": 2.1740007905363834,
      "learning_rate": 1.2605040441083822e-07,
      "loss": 0.4141,
      "step": 14739
    },
    {
      "epoch": 1.8072584600294261,
      "grad_norm": 2.0368063872192987,
      "learning_rate": 1.258915462445187e-07,
      "loss": 0.4608,
      "step": 14740
    },
    {
      "epoch": 1.8073810691515448,
      "grad_norm": 2.0607888179545863,
      "learning_rate": 1.2573278565825992e-07,
      "loss": 0.429,
      "step": 14741
    },
    {
      "epoch": 1.8075036782736635,
      "grad_norm": 1.888348850203955,
      "learning_rate": 1.2557412265858715e-07,
      "loss": 0.3993,
      "step": 14742
    },
    {
      "epoch": 1.8076262873957822,
      "grad_norm": 2.025724049352865,
      "learning_rate": 1.254155572520213e-07,
      "loss": 0.4342,
      "step": 14743
    },
    {
      "epoch": 1.807748896517901,
      "grad_norm": 2.13708370543278,
      "learning_rate": 1.2525708944507997e-07,
      "loss": 0.4573,
      "step": 14744
    },
    {
      "epoch": 1.8078715056400196,
      "grad_norm": 2.031798137237039,
      "learning_rate": 1.2509871924427653e-07,
      "loss": 0.4097,
      "step": 14745
    },
    {
      "epoch": 1.8079941147621383,
      "grad_norm": 1.9402468940961153,
      "learning_rate": 1.2494044665611994e-07,
      "loss": 0.3965,
      "step": 14746
    },
    {
      "epoch": 1.808116723884257,
      "grad_norm": 2.0431377580308263,
      "learning_rate": 1.2478227168711555e-07,
      "loss": 0.3822,
      "step": 14747
    },
    {
      "epoch": 1.8082393330063757,
      "grad_norm": 1.950344312140837,
      "learning_rate": 1.246241943437651e-07,
      "loss": 0.3802,
      "step": 14748
    },
    {
      "epoch": 1.8083619421284944,
      "grad_norm": 2.0240437709842705,
      "learning_rate": 1.2446621463256564e-07,
      "loss": 0.4034,
      "step": 14749
    },
    {
      "epoch": 1.8084845512506131,
      "grad_norm": 2.053952848711765,
      "learning_rate": 1.2430833256001003e-07,
      "loss": 0.4531,
      "step": 14750
    },
    {
      "epoch": 1.8086071603727318,
      "grad_norm": 2.0234391047850626,
      "learning_rate": 1.2415054813258804e-07,
      "loss": 0.4497,
      "step": 14751
    },
    {
      "epoch": 1.8087297694948505,
      "grad_norm": 1.8487638314546775,
      "learning_rate": 1.2399286135678424e-07,
      "loss": 0.4343,
      "step": 14752
    },
    {
      "epoch": 1.8088523786169692,
      "grad_norm": 1.925395698216762,
      "learning_rate": 1.2383527223908064e-07,
      "loss": 0.4097,
      "step": 14753
    },
    {
      "epoch": 1.808974987739088,
      "grad_norm": 1.7583979454460712,
      "learning_rate": 1.2367778078595348e-07,
      "loss": 0.3999,
      "step": 14754
    },
    {
      "epoch": 1.8090975968612066,
      "grad_norm": 1.9566091658107108,
      "learning_rate": 1.2352038700387704e-07,
      "loss": 0.3736,
      "step": 14755
    },
    {
      "epoch": 1.8092202059833251,
      "grad_norm": 2.010990855786023,
      "learning_rate": 1.2336309089931968e-07,
      "loss": 0.3829,
      "step": 14756
    },
    {
      "epoch": 1.8093428151054438,
      "grad_norm": 2.17031290249379,
      "learning_rate": 1.2320589247874686e-07,
      "loss": 0.4776,
      "step": 14757
    },
    {
      "epoch": 1.8094654242275625,
      "grad_norm": 2.047817311377644,
      "learning_rate": 1.2304879174861978e-07,
      "loss": 0.4412,
      "step": 14758
    },
    {
      "epoch": 1.8095880333496812,
      "grad_norm": 1.9204562767948912,
      "learning_rate": 1.228917887153952e-07,
      "loss": 0.396,
      "step": 14759
    },
    {
      "epoch": 1.8097106424718,
      "grad_norm": 2.1392281089705025,
      "learning_rate": 1.227348833855263e-07,
      "loss": 0.4129,
      "step": 14760
    },
    {
      "epoch": 1.8098332515939186,
      "grad_norm": 1.8430665947153893,
      "learning_rate": 1.2257807576546265e-07,
      "loss": 0.3968,
      "step": 14761
    },
    {
      "epoch": 1.8099558607160373,
      "grad_norm": 2.163522653782432,
      "learning_rate": 1.2242136586164883e-07,
      "loss": 0.46,
      "step": 14762
    },
    {
      "epoch": 1.810078469838156,
      "grad_norm": 1.9635304404468283,
      "learning_rate": 1.2226475368052604e-07,
      "loss": 0.407,
      "step": 14763
    },
    {
      "epoch": 1.8102010789602745,
      "grad_norm": 1.7504025154755347,
      "learning_rate": 1.2210823922853139e-07,
      "loss": 0.3965,
      "step": 14764
    },
    {
      "epoch": 1.8103236880823932,
      "grad_norm": 2.03484986218567,
      "learning_rate": 1.2195182251209748e-07,
      "loss": 0.4303,
      "step": 14765
    },
    {
      "epoch": 1.810446297204512,
      "grad_norm": 1.948939855873702,
      "learning_rate": 1.2179550353765417e-07,
      "loss": 0.4568,
      "step": 14766
    },
    {
      "epoch": 1.8105689063266306,
      "grad_norm": 1.8708722373095865,
      "learning_rate": 1.2163928231162552e-07,
      "loss": 0.4489,
      "step": 14767
    },
    {
      "epoch": 1.8106915154487493,
      "grad_norm": 1.8430883909134477,
      "learning_rate": 1.2148315884043334e-07,
      "loss": 0.4909,
      "step": 14768
    },
    {
      "epoch": 1.810814124570868,
      "grad_norm": 2.1113146759058146,
      "learning_rate": 1.2132713313049383e-07,
      "loss": 0.404,
      "step": 14769
    },
    {
      "epoch": 1.8109367336929867,
      "grad_norm": 1.9215843570964246,
      "learning_rate": 1.211712051882205e-07,
      "loss": 0.4423,
      "step": 14770
    },
    {
      "epoch": 1.8110593428151054,
      "grad_norm": 1.8936310205910083,
      "learning_rate": 1.2101537502002215e-07,
      "loss": 0.417,
      "step": 14771
    },
    {
      "epoch": 1.811181951937224,
      "grad_norm": 2.009370298772833,
      "learning_rate": 1.2085964263230304e-07,
      "loss": 0.4343,
      "step": 14772
    },
    {
      "epoch": 1.8113045610593428,
      "grad_norm": 1.8778853201106573,
      "learning_rate": 1.2070400803146476e-07,
      "loss": 0.4218,
      "step": 14773
    },
    {
      "epoch": 1.8114271701814615,
      "grad_norm": 1.960260661890882,
      "learning_rate": 1.2054847122390412e-07,
      "loss": 0.4114,
      "step": 14774
    },
    {
      "epoch": 1.8115497793035802,
      "grad_norm": 1.8082695706223497,
      "learning_rate": 1.2039303221601378e-07,
      "loss": 0.393,
      "step": 14775
    },
    {
      "epoch": 1.8116723884256989,
      "grad_norm": 1.9462831611429996,
      "learning_rate": 1.2023769101418225e-07,
      "loss": 0.4529,
      "step": 14776
    },
    {
      "epoch": 1.8117949975478176,
      "grad_norm": 2.099909556082297,
      "learning_rate": 1.20082447624795e-07,
      "loss": 0.4448,
      "step": 14777
    },
    {
      "epoch": 1.8119176066699363,
      "grad_norm": 1.9127827910848842,
      "learning_rate": 1.199273020542327e-07,
      "loss": 0.3963,
      "step": 14778
    },
    {
      "epoch": 1.812040215792055,
      "grad_norm": 1.9983843270191946,
      "learning_rate": 1.197722543088714e-07,
      "loss": 0.4565,
      "step": 14779
    },
    {
      "epoch": 1.8121628249141737,
      "grad_norm": 1.8781090555399855,
      "learning_rate": 1.1961730439508435e-07,
      "loss": 0.359,
      "step": 14780
    },
    {
      "epoch": 1.8122854340362924,
      "grad_norm": 2.047668725057885,
      "learning_rate": 1.194624523192403e-07,
      "loss": 0.4183,
      "step": 14781
    },
    {
      "epoch": 1.812408043158411,
      "grad_norm": 1.8959764382283715,
      "learning_rate": 1.1930769808770392e-07,
      "loss": 0.3827,
      "step": 14782
    },
    {
      "epoch": 1.8125306522805298,
      "grad_norm": 1.9347160321795696,
      "learning_rate": 1.1915304170683594e-07,
      "loss": 0.4254,
      "step": 14783
    },
    {
      "epoch": 1.8126532614026485,
      "grad_norm": 1.8798097845310586,
      "learning_rate": 1.1899848318299295e-07,
      "loss": 0.4077,
      "step": 14784
    },
    {
      "epoch": 1.8127758705247672,
      "grad_norm": 2.1764236447137097,
      "learning_rate": 1.1884402252252708e-07,
      "loss": 0.3943,
      "step": 14785
    },
    {
      "epoch": 1.8128984796468859,
      "grad_norm": 2.0372743576382333,
      "learning_rate": 1.186896597317877e-07,
      "loss": 0.4445,
      "step": 14786
    },
    {
      "epoch": 1.8130210887690044,
      "grad_norm": 2.028504730410392,
      "learning_rate": 1.1853539481711918e-07,
      "loss": 0.4327,
      "step": 14787
    },
    {
      "epoch": 1.813143697891123,
      "grad_norm": 1.8775789995056777,
      "learning_rate": 1.18381227784862e-07,
      "loss": 0.448,
      "step": 14788
    },
    {
      "epoch": 1.8132663070132418,
      "grad_norm": 1.8691238730785273,
      "learning_rate": 1.182271586413522e-07,
      "loss": 0.4561,
      "step": 14789
    },
    {
      "epoch": 1.8133889161353605,
      "grad_norm": 2.097976036008394,
      "learning_rate": 1.1807318739292306e-07,
      "loss": 0.4556,
      "step": 14790
    },
    {
      "epoch": 1.8135115252574792,
      "grad_norm": 2.1721461597601492,
      "learning_rate": 1.179193140459034e-07,
      "loss": 0.4743,
      "step": 14791
    },
    {
      "epoch": 1.8136341343795979,
      "grad_norm": 1.9883700233611796,
      "learning_rate": 1.1776553860661621e-07,
      "loss": 0.4025,
      "step": 14792
    },
    {
      "epoch": 1.8137567435017166,
      "grad_norm": 1.785044124257831,
      "learning_rate": 1.1761186108138312e-07,
      "loss": 0.392,
      "step": 14793
    },
    {
      "epoch": 1.8138793526238353,
      "grad_norm": 1.7317006816646752,
      "learning_rate": 1.1745828147652016e-07,
      "loss": 0.4223,
      "step": 14794
    },
    {
      "epoch": 1.8140019617459537,
      "grad_norm": 1.8895913197356695,
      "learning_rate": 1.1730479979834008e-07,
      "loss": 0.4073,
      "step": 14795
    },
    {
      "epoch": 1.8141245708680724,
      "grad_norm": 1.7901543483051374,
      "learning_rate": 1.1715141605315062e-07,
      "loss": 0.4003,
      "step": 14796
    },
    {
      "epoch": 1.8142471799901911,
      "grad_norm": 2.092530901321626,
      "learning_rate": 1.1699813024725643e-07,
      "loss": 0.4109,
      "step": 14797
    },
    {
      "epoch": 1.8143697891123098,
      "grad_norm": 2.03412191080601,
      "learning_rate": 1.1684494238695804e-07,
      "loss": 0.4376,
      "step": 14798
    },
    {
      "epoch": 1.8144923982344285,
      "grad_norm": 1.9881895231451538,
      "learning_rate": 1.1669185247855181e-07,
      "loss": 0.4317,
      "step": 14799
    },
    {
      "epoch": 1.8146150073565472,
      "grad_norm": 1.8705299687220294,
      "learning_rate": 1.1653886052832908e-07,
      "loss": 0.4058,
      "step": 14800
    },
    {
      "epoch": 1.814737616478666,
      "grad_norm": 1.9542100455127198,
      "learning_rate": 1.1638596654257928e-07,
      "loss": 0.4594,
      "step": 14801
    },
    {
      "epoch": 1.8148602256007846,
      "grad_norm": 1.9616382659708487,
      "learning_rate": 1.1623317052758599e-07,
      "loss": 0.4501,
      "step": 14802
    },
    {
      "epoch": 1.8149828347229033,
      "grad_norm": 2.0357773145416616,
      "learning_rate": 1.1608047248962972e-07,
      "loss": 0.4296,
      "step": 14803
    },
    {
      "epoch": 1.815105443845022,
      "grad_norm": 1.7643248037701853,
      "learning_rate": 1.1592787243498632e-07,
      "loss": 0.406,
      "step": 14804
    },
    {
      "epoch": 1.8152280529671407,
      "grad_norm": 1.8073305273278168,
      "learning_rate": 1.157753703699277e-07,
      "loss": 0.4392,
      "step": 14805
    },
    {
      "epoch": 1.8153506620892594,
      "grad_norm": 2.019876523136426,
      "learning_rate": 1.1562296630072245e-07,
      "loss": 0.4598,
      "step": 14806
    },
    {
      "epoch": 1.8154732712113781,
      "grad_norm": 1.9995324310414482,
      "learning_rate": 1.1547066023363473e-07,
      "loss": 0.4355,
      "step": 14807
    },
    {
      "epoch": 1.8155958803334968,
      "grad_norm": 1.9636061304329535,
      "learning_rate": 1.1531845217492427e-07,
      "loss": 0.3747,
      "step": 14808
    },
    {
      "epoch": 1.8157184894556155,
      "grad_norm": 1.9797098433280378,
      "learning_rate": 1.1516634213084688e-07,
      "loss": 0.3751,
      "step": 14809
    },
    {
      "epoch": 1.8158410985777342,
      "grad_norm": 1.8228370285688784,
      "learning_rate": 1.1501433010765506e-07,
      "loss": 0.4212,
      "step": 14810
    },
    {
      "epoch": 1.815963707699853,
      "grad_norm": 2.1125414064280363,
      "learning_rate": 1.1486241611159659e-07,
      "loss": 0.4599,
      "step": 14811
    },
    {
      "epoch": 1.8160863168219716,
      "grad_norm": 1.7884179665578488,
      "learning_rate": 1.1471060014891539e-07,
      "loss": 0.4366,
      "step": 14812
    },
    {
      "epoch": 1.8162089259440903,
      "grad_norm": 1.8227982891552823,
      "learning_rate": 1.1455888222585115e-07,
      "loss": 0.4613,
      "step": 14813
    },
    {
      "epoch": 1.816331535066209,
      "grad_norm": 1.9228471273883951,
      "learning_rate": 1.1440726234864031e-07,
      "loss": 0.3872,
      "step": 14814
    },
    {
      "epoch": 1.8164541441883277,
      "grad_norm": 1.9441868014345673,
      "learning_rate": 1.1425574052351396e-07,
      "loss": 0.4367,
      "step": 14815
    },
    {
      "epoch": 1.8165767533104464,
      "grad_norm": 1.9656223646181454,
      "learning_rate": 1.1410431675670076e-07,
      "loss": 0.4248,
      "step": 14816
    },
    {
      "epoch": 1.8166993624325651,
      "grad_norm": 1.8222170968070146,
      "learning_rate": 1.1395299105442348e-07,
      "loss": 0.4397,
      "step": 14817
    },
    {
      "epoch": 1.8168219715546838,
      "grad_norm": 2.0134547723722687,
      "learning_rate": 1.1380176342290272e-07,
      "loss": 0.4391,
      "step": 14818
    },
    {
      "epoch": 1.8169445806768023,
      "grad_norm": 1.9398642150492622,
      "learning_rate": 1.136506338683538e-07,
      "loss": 0.4008,
      "step": 14819
    },
    {
      "epoch": 1.817067189798921,
      "grad_norm": 2.065331222487374,
      "learning_rate": 1.1349960239698893e-07,
      "loss": 0.4053,
      "step": 14820
    },
    {
      "epoch": 1.8171897989210397,
      "grad_norm": 1.7244175363617862,
      "learning_rate": 1.1334866901501512e-07,
      "loss": 0.4103,
      "step": 14821
    },
    {
      "epoch": 1.8173124080431584,
      "grad_norm": 1.8143205965380378,
      "learning_rate": 1.1319783372863601e-07,
      "loss": 0.4256,
      "step": 14822
    },
    {
      "epoch": 1.8174350171652771,
      "grad_norm": 1.8813622456567967,
      "learning_rate": 1.1304709654405138e-07,
      "loss": 0.385,
      "step": 14823
    },
    {
      "epoch": 1.8175576262873958,
      "grad_norm": 1.8344498049212399,
      "learning_rate": 1.1289645746745764e-07,
      "loss": 0.4013,
      "step": 14824
    },
    {
      "epoch": 1.8176802354095145,
      "grad_norm": 2.0282221756499585,
      "learning_rate": 1.1274591650504485e-07,
      "loss": 0.4245,
      "step": 14825
    },
    {
      "epoch": 1.817802844531633,
      "grad_norm": 2.045696719268038,
      "learning_rate": 1.1259547366300111e-07,
      "loss": 0.4264,
      "step": 14826
    },
    {
      "epoch": 1.8179254536537517,
      "grad_norm": 1.9062080087366662,
      "learning_rate": 1.1244512894751036e-07,
      "loss": 0.3933,
      "step": 14827
    },
    {
      "epoch": 1.8180480627758704,
      "grad_norm": 2.0825612440392445,
      "learning_rate": 1.1229488236475156e-07,
      "loss": 0.3933,
      "step": 14828
    },
    {
      "epoch": 1.818170671897989,
      "grad_norm": 1.9319948896771295,
      "learning_rate": 1.1214473392090003e-07,
      "loss": 0.4118,
      "step": 14829
    },
    {
      "epoch": 1.8182932810201078,
      "grad_norm": 1.6842796069257122,
      "learning_rate": 1.1199468362212695e-07,
      "loss": 0.4114,
      "step": 14830
    },
    {
      "epoch": 1.8184158901422265,
      "grad_norm": 1.9156812047894463,
      "learning_rate": 1.1184473147460074e-07,
      "loss": 0.3854,
      "step": 14831
    },
    {
      "epoch": 1.8185384992643452,
      "grad_norm": 1.8326224009506387,
      "learning_rate": 1.1169487748448338e-07,
      "loss": 0.3683,
      "step": 14832
    },
    {
      "epoch": 1.8186611083864639,
      "grad_norm": 1.9768604608552762,
      "learning_rate": 1.1154512165793524e-07,
      "loss": 0.4112,
      "step": 14833
    },
    {
      "epoch": 1.8187837175085826,
      "grad_norm": 2.0097122534173413,
      "learning_rate": 1.1139546400111085e-07,
      "loss": 0.4576,
      "step": 14834
    },
    {
      "epoch": 1.8189063266307013,
      "grad_norm": 2.1103761210208263,
      "learning_rate": 1.1124590452016137e-07,
      "loss": 0.4355,
      "step": 14835
    },
    {
      "epoch": 1.81902893575282,
      "grad_norm": 1.8757790083288783,
      "learning_rate": 1.1109644322123414e-07,
      "loss": 0.4523,
      "step": 14836
    },
    {
      "epoch": 1.8191515448749387,
      "grad_norm": 2.0682139342926376,
      "learning_rate": 1.1094708011047256e-07,
      "loss": 0.4311,
      "step": 14837
    },
    {
      "epoch": 1.8192741539970574,
      "grad_norm": 1.8684538883885322,
      "learning_rate": 1.1079781519401561e-07,
      "loss": 0.4189,
      "step": 14838
    },
    {
      "epoch": 1.819396763119176,
      "grad_norm": 1.9829693035283893,
      "learning_rate": 1.1064864847799784e-07,
      "loss": 0.4415,
      "step": 14839
    },
    {
      "epoch": 1.8195193722412948,
      "grad_norm": 1.9616099655531174,
      "learning_rate": 1.1049957996855098e-07,
      "loss": 0.415,
      "step": 14840
    },
    {
      "epoch": 1.8196419813634135,
      "grad_norm": 1.7458235045822335,
      "learning_rate": 1.1035060967180155e-07,
      "loss": 0.3777,
      "step": 14841
    },
    {
      "epoch": 1.8197645904855322,
      "grad_norm": 2.0914107280374403,
      "learning_rate": 1.1020173759387242e-07,
      "loss": 0.4621,
      "step": 14842
    },
    {
      "epoch": 1.8198871996076509,
      "grad_norm": 1.9124563130345453,
      "learning_rate": 1.1005296374088259e-07,
      "loss": 0.4373,
      "step": 14843
    },
    {
      "epoch": 1.8200098087297696,
      "grad_norm": 2.090637346011749,
      "learning_rate": 1.0990428811894716e-07,
      "loss": 0.3939,
      "step": 14844
    },
    {
      "epoch": 1.8201324178518883,
      "grad_norm": 1.825448730008161,
      "learning_rate": 1.0975571073417707e-07,
      "loss": 0.3878,
      "step": 14845
    },
    {
      "epoch": 1.820255026974007,
      "grad_norm": 1.960965810611557,
      "learning_rate": 1.0960723159267855e-07,
      "loss": 0.4249,
      "step": 14846
    },
    {
      "epoch": 1.8203776360961257,
      "grad_norm": 2.1469021841126246,
      "learning_rate": 1.0945885070055478e-07,
      "loss": 0.4222,
      "step": 14847
    },
    {
      "epoch": 1.8205002452182444,
      "grad_norm": 1.8879310940246794,
      "learning_rate": 1.0931056806390422e-07,
      "loss": 0.4211,
      "step": 14848
    },
    {
      "epoch": 1.820622854340363,
      "grad_norm": 2.0154795805246186,
      "learning_rate": 1.0916238368882198e-07,
      "loss": 0.4142,
      "step": 14849
    },
    {
      "epoch": 1.8207454634624816,
      "grad_norm": 2.220525577147792,
      "learning_rate": 1.090142975813982e-07,
      "loss": 0.4109,
      "step": 14850
    },
    {
      "epoch": 1.8208680725846003,
      "grad_norm": 2.08943754274193,
      "learning_rate": 1.0886630974771995e-07,
      "loss": 0.4391,
      "step": 14851
    },
    {
      "epoch": 1.820990681706719,
      "grad_norm": 2.004207037613819,
      "learning_rate": 1.0871842019386903e-07,
      "loss": 0.4135,
      "step": 14852
    },
    {
      "epoch": 1.8211132908288377,
      "grad_norm": 1.9543359705295502,
      "learning_rate": 1.0857062892592501e-07,
      "loss": 0.3985,
      "step": 14853
    },
    {
      "epoch": 1.8212358999509564,
      "grad_norm": 1.7658028462253954,
      "learning_rate": 1.0842293594996167e-07,
      "loss": 0.3883,
      "step": 14854
    },
    {
      "epoch": 1.821358509073075,
      "grad_norm": 1.7806037389345195,
      "learning_rate": 1.0827534127204942e-07,
      "loss": 0.3932,
      "step": 14855
    },
    {
      "epoch": 1.8214811181951938,
      "grad_norm": 1.7292638519005077,
      "learning_rate": 1.0812784489825506e-07,
      "loss": 0.3804,
      "step": 14856
    },
    {
      "epoch": 1.8216037273173125,
      "grad_norm": 1.9954783902485353,
      "learning_rate": 1.079804468346407e-07,
      "loss": 0.437,
      "step": 14857
    },
    {
      "epoch": 1.821726336439431,
      "grad_norm": 2.2827000973335245,
      "learning_rate": 1.0783314708726483e-07,
      "loss": 0.4482,
      "step": 14858
    },
    {
      "epoch": 1.8218489455615496,
      "grad_norm": 1.9758839166298576,
      "learning_rate": 1.076859456621815e-07,
      "loss": 0.3931,
      "step": 14859
    },
    {
      "epoch": 1.8219715546836683,
      "grad_norm": 1.9551796063428295,
      "learning_rate": 1.0753884256544111e-07,
      "loss": 0.3732,
      "step": 14860
    },
    {
      "epoch": 1.822094163805787,
      "grad_norm": 2.025519443474946,
      "learning_rate": 1.073918378030897e-07,
      "loss": 0.3977,
      "step": 14861
    },
    {
      "epoch": 1.8222167729279057,
      "grad_norm": 2.0338360322004254,
      "learning_rate": 1.0724493138116965e-07,
      "loss": 0.4104,
      "step": 14862
    },
    {
      "epoch": 1.8223393820500244,
      "grad_norm": 2.035813083267078,
      "learning_rate": 1.0709812330571862e-07,
      "loss": 0.3953,
      "step": 14863
    },
    {
      "epoch": 1.8224619911721431,
      "grad_norm": 2.2496379497612966,
      "learning_rate": 1.069514135827715e-07,
      "loss": 0.422,
      "step": 14864
    },
    {
      "epoch": 1.8225846002942618,
      "grad_norm": 1.874651324697116,
      "learning_rate": 1.0680480221835765e-07,
      "loss": 0.4285,
      "step": 14865
    },
    {
      "epoch": 1.8227072094163805,
      "grad_norm": 2.04041121374576,
      "learning_rate": 1.0665828921850335e-07,
      "loss": 0.4243,
      "step": 14866
    },
    {
      "epoch": 1.8228298185384992,
      "grad_norm": 2.119018053445437,
      "learning_rate": 1.0651187458923046e-07,
      "loss": 0.3833,
      "step": 14867
    },
    {
      "epoch": 1.822952427660618,
      "grad_norm": 2.0962040652747307,
      "learning_rate": 1.0636555833655638e-07,
      "loss": 0.4024,
      "step": 14868
    },
    {
      "epoch": 1.8230750367827366,
      "grad_norm": 2.04091702105372,
      "learning_rate": 1.0621934046649574e-07,
      "loss": 0.4321,
      "step": 14869
    },
    {
      "epoch": 1.8231976459048553,
      "grad_norm": 1.8267047385340478,
      "learning_rate": 1.0607322098505817e-07,
      "loss": 0.4304,
      "step": 14870
    },
    {
      "epoch": 1.823320255026974,
      "grad_norm": 2.051012804352658,
      "learning_rate": 1.0592719989824946e-07,
      "loss": 0.4332,
      "step": 14871
    },
    {
      "epoch": 1.8234428641490927,
      "grad_norm": 2.128511632704548,
      "learning_rate": 1.0578127721207088e-07,
      "loss": 0.4193,
      "step": 14872
    },
    {
      "epoch": 1.8235654732712114,
      "grad_norm": 2.0042842756653894,
      "learning_rate": 1.0563545293252043e-07,
      "loss": 0.4398,
      "step": 14873
    },
    {
      "epoch": 1.8236880823933301,
      "grad_norm": 1.8050070621516812,
      "learning_rate": 1.0548972706559246e-07,
      "loss": 0.3727,
      "step": 14874
    },
    {
      "epoch": 1.8238106915154488,
      "grad_norm": 1.9702233458201284,
      "learning_rate": 1.0534409961727527e-07,
      "loss": 0.4367,
      "step": 14875
    },
    {
      "epoch": 1.8239333006375675,
      "grad_norm": 1.929748329644873,
      "learning_rate": 1.0519857059355487e-07,
      "loss": 0.4563,
      "step": 14876
    },
    {
      "epoch": 1.8240559097596862,
      "grad_norm": 2.1740277809612616,
      "learning_rate": 1.0505314000041317e-07,
      "loss": 0.4252,
      "step": 14877
    },
    {
      "epoch": 1.824178518881805,
      "grad_norm": 1.850407489651386,
      "learning_rate": 1.0490780784382704e-07,
      "loss": 0.4368,
      "step": 14878
    },
    {
      "epoch": 1.8243011280039236,
      "grad_norm": 2.1194594806421008,
      "learning_rate": 1.0476257412977032e-07,
      "loss": 0.4074,
      "step": 14879
    },
    {
      "epoch": 1.8244237371260423,
      "grad_norm": 1.9342017715171669,
      "learning_rate": 1.0461743886421238e-07,
      "loss": 0.3862,
      "step": 14880
    },
    {
      "epoch": 1.8245463462481608,
      "grad_norm": 1.954934474156522,
      "learning_rate": 1.044724020531182e-07,
      "loss": 0.4345,
      "step": 14881
    },
    {
      "epoch": 1.8246689553702795,
      "grad_norm": 2.003096834008612,
      "learning_rate": 1.0432746370244911e-07,
      "loss": 0.4737,
      "step": 14882
    },
    {
      "epoch": 1.8247915644923982,
      "grad_norm": 2.0321233322778096,
      "learning_rate": 1.0418262381816286e-07,
      "loss": 0.446,
      "step": 14883
    },
    {
      "epoch": 1.824914173614517,
      "grad_norm": 1.8486833312241728,
      "learning_rate": 1.0403788240621215e-07,
      "loss": 0.4071,
      "step": 14884
    },
    {
      "epoch": 1.8250367827366356,
      "grad_norm": 1.7593895856864632,
      "learning_rate": 1.0389323947254615e-07,
      "loss": 0.4045,
      "step": 14885
    },
    {
      "epoch": 1.8251593918587543,
      "grad_norm": 1.971568163227869,
      "learning_rate": 1.037486950231098e-07,
      "loss": 0.409,
      "step": 14886
    },
    {
      "epoch": 1.825282000980873,
      "grad_norm": 1.8592206690459614,
      "learning_rate": 1.0360424906384475e-07,
      "loss": 0.4113,
      "step": 14887
    },
    {
      "epoch": 1.8254046101029917,
      "grad_norm": 1.8396783990778312,
      "learning_rate": 1.0345990160068709e-07,
      "loss": 0.4303,
      "step": 14888
    },
    {
      "epoch": 1.8255272192251102,
      "grad_norm": 2.083144282399697,
      "learning_rate": 1.0331565263957011e-07,
      "loss": 0.4504,
      "step": 14889
    },
    {
      "epoch": 1.8256498283472289,
      "grad_norm": 2.028883069339949,
      "learning_rate": 1.0317150218642324e-07,
      "loss": 0.4177,
      "step": 14890
    },
    {
      "epoch": 1.8257724374693476,
      "grad_norm": 1.9642488526605995,
      "learning_rate": 1.0302745024717093e-07,
      "loss": 0.4241,
      "step": 14891
    },
    {
      "epoch": 1.8258950465914663,
      "grad_norm": 2.0120691021240202,
      "learning_rate": 1.0288349682773342e-07,
      "loss": 0.4493,
      "step": 14892
    },
    {
      "epoch": 1.826017655713585,
      "grad_norm": 1.9230054848227018,
      "learning_rate": 1.027396419340282e-07,
      "loss": 0.4198,
      "step": 14893
    },
    {
      "epoch": 1.8261402648357037,
      "grad_norm": 1.871175357627855,
      "learning_rate": 1.0259588557196775e-07,
      "loss": 0.4101,
      "step": 14894
    },
    {
      "epoch": 1.8262628739578224,
      "grad_norm": 2.0465262781588596,
      "learning_rate": 1.02452227747461e-07,
      "loss": 0.4358,
      "step": 14895
    },
    {
      "epoch": 1.826385483079941,
      "grad_norm": 1.8114534903822956,
      "learning_rate": 1.0230866846641153e-07,
      "loss": 0.3835,
      "step": 14896
    },
    {
      "epoch": 1.8265080922020598,
      "grad_norm": 1.9214633354051966,
      "learning_rate": 1.0216520773472127e-07,
      "loss": 0.4379,
      "step": 14897
    },
    {
      "epoch": 1.8266307013241785,
      "grad_norm": 2.148971835522387,
      "learning_rate": 1.0202184555828526e-07,
      "loss": 0.4371,
      "step": 14898
    },
    {
      "epoch": 1.8267533104462972,
      "grad_norm": 2.037767228616144,
      "learning_rate": 1.0187858194299738e-07,
      "loss": 0.4506,
      "step": 14899
    },
    {
      "epoch": 1.8268759195684159,
      "grad_norm": 1.901346188305205,
      "learning_rate": 1.0173541689474514e-07,
      "loss": 0.4161,
      "step": 14900
    },
    {
      "epoch": 1.8269985286905346,
      "grad_norm": 1.9105653015331923,
      "learning_rate": 1.0159235041941273e-07,
      "loss": 0.4713,
      "step": 14901
    },
    {
      "epoch": 1.8271211378126533,
      "grad_norm": 2.0280848323862366,
      "learning_rate": 1.0144938252288072e-07,
      "loss": 0.4431,
      "step": 14902
    },
    {
      "epoch": 1.827243746934772,
      "grad_norm": 1.9315310354199524,
      "learning_rate": 1.0130651321102581e-07,
      "loss": 0.4249,
      "step": 14903
    },
    {
      "epoch": 1.8273663560568907,
      "grad_norm": 1.9350666801543521,
      "learning_rate": 1.0116374248971966e-07,
      "loss": 0.4378,
      "step": 14904
    },
    {
      "epoch": 1.8274889651790094,
      "grad_norm": 1.8644282962376746,
      "learning_rate": 1.0102107036483038e-07,
      "loss": 0.4243,
      "step": 14905
    },
    {
      "epoch": 1.827611574301128,
      "grad_norm": 1.958818631010041,
      "learning_rate": 1.0087849684222189e-07,
      "loss": 0.4262,
      "step": 14906
    },
    {
      "epoch": 1.8277341834232468,
      "grad_norm": 1.9795252464484518,
      "learning_rate": 1.0073602192775506e-07,
      "loss": 0.4316,
      "step": 14907
    },
    {
      "epoch": 1.8278567925453655,
      "grad_norm": 1.9954744252848478,
      "learning_rate": 1.0059364562728518e-07,
      "loss": 0.431,
      "step": 14908
    },
    {
      "epoch": 1.8279794016674842,
      "grad_norm": 1.9537523641664467,
      "learning_rate": 1.0045136794666399e-07,
      "loss": 0.4138,
      "step": 14909
    },
    {
      "epoch": 1.8281020107896029,
      "grad_norm": 2.0229122278518963,
      "learning_rate": 1.0030918889173984e-07,
      "loss": 0.4563,
      "step": 14910
    },
    {
      "epoch": 1.8282246199117216,
      "grad_norm": 1.9369326675273655,
      "learning_rate": 1.0016710846835614e-07,
      "loss": 0.434,
      "step": 14911
    },
    {
      "epoch": 1.8283472290338403,
      "grad_norm": 1.9938413059264302,
      "learning_rate": 1.000251266823532e-07,
      "loss": 0.431,
      "step": 14912
    },
    {
      "epoch": 1.8284698381559588,
      "grad_norm": 1.8999558881657177,
      "learning_rate": 9.988324353956608e-08,
      "loss": 0.434,
      "step": 14913
    },
    {
      "epoch": 1.8285924472780775,
      "grad_norm": 1.9881957035784856,
      "learning_rate": 9.974145904582705e-08,
      "loss": 0.4173,
      "step": 14914
    },
    {
      "epoch": 1.8287150564001962,
      "grad_norm": 1.8357725968284613,
      "learning_rate": 9.959977320696284e-08,
      "loss": 0.3873,
      "step": 14915
    },
    {
      "epoch": 1.8288376655223149,
      "grad_norm": 1.982024142702977,
      "learning_rate": 9.945818602879797e-08,
      "loss": 0.4324,
      "step": 14916
    },
    {
      "epoch": 1.8289602746444336,
      "grad_norm": 2.0040294680888655,
      "learning_rate": 9.931669751715167e-08,
      "loss": 0.4515,
      "step": 14917
    },
    {
      "epoch": 1.8290828837665523,
      "grad_norm": 2.060680788325295,
      "learning_rate": 9.917530767783873e-08,
      "loss": 0.4097,
      "step": 14918
    },
    {
      "epoch": 1.829205492888671,
      "grad_norm": 2.0307209619026727,
      "learning_rate": 9.903401651667088e-08,
      "loss": 0.4499,
      "step": 14919
    },
    {
      "epoch": 1.8293281020107894,
      "grad_norm": 1.9228645729099163,
      "learning_rate": 9.889282403945627e-08,
      "loss": 0.4525,
      "step": 14920
    },
    {
      "epoch": 1.8294507111329081,
      "grad_norm": 1.8258066882433435,
      "learning_rate": 9.875173025199664e-08,
      "loss": 0.4229,
      "step": 14921
    },
    {
      "epoch": 1.8295733202550268,
      "grad_norm": 1.9221960041323978,
      "learning_rate": 9.861073516009206e-08,
      "loss": 0.4328,
      "step": 14922
    },
    {
      "epoch": 1.8296959293771455,
      "grad_norm": 2.0340958421741284,
      "learning_rate": 9.846983876953792e-08,
      "loss": 0.3998,
      "step": 14923
    },
    {
      "epoch": 1.8298185384992642,
      "grad_norm": 2.027001529030324,
      "learning_rate": 9.832904108612484e-08,
      "loss": 0.4527,
      "step": 14924
    },
    {
      "epoch": 1.829941147621383,
      "grad_norm": 1.9712336137026867,
      "learning_rate": 9.818834211563988e-08,
      "loss": 0.4354,
      "step": 14925
    },
    {
      "epoch": 1.8300637567435016,
      "grad_norm": 2.1676465218572876,
      "learning_rate": 9.804774186386617e-08,
      "loss": 0.4471,
      "step": 14926
    },
    {
      "epoch": 1.8301863658656203,
      "grad_norm": 2.074339648059079,
      "learning_rate": 9.790724033658272e-08,
      "loss": 0.4305,
      "step": 14927
    },
    {
      "epoch": 1.830308974987739,
      "grad_norm": 1.7917973466934662,
      "learning_rate": 9.776683753956406e-08,
      "loss": 0.4027,
      "step": 14928
    },
    {
      "epoch": 1.8304315841098577,
      "grad_norm": 2.018384786698347,
      "learning_rate": 9.762653347858142e-08,
      "loss": 0.4166,
      "step": 14929
    },
    {
      "epoch": 1.8305541932319764,
      "grad_norm": 2.0799837533021996,
      "learning_rate": 9.748632815940129e-08,
      "loss": 0.3997,
      "step": 14930
    },
    {
      "epoch": 1.8306768023540951,
      "grad_norm": 1.9321634657431084,
      "learning_rate": 9.734622158778628e-08,
      "loss": 0.4469,
      "step": 14931
    },
    {
      "epoch": 1.8307994114762138,
      "grad_norm": 1.997433223438738,
      "learning_rate": 9.720621376949485e-08,
      "loss": 0.4441,
      "step": 14932
    },
    {
      "epoch": 1.8309220205983325,
      "grad_norm": 2.2232522815712077,
      "learning_rate": 9.706630471028238e-08,
      "loss": 0.4279,
      "step": 14933
    },
    {
      "epoch": 1.8310446297204512,
      "grad_norm": 1.990943848861094,
      "learning_rate": 9.692649441589874e-08,
      "loss": 0.4102,
      "step": 14934
    },
    {
      "epoch": 1.83116723884257,
      "grad_norm": 2.1183871698752315,
      "learning_rate": 9.678678289209042e-08,
      "loss": 0.4561,
      "step": 14935
    },
    {
      "epoch": 1.8312898479646886,
      "grad_norm": 2.0348324483797198,
      "learning_rate": 9.664717014459979e-08,
      "loss": 0.4028,
      "step": 14936
    },
    {
      "epoch": 1.8314124570868073,
      "grad_norm": 1.8942721559458613,
      "learning_rate": 9.650765617916558e-08,
      "loss": 0.4312,
      "step": 14937
    },
    {
      "epoch": 1.831535066208926,
      "grad_norm": 1.7302377435767389,
      "learning_rate": 9.636824100152125e-08,
      "loss": 0.3876,
      "step": 14938
    },
    {
      "epoch": 1.8316576753310447,
      "grad_norm": 2.1528982343123455,
      "learning_rate": 9.622892461739752e-08,
      "loss": 0.4549,
      "step": 14939
    },
    {
      "epoch": 1.8317802844531634,
      "grad_norm": 1.911656266844741,
      "learning_rate": 9.608970703252091e-08,
      "loss": 0.3985,
      "step": 14940
    },
    {
      "epoch": 1.8319028935752821,
      "grad_norm": 2.1005169365872876,
      "learning_rate": 9.595058825261321e-08,
      "loss": 0.4507,
      "step": 14941
    },
    {
      "epoch": 1.8320255026974008,
      "grad_norm": 1.8317617916593925,
      "learning_rate": 9.581156828339211e-08,
      "loss": 0.4267,
      "step": 14942
    },
    {
      "epoch": 1.8321481118195195,
      "grad_norm": 1.9235890928394974,
      "learning_rate": 9.567264713057189e-08,
      "loss": 0.4347,
      "step": 14943
    },
    {
      "epoch": 1.832270720941638,
      "grad_norm": 1.8623514774167111,
      "learning_rate": 9.553382479986217e-08,
      "loss": 0.4162,
      "step": 14944
    },
    {
      "epoch": 1.8323933300637567,
      "grad_norm": 1.9258838935674782,
      "learning_rate": 9.53951012969695e-08,
      "loss": 0.4502,
      "step": 14945
    },
    {
      "epoch": 1.8325159391858754,
      "grad_norm": 1.926063159959677,
      "learning_rate": 9.525647662759458e-08,
      "loss": 0.3937,
      "step": 14946
    },
    {
      "epoch": 1.832638548307994,
      "grad_norm": 1.9628985734574513,
      "learning_rate": 9.511795079743619e-08,
      "loss": 0.4457,
      "step": 14947
    },
    {
      "epoch": 1.8327611574301128,
      "grad_norm": 1.8856001908062363,
      "learning_rate": 9.497952381218728e-08,
      "loss": 0.4141,
      "step": 14948
    },
    {
      "epoch": 1.8328837665522315,
      "grad_norm": 1.9932372885373641,
      "learning_rate": 9.484119567753775e-08,
      "loss": 0.4293,
      "step": 14949
    },
    {
      "epoch": 1.8330063756743502,
      "grad_norm": 2.213758670598817,
      "learning_rate": 9.470296639917359e-08,
      "loss": 0.4114,
      "step": 14950
    },
    {
      "epoch": 1.833128984796469,
      "grad_norm": 1.8213744921944648,
      "learning_rate": 9.456483598277499e-08,
      "loss": 0.3868,
      "step": 14951
    },
    {
      "epoch": 1.8332515939185874,
      "grad_norm": 1.83359117161204,
      "learning_rate": 9.442680443402047e-08,
      "loss": 0.4039,
      "step": 14952
    },
    {
      "epoch": 1.833374203040706,
      "grad_norm": 1.6992140304264394,
      "learning_rate": 9.428887175858297e-08,
      "loss": 0.4118,
      "step": 14953
    },
    {
      "epoch": 1.8334968121628248,
      "grad_norm": 1.9483428261431406,
      "learning_rate": 9.415103796213215e-08,
      "loss": 0.4497,
      "step": 14954
    },
    {
      "epoch": 1.8336194212849435,
      "grad_norm": 2.0462947245970216,
      "learning_rate": 9.401330305033235e-08,
      "loss": 0.4004,
      "step": 14955
    },
    {
      "epoch": 1.8337420304070622,
      "grad_norm": 2.0888395374594766,
      "learning_rate": 9.38756670288457e-08,
      "loss": 0.405,
      "step": 14956
    },
    {
      "epoch": 1.8338646395291809,
      "grad_norm": 2.0550569912729792,
      "learning_rate": 9.373812990332854e-08,
      "loss": 0.4145,
      "step": 14957
    },
    {
      "epoch": 1.8339872486512996,
      "grad_norm": 1.845075789823678,
      "learning_rate": 9.360069167943436e-08,
      "loss": 0.4104,
      "step": 14958
    },
    {
      "epoch": 1.8341098577734183,
      "grad_norm": 2.3615230820506454,
      "learning_rate": 9.346335236281173e-08,
      "loss": 0.431,
      "step": 14959
    },
    {
      "epoch": 1.834232466895537,
      "grad_norm": 1.996868002652253,
      "learning_rate": 9.332611195910585e-08,
      "loss": 0.4345,
      "step": 14960
    },
    {
      "epoch": 1.8343550760176557,
      "grad_norm": 2.0383783198173386,
      "learning_rate": 9.318897047395747e-08,
      "loss": 0.4246,
      "step": 14961
    },
    {
      "epoch": 1.8344776851397744,
      "grad_norm": 1.9067500015875591,
      "learning_rate": 9.305192791300349e-08,
      "loss": 0.443,
      "step": 14962
    },
    {
      "epoch": 1.834600294261893,
      "grad_norm": 2.226059081610543,
      "learning_rate": 9.291498428187634e-08,
      "loss": 0.433,
      "step": 14963
    },
    {
      "epoch": 1.8347229033840118,
      "grad_norm": 2.015321134253491,
      "learning_rate": 9.277813958620457e-08,
      "loss": 0.4336,
      "step": 14964
    },
    {
      "epoch": 1.8348455125061305,
      "grad_norm": 1.9088185997381943,
      "learning_rate": 9.264139383161286e-08,
      "loss": 0.3993,
      "step": 14965
    },
    {
      "epoch": 1.8349681216282492,
      "grad_norm": 1.8060061042369147,
      "learning_rate": 9.250474702372226e-08,
      "loss": 0.3664,
      "step": 14966
    },
    {
      "epoch": 1.8350907307503679,
      "grad_norm": 1.9673227400704838,
      "learning_rate": 9.236819916814854e-08,
      "loss": 0.4219,
      "step": 14967
    },
    {
      "epoch": 1.8352133398724866,
      "grad_norm": 1.8946947454792642,
      "learning_rate": 9.223175027050391e-08,
      "loss": 0.4071,
      "step": 14968
    },
    {
      "epoch": 1.8353359489946053,
      "grad_norm": 2.044361253121563,
      "learning_rate": 9.209540033639691e-08,
      "loss": 0.454,
      "step": 14969
    },
    {
      "epoch": 1.835458558116724,
      "grad_norm": 2.0598862452656674,
      "learning_rate": 9.195914937143224e-08,
      "loss": 0.4129,
      "step": 14970
    },
    {
      "epoch": 1.8355811672388427,
      "grad_norm": 1.8370001882278861,
      "learning_rate": 9.182299738120931e-08,
      "loss": 0.4436,
      "step": 14971
    },
    {
      "epoch": 1.8357037763609614,
      "grad_norm": 2.1516478827809857,
      "learning_rate": 9.168694437132447e-08,
      "loss": 0.4566,
      "step": 14972
    },
    {
      "epoch": 1.83582638548308,
      "grad_norm": 1.9042540679988338,
      "learning_rate": 9.15509903473702e-08,
      "loss": 0.3785,
      "step": 14973
    },
    {
      "epoch": 1.8359489946051988,
      "grad_norm": 2.0116818236261444,
      "learning_rate": 9.141513531493368e-08,
      "loss": 0.4044,
      "step": 14974
    },
    {
      "epoch": 1.8360716037273173,
      "grad_norm": 2.0123777812848553,
      "learning_rate": 9.127937927959935e-08,
      "loss": 0.4427,
      "step": 14975
    },
    {
      "epoch": 1.836194212849436,
      "grad_norm": 2.041352586851608,
      "learning_rate": 9.114372224694689e-08,
      "loss": 0.4383,
      "step": 14976
    },
    {
      "epoch": 1.8363168219715547,
      "grad_norm": 2.308475904066034,
      "learning_rate": 9.10081642225516e-08,
      "loss": 0.4078,
      "step": 14977
    },
    {
      "epoch": 1.8364394310936734,
      "grad_norm": 2.094613418830702,
      "learning_rate": 9.087270521198566e-08,
      "loss": 0.4351,
      "step": 14978
    },
    {
      "epoch": 1.836562040215792,
      "grad_norm": 2.003414299661676,
      "learning_rate": 9.073734522081684e-08,
      "loss": 0.4169,
      "step": 14979
    },
    {
      "epoch": 1.8366846493379108,
      "grad_norm": 1.8946618363412995,
      "learning_rate": 9.060208425460848e-08,
      "loss": 0.4283,
      "step": 14980
    },
    {
      "epoch": 1.8368072584600295,
      "grad_norm": 1.9274994655321138,
      "learning_rate": 9.046692231891973e-08,
      "loss": 0.4257,
      "step": 14981
    },
    {
      "epoch": 1.8369298675821482,
      "grad_norm": 2.075802926292608,
      "learning_rate": 9.03318594193059e-08,
      "loss": 0.3879,
      "step": 14982
    },
    {
      "epoch": 1.8370524767042666,
      "grad_norm": 1.7518381098529916,
      "learning_rate": 9.019689556131944e-08,
      "loss": 0.4121,
      "step": 14983
    },
    {
      "epoch": 1.8371750858263853,
      "grad_norm": 1.8128771106277275,
      "learning_rate": 9.006203075050623e-08,
      "loss": 0.3803,
      "step": 14984
    },
    {
      "epoch": 1.837297694948504,
      "grad_norm": 1.8958803923936622,
      "learning_rate": 8.992726499240989e-08,
      "loss": 0.3984,
      "step": 14985
    },
    {
      "epoch": 1.8374203040706227,
      "grad_norm": 1.8881630309645647,
      "learning_rate": 8.979259829257014e-08,
      "loss": 0.4554,
      "step": 14986
    },
    {
      "epoch": 1.8375429131927414,
      "grad_norm": 1.9801073628410548,
      "learning_rate": 8.965803065652146e-08,
      "loss": 0.4157,
      "step": 14987
    },
    {
      "epoch": 1.8376655223148601,
      "grad_norm": 2.1205084157156096,
      "learning_rate": 8.95235620897944e-08,
      "loss": 0.4045,
      "step": 14988
    },
    {
      "epoch": 1.8377881314369788,
      "grad_norm": 1.8277038152279617,
      "learning_rate": 8.938919259791651e-08,
      "loss": 0.4058,
      "step": 14989
    },
    {
      "epoch": 1.8379107405590975,
      "grad_norm": 1.894591561457332,
      "learning_rate": 8.925492218641085e-08,
      "loss": 0.4397,
      "step": 14990
    },
    {
      "epoch": 1.8380333496812162,
      "grad_norm": 1.9590499067368699,
      "learning_rate": 8.912075086079553e-08,
      "loss": 0.4344,
      "step": 14991
    },
    {
      "epoch": 1.838155958803335,
      "grad_norm": 1.9126230444928751,
      "learning_rate": 8.898667862658528e-08,
      "loss": 0.4754,
      "step": 14992
    },
    {
      "epoch": 1.8382785679254536,
      "grad_norm": 1.909734678597626,
      "learning_rate": 8.885270548929125e-08,
      "loss": 0.447,
      "step": 14993
    },
    {
      "epoch": 1.8384011770475723,
      "grad_norm": 1.8441234066502157,
      "learning_rate": 8.871883145441934e-08,
      "loss": 0.4543,
      "step": 14994
    },
    {
      "epoch": 1.838523786169691,
      "grad_norm": 2.1227101509732047,
      "learning_rate": 8.858505652747263e-08,
      "loss": 0.4301,
      "step": 14995
    },
    {
      "epoch": 1.8386463952918097,
      "grad_norm": 2.075656275449968,
      "learning_rate": 8.845138071394893e-08,
      "loss": 0.4451,
      "step": 14996
    },
    {
      "epoch": 1.8387690044139284,
      "grad_norm": 1.9850318705711993,
      "learning_rate": 8.831780401934275e-08,
      "loss": 0.4503,
      "step": 14997
    },
    {
      "epoch": 1.8388916135360471,
      "grad_norm": 1.8231192261755003,
      "learning_rate": 8.818432644914443e-08,
      "loss": 0.3862,
      "step": 14998
    },
    {
      "epoch": 1.8390142226581658,
      "grad_norm": 1.92765088162752,
      "learning_rate": 8.805094800884012e-08,
      "loss": 0.4117,
      "step": 14999
    },
    {
      "epoch": 1.8391368317802845,
      "grad_norm": 1.8210866812465012,
      "learning_rate": 8.791766870391211e-08,
      "loss": 0.3919,
      "step": 15000
    },
    {
      "epoch": 1.8392594409024032,
      "grad_norm": 1.9322718158412864,
      "learning_rate": 8.778448853983768e-08,
      "loss": 0.4131,
      "step": 15001
    },
    {
      "epoch": 1.839382050024522,
      "grad_norm": 1.8897580437771508,
      "learning_rate": 8.765140752209134e-08,
      "loss": 0.4473,
      "step": 15002
    },
    {
      "epoch": 1.8395046591466406,
      "grad_norm": 2.001677596647142,
      "learning_rate": 8.751842565614316e-08,
      "loss": 0.4437,
      "step": 15003
    },
    {
      "epoch": 1.8396272682687593,
      "grad_norm": 2.0892235800661263,
      "learning_rate": 8.738554294745877e-08,
      "loss": 0.3869,
      "step": 15004
    },
    {
      "epoch": 1.839749877390878,
      "grad_norm": 1.9304641493414392,
      "learning_rate": 8.725275940149935e-08,
      "loss": 0.4016,
      "step": 15005
    },
    {
      "epoch": 1.8398724865129967,
      "grad_norm": 1.8420335564882115,
      "learning_rate": 8.712007502372332e-08,
      "loss": 0.3995,
      "step": 15006
    },
    {
      "epoch": 1.8399950956351152,
      "grad_norm": 1.7713982546447613,
      "learning_rate": 8.698748981958327e-08,
      "loss": 0.4463,
      "step": 15007
    },
    {
      "epoch": 1.840117704757234,
      "grad_norm": 1.8592874264033024,
      "learning_rate": 8.685500379452983e-08,
      "loss": 0.4134,
      "step": 15008
    },
    {
      "epoch": 1.8402403138793526,
      "grad_norm": 1.8867926297367443,
      "learning_rate": 8.67226169540078e-08,
      "loss": 0.4128,
      "step": 15009
    },
    {
      "epoch": 1.8403629230014713,
      "grad_norm": 1.8830840349817342,
      "learning_rate": 8.659032930345867e-08,
      "loss": 0.4428,
      "step": 15010
    },
    {
      "epoch": 1.84048553212359,
      "grad_norm": 2.071663445772499,
      "learning_rate": 8.645814084831922e-08,
      "loss": 0.4596,
      "step": 15011
    },
    {
      "epoch": 1.8406081412457087,
      "grad_norm": 2.0622815536887487,
      "learning_rate": 8.632605159402341e-08,
      "loss": 0.4365,
      "step": 15012
    },
    {
      "epoch": 1.8407307503678274,
      "grad_norm": 1.744403262364278,
      "learning_rate": 8.619406154599996e-08,
      "loss": 0.4023,
      "step": 15013
    },
    {
      "epoch": 1.840853359489946,
      "grad_norm": 1.8543687717901707,
      "learning_rate": 8.606217070967343e-08,
      "loss": 0.3918,
      "step": 15014
    },
    {
      "epoch": 1.8409759686120646,
      "grad_norm": 1.9895090992803566,
      "learning_rate": 8.593037909046558e-08,
      "loss": 0.4637,
      "step": 15015
    },
    {
      "epoch": 1.8410985777341833,
      "grad_norm": 1.8849698103324635,
      "learning_rate": 8.579868669379293e-08,
      "loss": 0.427,
      "step": 15016
    },
    {
      "epoch": 1.841221186856302,
      "grad_norm": 1.9700470712318325,
      "learning_rate": 8.566709352506835e-08,
      "loss": 0.3777,
      "step": 15017
    },
    {
      "epoch": 1.8413437959784207,
      "grad_norm": 1.9456364597898028,
      "learning_rate": 8.553559958970003e-08,
      "loss": 0.4537,
      "step": 15018
    },
    {
      "epoch": 1.8414664051005394,
      "grad_norm": 2.0860851408012513,
      "learning_rate": 8.540420489309365e-08,
      "loss": 0.4192,
      "step": 15019
    },
    {
      "epoch": 1.841589014222658,
      "grad_norm": 1.958456909520959,
      "learning_rate": 8.527290944064876e-08,
      "loss": 0.4306,
      "step": 15020
    },
    {
      "epoch": 1.8417116233447768,
      "grad_norm": 1.8776096225786132,
      "learning_rate": 8.514171323776216e-08,
      "loss": 0.4096,
      "step": 15021
    },
    {
      "epoch": 1.8418342324668955,
      "grad_norm": 1.9257080742026162,
      "learning_rate": 8.501061628982649e-08,
      "loss": 0.4629,
      "step": 15022
    },
    {
      "epoch": 1.8419568415890142,
      "grad_norm": 2.086259810088841,
      "learning_rate": 8.487961860222992e-08,
      "loss": 0.4461,
      "step": 15023
    },
    {
      "epoch": 1.8420794507111329,
      "grad_norm": 2.0650423588492206,
      "learning_rate": 8.474872018035652e-08,
      "loss": 0.3826,
      "step": 15024
    },
    {
      "epoch": 1.8422020598332516,
      "grad_norm": 2.0016747197721836,
      "learning_rate": 8.461792102958666e-08,
      "loss": 0.4333,
      "step": 15025
    },
    {
      "epoch": 1.8423246689553703,
      "grad_norm": 2.0334916649093846,
      "learning_rate": 8.448722115529661e-08,
      "loss": 0.4096,
      "step": 15026
    },
    {
      "epoch": 1.842447278077489,
      "grad_norm": 2.0564741102108135,
      "learning_rate": 8.435662056285765e-08,
      "loss": 0.4188,
      "step": 15027
    },
    {
      "epoch": 1.8425698871996077,
      "grad_norm": 1.7081440901726235,
      "learning_rate": 8.422611925763824e-08,
      "loss": 0.4328,
      "step": 15028
    },
    {
      "epoch": 1.8426924963217264,
      "grad_norm": 1.998914242767102,
      "learning_rate": 8.409571724500216e-08,
      "loss": 0.4222,
      "step": 15029
    },
    {
      "epoch": 1.842815105443845,
      "grad_norm": 1.9874243578936546,
      "learning_rate": 8.396541453030954e-08,
      "loss": 0.4255,
      "step": 15030
    },
    {
      "epoch": 1.8429377145659638,
      "grad_norm": 1.7862850390860654,
      "learning_rate": 8.383521111891502e-08,
      "loss": 0.4208,
      "step": 15031
    },
    {
      "epoch": 1.8430603236880825,
      "grad_norm": 2.044060055027158,
      "learning_rate": 8.370510701617123e-08,
      "loss": 0.4087,
      "step": 15032
    },
    {
      "epoch": 1.8431829328102012,
      "grad_norm": 2.0311331338241954,
      "learning_rate": 8.357510222742532e-08,
      "loss": 0.4489,
      "step": 15033
    },
    {
      "epoch": 1.8433055419323199,
      "grad_norm": 1.9348698944378413,
      "learning_rate": 8.344519675802021e-08,
      "loss": 0.4568,
      "step": 15034
    },
    {
      "epoch": 1.8434281510544386,
      "grad_norm": 1.9480987440145576,
      "learning_rate": 8.33153906132958e-08,
      "loss": 0.4662,
      "step": 15035
    },
    {
      "epoch": 1.8435507601765573,
      "grad_norm": 1.854108470185618,
      "learning_rate": 8.318568379858727e-08,
      "loss": 0.3996,
      "step": 15036
    },
    {
      "epoch": 1.843673369298676,
      "grad_norm": 1.947090876220115,
      "learning_rate": 8.305607631922619e-08,
      "loss": 0.4881,
      "step": 15037
    },
    {
      "epoch": 1.8437959784207945,
      "grad_norm": 1.91001531911506,
      "learning_rate": 8.292656818053857e-08,
      "loss": 0.4557,
      "step": 15038
    },
    {
      "epoch": 1.8439185875429132,
      "grad_norm": 2.13935023887449,
      "learning_rate": 8.279715938784849e-08,
      "loss": 0.4411,
      "step": 15039
    },
    {
      "epoch": 1.8440411966650319,
      "grad_norm": 1.8298352167580043,
      "learning_rate": 8.26678499464742e-08,
      "loss": 0.3852,
      "step": 15040
    },
    {
      "epoch": 1.8441638057871506,
      "grad_norm": 1.842957508527586,
      "learning_rate": 8.253863986173116e-08,
      "loss": 0.4542,
      "step": 15041
    },
    {
      "epoch": 1.8442864149092693,
      "grad_norm": 1.8989021453601715,
      "learning_rate": 8.240952913892958e-08,
      "loss": 0.4198,
      "step": 15042
    },
    {
      "epoch": 1.844409024031388,
      "grad_norm": 1.7732453974948934,
      "learning_rate": 8.228051778337658e-08,
      "loss": 0.3872,
      "step": 15043
    },
    {
      "epoch": 1.8445316331535067,
      "grad_norm": 2.1141226238358137,
      "learning_rate": 8.215160580037434e-08,
      "loss": 0.4213,
      "step": 15044
    },
    {
      "epoch": 1.8446542422756254,
      "grad_norm": 2.073022104468673,
      "learning_rate": 8.202279319522138e-08,
      "loss": 0.4355,
      "step": 15045
    },
    {
      "epoch": 1.8447768513977438,
      "grad_norm": 1.9227018942842167,
      "learning_rate": 8.189407997321292e-08,
      "loss": 0.4297,
      "step": 15046
    },
    {
      "epoch": 1.8448994605198625,
      "grad_norm": 2.0129498359616362,
      "learning_rate": 8.176546613963832e-08,
      "loss": 0.4065,
      "step": 15047
    },
    {
      "epoch": 1.8450220696419812,
      "grad_norm": 1.741372335296403,
      "learning_rate": 8.163695169978392e-08,
      "loss": 0.4062,
      "step": 15048
    },
    {
      "epoch": 1.8451446787641,
      "grad_norm": 1.8312001388426433,
      "learning_rate": 8.150853665893271e-08,
      "loss": 0.4281,
      "step": 15049
    },
    {
      "epoch": 1.8452672878862186,
      "grad_norm": 2.004631399747369,
      "learning_rate": 8.138022102236215e-08,
      "loss": 0.4333,
      "step": 15050
    },
    {
      "epoch": 1.8453898970083373,
      "grad_norm": 1.938292341735587,
      "learning_rate": 8.125200479534606e-08,
      "loss": 0.4182,
      "step": 15051
    },
    {
      "epoch": 1.845512506130456,
      "grad_norm": 2.07669406886528,
      "learning_rate": 8.112388798315496e-08,
      "loss": 0.4188,
      "step": 15052
    },
    {
      "epoch": 1.8456351152525747,
      "grad_norm": 1.9115735831078882,
      "learning_rate": 8.099587059105379e-08,
      "loss": 0.4334,
      "step": 15053
    },
    {
      "epoch": 1.8457577243746934,
      "grad_norm": 1.7969316191339608,
      "learning_rate": 8.086795262430559e-08,
      "loss": 0.4249,
      "step": 15054
    },
    {
      "epoch": 1.8458803334968121,
      "grad_norm": 2.043760943388248,
      "learning_rate": 8.074013408816667e-08,
      "loss": 0.4112,
      "step": 15055
    },
    {
      "epoch": 1.8460029426189308,
      "grad_norm": 2.09473262172498,
      "learning_rate": 8.061241498789147e-08,
      "loss": 0.4619,
      "step": 15056
    },
    {
      "epoch": 1.8461255517410495,
      "grad_norm": 1.9278808849512934,
      "learning_rate": 8.04847953287291e-08,
      "loss": 0.4423,
      "step": 15057
    },
    {
      "epoch": 1.8462481608631682,
      "grad_norm": 1.9859182656002212,
      "learning_rate": 8.035727511592512e-08,
      "loss": 0.4306,
      "step": 15058
    },
    {
      "epoch": 1.846370769985287,
      "grad_norm": 1.9229023567081784,
      "learning_rate": 8.022985435472087e-08,
      "loss": 0.4425,
      "step": 15059
    },
    {
      "epoch": 1.8464933791074056,
      "grad_norm": 1.8620243415552582,
      "learning_rate": 8.010253305035326e-08,
      "loss": 0.3969,
      "step": 15060
    },
    {
      "epoch": 1.8466159882295243,
      "grad_norm": 1.9657681164470144,
      "learning_rate": 7.997531120805563e-08,
      "loss": 0.4393,
      "step": 15061
    },
    {
      "epoch": 1.846738597351643,
      "grad_norm": 1.9623015348657475,
      "learning_rate": 7.984818883305739e-08,
      "loss": 0.4583,
      "step": 15062
    },
    {
      "epoch": 1.8468612064737617,
      "grad_norm": 1.9492469325815807,
      "learning_rate": 7.972116593058299e-08,
      "loss": 0.4456,
      "step": 15063
    },
    {
      "epoch": 1.8469838155958804,
      "grad_norm": 1.7794797883733706,
      "learning_rate": 7.959424250585323e-08,
      "loss": 0.4379,
      "step": 15064
    },
    {
      "epoch": 1.8471064247179991,
      "grad_norm": 1.4786712124943593,
      "learning_rate": 7.946741856408507e-08,
      "loss": 0.3892,
      "step": 15065
    },
    {
      "epoch": 1.8472290338401178,
      "grad_norm": 2.123211634234834,
      "learning_rate": 7.934069411049155e-08,
      "loss": 0.3951,
      "step": 15066
    },
    {
      "epoch": 1.8473516429622365,
      "grad_norm": 1.9506456180175629,
      "learning_rate": 7.921406915028074e-08,
      "loss": 0.4425,
      "step": 15067
    },
    {
      "epoch": 1.8474742520843552,
      "grad_norm": 2.003532687514416,
      "learning_rate": 7.908754368865707e-08,
      "loss": 0.4214,
      "step": 15068
    },
    {
      "epoch": 1.8475968612064737,
      "grad_norm": 1.789113132460293,
      "learning_rate": 7.896111773082166e-08,
      "loss": 0.3886,
      "step": 15069
    },
    {
      "epoch": 1.8477194703285924,
      "grad_norm": 1.9808903471508763,
      "learning_rate": 7.883479128197036e-08,
      "loss": 0.447,
      "step": 15070
    },
    {
      "epoch": 1.847842079450711,
      "grad_norm": 1.9576595318035361,
      "learning_rate": 7.870856434729541e-08,
      "loss": 0.4046,
      "step": 15071
    },
    {
      "epoch": 1.8479646885728298,
      "grad_norm": 2.006545501726884,
      "learning_rate": 7.858243693198486e-08,
      "loss": 0.4025,
      "step": 15072
    },
    {
      "epoch": 1.8480872976949485,
      "grad_norm": 2.0166278235485526,
      "learning_rate": 7.84564090412232e-08,
      "loss": 0.3881,
      "step": 15073
    },
    {
      "epoch": 1.8482099068170672,
      "grad_norm": 1.9653761541888781,
      "learning_rate": 7.833048068018988e-08,
      "loss": 0.4371,
      "step": 15074
    },
    {
      "epoch": 1.848332515939186,
      "grad_norm": 1.958897705639357,
      "learning_rate": 7.820465185406134e-08,
      "loss": 0.4321,
      "step": 15075
    },
    {
      "epoch": 1.8484551250613046,
      "grad_norm": 2.1954104564506753,
      "learning_rate": 7.807892256800898e-08,
      "loss": 0.4105,
      "step": 15076
    },
    {
      "epoch": 1.848577734183423,
      "grad_norm": 2.0662330881843274,
      "learning_rate": 7.795329282720032e-08,
      "loss": 0.4322,
      "step": 15077
    },
    {
      "epoch": 1.8487003433055418,
      "grad_norm": 1.8927172656963882,
      "learning_rate": 7.782776263679931e-08,
      "loss": 0.4021,
      "step": 15078
    },
    {
      "epoch": 1.8488229524276605,
      "grad_norm": 2.009046738201017,
      "learning_rate": 7.770233200196598e-08,
      "loss": 0.4091,
      "step": 15079
    },
    {
      "epoch": 1.8489455615497792,
      "grad_norm": 1.8292619690782232,
      "learning_rate": 7.757700092785454e-08,
      "loss": 0.4453,
      "step": 15080
    },
    {
      "epoch": 1.8490681706718979,
      "grad_norm": 2.0943395733671757,
      "learning_rate": 7.745176941961669e-08,
      "loss": 0.4075,
      "step": 15081
    },
    {
      "epoch": 1.8491907797940166,
      "grad_norm": 2.0262374730292403,
      "learning_rate": 7.732663748240027e-08,
      "loss": 0.4195,
      "step": 15082
    },
    {
      "epoch": 1.8493133889161353,
      "grad_norm": 2.072409686584502,
      "learning_rate": 7.720160512134839e-08,
      "loss": 0.415,
      "step": 15083
    },
    {
      "epoch": 1.849435998038254,
      "grad_norm": 2.068776106452551,
      "learning_rate": 7.707667234159916e-08,
      "loss": 0.428,
      "step": 15084
    },
    {
      "epoch": 1.8495586071603727,
      "grad_norm": 1.931839482801653,
      "learning_rate": 7.695183914828819e-08,
      "loss": 0.4418,
      "step": 15085
    },
    {
      "epoch": 1.8496812162824914,
      "grad_norm": 2.146722200513109,
      "learning_rate": 7.682710554654666e-08,
      "loss": 0.4206,
      "step": 15086
    },
    {
      "epoch": 1.84980382540461,
      "grad_norm": 2.1924827355615695,
      "learning_rate": 7.670247154150073e-08,
      "loss": 0.4642,
      "step": 15087
    },
    {
      "epoch": 1.8499264345267288,
      "grad_norm": 2.0089779431738672,
      "learning_rate": 7.657793713827327e-08,
      "loss": 0.4806,
      "step": 15088
    },
    {
      "epoch": 1.8500490436488475,
      "grad_norm": 2.0230547013574807,
      "learning_rate": 7.645350234198324e-08,
      "loss": 0.422,
      "step": 15089
    },
    {
      "epoch": 1.8501716527709662,
      "grad_norm": 2.119130351511439,
      "learning_rate": 7.632916715774458e-08,
      "loss": 0.4007,
      "step": 15090
    },
    {
      "epoch": 1.8502942618930849,
      "grad_norm": 2.0111381230998586,
      "learning_rate": 7.620493159066822e-08,
      "loss": 0.448,
      "step": 15091
    },
    {
      "epoch": 1.8504168710152036,
      "grad_norm": 2.0122167725856035,
      "learning_rate": 7.608079564585951e-08,
      "loss": 0.4389,
      "step": 15092
    },
    {
      "epoch": 1.8505394801373223,
      "grad_norm": 1.9918451201095546,
      "learning_rate": 7.59567593284219e-08,
      "loss": 0.4414,
      "step": 15093
    },
    {
      "epoch": 1.850662089259441,
      "grad_norm": 1.9568213943680741,
      "learning_rate": 7.583282264345266e-08,
      "loss": 0.3696,
      "step": 15094
    },
    {
      "epoch": 1.8507846983815597,
      "grad_norm": 1.9401577488585473,
      "learning_rate": 7.570898559604633e-08,
      "loss": 0.4049,
      "step": 15095
    },
    {
      "epoch": 1.8509073075036784,
      "grad_norm": 1.9038762366270516,
      "learning_rate": 7.558524819129248e-08,
      "loss": 0.423,
      "step": 15096
    },
    {
      "epoch": 1.851029916625797,
      "grad_norm": 2.0017746261165317,
      "learning_rate": 7.546161043427675e-08,
      "loss": 0.4373,
      "step": 15097
    },
    {
      "epoch": 1.8511525257479158,
      "grad_norm": 1.8105129509600786,
      "learning_rate": 7.533807233008117e-08,
      "loss": 0.4402,
      "step": 15098
    },
    {
      "epoch": 1.8512751348700345,
      "grad_norm": 2.1268987990796444,
      "learning_rate": 7.521463388378364e-08,
      "loss": 0.4101,
      "step": 15099
    },
    {
      "epoch": 1.8513977439921532,
      "grad_norm": 1.9852489555544572,
      "learning_rate": 7.509129510045732e-08,
      "loss": 0.4756,
      "step": 15100
    },
    {
      "epoch": 1.8515203531142717,
      "grad_norm": 2.147710817246233,
      "learning_rate": 7.496805598517149e-08,
      "loss": 0.4383,
      "step": 15101
    },
    {
      "epoch": 1.8516429622363904,
      "grad_norm": 1.8704689314295555,
      "learning_rate": 7.484491654299209e-08,
      "loss": 0.3817,
      "step": 15102
    },
    {
      "epoch": 1.851765571358509,
      "grad_norm": 1.9486452763575575,
      "learning_rate": 7.472187677897951e-08,
      "loss": 0.4477,
      "step": 15103
    },
    {
      "epoch": 1.8518881804806278,
      "grad_norm": 1.7647236543169507,
      "learning_rate": 7.459893669819196e-08,
      "loss": 0.4414,
      "step": 15104
    },
    {
      "epoch": 1.8520107896027465,
      "grad_norm": 2.011182426582372,
      "learning_rate": 7.447609630568147e-08,
      "loss": 0.4021,
      "step": 15105
    },
    {
      "epoch": 1.8521333987248652,
      "grad_norm": 1.8332473610318092,
      "learning_rate": 7.435335560649793e-08,
      "loss": 0.3969,
      "step": 15106
    },
    {
      "epoch": 1.8522560078469839,
      "grad_norm": 1.8754120161447965,
      "learning_rate": 7.423071460568531e-08,
      "loss": 0.3777,
      "step": 15107
    },
    {
      "epoch": 1.8523786169691026,
      "grad_norm": 2.0311278818292138,
      "learning_rate": 7.410817330828518e-08,
      "loss": 0.4346,
      "step": 15108
    },
    {
      "epoch": 1.852501226091221,
      "grad_norm": 2.0978238924485217,
      "learning_rate": 7.398573171933376e-08,
      "loss": 0.4276,
      "step": 15109
    },
    {
      "epoch": 1.8526238352133397,
      "grad_norm": 2.094800751874953,
      "learning_rate": 7.386338984386343e-08,
      "loss": 0.4095,
      "step": 15110
    },
    {
      "epoch": 1.8527464443354584,
      "grad_norm": 1.9657666036052892,
      "learning_rate": 7.374114768690293e-08,
      "loss": 0.5004,
      "step": 15111
    },
    {
      "epoch": 1.8528690534575771,
      "grad_norm": 2.0902458035153137,
      "learning_rate": 7.361900525347687e-08,
      "loss": 0.4159,
      "step": 15112
    },
    {
      "epoch": 1.8529916625796958,
      "grad_norm": 1.8965704797379912,
      "learning_rate": 7.34969625486054e-08,
      "loss": 0.4269,
      "step": 15113
    },
    {
      "epoch": 1.8531142717018145,
      "grad_norm": 1.9599082510140187,
      "learning_rate": 7.33750195773042e-08,
      "loss": 0.3755,
      "step": 15114
    },
    {
      "epoch": 1.8532368808239332,
      "grad_norm": 1.9188037719412783,
      "learning_rate": 7.325317634458595e-08,
      "loss": 0.4521,
      "step": 15115
    },
    {
      "epoch": 1.853359489946052,
      "grad_norm": 1.989458176466408,
      "learning_rate": 7.313143285545832e-08,
      "loss": 0.4339,
      "step": 15116
    },
    {
      "epoch": 1.8534820990681706,
      "grad_norm": 1.9992379934858324,
      "learning_rate": 7.300978911492507e-08,
      "loss": 0.3583,
      "step": 15117
    },
    {
      "epoch": 1.8536047081902893,
      "grad_norm": 2.1093070309886284,
      "learning_rate": 7.28882451279861e-08,
      "loss": 0.4318,
      "step": 15118
    },
    {
      "epoch": 1.853727317312408,
      "grad_norm": 2.067720524020679,
      "learning_rate": 7.27668008996374e-08,
      "loss": 0.4555,
      "step": 15119
    },
    {
      "epoch": 1.8538499264345267,
      "grad_norm": 1.9285106220196095,
      "learning_rate": 7.264545643486997e-08,
      "loss": 0.4299,
      "step": 15120
    },
    {
      "epoch": 1.8539725355566454,
      "grad_norm": 1.8782301193231603,
      "learning_rate": 7.25242117386718e-08,
      "loss": 0.4822,
      "step": 15121
    },
    {
      "epoch": 1.8540951446787641,
      "grad_norm": 2.0048547351487285,
      "learning_rate": 7.24030668160261e-08,
      "loss": 0.4213,
      "step": 15122
    },
    {
      "epoch": 1.8542177538008828,
      "grad_norm": 1.9356403047026423,
      "learning_rate": 7.228202167191167e-08,
      "loss": 0.4222,
      "step": 15123
    },
    {
      "epoch": 1.8543403629230015,
      "grad_norm": 1.980133503638339,
      "learning_rate": 7.216107631130425e-08,
      "loss": 0.4671,
      "step": 15124
    },
    {
      "epoch": 1.8544629720451202,
      "grad_norm": 1.9041657928469427,
      "learning_rate": 7.204023073917487e-08,
      "loss": 0.393,
      "step": 15125
    },
    {
      "epoch": 1.854585581167239,
      "grad_norm": 1.9423080856518056,
      "learning_rate": 7.19194849604904e-08,
      "loss": 0.3908,
      "step": 15126
    },
    {
      "epoch": 1.8547081902893576,
      "grad_norm": 1.7900503839300022,
      "learning_rate": 7.17988389802135e-08,
      "loss": 0.4172,
      "step": 15127
    },
    {
      "epoch": 1.8548307994114763,
      "grad_norm": 1.983899744038735,
      "learning_rate": 7.167829280330302e-08,
      "loss": 0.4382,
      "step": 15128
    },
    {
      "epoch": 1.854953408533595,
      "grad_norm": 2.0514942848809827,
      "learning_rate": 7.155784643471442e-08,
      "loss": 0.4628,
      "step": 15129
    },
    {
      "epoch": 1.8550760176557137,
      "grad_norm": 1.9170463686148278,
      "learning_rate": 7.14374998793968e-08,
      "loss": 0.4159,
      "step": 15130
    },
    {
      "epoch": 1.8551986267778324,
      "grad_norm": 1.7758956485401156,
      "learning_rate": 7.13172531422976e-08,
      "loss": 0.4517,
      "step": 15131
    },
    {
      "epoch": 1.855321235899951,
      "grad_norm": 1.9542117261330658,
      "learning_rate": 7.119710622835896e-08,
      "loss": 0.3964,
      "step": 15132
    },
    {
      "epoch": 1.8554438450220696,
      "grad_norm": 1.8852685636797275,
      "learning_rate": 7.107705914251917e-08,
      "loss": 0.3906,
      "step": 15133
    },
    {
      "epoch": 1.8555664541441883,
      "grad_norm": 2.1783205341290435,
      "learning_rate": 7.095711188971205e-08,
      "loss": 0.4459,
      "step": 15134
    },
    {
      "epoch": 1.855689063266307,
      "grad_norm": 1.910752701950405,
      "learning_rate": 7.083726447486838e-08,
      "loss": 0.4475,
      "step": 15135
    },
    {
      "epoch": 1.8558116723884257,
      "grad_norm": 1.9761324598664227,
      "learning_rate": 7.071751690291312e-08,
      "loss": 0.401,
      "step": 15136
    },
    {
      "epoch": 1.8559342815105444,
      "grad_norm": 2.0785267292018745,
      "learning_rate": 7.059786917876899e-08,
      "loss": 0.4021,
      "step": 15137
    },
    {
      "epoch": 1.856056890632663,
      "grad_norm": 1.955030370626808,
      "learning_rate": 7.047832130735316e-08,
      "loss": 0.4073,
      "step": 15138
    },
    {
      "epoch": 1.8561794997547818,
      "grad_norm": 2.048594283762255,
      "learning_rate": 7.035887329357949e-08,
      "loss": 0.4421,
      "step": 15139
    },
    {
      "epoch": 1.8563021088769003,
      "grad_norm": 1.8799738871053293,
      "learning_rate": 7.023952514235738e-08,
      "loss": 0.4234,
      "step": 15140
    },
    {
      "epoch": 1.856424717999019,
      "grad_norm": 2.0065929364022552,
      "learning_rate": 7.012027685859236e-08,
      "loss": 0.4489,
      "step": 15141
    },
    {
      "epoch": 1.8565473271211377,
      "grad_norm": 1.9584429774492746,
      "learning_rate": 7.000112844718631e-08,
      "loss": 0.4225,
      "step": 15142
    },
    {
      "epoch": 1.8566699362432564,
      "grad_norm": 2.06292366350452,
      "learning_rate": 6.988207991303509e-08,
      "loss": 0.4331,
      "step": 15143
    },
    {
      "epoch": 1.856792545365375,
      "grad_norm": 1.8654376505887915,
      "learning_rate": 6.976313126103279e-08,
      "loss": 0.4191,
      "step": 15144
    },
    {
      "epoch": 1.8569151544874938,
      "grad_norm": 1.7193520082077212,
      "learning_rate": 6.96442824960683e-08,
      "loss": 0.3641,
      "step": 15145
    },
    {
      "epoch": 1.8570377636096125,
      "grad_norm": 2.0298876122481695,
      "learning_rate": 6.952553362302661e-08,
      "loss": 0.487,
      "step": 15146
    },
    {
      "epoch": 1.8571603727317312,
      "grad_norm": 1.8659484212003035,
      "learning_rate": 6.940688464678796e-08,
      "loss": 0.4183,
      "step": 15147
    },
    {
      "epoch": 1.8572829818538499,
      "grad_norm": 1.9185432006620746,
      "learning_rate": 6.928833557222931e-08,
      "loss": 0.4233,
      "step": 15148
    },
    {
      "epoch": 1.8574055909759686,
      "grad_norm": 2.089960834210372,
      "learning_rate": 6.916988640422367e-08,
      "loss": 0.4349,
      "step": 15149
    },
    {
      "epoch": 1.8575282000980873,
      "grad_norm": 2.0356027610195166,
      "learning_rate": 6.905153714763885e-08,
      "loss": 0.4878,
      "step": 15150
    },
    {
      "epoch": 1.857650809220206,
      "grad_norm": 1.7805691786401783,
      "learning_rate": 6.893328780733955e-08,
      "loss": 0.4511,
      "step": 15151
    },
    {
      "epoch": 1.8577734183423247,
      "grad_norm": 1.874480782226458,
      "learning_rate": 6.881513838818604e-08,
      "loss": 0.3919,
      "step": 15152
    },
    {
      "epoch": 1.8578960274644434,
      "grad_norm": 1.8984985273396702,
      "learning_rate": 6.869708889503446e-08,
      "loss": 0.4157,
      "step": 15153
    },
    {
      "epoch": 1.858018636586562,
      "grad_norm": 2.2052656150940697,
      "learning_rate": 6.857913933273675e-08,
      "loss": 0.4384,
      "step": 15154
    },
    {
      "epoch": 1.8581412457086808,
      "grad_norm": 2.1061500970167297,
      "learning_rate": 6.846128970614096e-08,
      "loss": 0.4228,
      "step": 15155
    },
    {
      "epoch": 1.8582638548307995,
      "grad_norm": 2.231448077772046,
      "learning_rate": 6.834354002009075e-08,
      "loss": 0.4755,
      "step": 15156
    },
    {
      "epoch": 1.8583864639529182,
      "grad_norm": 1.8794102590881363,
      "learning_rate": 6.822589027942584e-08,
      "loss": 0.3858,
      "step": 15157
    },
    {
      "epoch": 1.8585090730750369,
      "grad_norm": 2.2008714908534857,
      "learning_rate": 6.810834048898235e-08,
      "loss": 0.4264,
      "step": 15158
    },
    {
      "epoch": 1.8586316821971556,
      "grad_norm": 1.9988009477130744,
      "learning_rate": 6.799089065359143e-08,
      "loss": 0.4015,
      "step": 15159
    },
    {
      "epoch": 1.8587542913192743,
      "grad_norm": 1.7710298186328752,
      "learning_rate": 6.787354077808006e-08,
      "loss": 0.4608,
      "step": 15160
    },
    {
      "epoch": 1.858876900441393,
      "grad_norm": 1.777230678109293,
      "learning_rate": 6.775629086727187e-08,
      "loss": 0.4608,
      "step": 15161
    },
    {
      "epoch": 1.8589995095635117,
      "grad_norm": 1.8425316190202206,
      "learning_rate": 6.763914092598661e-08,
      "loss": 0.4192,
      "step": 15162
    },
    {
      "epoch": 1.8591221186856304,
      "grad_norm": 2.040930101555769,
      "learning_rate": 6.752209095903823e-08,
      "loss": 0.4625,
      "step": 15163
    },
    {
      "epoch": 1.8592447278077489,
      "grad_norm": 1.9050615320108473,
      "learning_rate": 6.740514097123841e-08,
      "loss": 0.3992,
      "step": 15164
    },
    {
      "epoch": 1.8593673369298676,
      "grad_norm": 2.0046071114775663,
      "learning_rate": 6.728829096739414e-08,
      "loss": 0.4323,
      "step": 15165
    },
    {
      "epoch": 1.8594899460519863,
      "grad_norm": 1.902148459431184,
      "learning_rate": 6.717154095230743e-08,
      "loss": 0.4247,
      "step": 15166
    },
    {
      "epoch": 1.859612555174105,
      "grad_norm": 2.0926766426624184,
      "learning_rate": 6.705489093077777e-08,
      "loss": 0.4161,
      "step": 15167
    },
    {
      "epoch": 1.8597351642962237,
      "grad_norm": 1.8987734412624013,
      "learning_rate": 6.69383409075991e-08,
      "loss": 0.4044,
      "step": 15168
    },
    {
      "epoch": 1.8598577734183424,
      "grad_norm": 1.8022764990517643,
      "learning_rate": 6.682189088756203e-08,
      "loss": 0.4501,
      "step": 15169
    },
    {
      "epoch": 1.859980382540461,
      "grad_norm": 1.9327025273293588,
      "learning_rate": 6.670554087545273e-08,
      "loss": 0.4318,
      "step": 15170
    },
    {
      "epoch": 1.8601029916625795,
      "grad_norm": 1.9316184310060618,
      "learning_rate": 6.658929087605376e-08,
      "loss": 0.4501,
      "step": 15171
    },
    {
      "epoch": 1.8602256007846982,
      "grad_norm": 1.7095528441732124,
      "learning_rate": 6.647314089414297e-08,
      "loss": 0.4368,
      "step": 15172
    },
    {
      "epoch": 1.860348209906817,
      "grad_norm": 1.869966300253277,
      "learning_rate": 6.635709093449433e-08,
      "loss": 0.4049,
      "step": 15173
    },
    {
      "epoch": 1.8604708190289356,
      "grad_norm": 2.029919630887511,
      "learning_rate": 6.624114100187734e-08,
      "loss": 0.4195,
      "step": 15174
    },
    {
      "epoch": 1.8605934281510543,
      "grad_norm": 1.7645082197697293,
      "learning_rate": 6.612529110105847e-08,
      "loss": 0.4188,
      "step": 15175
    },
    {
      "epoch": 1.860716037273173,
      "grad_norm": 1.9103294818860106,
      "learning_rate": 6.600954123679864e-08,
      "loss": 0.4411,
      "step": 15176
    },
    {
      "epoch": 1.8608386463952917,
      "grad_norm": 1.9334196472964986,
      "learning_rate": 6.589389141385599e-08,
      "loss": 0.422,
      "step": 15177
    },
    {
      "epoch": 1.8609612555174104,
      "grad_norm": 2.02992845274192,
      "learning_rate": 6.577834163698338e-08,
      "loss": 0.4048,
      "step": 15178
    },
    {
      "epoch": 1.8610838646395291,
      "grad_norm": 1.9290550133586761,
      "learning_rate": 6.566289191093061e-08,
      "loss": 0.426,
      "step": 15179
    },
    {
      "epoch": 1.8612064737616478,
      "grad_norm": 1.946418563096974,
      "learning_rate": 6.554754224044252e-08,
      "loss": 0.4514,
      "step": 15180
    },
    {
      "epoch": 1.8613290828837665,
      "grad_norm": 2.0729364283210376,
      "learning_rate": 6.54322926302603e-08,
      "loss": 0.4195,
      "step": 15181
    },
    {
      "epoch": 1.8614516920058852,
      "grad_norm": 2.071734451118988,
      "learning_rate": 6.531714308512099e-08,
      "loss": 0.4119,
      "step": 15182
    },
    {
      "epoch": 1.861574301128004,
      "grad_norm": 1.9306098430234866,
      "learning_rate": 6.520209360975748e-08,
      "loss": 0.4135,
      "step": 15183
    },
    {
      "epoch": 1.8616969102501226,
      "grad_norm": 1.9818679081990886,
      "learning_rate": 6.508714420889795e-08,
      "loss": 0.4681,
      "step": 15184
    },
    {
      "epoch": 1.8618195193722413,
      "grad_norm": 2.0148052798143783,
      "learning_rate": 6.497229488726803e-08,
      "loss": 0.3914,
      "step": 15185
    },
    {
      "epoch": 1.86194212849436,
      "grad_norm": 1.959261033878946,
      "learning_rate": 6.485754564958729e-08,
      "loss": 0.3926,
      "step": 15186
    },
    {
      "epoch": 1.8620647376164787,
      "grad_norm": 1.881754695277905,
      "learning_rate": 6.474289650057252e-08,
      "loss": 0.4801,
      "step": 15187
    },
    {
      "epoch": 1.8621873467385974,
      "grad_norm": 1.8518403533857513,
      "learning_rate": 6.462834744493607e-08,
      "loss": 0.3786,
      "step": 15188
    },
    {
      "epoch": 1.8623099558607161,
      "grad_norm": 1.829515947882015,
      "learning_rate": 6.45138984873861e-08,
      "loss": 0.4244,
      "step": 15189
    },
    {
      "epoch": 1.8624325649828348,
      "grad_norm": 1.7678137897740895,
      "learning_rate": 6.439954963262634e-08,
      "loss": 0.3952,
      "step": 15190
    },
    {
      "epoch": 1.8625551741049535,
      "grad_norm": 2.0032362286303678,
      "learning_rate": 6.428530088535723e-08,
      "loss": 0.4246,
      "step": 15191
    },
    {
      "epoch": 1.8626777832270722,
      "grad_norm": 2.0802886607063646,
      "learning_rate": 6.417115225027442e-08,
      "loss": 0.4276,
      "step": 15192
    },
    {
      "epoch": 1.862800392349191,
      "grad_norm": 2.0713028343484123,
      "learning_rate": 6.40571037320692e-08,
      "loss": 0.4557,
      "step": 15193
    },
    {
      "epoch": 1.8629230014713096,
      "grad_norm": 2.1000351161671253,
      "learning_rate": 6.394315533542972e-08,
      "loss": 0.3964,
      "step": 15194
    },
    {
      "epoch": 1.863045610593428,
      "grad_norm": 1.8864465415886786,
      "learning_rate": 6.382930706503948e-08,
      "loss": 0.4147,
      "step": 15195
    },
    {
      "epoch": 1.8631682197155468,
      "grad_norm": 1.8708768752509153,
      "learning_rate": 6.371555892557752e-08,
      "loss": 0.42,
      "step": 15196
    },
    {
      "epoch": 1.8632908288376655,
      "grad_norm": 2.1501883479037396,
      "learning_rate": 6.36019109217187e-08,
      "loss": 0.4217,
      "step": 15197
    },
    {
      "epoch": 1.8634134379597842,
      "grad_norm": 1.843090385874795,
      "learning_rate": 6.34883630581351e-08,
      "loss": 0.3721,
      "step": 15198
    },
    {
      "epoch": 1.863536047081903,
      "grad_norm": 1.921197537163828,
      "learning_rate": 6.33749153394933e-08,
      "loss": 0.4214,
      "step": 15199
    },
    {
      "epoch": 1.8636586562040216,
      "grad_norm": 1.9839515059464239,
      "learning_rate": 6.326156777045595e-08,
      "loss": 0.4288,
      "step": 15200
    },
    {
      "epoch": 1.8637812653261403,
      "grad_norm": 1.9891863451899097,
      "learning_rate": 6.314832035568208e-08,
      "loss": 0.4043,
      "step": 15201
    },
    {
      "epoch": 1.863903874448259,
      "grad_norm": 1.8427235414606395,
      "learning_rate": 6.303517309982659e-08,
      "loss": 0.3752,
      "step": 15202
    },
    {
      "epoch": 1.8640264835703775,
      "grad_norm": 1.9613207810221218,
      "learning_rate": 6.292212600753966e-08,
      "loss": 0.4052,
      "step": 15203
    },
    {
      "epoch": 1.8641490926924962,
      "grad_norm": 1.9667388928803922,
      "learning_rate": 6.280917908346784e-08,
      "loss": 0.4169,
      "step": 15204
    },
    {
      "epoch": 1.8642717018146149,
      "grad_norm": 2.0231702697014926,
      "learning_rate": 6.26963323322538e-08,
      "loss": 0.4605,
      "step": 15205
    },
    {
      "epoch": 1.8643943109367336,
      "grad_norm": 1.9865523062546264,
      "learning_rate": 6.258358575853496e-08,
      "loss": 0.3925,
      "step": 15206
    },
    {
      "epoch": 1.8645169200588523,
      "grad_norm": 1.8315494853096332,
      "learning_rate": 6.247093936694593e-08,
      "loss": 0.4654,
      "step": 15207
    },
    {
      "epoch": 1.864639529180971,
      "grad_norm": 1.817607428205607,
      "learning_rate": 6.235839316211662e-08,
      "loss": 0.4246,
      "step": 15208
    },
    {
      "epoch": 1.8647621383030897,
      "grad_norm": 1.9181388231018748,
      "learning_rate": 6.224594714867304e-08,
      "loss": 0.3731,
      "step": 15209
    },
    {
      "epoch": 1.8648847474252084,
      "grad_norm": 1.7606673743634726,
      "learning_rate": 6.213360133123652e-08,
      "loss": 0.3992,
      "step": 15210
    },
    {
      "epoch": 1.865007356547327,
      "grad_norm": 2.0538411386266264,
      "learning_rate": 6.202135571442503e-08,
      "loss": 0.4046,
      "step": 15211
    },
    {
      "epoch": 1.8651299656694458,
      "grad_norm": 1.9387026838662864,
      "learning_rate": 6.19092103028518e-08,
      "loss": 0.445,
      "step": 15212
    },
    {
      "epoch": 1.8652525747915645,
      "grad_norm": 1.956748088850032,
      "learning_rate": 6.179716510112649e-08,
      "loss": 0.4476,
      "step": 15213
    },
    {
      "epoch": 1.8653751839136832,
      "grad_norm": 1.9496198784039735,
      "learning_rate": 6.168522011385403e-08,
      "loss": 0.4727,
      "step": 15214
    },
    {
      "epoch": 1.8654977930358019,
      "grad_norm": 2.0731793430206396,
      "learning_rate": 6.157337534563573e-08,
      "loss": 0.4641,
      "step": 15215
    },
    {
      "epoch": 1.8656204021579206,
      "grad_norm": 2.106009627496005,
      "learning_rate": 6.146163080106849e-08,
      "loss": 0.3804,
      "step": 15216
    },
    {
      "epoch": 1.8657430112800393,
      "grad_norm": 1.9697628459622452,
      "learning_rate": 6.134998648474555e-08,
      "loss": 0.4362,
      "step": 15217
    },
    {
      "epoch": 1.865865620402158,
      "grad_norm": 1.9451996832319691,
      "learning_rate": 6.123844240125548e-08,
      "loss": 0.4569,
      "step": 15218
    },
    {
      "epoch": 1.8659882295242767,
      "grad_norm": 1.9532131941692092,
      "learning_rate": 6.112699855518267e-08,
      "loss": 0.4444,
      "step": 15219
    },
    {
      "epoch": 1.8661108386463954,
      "grad_norm": 2.192692707333248,
      "learning_rate": 6.101565495110817e-08,
      "loss": 0.424,
      "step": 15220
    },
    {
      "epoch": 1.866233447768514,
      "grad_norm": 1.9426979175497796,
      "learning_rate": 6.090441159360804e-08,
      "loss": 0.4172,
      "step": 15221
    },
    {
      "epoch": 1.8663560568906328,
      "grad_norm": 2.0409078681366823,
      "learning_rate": 6.079326848725475e-08,
      "loss": 0.4203,
      "step": 15222
    },
    {
      "epoch": 1.8664786660127515,
      "grad_norm": 1.978061224482444,
      "learning_rate": 6.068222563661602e-08,
      "loss": 0.4596,
      "step": 15223
    },
    {
      "epoch": 1.8666012751348702,
      "grad_norm": 1.9648367058218432,
      "learning_rate": 6.057128304625654e-08,
      "loss": 0.4237,
      "step": 15224
    },
    {
      "epoch": 1.8667238842569889,
      "grad_norm": 1.7997104070090395,
      "learning_rate": 6.046044072073625e-08,
      "loss": 0.427,
      "step": 15225
    },
    {
      "epoch": 1.8668464933791074,
      "grad_norm": 2.047737101762293,
      "learning_rate": 6.034969866461044e-08,
      "loss": 0.4128,
      "step": 15226
    },
    {
      "epoch": 1.866969102501226,
      "grad_norm": 1.8657438365035954,
      "learning_rate": 6.023905688243098e-08,
      "loss": 0.4146,
      "step": 15227
    },
    {
      "epoch": 1.8670917116233448,
      "grad_norm": 1.9017050432959322,
      "learning_rate": 6.012851537874592e-08,
      "loss": 0.4722,
      "step": 15228
    },
    {
      "epoch": 1.8672143207454635,
      "grad_norm": 1.9181543697073489,
      "learning_rate": 6.001807415809802e-08,
      "loss": 0.4184,
      "step": 15229
    },
    {
      "epoch": 1.8673369298675822,
      "grad_norm": 1.9292094237461903,
      "learning_rate": 5.990773322502696e-08,
      "loss": 0.4584,
      "step": 15230
    },
    {
      "epoch": 1.8674595389897009,
      "grad_norm": 1.664692981551434,
      "learning_rate": 5.979749258406802e-08,
      "loss": 0.447,
      "step": 15231
    },
    {
      "epoch": 1.8675821481118196,
      "grad_norm": 1.9061269183279772,
      "learning_rate": 5.968735223975203e-08,
      "loss": 0.3787,
      "step": 15232
    },
    {
      "epoch": 1.8677047572339383,
      "grad_norm": 1.9512283024204888,
      "learning_rate": 5.957731219660617e-08,
      "loss": 0.4422,
      "step": 15233
    },
    {
      "epoch": 1.8678273663560567,
      "grad_norm": 1.8695228517392704,
      "learning_rate": 5.946737245915324e-08,
      "loss": 0.4128,
      "step": 15234
    },
    {
      "epoch": 1.8679499754781754,
      "grad_norm": 1.8147197725961246,
      "learning_rate": 5.935753303191183e-08,
      "loss": 0.4551,
      "step": 15235
    },
    {
      "epoch": 1.8680725846002941,
      "grad_norm": 1.9353217518985495,
      "learning_rate": 5.9247793919396665e-08,
      "loss": 0.434,
      "step": 15236
    },
    {
      "epoch": 1.8681951937224128,
      "grad_norm": 1.9355340052274042,
      "learning_rate": 5.9138155126118034e-08,
      "loss": 0.4622,
      "step": 15237
    },
    {
      "epoch": 1.8683178028445315,
      "grad_norm": 1.9444242931729028,
      "learning_rate": 5.9028616656582885e-08,
      "loss": 0.4093,
      "step": 15238
    },
    {
      "epoch": 1.8684404119666502,
      "grad_norm": 2.0137677035452586,
      "learning_rate": 5.891917851529261e-08,
      "loss": 0.4463,
      "step": 15239
    },
    {
      "epoch": 1.868563021088769,
      "grad_norm": 1.9314014798613819,
      "learning_rate": 5.880984070674556e-08,
      "loss": 0.425,
      "step": 15240
    },
    {
      "epoch": 1.8686856302108876,
      "grad_norm": 2.0416973541726247,
      "learning_rate": 5.8700603235436196e-08,
      "loss": 0.4175,
      "step": 15241
    },
    {
      "epoch": 1.8688082393330063,
      "grad_norm": 2.0097599654020186,
      "learning_rate": 5.859146610585398e-08,
      "loss": 0.4337,
      "step": 15242
    },
    {
      "epoch": 1.868930848455125,
      "grad_norm": 1.994603383998473,
      "learning_rate": 5.848242932248449e-08,
      "loss": 0.4528,
      "step": 15243
    },
    {
      "epoch": 1.8690534575772437,
      "grad_norm": 1.9366784767157128,
      "learning_rate": 5.83734928898097e-08,
      "loss": 0.3994,
      "step": 15244
    },
    {
      "epoch": 1.8691760666993624,
      "grad_norm": 1.8698365439328881,
      "learning_rate": 5.826465681230686e-08,
      "loss": 0.3995,
      "step": 15245
    },
    {
      "epoch": 1.8692986758214811,
      "grad_norm": 2.121605733574572,
      "learning_rate": 5.815592109444934e-08,
      "loss": 0.4282,
      "step": 15246
    },
    {
      "epoch": 1.8694212849435998,
      "grad_norm": 1.6939342911336719,
      "learning_rate": 5.8047285740706336e-08,
      "loss": 0.4204,
      "step": 15247
    },
    {
      "epoch": 1.8695438940657185,
      "grad_norm": 1.967937905869139,
      "learning_rate": 5.793875075554317e-08,
      "loss": 0.4247,
      "step": 15248
    },
    {
      "epoch": 1.8696665031878372,
      "grad_norm": 2.013325767161132,
      "learning_rate": 5.7830316143420714e-08,
      "loss": 0.4204,
      "step": 15249
    },
    {
      "epoch": 1.869789112309956,
      "grad_norm": 1.905223664695622,
      "learning_rate": 5.772198190879569e-08,
      "loss": 0.4,
      "step": 15250
    },
    {
      "epoch": 1.8699117214320746,
      "grad_norm": 1.9699118021481798,
      "learning_rate": 5.761374805612091e-08,
      "loss": 0.4399,
      "step": 15251
    },
    {
      "epoch": 1.8700343305541933,
      "grad_norm": 2.100770798753289,
      "learning_rate": 5.7505614589845046e-08,
      "loss": 0.4531,
      "step": 15252
    },
    {
      "epoch": 1.870156939676312,
      "grad_norm": 1.9248449861472445,
      "learning_rate": 5.7397581514412325e-08,
      "loss": 0.3991,
      "step": 15253
    },
    {
      "epoch": 1.8702795487984307,
      "grad_norm": 1.7453275305127576,
      "learning_rate": 5.7289648834263634e-08,
      "loss": 0.4002,
      "step": 15254
    },
    {
      "epoch": 1.8704021579205494,
      "grad_norm": 2.1694460348645515,
      "learning_rate": 5.718181655383459e-08,
      "loss": 0.4467,
      "step": 15255
    },
    {
      "epoch": 1.8705247670426681,
      "grad_norm": 2.144797151770683,
      "learning_rate": 5.707408467755748e-08,
      "loss": 0.4199,
      "step": 15256
    },
    {
      "epoch": 1.8706473761647868,
      "grad_norm": 1.9885596643678098,
      "learning_rate": 5.696645320986044e-08,
      "loss": 0.3736,
      "step": 15257
    },
    {
      "epoch": 1.8707699852869053,
      "grad_norm": 1.9328819252084424,
      "learning_rate": 5.6858922155167696e-08,
      "loss": 0.4493,
      "step": 15258
    },
    {
      "epoch": 1.870892594409024,
      "grad_norm": 2.014874497947069,
      "learning_rate": 5.675149151789794e-08,
      "loss": 0.4211,
      "step": 15259
    },
    {
      "epoch": 1.8710152035311427,
      "grad_norm": 1.8794341006249369,
      "learning_rate": 5.664416130246736e-08,
      "loss": 0.3801,
      "step": 15260
    },
    {
      "epoch": 1.8711378126532614,
      "grad_norm": 1.9203987533999836,
      "learning_rate": 5.653693151328743e-08,
      "loss": 0.3871,
      "step": 15261
    },
    {
      "epoch": 1.87126042177538,
      "grad_norm": 2.0210922688852806,
      "learning_rate": 5.642980215476546e-08,
      "loss": 0.457,
      "step": 15262
    },
    {
      "epoch": 1.8713830308974988,
      "grad_norm": 1.9700910347709724,
      "learning_rate": 5.6322773231304585e-08,
      "loss": 0.4036,
      "step": 15263
    },
    {
      "epoch": 1.8715056400196175,
      "grad_norm": 1.8590955012286114,
      "learning_rate": 5.621584474730407e-08,
      "loss": 0.416,
      "step": 15264
    },
    {
      "epoch": 1.8716282491417362,
      "grad_norm": 1.9776793611017192,
      "learning_rate": 5.610901670715846e-08,
      "loss": 0.4327,
      "step": 15265
    },
    {
      "epoch": 1.8717508582638547,
      "grad_norm": 1.9924654408065305,
      "learning_rate": 5.600228911525896e-08,
      "loss": 0.4315,
      "step": 15266
    },
    {
      "epoch": 1.8718734673859734,
      "grad_norm": 1.8643261833438123,
      "learning_rate": 5.589566197599233e-08,
      "loss": 0.4356,
      "step": 15267
    },
    {
      "epoch": 1.871996076508092,
      "grad_norm": 2.0661372479023927,
      "learning_rate": 5.578913529374091e-08,
      "loss": 0.4282,
      "step": 15268
    },
    {
      "epoch": 1.8721186856302108,
      "grad_norm": 1.7812712926126175,
      "learning_rate": 5.568270907288287e-08,
      "loss": 0.4515,
      "step": 15269
    },
    {
      "epoch": 1.8722412947523295,
      "grad_norm": 1.9711066935358819,
      "learning_rate": 5.5576383317792747e-08,
      "loss": 0.4231,
      "step": 15270
    },
    {
      "epoch": 1.8723639038744482,
      "grad_norm": 1.9441068876994467,
      "learning_rate": 5.5470158032841225e-08,
      "loss": 0.4052,
      "step": 15271
    },
    {
      "epoch": 1.8724865129965669,
      "grad_norm": 1.8384694485510753,
      "learning_rate": 5.53640332223937e-08,
      "loss": 0.3867,
      "step": 15272
    },
    {
      "epoch": 1.8726091221186856,
      "grad_norm": 2.2469623593071386,
      "learning_rate": 5.525800889081223e-08,
      "loss": 0.4266,
      "step": 15273
    },
    {
      "epoch": 1.8727317312408043,
      "grad_norm": 2.014915928771862,
      "learning_rate": 5.515208504245473e-08,
      "loss": 0.4222,
      "step": 15274
    },
    {
      "epoch": 1.872854340362923,
      "grad_norm": 2.127172777783786,
      "learning_rate": 5.5046261681674926e-08,
      "loss": 0.4329,
      "step": 15275
    },
    {
      "epoch": 1.8729769494850417,
      "grad_norm": 1.9419756042032854,
      "learning_rate": 5.4940538812821856e-08,
      "loss": 0.4309,
      "step": 15276
    },
    {
      "epoch": 1.8730995586071604,
      "grad_norm": 2.2458164538567447,
      "learning_rate": 5.483491644024147e-08,
      "loss": 0.4353,
      "step": 15277
    },
    {
      "epoch": 1.873222167729279,
      "grad_norm": 1.9006318273332188,
      "learning_rate": 5.4729394568274755e-08,
      "loss": 0.3954,
      "step": 15278
    },
    {
      "epoch": 1.8733447768513978,
      "grad_norm": 1.967537576571401,
      "learning_rate": 5.462397320125934e-08,
      "loss": 0.3871,
      "step": 15279
    },
    {
      "epoch": 1.8734673859735165,
      "grad_norm": 1.7950221802813433,
      "learning_rate": 5.451865234352732e-08,
      "loss": 0.4043,
      "step": 15280
    },
    {
      "epoch": 1.8735899950956352,
      "grad_norm": 1.9642002307857298,
      "learning_rate": 5.441343199940829e-08,
      "loss": 0.3786,
      "step": 15281
    },
    {
      "epoch": 1.8737126042177539,
      "grad_norm": 1.9498193605666452,
      "learning_rate": 5.430831217322685e-08,
      "loss": 0.4142,
      "step": 15282
    },
    {
      "epoch": 1.8738352133398726,
      "grad_norm": 2.016991596278454,
      "learning_rate": 5.420329286930343e-08,
      "loss": 0.4248,
      "step": 15283
    },
    {
      "epoch": 1.8739578224619913,
      "grad_norm": 1.8865777374800943,
      "learning_rate": 5.4098374091954564e-08,
      "loss": 0.3946,
      "step": 15284
    },
    {
      "epoch": 1.87408043158411,
      "grad_norm": 1.9746793658276491,
      "learning_rate": 5.399355584549293e-08,
      "loss": 0.3982,
      "step": 15285
    },
    {
      "epoch": 1.8742030407062287,
      "grad_norm": 2.016680454169681,
      "learning_rate": 5.3888838134225905e-08,
      "loss": 0.4419,
      "step": 15286
    },
    {
      "epoch": 1.8743256498283474,
      "grad_norm": 1.7460748446332655,
      "learning_rate": 5.378422096245867e-08,
      "loss": 0.4593,
      "step": 15287
    },
    {
      "epoch": 1.874448258950466,
      "grad_norm": 1.6777160146493522,
      "learning_rate": 5.3679704334490545e-08,
      "loss": 0.4086,
      "step": 15288
    },
    {
      "epoch": 1.8745708680725846,
      "grad_norm": 1.8692443996782098,
      "learning_rate": 5.357528825461728e-08,
      "loss": 0.4059,
      "step": 15289
    },
    {
      "epoch": 1.8746934771947033,
      "grad_norm": 1.869051240971666,
      "learning_rate": 5.3470972727130696e-08,
      "loss": 0.4208,
      "step": 15290
    },
    {
      "epoch": 1.874816086316822,
      "grad_norm": 1.8252108357631336,
      "learning_rate": 5.33667577563185e-08,
      "loss": 0.3908,
      "step": 15291
    },
    {
      "epoch": 1.8749386954389407,
      "grad_norm": 2.07390208871038,
      "learning_rate": 5.3262643346464196e-08,
      "loss": 0.4531,
      "step": 15292
    },
    {
      "epoch": 1.8750613045610593,
      "grad_norm": 1.9174401576954725,
      "learning_rate": 5.3158629501846306e-08,
      "loss": 0.3848,
      "step": 15293
    },
    {
      "epoch": 1.875183913683178,
      "grad_norm": 1.9744437863190887,
      "learning_rate": 5.3054716226741134e-08,
      "loss": 0.4113,
      "step": 15294
    },
    {
      "epoch": 1.8753065228052967,
      "grad_norm": 1.891728127587635,
      "learning_rate": 5.2950903525418595e-08,
      "loss": 0.4277,
      "step": 15295
    },
    {
      "epoch": 1.8754291319274154,
      "grad_norm": 1.9768210136577746,
      "learning_rate": 5.284719140214639e-08,
      "loss": 0.3671,
      "step": 15296
    },
    {
      "epoch": 1.875551741049534,
      "grad_norm": 1.934056712224119,
      "learning_rate": 5.274357986118667e-08,
      "loss": 0.3927,
      "step": 15297
    },
    {
      "epoch": 1.8756743501716526,
      "grad_norm": 1.974326360116601,
      "learning_rate": 5.2640068906798793e-08,
      "loss": 0.3956,
      "step": 15298
    },
    {
      "epoch": 1.8757969592937713,
      "grad_norm": 1.9013777713436835,
      "learning_rate": 5.253665854323631e-08,
      "loss": 0.428,
      "step": 15299
    },
    {
      "epoch": 1.87591956841589,
      "grad_norm": 2.0035119222299356,
      "learning_rate": 5.243334877475054e-08,
      "loss": 0.4349,
      "step": 15300
    },
    {
      "epoch": 1.8760421775380087,
      "grad_norm": 1.8793983483397552,
      "learning_rate": 5.2330139605586974e-08,
      "loss": 0.417,
      "step": 15301
    },
    {
      "epoch": 1.8761647866601274,
      "grad_norm": 1.9290649076473556,
      "learning_rate": 5.222703103998777e-08,
      "loss": 0.3618,
      "step": 15302
    },
    {
      "epoch": 1.8762873957822461,
      "grad_norm": 2.0168155691714897,
      "learning_rate": 5.212402308219122e-08,
      "loss": 0.4526,
      "step": 15303
    },
    {
      "epoch": 1.8764100049043648,
      "grad_norm": 1.9703450412176269,
      "learning_rate": 5.202111573643087e-08,
      "loss": 0.4341,
      "step": 15304
    },
    {
      "epoch": 1.8765326140264835,
      "grad_norm": 1.836530782026063,
      "learning_rate": 5.1918309006936675e-08,
      "loss": 0.3692,
      "step": 15305
    },
    {
      "epoch": 1.8766552231486022,
      "grad_norm": 1.8952638396840948,
      "learning_rate": 5.1815602897933583e-08,
      "loss": 0.4142,
      "step": 15306
    },
    {
      "epoch": 1.876777832270721,
      "grad_norm": 1.9787690721543276,
      "learning_rate": 5.171299741364377e-08,
      "loss": 0.4119,
      "step": 15307
    },
    {
      "epoch": 1.8769004413928396,
      "grad_norm": 1.7748762176375394,
      "learning_rate": 5.161049255828388e-08,
      "loss": 0.4471,
      "step": 15308
    },
    {
      "epoch": 1.8770230505149583,
      "grad_norm": 2.1872744045061387,
      "learning_rate": 5.1508088336067464e-08,
      "loss": 0.4247,
      "step": 15309
    },
    {
      "epoch": 1.877145659637077,
      "grad_norm": 1.954189670445555,
      "learning_rate": 5.1405784751203115e-08,
      "loss": 0.4208,
      "step": 15310
    },
    {
      "epoch": 1.8772682687591957,
      "grad_norm": 1.7200340465950483,
      "learning_rate": 5.130358180789608e-08,
      "loss": 0.4389,
      "step": 15311
    },
    {
      "epoch": 1.8773908778813144,
      "grad_norm": 2.0259814803637153,
      "learning_rate": 5.1201479510346595e-08,
      "loss": 0.4228,
      "step": 15312
    },
    {
      "epoch": 1.8775134870034331,
      "grad_norm": 2.004055315003893,
      "learning_rate": 5.1099477862751877e-08,
      "loss": 0.4042,
      "step": 15313
    },
    {
      "epoch": 1.8776360961255518,
      "grad_norm": 1.743118757489751,
      "learning_rate": 5.099757686930412e-08,
      "loss": 0.4096,
      "step": 15314
    },
    {
      "epoch": 1.8777587052476705,
      "grad_norm": 1.8205125949679728,
      "learning_rate": 5.089577653419136e-08,
      "loss": 0.4063,
      "step": 15315
    },
    {
      "epoch": 1.8778813143697892,
      "grad_norm": 1.9188224363586102,
      "learning_rate": 5.079407686159776e-08,
      "loss": 0.3926,
      "step": 15316
    },
    {
      "epoch": 1.878003923491908,
      "grad_norm": 1.927217063148141,
      "learning_rate": 5.0692477855704134e-08,
      "loss": 0.4136,
      "step": 15317
    },
    {
      "epoch": 1.8781265326140266,
      "grad_norm": 2.0287450203814457,
      "learning_rate": 5.059097952068548e-08,
      "loss": 0.4604,
      "step": 15318
    },
    {
      "epoch": 1.8782491417361453,
      "grad_norm": 2.224554375562871,
      "learning_rate": 5.048958186071401e-08,
      "loss": 0.4292,
      "step": 15319
    },
    {
      "epoch": 1.8783717508582638,
      "grad_norm": 2.0317992590474265,
      "learning_rate": 5.038828487995695e-08,
      "loss": 0.4112,
      "step": 15320
    },
    {
      "epoch": 1.8784943599803825,
      "grad_norm": 1.9352872732590716,
      "learning_rate": 5.0287088582578747e-08,
      "loss": 0.4354,
      "step": 15321
    },
    {
      "epoch": 1.8786169691025012,
      "grad_norm": 1.8978389154641628,
      "learning_rate": 5.018599297273746e-08,
      "loss": 0.4231,
      "step": 15322
    },
    {
      "epoch": 1.87873957822462,
      "grad_norm": 2.035369544547407,
      "learning_rate": 5.0084998054589216e-08,
      "loss": 0.401,
      "step": 15323
    },
    {
      "epoch": 1.8788621873467386,
      "grad_norm": 2.053453159140253,
      "learning_rate": 4.998410383228458e-08,
      "loss": 0.4189,
      "step": 15324
    },
    {
      "epoch": 1.8789847964688573,
      "grad_norm": 1.7885554228698732,
      "learning_rate": 4.988331030997107e-08,
      "loss": 0.4152,
      "step": 15325
    },
    {
      "epoch": 1.879107405590976,
      "grad_norm": 1.9274543089061036,
      "learning_rate": 4.978261749179064e-08,
      "loss": 0.4321,
      "step": 15326
    },
    {
      "epoch": 1.8792300147130947,
      "grad_norm": 1.9510876668767478,
      "learning_rate": 4.968202538188277e-08,
      "loss": 0.3904,
      "step": 15327
    },
    {
      "epoch": 1.8793526238352132,
      "grad_norm": 1.8679980295733547,
      "learning_rate": 4.958153398438137e-08,
      "loss": 0.4302,
      "step": 15328
    },
    {
      "epoch": 1.8794752329573319,
      "grad_norm": 1.9862280417692952,
      "learning_rate": 4.948114330341702e-08,
      "loss": 0.4452,
      "step": 15329
    },
    {
      "epoch": 1.8795978420794506,
      "grad_norm": 2.04844163445922,
      "learning_rate": 4.938085334311615e-08,
      "loss": 0.416,
      "step": 15330
    },
    {
      "epoch": 1.8797204512015693,
      "grad_norm": 1.8556407883028987,
      "learning_rate": 4.928066410760046e-08,
      "loss": 0.3739,
      "step": 15331
    },
    {
      "epoch": 1.879843060323688,
      "grad_norm": 2.084883920772386,
      "learning_rate": 4.918057560098832e-08,
      "loss": 0.4656,
      "step": 15332
    },
    {
      "epoch": 1.8799656694458067,
      "grad_norm": 1.8658373920040858,
      "learning_rate": 4.908058782739311e-08,
      "loss": 0.4164,
      "step": 15333
    },
    {
      "epoch": 1.8800882785679254,
      "grad_norm": 1.7002260406090448,
      "learning_rate": 4.898070079092543e-08,
      "loss": 0.4282,
      "step": 15334
    },
    {
      "epoch": 1.880210887690044,
      "grad_norm": 2.1224463920921623,
      "learning_rate": 4.888091449568949e-08,
      "loss": 0.456,
      "step": 15335
    },
    {
      "epoch": 1.8803334968121628,
      "grad_norm": 1.9079761348067195,
      "learning_rate": 4.878122894578729e-08,
      "loss": 0.4137,
      "step": 15336
    },
    {
      "epoch": 1.8804561059342815,
      "grad_norm": 2.1871469589333707,
      "learning_rate": 4.86816441453164e-08,
      "loss": 0.4233,
      "step": 15337
    },
    {
      "epoch": 1.8805787150564002,
      "grad_norm": 1.8185013738122382,
      "learning_rate": 4.858216009836964e-08,
      "loss": 0.4039,
      "step": 15338
    },
    {
      "epoch": 1.8807013241785189,
      "grad_norm": 1.7244739392013007,
      "learning_rate": 4.8482776809035955e-08,
      "loss": 0.4407,
      "step": 15339
    },
    {
      "epoch": 1.8808239333006376,
      "grad_norm": 1.9545790445754017,
      "learning_rate": 4.8383494281400157e-08,
      "loss": 0.3817,
      "step": 15340
    },
    {
      "epoch": 1.8809465424227563,
      "grad_norm": 2.100418065219917,
      "learning_rate": 4.828431251954313e-08,
      "loss": 0.4087,
      "step": 15341
    },
    {
      "epoch": 1.881069151544875,
      "grad_norm": 1.8511805162227777,
      "learning_rate": 4.8185231527541346e-08,
      "loss": 0.429,
      "step": 15342
    },
    {
      "epoch": 1.8811917606669937,
      "grad_norm": 1.8235608352540744,
      "learning_rate": 4.808625130946681e-08,
      "loss": 0.398,
      "step": 15343
    },
    {
      "epoch": 1.8813143697891124,
      "grad_norm": 2.090585740018165,
      "learning_rate": 4.798737186938823e-08,
      "loss": 0.4649,
      "step": 15344
    },
    {
      "epoch": 1.881436978911231,
      "grad_norm": 2.110511841683813,
      "learning_rate": 4.788859321136957e-08,
      "loss": 0.3841,
      "step": 15345
    },
    {
      "epoch": 1.8815595880333498,
      "grad_norm": 1.7286923063895658,
      "learning_rate": 4.7789915339471194e-08,
      "loss": 0.4196,
      "step": 15346
    },
    {
      "epoch": 1.8816821971554685,
      "grad_norm": 1.9334055028350894,
      "learning_rate": 4.769133825774847e-08,
      "loss": 0.4329,
      "step": 15347
    },
    {
      "epoch": 1.8818048062775872,
      "grad_norm": 1.7614634197652115,
      "learning_rate": 4.759286197025287e-08,
      "loss": 0.3817,
      "step": 15348
    },
    {
      "epoch": 1.8819274153997059,
      "grad_norm": 2.003399616061461,
      "learning_rate": 4.749448648103228e-08,
      "loss": 0.4365,
      "step": 15349
    },
    {
      "epoch": 1.8820500245218246,
      "grad_norm": 1.7264199524906532,
      "learning_rate": 4.739621179413068e-08,
      "loss": 0.4307,
      "step": 15350
    },
    {
      "epoch": 1.8821726336439433,
      "grad_norm": 1.9318785955276663,
      "learning_rate": 4.72980379135865e-08,
      "loss": 0.457,
      "step": 15351
    },
    {
      "epoch": 1.8822952427660617,
      "grad_norm": 1.9100035544118266,
      "learning_rate": 4.7199964843434865e-08,
      "loss": 0.4026,
      "step": 15352
    },
    {
      "epoch": 1.8824178518881804,
      "grad_norm": 2.2325301238965976,
      "learning_rate": 4.710199258770726e-08,
      "loss": 0.466,
      "step": 15353
    },
    {
      "epoch": 1.8825404610102991,
      "grad_norm": 1.9244403025661636,
      "learning_rate": 4.700412115043046e-08,
      "loss": 0.4169,
      "step": 15354
    },
    {
      "epoch": 1.8826630701324178,
      "grad_norm": 1.8880190877586607,
      "learning_rate": 4.6906350535626534e-08,
      "loss": 0.3749,
      "step": 15355
    },
    {
      "epoch": 1.8827856792545365,
      "grad_norm": 2.083307160890541,
      "learning_rate": 4.680868074731448e-08,
      "loss": 0.435,
      "step": 15356
    },
    {
      "epoch": 1.8829082883766552,
      "grad_norm": 2.033281039416048,
      "learning_rate": 4.671111178950916e-08,
      "loss": 0.422,
      "step": 15357
    },
    {
      "epoch": 1.883030897498774,
      "grad_norm": 2.0334796949320975,
      "learning_rate": 4.661364366621984e-08,
      "loss": 0.459,
      "step": 15358
    },
    {
      "epoch": 1.8831535066208926,
      "grad_norm": 2.064331324286593,
      "learning_rate": 4.651627638145362e-08,
      "loss": 0.4291,
      "step": 15359
    },
    {
      "epoch": 1.8832761157430111,
      "grad_norm": 2.0484088168719317,
      "learning_rate": 4.6419009939211447e-08,
      "loss": 0.4204,
      "step": 15360
    },
    {
      "epoch": 1.8833987248651298,
      "grad_norm": 2.0263706886260664,
      "learning_rate": 4.6321844343492076e-08,
      "loss": 0.4313,
      "step": 15361
    },
    {
      "epoch": 1.8835213339872485,
      "grad_norm": 1.9360242760786954,
      "learning_rate": 4.62247795982887e-08,
      "loss": 0.402,
      "step": 15362
    },
    {
      "epoch": 1.8836439431093672,
      "grad_norm": 1.983889657939549,
      "learning_rate": 4.6127815707590895e-08,
      "loss": 0.4544,
      "step": 15363
    },
    {
      "epoch": 1.883766552231486,
      "grad_norm": 1.9438187981052406,
      "learning_rate": 4.6030952675384376e-08,
      "loss": 0.4276,
      "step": 15364
    },
    {
      "epoch": 1.8838891613536046,
      "grad_norm": 2.1799826920124232,
      "learning_rate": 4.5934190505649845e-08,
      "loss": 0.4033,
      "step": 15365
    },
    {
      "epoch": 1.8840117704757233,
      "grad_norm": 1.8628580563037944,
      "learning_rate": 4.583752920236467e-08,
      "loss": 0.4338,
      "step": 15366
    },
    {
      "epoch": 1.884134379597842,
      "grad_norm": 2.0290270080436232,
      "learning_rate": 4.574096876950207e-08,
      "loss": 0.4318,
      "step": 15367
    },
    {
      "epoch": 1.8842569887199607,
      "grad_norm": 2.072042527455531,
      "learning_rate": 4.5644509211030253e-08,
      "loss": 0.4461,
      "step": 15368
    },
    {
      "epoch": 1.8843795978420794,
      "grad_norm": 1.8467379931338184,
      "learning_rate": 4.5548150530914104e-08,
      "loss": 0.3928,
      "step": 15369
    },
    {
      "epoch": 1.8845022069641981,
      "grad_norm": 2.0747723471946813,
      "learning_rate": 4.545189273311462e-08,
      "loss": 0.419,
      "step": 15370
    },
    {
      "epoch": 1.8846248160863168,
      "grad_norm": 1.9818214642095748,
      "learning_rate": 4.535573582158753e-08,
      "loss": 0.4811,
      "step": 15371
    },
    {
      "epoch": 1.8847474252084355,
      "grad_norm": 1.9240929083046343,
      "learning_rate": 4.525967980028523e-08,
      "loss": 0.4262,
      "step": 15372
    },
    {
      "epoch": 1.8848700343305542,
      "grad_norm": 1.7698435991163632,
      "learning_rate": 4.516372467315594e-08,
      "loss": 0.4103,
      "step": 15373
    },
    {
      "epoch": 1.884992643452673,
      "grad_norm": 2.0520564713062703,
      "learning_rate": 4.506787044414373e-08,
      "loss": 0.3938,
      "step": 15374
    },
    {
      "epoch": 1.8851152525747916,
      "grad_norm": 1.8788090963849882,
      "learning_rate": 4.497211711718824e-08,
      "loss": 0.4134,
      "step": 15375
    },
    {
      "epoch": 1.8852378616969103,
      "grad_norm": 1.9443580457652851,
      "learning_rate": 4.487646469622464e-08,
      "loss": 0.4557,
      "step": 15376
    },
    {
      "epoch": 1.885360470819029,
      "grad_norm": 2.0493256064945404,
      "learning_rate": 4.478091318518507e-08,
      "loss": 0.4593,
      "step": 15377
    },
    {
      "epoch": 1.8854830799411477,
      "grad_norm": 1.9480775412584528,
      "learning_rate": 4.468546258799611e-08,
      "loss": 0.4199,
      "step": 15378
    },
    {
      "epoch": 1.8856056890632664,
      "grad_norm": 1.984256288151507,
      "learning_rate": 4.459011290858212e-08,
      "loss": 0.394,
      "step": 15379
    },
    {
      "epoch": 1.8857282981853851,
      "grad_norm": 1.995131937503731,
      "learning_rate": 4.4494864150860795e-08,
      "loss": 0.396,
      "step": 15380
    },
    {
      "epoch": 1.8858509073075038,
      "grad_norm": 1.855760847339838,
      "learning_rate": 4.4399716318748175e-08,
      "loss": 0.4327,
      "step": 15381
    },
    {
      "epoch": 1.8859735164296225,
      "grad_norm": 2.1449520179495867,
      "learning_rate": 4.430466941615419e-08,
      "loss": 0.3782,
      "step": 15382
    },
    {
      "epoch": 1.886096125551741,
      "grad_norm": 1.8969004965650182,
      "learning_rate": 4.420972344698599e-08,
      "loss": 0.4374,
      "step": 15383
    },
    {
      "epoch": 1.8862187346738597,
      "grad_norm": 1.8126599096272007,
      "learning_rate": 4.4114878415146004e-08,
      "loss": 0.4632,
      "step": 15384
    },
    {
      "epoch": 1.8863413437959784,
      "grad_norm": 2.081337691181376,
      "learning_rate": 4.402013432453167e-08,
      "loss": 0.4236,
      "step": 15385
    },
    {
      "epoch": 1.886463952918097,
      "grad_norm": 1.861876403242579,
      "learning_rate": 4.392549117903821e-08,
      "loss": 0.4363,
      "step": 15386
    },
    {
      "epoch": 1.8865865620402158,
      "grad_norm": 2.0402574077391447,
      "learning_rate": 4.38309489825553e-08,
      "loss": 0.4354,
      "step": 15387
    },
    {
      "epoch": 1.8867091711623345,
      "grad_norm": 1.9334029406732152,
      "learning_rate": 4.3736507738968426e-08,
      "loss": 0.4454,
      "step": 15388
    },
    {
      "epoch": 1.8868317802844532,
      "grad_norm": 2.0951206245594163,
      "learning_rate": 4.3642167452159487e-08,
      "loss": 0.4017,
      "step": 15389
    },
    {
      "epoch": 1.886954389406572,
      "grad_norm": 1.8402040941783937,
      "learning_rate": 4.35479281260065e-08,
      "loss": 0.3839,
      "step": 15390
    },
    {
      "epoch": 1.8870769985286904,
      "grad_norm": 2.0842931668870444,
      "learning_rate": 4.3453789764382194e-08,
      "loss": 0.3905,
      "step": 15391
    },
    {
      "epoch": 1.887199607650809,
      "grad_norm": 1.8339672093890524,
      "learning_rate": 4.335975237115625e-08,
      "loss": 0.4392,
      "step": 15392
    },
    {
      "epoch": 1.8873222167729278,
      "grad_norm": 1.8781993980011977,
      "learning_rate": 4.326581595019336e-08,
      "loss": 0.4195,
      "step": 15393
    },
    {
      "epoch": 1.8874448258950465,
      "grad_norm": 1.960640700691805,
      "learning_rate": 4.317198050535487e-08,
      "loss": 0.4463,
      "step": 15394
    },
    {
      "epoch": 1.8875674350171652,
      "grad_norm": 2.0572264511801666,
      "learning_rate": 4.307824604049743e-08,
      "loss": 0.3869,
      "step": 15395
    },
    {
      "epoch": 1.8876900441392839,
      "grad_norm": 1.9363607712677493,
      "learning_rate": 4.298461255947406e-08,
      "loss": 0.4322,
      "step": 15396
    },
    {
      "epoch": 1.8878126532614026,
      "grad_norm": 2.139134749926693,
      "learning_rate": 4.2891080066132517e-08,
      "loss": 0.4345,
      "step": 15397
    },
    {
      "epoch": 1.8879352623835213,
      "grad_norm": 1.8394040851975997,
      "learning_rate": 4.2797648564317785e-08,
      "loss": 0.4226,
      "step": 15398
    },
    {
      "epoch": 1.88805787150564,
      "grad_norm": 2.033125897986988,
      "learning_rate": 4.270431805786957e-08,
      "loss": 0.4454,
      "step": 15399
    },
    {
      "epoch": 1.8881804806277587,
      "grad_norm": 2.089495133086673,
      "learning_rate": 4.2611088550624235e-08,
      "loss": 0.4364,
      "step": 15400
    },
    {
      "epoch": 1.8883030897498774,
      "grad_norm": 1.8565640374603916,
      "learning_rate": 4.251796004641401e-08,
      "loss": 0.4195,
      "step": 15401
    },
    {
      "epoch": 1.888425698871996,
      "grad_norm": 1.8812350104034787,
      "learning_rate": 4.2424932549065824e-08,
      "loss": 0.4239,
      "step": 15402
    },
    {
      "epoch": 1.8885483079941148,
      "grad_norm": 1.9535967233511735,
      "learning_rate": 4.233200606240384e-08,
      "loss": 0.4129,
      "step": 15403
    },
    {
      "epoch": 1.8886709171162335,
      "grad_norm": 1.9311989778861771,
      "learning_rate": 4.223918059024723e-08,
      "loss": 0.3703,
      "step": 15404
    },
    {
      "epoch": 1.8887935262383522,
      "grad_norm": 1.9366472027785755,
      "learning_rate": 4.214645613641183e-08,
      "loss": 0.4625,
      "step": 15405
    },
    {
      "epoch": 1.8889161353604709,
      "grad_norm": 1.975726390271466,
      "learning_rate": 4.205383270470792e-08,
      "loss": 0.4121,
      "step": 15406
    },
    {
      "epoch": 1.8890387444825896,
      "grad_norm": 2.053073392015953,
      "learning_rate": 4.196131029894329e-08,
      "loss": 0.4213,
      "step": 15407
    },
    {
      "epoch": 1.8891613536047083,
      "grad_norm": 1.8937916386978677,
      "learning_rate": 4.1868888922920184e-08,
      "loss": 0.3932,
      "step": 15408
    },
    {
      "epoch": 1.889283962726827,
      "grad_norm": 1.9311497038865153,
      "learning_rate": 4.177656858043749e-08,
      "loss": 0.3802,
      "step": 15409
    },
    {
      "epoch": 1.8894065718489457,
      "grad_norm": 2.0747881287297982,
      "learning_rate": 4.168434927528997e-08,
      "loss": 0.4544,
      "step": 15410
    },
    {
      "epoch": 1.8895291809710644,
      "grad_norm": 1.9319735497712274,
      "learning_rate": 4.159223101126764e-08,
      "loss": 0.4254,
      "step": 15411
    },
    {
      "epoch": 1.889651790093183,
      "grad_norm": 1.8431657122099094,
      "learning_rate": 4.1500213792156905e-08,
      "loss": 0.3944,
      "step": 15412
    },
    {
      "epoch": 1.8897743992153018,
      "grad_norm": 1.8894371980996747,
      "learning_rate": 4.1408297621739754e-08,
      "loss": 0.4275,
      "step": 15413
    },
    {
      "epoch": 1.8898970083374205,
      "grad_norm": 1.7835282240877164,
      "learning_rate": 4.131648250379455e-08,
      "loss": 0.4095,
      "step": 15414
    },
    {
      "epoch": 1.890019617459539,
      "grad_norm": 1.9895564667524535,
      "learning_rate": 4.122476844209411e-08,
      "loss": 0.4603,
      "step": 15415
    },
    {
      "epoch": 1.8901422265816576,
      "grad_norm": 1.9778590375315876,
      "learning_rate": 4.1133155440409025e-08,
      "loss": 0.4165,
      "step": 15416
    },
    {
      "epoch": 1.8902648357037763,
      "grad_norm": 2.058370421160074,
      "learning_rate": 4.1041643502504334e-08,
      "loss": 0.4501,
      "step": 15417
    },
    {
      "epoch": 1.890387444825895,
      "grad_norm": 1.9496065814643644,
      "learning_rate": 4.0950232632141205e-08,
      "loss": 0.4102,
      "step": 15418
    },
    {
      "epoch": 1.8905100539480137,
      "grad_norm": 2.0502212189049143,
      "learning_rate": 4.0858922833076634e-08,
      "loss": 0.4385,
      "step": 15419
    },
    {
      "epoch": 1.8906326630701324,
      "grad_norm": 2.154574384364384,
      "learning_rate": 4.076771410906427e-08,
      "loss": 0.4456,
      "step": 15420
    },
    {
      "epoch": 1.8907552721922511,
      "grad_norm": 1.9358764940860367,
      "learning_rate": 4.06766064638528e-08,
      "loss": 0.4301,
      "step": 15421
    },
    {
      "epoch": 1.8908778813143696,
      "grad_norm": 1.9707981870601783,
      "learning_rate": 4.058559990118616e-08,
      "loss": 0.4309,
      "step": 15422
    },
    {
      "epoch": 1.8910004904364883,
      "grad_norm": 1.9649542900204764,
      "learning_rate": 4.0494694424805805e-08,
      "loss": 0.4451,
      "step": 15423
    },
    {
      "epoch": 1.891123099558607,
      "grad_norm": 1.8788380295707203,
      "learning_rate": 4.0403890038447355e-08,
      "loss": 0.4174,
      "step": 15424
    },
    {
      "epoch": 1.8912457086807257,
      "grad_norm": 1.8506779542995022,
      "learning_rate": 4.031318674584367e-08,
      "loss": 0.419,
      "step": 15425
    },
    {
      "epoch": 1.8913683178028444,
      "grad_norm": 1.9148527031809253,
      "learning_rate": 4.022258455072231e-08,
      "loss": 0.4097,
      "step": 15426
    },
    {
      "epoch": 1.8914909269249631,
      "grad_norm": 1.9880752039798923,
      "learning_rate": 4.01320834568078e-08,
      "loss": 0.4308,
      "step": 15427
    },
    {
      "epoch": 1.8916135360470818,
      "grad_norm": 1.728665120040995,
      "learning_rate": 4.004168346781911e-08,
      "loss": 0.4194,
      "step": 15428
    },
    {
      "epoch": 1.8917361451692005,
      "grad_norm": 1.9875812491619147,
      "learning_rate": 3.995138458747244e-08,
      "loss": 0.4373,
      "step": 15429
    },
    {
      "epoch": 1.8918587542913192,
      "grad_norm": 2.0205533887578757,
      "learning_rate": 3.986118681947926e-08,
      "loss": 0.438,
      "step": 15430
    },
    {
      "epoch": 1.891981363413438,
      "grad_norm": 1.9500077492069088,
      "learning_rate": 3.977109016754632e-08,
      "loss": 0.423,
      "step": 15431
    },
    {
      "epoch": 1.8921039725355566,
      "grad_norm": 1.775210102138401,
      "learning_rate": 3.968109463537706e-08,
      "loss": 0.4533,
      "step": 15432
    },
    {
      "epoch": 1.8922265816576753,
      "grad_norm": 1.881694486405672,
      "learning_rate": 3.959120022667073e-08,
      "loss": 0.4723,
      "step": 15433
    },
    {
      "epoch": 1.892349190779794,
      "grad_norm": 2.133300755749832,
      "learning_rate": 3.9501406945121614e-08,
      "loss": 0.4533,
      "step": 15434
    },
    {
      "epoch": 1.8924717999019127,
      "grad_norm": 2.0025435633524693,
      "learning_rate": 3.941171479442063e-08,
      "loss": 0.4283,
      "step": 15435
    },
    {
      "epoch": 1.8925944090240314,
      "grad_norm": 2.055059173498363,
      "learning_rate": 3.9322123778254285e-08,
      "loss": 0.3813,
      "step": 15436
    },
    {
      "epoch": 1.8927170181461501,
      "grad_norm": 2.004078225591249,
      "learning_rate": 3.923263390030518e-08,
      "loss": 0.437,
      "step": 15437
    },
    {
      "epoch": 1.8928396272682688,
      "grad_norm": 1.9332034895538637,
      "learning_rate": 3.914324516425122e-08,
      "loss": 0.4304,
      "step": 15438
    },
    {
      "epoch": 1.8929622363903875,
      "grad_norm": 1.9564338118432065,
      "learning_rate": 3.90539575737664e-08,
      "loss": 0.4262,
      "step": 15439
    },
    {
      "epoch": 1.8930848455125062,
      "grad_norm": 1.8505096335521465,
      "learning_rate": 3.896477113252084e-08,
      "loss": 0.4237,
      "step": 15440
    },
    {
      "epoch": 1.893207454634625,
      "grad_norm": 1.9135436570816937,
      "learning_rate": 3.887568584417967e-08,
      "loss": 0.4435,
      "step": 15441
    },
    {
      "epoch": 1.8933300637567436,
      "grad_norm": 2.09299789226325,
      "learning_rate": 3.878670171240523e-08,
      "loss": 0.4329,
      "step": 15442
    },
    {
      "epoch": 1.8934526728788623,
      "grad_norm": 1.9796575573421198,
      "learning_rate": 3.8697818740854874e-08,
      "loss": 0.4335,
      "step": 15443
    },
    {
      "epoch": 1.893575282000981,
      "grad_norm": 1.8964076821303246,
      "learning_rate": 3.8609036933180966e-08,
      "loss": 0.434,
      "step": 15444
    },
    {
      "epoch": 1.8936978911230997,
      "grad_norm": 1.8246014854434986,
      "learning_rate": 3.8520356293033355e-08,
      "loss": 0.4106,
      "step": 15445
    },
    {
      "epoch": 1.8938205002452182,
      "grad_norm": 1.9281048384322779,
      "learning_rate": 3.843177682405691e-08,
      "loss": 0.3802,
      "step": 15446
    },
    {
      "epoch": 1.893943109367337,
      "grad_norm": 1.9440822315531934,
      "learning_rate": 3.8343298529892605e-08,
      "loss": 0.4357,
      "step": 15447
    },
    {
      "epoch": 1.8940657184894556,
      "grad_norm": 2.0291545726918723,
      "learning_rate": 3.825492141417614e-08,
      "loss": 0.4666,
      "step": 15448
    },
    {
      "epoch": 1.8941883276115743,
      "grad_norm": 1.8771515828873384,
      "learning_rate": 3.8166645480541e-08,
      "loss": 0.4204,
      "step": 15449
    },
    {
      "epoch": 1.894310936733693,
      "grad_norm": 1.8856122246775062,
      "learning_rate": 3.807847073261511e-08,
      "loss": 0.403,
      "step": 15450
    },
    {
      "epoch": 1.8944335458558117,
      "grad_norm": 1.959334980388008,
      "learning_rate": 3.799039717402225e-08,
      "loss": 0.4227,
      "step": 15451
    },
    {
      "epoch": 1.8945561549779304,
      "grad_norm": 1.9942875452324829,
      "learning_rate": 3.7902424808382854e-08,
      "loss": 0.4088,
      "step": 15452
    },
    {
      "epoch": 1.894678764100049,
      "grad_norm": 1.9617470815860623,
      "learning_rate": 3.7814553639312634e-08,
      "loss": 0.4319,
      "step": 15453
    },
    {
      "epoch": 1.8948013732221676,
      "grad_norm": 2.025880375516547,
      "learning_rate": 3.772678367042315e-08,
      "loss": 0.4392,
      "step": 15454
    },
    {
      "epoch": 1.8949239823442863,
      "grad_norm": 1.8177558672303364,
      "learning_rate": 3.763911490532207e-08,
      "loss": 0.4105,
      "step": 15455
    },
    {
      "epoch": 1.895046591466405,
      "grad_norm": 1.825860195094307,
      "learning_rate": 3.755154734761235e-08,
      "loss": 0.4237,
      "step": 15456
    },
    {
      "epoch": 1.8951692005885237,
      "grad_norm": 1.9110999687492365,
      "learning_rate": 3.7464081000893884e-08,
      "loss": 0.4214,
      "step": 15457
    },
    {
      "epoch": 1.8952918097106424,
      "grad_norm": 2.0257265741800694,
      "learning_rate": 3.737671586876101e-08,
      "loss": 0.4403,
      "step": 15458
    },
    {
      "epoch": 1.895414418832761,
      "grad_norm": 1.9426351433184994,
      "learning_rate": 3.7289451954804766e-08,
      "loss": 0.427,
      "step": 15459
    },
    {
      "epoch": 1.8955370279548798,
      "grad_norm": 1.858594470197905,
      "learning_rate": 3.720228926261227e-08,
      "loss": 0.4481,
      "step": 15460
    },
    {
      "epoch": 1.8956596370769985,
      "grad_norm": 1.9588737963803124,
      "learning_rate": 3.711522779576537e-08,
      "loss": 0.4167,
      "step": 15461
    },
    {
      "epoch": 1.8957822461991172,
      "grad_norm": 2.0059925589599517,
      "learning_rate": 3.702826755784289e-08,
      "loss": 0.408,
      "step": 15462
    },
    {
      "epoch": 1.8959048553212359,
      "grad_norm": 1.8622503504171186,
      "learning_rate": 3.6941408552419176e-08,
      "loss": 0.3925,
      "step": 15463
    },
    {
      "epoch": 1.8960274644433546,
      "grad_norm": 1.9424415693587964,
      "learning_rate": 3.685465078306388e-08,
      "loss": 0.4467,
      "step": 15464
    },
    {
      "epoch": 1.8961500735654733,
      "grad_norm": 1.953010270758222,
      "learning_rate": 3.676799425334304e-08,
      "loss": 0.3917,
      "step": 15465
    },
    {
      "epoch": 1.896272682687592,
      "grad_norm": 1.8935612711548577,
      "learning_rate": 3.668143896681853e-08,
      "loss": 0.4088,
      "step": 15466
    },
    {
      "epoch": 1.8963952918097107,
      "grad_norm": 2.072901281463275,
      "learning_rate": 3.6594984927048046e-08,
      "loss": 0.449,
      "step": 15467
    },
    {
      "epoch": 1.8965179009318294,
      "grad_norm": 2.0815954876765193,
      "learning_rate": 3.65086321375846e-08,
      "loss": 0.4433,
      "step": 15468
    },
    {
      "epoch": 1.896640510053948,
      "grad_norm": 1.676740940402929,
      "learning_rate": 3.6422380601977556e-08,
      "loss": 0.4094,
      "step": 15469
    },
    {
      "epoch": 1.8967631191760668,
      "grad_norm": 1.9285243465484545,
      "learning_rate": 3.6336230323772146e-08,
      "loss": 0.4373,
      "step": 15470
    },
    {
      "epoch": 1.8968857282981855,
      "grad_norm": 1.998560164598604,
      "learning_rate": 3.6250181306509425e-08,
      "loss": 0.4065,
      "step": 15471
    },
    {
      "epoch": 1.8970083374203042,
      "grad_norm": 1.9060469436229701,
      "learning_rate": 3.6164233553725724e-08,
      "loss": 0.3983,
      "step": 15472
    },
    {
      "epoch": 1.8971309465424229,
      "grad_norm": 2.034362655716273,
      "learning_rate": 3.6078387068954055e-08,
      "loss": 0.4342,
      "step": 15473
    },
    {
      "epoch": 1.8972535556645416,
      "grad_norm": 2.085348912570049,
      "learning_rate": 3.599264185572243e-08,
      "loss": 0.4222,
      "step": 15474
    },
    {
      "epoch": 1.8973761647866603,
      "grad_norm": 1.9396504338255263,
      "learning_rate": 3.5906997917555806e-08,
      "loss": 0.4156,
      "step": 15475
    },
    {
      "epoch": 1.897498773908779,
      "grad_norm": 1.8365408336325786,
      "learning_rate": 3.5821455257973595e-08,
      "loss": 0.3987,
      "step": 15476
    },
    {
      "epoch": 1.8976213830308974,
      "grad_norm": 1.9900960668343297,
      "learning_rate": 3.5736013880492414e-08,
      "loss": 0.419,
      "step": 15477
    },
    {
      "epoch": 1.8977439921530161,
      "grad_norm": 2.092818162764749,
      "learning_rate": 3.565067378862336e-08,
      "loss": 0.4397,
      "step": 15478
    },
    {
      "epoch": 1.8978666012751348,
      "grad_norm": 2.002687872344558,
      "learning_rate": 3.5565434985874446e-08,
      "loss": 0.4163,
      "step": 15479
    },
    {
      "epoch": 1.8979892103972535,
      "grad_norm": 1.8446513749787696,
      "learning_rate": 3.548029747574927e-08,
      "loss": 0.4033,
      "step": 15480
    },
    {
      "epoch": 1.8981118195193722,
      "grad_norm": 1.9667737094748323,
      "learning_rate": 3.5395261261746694e-08,
      "loss": 0.4044,
      "step": 15481
    },
    {
      "epoch": 1.898234428641491,
      "grad_norm": 2.1008995633711622,
      "learning_rate": 3.531032634736226e-08,
      "loss": 0.4448,
      "step": 15482
    },
    {
      "epoch": 1.8983570377636096,
      "grad_norm": 1.963493472083406,
      "learning_rate": 3.5225492736087054e-08,
      "loss": 0.3853,
      "step": 15483
    },
    {
      "epoch": 1.8984796468857283,
      "grad_norm": 2.022363474203639,
      "learning_rate": 3.514076043140746e-08,
      "loss": 0.4221,
      "step": 15484
    },
    {
      "epoch": 1.8986022560078468,
      "grad_norm": 1.903571389922527,
      "learning_rate": 3.5056129436805975e-08,
      "loss": 0.401,
      "step": 15485
    },
    {
      "epoch": 1.8987248651299655,
      "grad_norm": 1.8723667362699479,
      "learning_rate": 3.497159975576175e-08,
      "loss": 0.4273,
      "step": 15486
    },
    {
      "epoch": 1.8988474742520842,
      "grad_norm": 1.8367908052976651,
      "learning_rate": 3.4887171391748686e-08,
      "loss": 0.4191,
      "step": 15487
    },
    {
      "epoch": 1.898970083374203,
      "grad_norm": 2.0506410683146896,
      "learning_rate": 3.480284434823705e-08,
      "loss": 0.4269,
      "step": 15488
    },
    {
      "epoch": 1.8990926924963216,
      "grad_norm": 1.9612943873075206,
      "learning_rate": 3.471861862869269e-08,
      "loss": 0.4056,
      "step": 15489
    },
    {
      "epoch": 1.8992153016184403,
      "grad_norm": 1.9675399084209084,
      "learning_rate": 3.4634494236577834e-08,
      "loss": 0.3939,
      "step": 15490
    },
    {
      "epoch": 1.899337910740559,
      "grad_norm": 2.0479407258217717,
      "learning_rate": 3.455047117534943e-08,
      "loss": 0.4841,
      "step": 15491
    },
    {
      "epoch": 1.8994605198626777,
      "grad_norm": 1.9736282459074341,
      "learning_rate": 3.446654944846167e-08,
      "loss": 0.4235,
      "step": 15492
    },
    {
      "epoch": 1.8995831289847964,
      "grad_norm": 1.8560955866762119,
      "learning_rate": 3.438272905936374e-08,
      "loss": 0.425,
      "step": 15493
    },
    {
      "epoch": 1.8997057381069151,
      "grad_norm": 1.990918171276612,
      "learning_rate": 3.4299010011500376e-08,
      "loss": 0.4086,
      "step": 15494
    },
    {
      "epoch": 1.8998283472290338,
      "grad_norm": 2.010129722182485,
      "learning_rate": 3.4215392308312725e-08,
      "loss": 0.4287,
      "step": 15495
    },
    {
      "epoch": 1.8999509563511525,
      "grad_norm": 1.9764935034629054,
      "learning_rate": 3.41318759532383e-08,
      "loss": 0.4469,
      "step": 15496
    },
    {
      "epoch": 1.9000735654732712,
      "grad_norm": 1.9123830804160489,
      "learning_rate": 3.404846094970909e-08,
      "loss": 0.4267,
      "step": 15497
    },
    {
      "epoch": 1.90019617459539,
      "grad_norm": 1.994778135340016,
      "learning_rate": 3.396514730115347e-08,
      "loss": 0.4485,
      "step": 15498
    },
    {
      "epoch": 1.9003187837175086,
      "grad_norm": 2.0717695659191473,
      "learning_rate": 3.388193501099646e-08,
      "loss": 0.4734,
      "step": 15499
    },
    {
      "epoch": 1.9004413928396273,
      "grad_norm": 2.198918414848554,
      "learning_rate": 3.379882408265756e-08,
      "loss": 0.4187,
      "step": 15500
    },
    {
      "epoch": 1.900564001961746,
      "grad_norm": 1.8422439171555915,
      "learning_rate": 3.371581451955319e-08,
      "loss": 0.4277,
      "step": 15501
    },
    {
      "epoch": 1.9006866110838647,
      "grad_norm": 1.7359211058175692,
      "learning_rate": 3.36329063250948e-08,
      "loss": 0.3917,
      "step": 15502
    },
    {
      "epoch": 1.9008092202059834,
      "grad_norm": 1.9938696011427808,
      "learning_rate": 3.355009950269078e-08,
      "loss": 0.4063,
      "step": 15503
    },
    {
      "epoch": 1.9009318293281021,
      "grad_norm": 1.924778312912112,
      "learning_rate": 3.346739405574367e-08,
      "loss": 0.4823,
      "step": 15504
    },
    {
      "epoch": 1.9010544384502208,
      "grad_norm": 1.9811870462721755,
      "learning_rate": 3.338478998765382e-08,
      "loss": 0.3969,
      "step": 15505
    },
    {
      "epoch": 1.9011770475723395,
      "grad_norm": 1.8999258861544641,
      "learning_rate": 3.330228730181573e-08,
      "loss": 0.3631,
      "step": 15506
    },
    {
      "epoch": 1.9012996566944582,
      "grad_norm": 1.9683115917603637,
      "learning_rate": 3.3219886001620294e-08,
      "loss": 0.4086,
      "step": 15507
    },
    {
      "epoch": 1.901422265816577,
      "grad_norm": 1.8456001127929282,
      "learning_rate": 3.313758609045481e-08,
      "loss": 0.3994,
      "step": 15508
    },
    {
      "epoch": 1.9015448749386954,
      "grad_norm": 1.9932780075763983,
      "learning_rate": 3.3055387571701845e-08,
      "loss": 0.3966,
      "step": 15509
    },
    {
      "epoch": 1.901667484060814,
      "grad_norm": 1.9227145250787578,
      "learning_rate": 3.297329044873981e-08,
      "loss": 0.3963,
      "step": 15510
    },
    {
      "epoch": 1.9017900931829328,
      "grad_norm": 1.9554291950501088,
      "learning_rate": 3.2891294724942956e-08,
      "loss": 0.4186,
      "step": 15511
    },
    {
      "epoch": 1.9019127023050515,
      "grad_norm": 2.0294854941685783,
      "learning_rate": 3.280940040368164e-08,
      "loss": 0.4168,
      "step": 15512
    },
    {
      "epoch": 1.9020353114271702,
      "grad_norm": 1.9536156156936006,
      "learning_rate": 3.272760748832204e-08,
      "loss": 0.4401,
      "step": 15513
    },
    {
      "epoch": 1.902157920549289,
      "grad_norm": 1.79944356784267,
      "learning_rate": 3.2645915982225384e-08,
      "loss": 0.423,
      "step": 15514
    },
    {
      "epoch": 1.9022805296714076,
      "grad_norm": 2.0504841593171155,
      "learning_rate": 3.2564325888749526e-08,
      "loss": 0.4377,
      "step": 15515
    },
    {
      "epoch": 1.9024031387935263,
      "grad_norm": 2.0288277138892234,
      "learning_rate": 3.2482837211248453e-08,
      "loss": 0.409,
      "step": 15516
    },
    {
      "epoch": 1.9025257479156448,
      "grad_norm": 2.1634984938037847,
      "learning_rate": 3.240144995307143e-08,
      "loss": 0.3971,
      "step": 15517
    },
    {
      "epoch": 1.9026483570377635,
      "grad_norm": 2.0692361199623632,
      "learning_rate": 3.232016411756273e-08,
      "loss": 0.4264,
      "step": 15518
    },
    {
      "epoch": 1.9027709661598822,
      "grad_norm": 2.2082079039684523,
      "learning_rate": 3.2238979708064386e-08,
      "loss": 0.406,
      "step": 15519
    },
    {
      "epoch": 1.9028935752820009,
      "grad_norm": 2.0814961598310537,
      "learning_rate": 3.215789672791264e-08,
      "loss": 0.4104,
      "step": 15520
    },
    {
      "epoch": 1.9030161844041196,
      "grad_norm": 1.9424616785133593,
      "learning_rate": 3.207691518044037e-08,
      "loss": 0.4527,
      "step": 15521
    },
    {
      "epoch": 1.9031387935262383,
      "grad_norm": 1.8390835158788252,
      "learning_rate": 3.199603506897603e-08,
      "loss": 0.3686,
      "step": 15522
    },
    {
      "epoch": 1.903261402648357,
      "grad_norm": 1.936757247609703,
      "learning_rate": 3.1915256396843896e-08,
      "loss": 0.4606,
      "step": 15523
    },
    {
      "epoch": 1.9033840117704757,
      "grad_norm": 1.8445582188433256,
      "learning_rate": 3.183457916736382e-08,
      "loss": 0.4336,
      "step": 15524
    },
    {
      "epoch": 1.9035066208925944,
      "grad_norm": 2.0435281007199286,
      "learning_rate": 3.1754003383852315e-08,
      "loss": 0.426,
      "step": 15525
    },
    {
      "epoch": 1.903629230014713,
      "grad_norm": 1.8239840560606644,
      "learning_rate": 3.167352904962118e-08,
      "loss": 0.4009,
      "step": 15526
    },
    {
      "epoch": 1.9037518391368318,
      "grad_norm": 2.0017748157935777,
      "learning_rate": 3.159315616797776e-08,
      "loss": 0.3866,
      "step": 15527
    },
    {
      "epoch": 1.9038744482589505,
      "grad_norm": 1.904578615065916,
      "learning_rate": 3.151288474222525e-08,
      "loss": 0.4546,
      "step": 15528
    },
    {
      "epoch": 1.9039970573810692,
      "grad_norm": 1.9283611073340072,
      "learning_rate": 3.143271477566351e-08,
      "loss": 0.3725,
      "step": 15529
    },
    {
      "epoch": 1.9041196665031879,
      "grad_norm": 2.030995893478539,
      "learning_rate": 3.1352646271587684e-08,
      "loss": 0.4169,
      "step": 15530
    },
    {
      "epoch": 1.9042422756253066,
      "grad_norm": 1.9947094272902817,
      "learning_rate": 3.127267923328819e-08,
      "loss": 0.4054,
      "step": 15531
    },
    {
      "epoch": 1.9043648847474253,
      "grad_norm": 1.974893408740796,
      "learning_rate": 3.1192813664052135e-08,
      "loss": 0.4572,
      "step": 15532
    },
    {
      "epoch": 1.904487493869544,
      "grad_norm": 1.8020019667516916,
      "learning_rate": 3.111304956716216e-08,
      "loss": 0.4144,
      "step": 15533
    },
    {
      "epoch": 1.9046101029916627,
      "grad_norm": 2.062178926251098,
      "learning_rate": 3.103338694589675e-08,
      "loss": 0.4214,
      "step": 15534
    },
    {
      "epoch": 1.9047327121137814,
      "grad_norm": 2.085310753185033,
      "learning_rate": 3.095382580352996e-08,
      "loss": 0.4181,
      "step": 15535
    },
    {
      "epoch": 1.9048553212359,
      "grad_norm": 1.9763085202718922,
      "learning_rate": 3.087436614333222e-08,
      "loss": 0.4185,
      "step": 15536
    },
    {
      "epoch": 1.9049779303580188,
      "grad_norm": 1.7284759849665154,
      "learning_rate": 3.079500796856899e-08,
      "loss": 0.4358,
      "step": 15537
    },
    {
      "epoch": 1.9051005394801375,
      "grad_norm": 1.909386743057433,
      "learning_rate": 3.0715751282502646e-08,
      "loss": 0.4392,
      "step": 15538
    },
    {
      "epoch": 1.9052231486022562,
      "grad_norm": 1.8895051345610614,
      "learning_rate": 3.063659608839031e-08,
      "loss": 0.4192,
      "step": 15539
    },
    {
      "epoch": 1.9053457577243746,
      "grad_norm": 1.8819726892178157,
      "learning_rate": 3.0557542389485497e-08,
      "loss": 0.4204,
      "step": 15540
    },
    {
      "epoch": 1.9054683668464933,
      "grad_norm": 1.8789578356744105,
      "learning_rate": 3.047859018903726e-08,
      "loss": 0.3998,
      "step": 15541
    },
    {
      "epoch": 1.905590975968612,
      "grad_norm": 2.1142113457676426,
      "learning_rate": 3.0399739490291344e-08,
      "loss": 0.4355,
      "step": 15542
    },
    {
      "epoch": 1.9057135850907307,
      "grad_norm": 1.9954543519944483,
      "learning_rate": 3.032099029648822e-08,
      "loss": 0.4032,
      "step": 15543
    },
    {
      "epoch": 1.9058361942128494,
      "grad_norm": 2.1822503510320614,
      "learning_rate": 3.024234261086417e-08,
      "loss": 0.4296,
      "step": 15544
    },
    {
      "epoch": 1.9059588033349681,
      "grad_norm": 2.1068169246022843,
      "learning_rate": 3.016379643665246e-08,
      "loss": 0.3691,
      "step": 15545
    },
    {
      "epoch": 1.9060814124570868,
      "grad_norm": 2.022166054800977,
      "learning_rate": 3.008535177708133e-08,
      "loss": 0.4654,
      "step": 15546
    },
    {
      "epoch": 1.9062040215792055,
      "grad_norm": 2.0409387522084037,
      "learning_rate": 3.0007008635374867e-08,
      "loss": 0.4654,
      "step": 15547
    },
    {
      "epoch": 1.906326630701324,
      "grad_norm": 1.883367511401196,
      "learning_rate": 2.9928767014752725e-08,
      "loss": 0.4044,
      "step": 15548
    },
    {
      "epoch": 1.9064492398234427,
      "grad_norm": 1.830315459633394,
      "learning_rate": 2.985062691843177e-08,
      "loss": 0.4013,
      "step": 15549
    },
    {
      "epoch": 1.9065718489455614,
      "grad_norm": 2.064760393825054,
      "learning_rate": 2.9772588349622768e-08,
      "loss": 0.4711,
      "step": 15550
    },
    {
      "epoch": 1.9066944580676801,
      "grad_norm": 1.8560918928860448,
      "learning_rate": 2.9694651311533705e-08,
      "loss": 0.3798,
      "step": 15551
    },
    {
      "epoch": 1.9068170671897988,
      "grad_norm": 1.974209676064101,
      "learning_rate": 2.9616815807367583e-08,
      "loss": 0.4329,
      "step": 15552
    },
    {
      "epoch": 1.9069396763119175,
      "grad_norm": 1.9772036697036302,
      "learning_rate": 2.953908184032406e-08,
      "loss": 0.4274,
      "step": 15553
    },
    {
      "epoch": 1.9070622854340362,
      "grad_norm": 1.9155576480612262,
      "learning_rate": 2.9461449413597522e-08,
      "loss": 0.402,
      "step": 15554
    },
    {
      "epoch": 1.907184894556155,
      "grad_norm": 1.9620875666814928,
      "learning_rate": 2.9383918530379584e-08,
      "loss": 0.3836,
      "step": 15555
    },
    {
      "epoch": 1.9073075036782736,
      "grad_norm": 2.0763051181656076,
      "learning_rate": 2.9306489193856313e-08,
      "loss": 0.4995,
      "step": 15556
    },
    {
      "epoch": 1.9074301128003923,
      "grad_norm": 1.7927165321947531,
      "learning_rate": 2.922916140721016e-08,
      "loss": 0.3868,
      "step": 15557
    },
    {
      "epoch": 1.907552721922511,
      "grad_norm": 1.9996235086641019,
      "learning_rate": 2.9151935173619693e-08,
      "loss": 0.4417,
      "step": 15558
    },
    {
      "epoch": 1.9076753310446297,
      "grad_norm": 2.0296911584540616,
      "learning_rate": 2.9074810496259597e-08,
      "loss": 0.4344,
      "step": 15559
    },
    {
      "epoch": 1.9077979401667484,
      "grad_norm": 1.800404499987006,
      "learning_rate": 2.8997787378298448e-08,
      "loss": 0.4285,
      "step": 15560
    },
    {
      "epoch": 1.9079205492888671,
      "grad_norm": 1.9921019259974577,
      "learning_rate": 2.8920865822903154e-08,
      "loss": 0.3876,
      "step": 15561
    },
    {
      "epoch": 1.9080431584109858,
      "grad_norm": 1.8473057263961579,
      "learning_rate": 2.88440458332348e-08,
      "loss": 0.3967,
      "step": 15562
    },
    {
      "epoch": 1.9081657675331045,
      "grad_norm": 2.0779736708680727,
      "learning_rate": 2.876732741245114e-08,
      "loss": 0.4179,
      "step": 15563
    },
    {
      "epoch": 1.9082883766552232,
      "grad_norm": 2.005163103972028,
      "learning_rate": 2.8690710563705203e-08,
      "loss": 0.4367,
      "step": 15564
    },
    {
      "epoch": 1.908410985777342,
      "grad_norm": 2.0882227574570456,
      "learning_rate": 2.861419529014614e-08,
      "loss": 0.441,
      "step": 15565
    },
    {
      "epoch": 1.9085335948994606,
      "grad_norm": 1.8525155856884972,
      "learning_rate": 2.8537781594919212e-08,
      "loss": 0.4039,
      "step": 15566
    },
    {
      "epoch": 1.9086562040215793,
      "grad_norm": 1.9598201652594505,
      "learning_rate": 2.8461469481164682e-08,
      "loss": 0.4398,
      "step": 15567
    },
    {
      "epoch": 1.908778813143698,
      "grad_norm": 2.035672798923114,
      "learning_rate": 2.8385258952018934e-08,
      "loss": 0.4205,
      "step": 15568
    },
    {
      "epoch": 1.9089014222658167,
      "grad_norm": 2.052241607495116,
      "learning_rate": 2.8309150010615017e-08,
      "loss": 0.4411,
      "step": 15569
    },
    {
      "epoch": 1.9090240313879354,
      "grad_norm": 2.0535699387163335,
      "learning_rate": 2.8233142660080703e-08,
      "loss": 0.4369,
      "step": 15570
    },
    {
      "epoch": 1.909146640510054,
      "grad_norm": 1.8568238575117828,
      "learning_rate": 2.8157236903540164e-08,
      "loss": 0.4262,
      "step": 15571
    },
    {
      "epoch": 1.9092692496321726,
      "grad_norm": 1.9641877277045807,
      "learning_rate": 2.8081432744113402e-08,
      "loss": 0.4327,
      "step": 15572
    },
    {
      "epoch": 1.9093918587542913,
      "grad_norm": 1.864216812458425,
      "learning_rate": 2.8005730184915703e-08,
      "loss": 0.4272,
      "step": 15573
    },
    {
      "epoch": 1.90951446787641,
      "grad_norm": 1.7565748776418249,
      "learning_rate": 2.793012922905902e-08,
      "loss": 0.4275,
      "step": 15574
    },
    {
      "epoch": 1.9096370769985287,
      "grad_norm": 2.014790486297349,
      "learning_rate": 2.7854629879650318e-08,
      "loss": 0.4226,
      "step": 15575
    },
    {
      "epoch": 1.9097596861206474,
      "grad_norm": 2.122965658923859,
      "learning_rate": 2.777923213979322e-08,
      "loss": 0.4464,
      "step": 15576
    },
    {
      "epoch": 1.909882295242766,
      "grad_norm": 1.9982088779709628,
      "learning_rate": 2.7703936012586086e-08,
      "loss": 0.4294,
      "step": 15577
    },
    {
      "epoch": 1.9100049043648848,
      "grad_norm": 1.9816364922554637,
      "learning_rate": 2.7628741501123934e-08,
      "loss": 0.4194,
      "step": 15578
    },
    {
      "epoch": 1.9101275134870033,
      "grad_norm": 1.9575735414411508,
      "learning_rate": 2.755364860849763e-08,
      "loss": 0.4505,
      "step": 15579
    },
    {
      "epoch": 1.910250122609122,
      "grad_norm": 1.7321104534246485,
      "learning_rate": 2.7478657337793868e-08,
      "loss": 0.4192,
      "step": 15580
    },
    {
      "epoch": 1.9103727317312407,
      "grad_norm": 1.914267652502476,
      "learning_rate": 2.7403767692094075e-08,
      "loss": 0.449,
      "step": 15581
    },
    {
      "epoch": 1.9104953408533594,
      "grad_norm": 1.9912115985760321,
      "learning_rate": 2.7328979674476897e-08,
      "loss": 0.431,
      "step": 15582
    },
    {
      "epoch": 1.910617949975478,
      "grad_norm": 1.9183577599644281,
      "learning_rate": 2.7254293288016263e-08,
      "loss": 0.4295,
      "step": 15583
    },
    {
      "epoch": 1.9107405590975968,
      "grad_norm": 2.2814405915398956,
      "learning_rate": 2.7179708535781945e-08,
      "loss": 0.4454,
      "step": 15584
    },
    {
      "epoch": 1.9108631682197155,
      "grad_norm": 2.0096045631154746,
      "learning_rate": 2.710522542083899e-08,
      "loss": 0.4202,
      "step": 15585
    },
    {
      "epoch": 1.9109857773418342,
      "grad_norm": 1.9176737592329525,
      "learning_rate": 2.7030843946249664e-08,
      "loss": 0.3809,
      "step": 15586
    },
    {
      "epoch": 1.9111083864639529,
      "grad_norm": 1.8692438176679689,
      "learning_rate": 2.6956564115070416e-08,
      "loss": 0.3855,
      "step": 15587
    },
    {
      "epoch": 1.9112309955860716,
      "grad_norm": 1.7910446520360015,
      "learning_rate": 2.6882385930354638e-08,
      "loss": 0.4085,
      "step": 15588
    },
    {
      "epoch": 1.9113536047081903,
      "grad_norm": 1.8842047232527024,
      "learning_rate": 2.6808309395151276e-08,
      "loss": 0.442,
      "step": 15589
    },
    {
      "epoch": 1.911476213830309,
      "grad_norm": 1.9465703425064451,
      "learning_rate": 2.6734334512504568e-08,
      "loss": 0.4121,
      "step": 15590
    },
    {
      "epoch": 1.9115988229524277,
      "grad_norm": 2.1245029105894395,
      "learning_rate": 2.666046128545513e-08,
      "loss": 0.451,
      "step": 15591
    },
    {
      "epoch": 1.9117214320745464,
      "grad_norm": 1.8658859617747392,
      "learning_rate": 2.6586689717039703e-08,
      "loss": 0.4433,
      "step": 15592
    },
    {
      "epoch": 1.911844041196665,
      "grad_norm": 2.047839731186438,
      "learning_rate": 2.651301981029031e-08,
      "loss": 0.4539,
      "step": 15593
    },
    {
      "epoch": 1.9119666503187838,
      "grad_norm": 1.748848671988062,
      "learning_rate": 2.643945156823452e-08,
      "loss": 0.4275,
      "step": 15594
    },
    {
      "epoch": 1.9120892594409025,
      "grad_norm": 1.9504899999137593,
      "learning_rate": 2.6365984993896586e-08,
      "loss": 0.4259,
      "step": 15595
    },
    {
      "epoch": 1.9122118685630212,
      "grad_norm": 1.9582362842947427,
      "learning_rate": 2.6292620090295483e-08,
      "loss": 0.4024,
      "step": 15596
    },
    {
      "epoch": 1.9123344776851399,
      "grad_norm": 1.831126327821065,
      "learning_rate": 2.621935686044741e-08,
      "loss": 0.4232,
      "step": 15597
    },
    {
      "epoch": 1.9124570868072586,
      "grad_norm": 2.0158298945639457,
      "learning_rate": 2.6146195307363286e-08,
      "loss": 0.4218,
      "step": 15598
    },
    {
      "epoch": 1.9125796959293773,
      "grad_norm": 1.9968205444239275,
      "learning_rate": 2.607313543404988e-08,
      "loss": 0.4197,
      "step": 15599
    },
    {
      "epoch": 1.912702305051496,
      "grad_norm": 1.9177739921024168,
      "learning_rate": 2.6000177243510615e-08,
      "loss": 0.4619,
      "step": 15600
    },
    {
      "epoch": 1.9128249141736147,
      "grad_norm": 1.797868624667788,
      "learning_rate": 2.5927320738743657e-08,
      "loss": 0.422,
      "step": 15601
    },
    {
      "epoch": 1.9129475232957334,
      "grad_norm": 1.926877373554165,
      "learning_rate": 2.5854565922744103e-08,
      "loss": 0.467,
      "step": 15602
    },
    {
      "epoch": 1.9130701324178518,
      "grad_norm": 1.858912971349977,
      "learning_rate": 2.5781912798501785e-08,
      "loss": 0.4305,
      "step": 15603
    },
    {
      "epoch": 1.9131927415399705,
      "grad_norm": 2.0866808458654638,
      "learning_rate": 2.5709361369003205e-08,
      "loss": 0.4323,
      "step": 15604
    },
    {
      "epoch": 1.9133153506620892,
      "grad_norm": 2.0280136022485746,
      "learning_rate": 2.563691163723042e-08,
      "loss": 0.4345,
      "step": 15605
    },
    {
      "epoch": 1.913437959784208,
      "grad_norm": 1.8956983332394481,
      "learning_rate": 2.5564563606161054e-08,
      "loss": 0.3983,
      "step": 15606
    },
    {
      "epoch": 1.9135605689063266,
      "grad_norm": 1.824336325116953,
      "learning_rate": 2.549231727876855e-08,
      "loss": 0.3961,
      "step": 15607
    },
    {
      "epoch": 1.9136831780284453,
      "grad_norm": 1.8245614321722998,
      "learning_rate": 2.5420172658022492e-08,
      "loss": 0.3986,
      "step": 15608
    },
    {
      "epoch": 1.913805787150564,
      "grad_norm": 1.9261085967878948,
      "learning_rate": 2.534812974688883e-08,
      "loss": 0.4106,
      "step": 15609
    },
    {
      "epoch": 1.9139283962726827,
      "grad_norm": 1.791923300146823,
      "learning_rate": 2.5276188548327418e-08,
      "loss": 0.4138,
      "step": 15610
    },
    {
      "epoch": 1.9140510053948012,
      "grad_norm": 1.884735110496215,
      "learning_rate": 2.5204349065295898e-08,
      "loss": 0.4157,
      "step": 15611
    },
    {
      "epoch": 1.91417361451692,
      "grad_norm": 2.051620619218435,
      "learning_rate": 2.513261130074718e-08,
      "loss": 0.4319,
      "step": 15612
    },
    {
      "epoch": 1.9142962236390386,
      "grad_norm": 1.8911875000645455,
      "learning_rate": 2.5060975257629462e-08,
      "loss": 0.4409,
      "step": 15613
    },
    {
      "epoch": 1.9144188327611573,
      "grad_norm": 1.9141568260237498,
      "learning_rate": 2.4989440938887054e-08,
      "loss": 0.3924,
      "step": 15614
    },
    {
      "epoch": 1.914541441883276,
      "grad_norm": 1.973847920410064,
      "learning_rate": 2.4918008347460665e-08,
      "loss": 0.3982,
      "step": 15615
    },
    {
      "epoch": 1.9146640510053947,
      "grad_norm": 2.1385927050446902,
      "learning_rate": 2.4846677486285444e-08,
      "loss": 0.419,
      "step": 15616
    },
    {
      "epoch": 1.9147866601275134,
      "grad_norm": 1.786948371962001,
      "learning_rate": 2.4775448358294042e-08,
      "loss": 0.4535,
      "step": 15617
    },
    {
      "epoch": 1.9149092692496321,
      "grad_norm": 2.0659272911122235,
      "learning_rate": 2.4704320966413563e-08,
      "loss": 0.4453,
      "step": 15618
    },
    {
      "epoch": 1.9150318783717508,
      "grad_norm": 1.8844304697824499,
      "learning_rate": 2.4633295313567783e-08,
      "loss": 0.4247,
      "step": 15619
    },
    {
      "epoch": 1.9151544874938695,
      "grad_norm": 1.743905065241281,
      "learning_rate": 2.4562371402675477e-08,
      "loss": 0.3921,
      "step": 15620
    },
    {
      "epoch": 1.9152770966159882,
      "grad_norm": 1.9301852277781106,
      "learning_rate": 2.4491549236652367e-08,
      "loss": 0.4054,
      "step": 15621
    },
    {
      "epoch": 1.915399705738107,
      "grad_norm": 2.088258937739457,
      "learning_rate": 2.442082881840946e-08,
      "loss": 0.3917,
      "step": 15622
    },
    {
      "epoch": 1.9155223148602256,
      "grad_norm": 2.1237812020188076,
      "learning_rate": 2.435021015085276e-08,
      "loss": 0.4446,
      "step": 15623
    },
    {
      "epoch": 1.9156449239823443,
      "grad_norm": 1.8073311043736302,
      "learning_rate": 2.4279693236885503e-08,
      "loss": 0.4183,
      "step": 15624
    },
    {
      "epoch": 1.915767533104463,
      "grad_norm": 1.8889010561707085,
      "learning_rate": 2.420927807940565e-08,
      "loss": 0.4287,
      "step": 15625
    },
    {
      "epoch": 1.9158901422265817,
      "grad_norm": 1.8496727436067402,
      "learning_rate": 2.4138964681307553e-08,
      "loss": 0.3821,
      "step": 15626
    },
    {
      "epoch": 1.9160127513487004,
      "grad_norm": 1.9101330730071766,
      "learning_rate": 2.406875304548084e-08,
      "loss": 0.4301,
      "step": 15627
    },
    {
      "epoch": 1.9161353604708191,
      "grad_norm": 1.8306648863599175,
      "learning_rate": 2.399864317481182e-08,
      "loss": 0.3989,
      "step": 15628
    },
    {
      "epoch": 1.9162579695929378,
      "grad_norm": 2.048587256879982,
      "learning_rate": 2.392863507218235e-08,
      "loss": 0.454,
      "step": 15629
    },
    {
      "epoch": 1.9163805787150565,
      "grad_norm": 1.8644221205847324,
      "learning_rate": 2.3858728740469295e-08,
      "loss": 0.4037,
      "step": 15630
    },
    {
      "epoch": 1.9165031878371752,
      "grad_norm": 2.0476514656885674,
      "learning_rate": 2.3788924182545914e-08,
      "loss": 0.449,
      "step": 15631
    },
    {
      "epoch": 1.916625796959294,
      "grad_norm": 2.0041100959890104,
      "learning_rate": 2.3719221401281855e-08,
      "loss": 0.3822,
      "step": 15632
    },
    {
      "epoch": 1.9167484060814126,
      "grad_norm": 1.9643224180580263,
      "learning_rate": 2.3649620399541495e-08,
      "loss": 0.4471,
      "step": 15633
    },
    {
      "epoch": 1.916871015203531,
      "grad_norm": 1.8554783535060018,
      "learning_rate": 2.3580121180185877e-08,
      "loss": 0.4339,
      "step": 15634
    },
    {
      "epoch": 1.9169936243256498,
      "grad_norm": 2.0741880065491873,
      "learning_rate": 2.351072374607133e-08,
      "loss": 0.4172,
      "step": 15635
    },
    {
      "epoch": 1.9171162334477685,
      "grad_norm": 1.9254680968443814,
      "learning_rate": 2.3441428100050568e-08,
      "loss": 0.4404,
      "step": 15636
    },
    {
      "epoch": 1.9172388425698872,
      "grad_norm": 2.067659539619749,
      "learning_rate": 2.3372234244971038e-08,
      "loss": 0.4414,
      "step": 15637
    },
    {
      "epoch": 1.917361451692006,
      "grad_norm": 2.0545951019857798,
      "learning_rate": 2.3303142183677684e-08,
      "loss": 0.4425,
      "step": 15638
    },
    {
      "epoch": 1.9174840608141246,
      "grad_norm": 1.7861456617564009,
      "learning_rate": 2.3234151919009628e-08,
      "loss": 0.4352,
      "step": 15639
    },
    {
      "epoch": 1.9176066699362433,
      "grad_norm": 1.745285667163155,
      "learning_rate": 2.3165263453802657e-08,
      "loss": 0.3633,
      "step": 15640
    },
    {
      "epoch": 1.917729279058362,
      "grad_norm": 1.834363085446097,
      "learning_rate": 2.309647679088839e-08,
      "loss": 0.4369,
      "step": 15641
    },
    {
      "epoch": 1.9178518881804805,
      "grad_norm": 1.8567721491474953,
      "learning_rate": 2.3027791933094012e-08,
      "loss": 0.435,
      "step": 15642
    },
    {
      "epoch": 1.9179744973025992,
      "grad_norm": 1.8514849287108686,
      "learning_rate": 2.2959208883242266e-08,
      "loss": 0.4492,
      "step": 15643
    },
    {
      "epoch": 1.9180971064247179,
      "grad_norm": 1.8971902008613515,
      "learning_rate": 2.289072764415229e-08,
      "loss": 0.4474,
      "step": 15644
    },
    {
      "epoch": 1.9182197155468366,
      "grad_norm": 2.0718528705334185,
      "learning_rate": 2.2822348218639045e-08,
      "loss": 0.4352,
      "step": 15645
    },
    {
      "epoch": 1.9183423246689553,
      "grad_norm": 1.725114993984252,
      "learning_rate": 2.2754070609512513e-08,
      "loss": 0.4025,
      "step": 15646
    },
    {
      "epoch": 1.918464933791074,
      "grad_norm": 1.9243641256023953,
      "learning_rate": 2.268589481957961e-08,
      "loss": 0.3869,
      "step": 15647
    },
    {
      "epoch": 1.9185875429131927,
      "grad_norm": 2.0833413942372947,
      "learning_rate": 2.261782085164199e-08,
      "loss": 0.4501,
      "step": 15648
    },
    {
      "epoch": 1.9187101520353114,
      "grad_norm": 1.8691041856859776,
      "learning_rate": 2.254984870849769e-08,
      "loss": 0.4442,
      "step": 15649
    },
    {
      "epoch": 1.91883276115743,
      "grad_norm": 1.9586845978123804,
      "learning_rate": 2.2481978392940585e-08,
      "loss": 0.4605,
      "step": 15650
    },
    {
      "epoch": 1.9189553702795488,
      "grad_norm": 1.9813970378005616,
      "learning_rate": 2.2414209907760665e-08,
      "loss": 0.4304,
      "step": 15651
    },
    {
      "epoch": 1.9190779794016675,
      "grad_norm": 1.8197342766764848,
      "learning_rate": 2.2346543255742647e-08,
      "loss": 0.4153,
      "step": 15652
    },
    {
      "epoch": 1.9192005885237862,
      "grad_norm": 1.916252991326625,
      "learning_rate": 2.227897843966792e-08,
      "loss": 0.4558,
      "step": 15653
    },
    {
      "epoch": 1.9193231976459049,
      "grad_norm": 1.8846014004588298,
      "learning_rate": 2.2211515462313704e-08,
      "loss": 0.4306,
      "step": 15654
    },
    {
      "epoch": 1.9194458067680236,
      "grad_norm": 1.9655714613321755,
      "learning_rate": 2.2144154326452784e-08,
      "loss": 0.4446,
      "step": 15655
    },
    {
      "epoch": 1.9195684158901423,
      "grad_norm": 1.8923213156503609,
      "learning_rate": 2.2076895034854052e-08,
      "loss": 0.4476,
      "step": 15656
    },
    {
      "epoch": 1.919691025012261,
      "grad_norm": 1.7343788609526247,
      "learning_rate": 2.200973759028141e-08,
      "loss": 0.3898,
      "step": 15657
    },
    {
      "epoch": 1.9198136341343797,
      "grad_norm": 2.121586887168819,
      "learning_rate": 2.1942681995495708e-08,
      "loss": 0.4663,
      "step": 15658
    },
    {
      "epoch": 1.9199362432564984,
      "grad_norm": 2.063771923453693,
      "learning_rate": 2.1875728253252792e-08,
      "loss": 0.4265,
      "step": 15659
    },
    {
      "epoch": 1.920058852378617,
      "grad_norm": 1.880173111101026,
      "learning_rate": 2.1808876366304633e-08,
      "loss": 0.4014,
      "step": 15660
    },
    {
      "epoch": 1.9201814615007358,
      "grad_norm": 2.0553076587499266,
      "learning_rate": 2.174212633739875e-08,
      "loss": 0.4232,
      "step": 15661
    },
    {
      "epoch": 1.9203040706228545,
      "grad_norm": 1.9940804091223916,
      "learning_rate": 2.167547816927934e-08,
      "loss": 0.4226,
      "step": 15662
    },
    {
      "epoch": 1.9204266797449732,
      "grad_norm": 1.8400917186354022,
      "learning_rate": 2.1608931864685044e-08,
      "loss": 0.387,
      "step": 15663
    },
    {
      "epoch": 1.9205492888670919,
      "grad_norm": 2.0258174150323214,
      "learning_rate": 2.1542487426351177e-08,
      "loss": 0.4214,
      "step": 15664
    },
    {
      "epoch": 1.9206718979892106,
      "grad_norm": 1.9018130935198934,
      "learning_rate": 2.1476144857008884e-08,
      "loss": 0.4229,
      "step": 15665
    },
    {
      "epoch": 1.920794507111329,
      "grad_norm": 1.7991030274988922,
      "learning_rate": 2.1409904159384876e-08,
      "loss": 0.4316,
      "step": 15666
    },
    {
      "epoch": 1.9209171162334477,
      "grad_norm": 2.0342961621577076,
      "learning_rate": 2.1343765336201972e-08,
      "loss": 0.4443,
      "step": 15667
    },
    {
      "epoch": 1.9210397253555664,
      "grad_norm": 1.6544648988315689,
      "learning_rate": 2.1277728390178276e-08,
      "loss": 0.4249,
      "step": 15668
    },
    {
      "epoch": 1.9211623344776851,
      "grad_norm": 2.162931849469145,
      "learning_rate": 2.1211793324028283e-08,
      "loss": 0.4733,
      "step": 15669
    },
    {
      "epoch": 1.9212849435998038,
      "grad_norm": 1.9511993152417353,
      "learning_rate": 2.114596014046205e-08,
      "loss": 0.4036,
      "step": 15670
    },
    {
      "epoch": 1.9214075527219225,
      "grad_norm": 2.0237866250020558,
      "learning_rate": 2.108022884218519e-08,
      "loss": 0.4343,
      "step": 15671
    },
    {
      "epoch": 1.9215301618440412,
      "grad_norm": 1.8469261983341714,
      "learning_rate": 2.101459943189943e-08,
      "loss": 0.4235,
      "step": 15672
    },
    {
      "epoch": 1.9216527709661597,
      "grad_norm": 2.017562650040961,
      "learning_rate": 2.0949071912302332e-08,
      "loss": 0.38,
      "step": 15673
    },
    {
      "epoch": 1.9217753800882784,
      "grad_norm": 2.109032670641275,
      "learning_rate": 2.08836462860873e-08,
      "loss": 0.437,
      "step": 15674
    },
    {
      "epoch": 1.9218979892103971,
      "grad_norm": 1.8621565593515126,
      "learning_rate": 2.0818322555943294e-08,
      "loss": 0.4159,
      "step": 15675
    },
    {
      "epoch": 1.9220205983325158,
      "grad_norm": 1.935141088330212,
      "learning_rate": 2.0753100724555387e-08,
      "loss": 0.4259,
      "step": 15676
    },
    {
      "epoch": 1.9221432074546345,
      "grad_norm": 1.9673479247436607,
      "learning_rate": 2.0687980794603934e-08,
      "loss": 0.423,
      "step": 15677
    },
    {
      "epoch": 1.9222658165767532,
      "grad_norm": 2.1475606227158255,
      "learning_rate": 2.0622962768765965e-08,
      "loss": 0.447,
      "step": 15678
    },
    {
      "epoch": 1.922388425698872,
      "grad_norm": 1.9641493360642426,
      "learning_rate": 2.0558046649713505e-08,
      "loss": 0.4284,
      "step": 15679
    },
    {
      "epoch": 1.9225110348209906,
      "grad_norm": 2.2145909444882186,
      "learning_rate": 2.0493232440114972e-08,
      "loss": 0.4268,
      "step": 15680
    },
    {
      "epoch": 1.9226336439431093,
      "grad_norm": 1.8805929085698134,
      "learning_rate": 2.0428520142634078e-08,
      "loss": 0.431,
      "step": 15681
    },
    {
      "epoch": 1.922756253065228,
      "grad_norm": 2.195189960636368,
      "learning_rate": 2.0363909759930912e-08,
      "loss": 0.4478,
      "step": 15682
    },
    {
      "epoch": 1.9228788621873467,
      "grad_norm": 1.8631147138051645,
      "learning_rate": 2.029940129466057e-08,
      "loss": 0.4428,
      "step": 15683
    },
    {
      "epoch": 1.9230014713094654,
      "grad_norm": 1.9472913480400813,
      "learning_rate": 2.02349947494751e-08,
      "loss": 0.4087,
      "step": 15684
    },
    {
      "epoch": 1.9231240804315841,
      "grad_norm": 1.865027209113039,
      "learning_rate": 2.0170690127021553e-08,
      "loss": 0.3882,
      "step": 15685
    },
    {
      "epoch": 1.9232466895537028,
      "grad_norm": 1.840077722625428,
      "learning_rate": 2.0106487429942533e-08,
      "loss": 0.4016,
      "step": 15686
    },
    {
      "epoch": 1.9233692986758215,
      "grad_norm": 1.938634321506619,
      "learning_rate": 2.0042386660877045e-08,
      "loss": 0.4429,
      "step": 15687
    },
    {
      "epoch": 1.9234919077979402,
      "grad_norm": 1.9995752247258396,
      "learning_rate": 1.99783878224602e-08,
      "loss": 0.4357,
      "step": 15688
    },
    {
      "epoch": 1.923614516920059,
      "grad_norm": 2.054443449071308,
      "learning_rate": 1.9914490917322116e-08,
      "loss": 0.4347,
      "step": 15689
    },
    {
      "epoch": 1.9237371260421776,
      "grad_norm": 2.276577118102449,
      "learning_rate": 1.9850695948089026e-08,
      "loss": 0.3956,
      "step": 15690
    },
    {
      "epoch": 1.9238597351642963,
      "grad_norm": 1.907864283923594,
      "learning_rate": 1.978700291738328e-08,
      "loss": 0.4271,
      "step": 15691
    },
    {
      "epoch": 1.923982344286415,
      "grad_norm": 1.9062071664851312,
      "learning_rate": 1.9723411827822226e-08,
      "loss": 0.407,
      "step": 15692
    },
    {
      "epoch": 1.9241049534085337,
      "grad_norm": 1.896029761550443,
      "learning_rate": 1.965992268202016e-08,
      "loss": 0.432,
      "step": 15693
    },
    {
      "epoch": 1.9242275625306524,
      "grad_norm": 1.8362767540749536,
      "learning_rate": 1.9596535482586388e-08,
      "loss": 0.3913,
      "step": 15694
    },
    {
      "epoch": 1.9243501716527711,
      "grad_norm": 1.925189059296543,
      "learning_rate": 1.9533250232126045e-08,
      "loss": 0.4075,
      "step": 15695
    },
    {
      "epoch": 1.9244727807748898,
      "grad_norm": 1.8765554823815442,
      "learning_rate": 1.9470066933240383e-08,
      "loss": 0.4097,
      "step": 15696
    },
    {
      "epoch": 1.9245953898970083,
      "grad_norm": 2.016913570313214,
      "learning_rate": 1.9406985588526772e-08,
      "loss": 0.3902,
      "step": 15697
    },
    {
      "epoch": 1.924717999019127,
      "grad_norm": 1.8981585515937935,
      "learning_rate": 1.934400620057758e-08,
      "loss": 0.3369,
      "step": 15698
    },
    {
      "epoch": 1.9248406081412457,
      "grad_norm": 2.0637724326914,
      "learning_rate": 1.9281128771981017e-08,
      "loss": 0.4146,
      "step": 15699
    },
    {
      "epoch": 1.9249632172633644,
      "grad_norm": 1.66673755920499,
      "learning_rate": 1.9218353305321957e-08,
      "loss": 0.3495,
      "step": 15700
    },
    {
      "epoch": 1.925085826385483,
      "grad_norm": 1.8756253127699964,
      "learning_rate": 1.915567980318056e-08,
      "loss": 0.455,
      "step": 15701
    },
    {
      "epoch": 1.9252084355076018,
      "grad_norm": 1.9479838358903196,
      "learning_rate": 1.909310826813282e-08,
      "loss": 0.4223,
      "step": 15702
    },
    {
      "epoch": 1.9253310446297205,
      "grad_norm": 1.8941260312983328,
      "learning_rate": 1.9030638702750014e-08,
      "loss": 0.4615,
      "step": 15703
    },
    {
      "epoch": 1.9254536537518392,
      "grad_norm": 2.0080379944002873,
      "learning_rate": 1.8968271109600368e-08,
      "loss": 0.4729,
      "step": 15704
    },
    {
      "epoch": 1.9255762628739577,
      "grad_norm": 2.0552516037977426,
      "learning_rate": 1.890600549124738e-08,
      "loss": 0.4262,
      "step": 15705
    },
    {
      "epoch": 1.9256988719960764,
      "grad_norm": 1.8441917038693698,
      "learning_rate": 1.8843841850249845e-08,
      "loss": 0.4214,
      "step": 15706
    },
    {
      "epoch": 1.925821481118195,
      "grad_norm": 2.1086334670148763,
      "learning_rate": 1.878178018916266e-08,
      "loss": 0.4252,
      "step": 15707
    },
    {
      "epoch": 1.9259440902403138,
      "grad_norm": 1.9300155352939496,
      "learning_rate": 1.871982051053739e-08,
      "loss": 0.434,
      "step": 15708
    },
    {
      "epoch": 1.9260666993624325,
      "grad_norm": 2.0237504130836177,
      "learning_rate": 1.865796281692006e-08,
      "loss": 0.4149,
      "step": 15709
    },
    {
      "epoch": 1.9261893084845512,
      "grad_norm": 1.8150016088430934,
      "learning_rate": 1.859620711085336e-08,
      "loss": 0.4016,
      "step": 15710
    },
    {
      "epoch": 1.9263119176066699,
      "grad_norm": 1.9369093321670283,
      "learning_rate": 1.8534553394875256e-08,
      "loss": 0.4084,
      "step": 15711
    },
    {
      "epoch": 1.9264345267287886,
      "grad_norm": 1.9808571815003022,
      "learning_rate": 1.8473001671520385e-08,
      "loss": 0.4567,
      "step": 15712
    },
    {
      "epoch": 1.9265571358509073,
      "grad_norm": 1.9867840357819777,
      "learning_rate": 1.8411551943318394e-08,
      "loss": 0.4319,
      "step": 15713
    },
    {
      "epoch": 1.926679744973026,
      "grad_norm": 1.8830504333842444,
      "learning_rate": 1.8350204212794765e-08,
      "loss": 0.4437,
      "step": 15714
    },
    {
      "epoch": 1.9268023540951447,
      "grad_norm": 2.0434067739977912,
      "learning_rate": 1.8288958482471365e-08,
      "loss": 0.4234,
      "step": 15715
    },
    {
      "epoch": 1.9269249632172634,
      "grad_norm": 2.162048415668139,
      "learning_rate": 1.822781475486507e-08,
      "loss": 0.4372,
      "step": 15716
    },
    {
      "epoch": 1.927047572339382,
      "grad_norm": 1.8715470688328266,
      "learning_rate": 1.816677303248915e-08,
      "loss": 0.3906,
      "step": 15717
    },
    {
      "epoch": 1.9271701814615008,
      "grad_norm": 1.9902741956451762,
      "learning_rate": 1.8105833317852985e-08,
      "loss": 0.4486,
      "step": 15718
    },
    {
      "epoch": 1.9272927905836195,
      "grad_norm": 1.821732317290106,
      "learning_rate": 1.804499561346068e-08,
      "loss": 0.4,
      "step": 15719
    },
    {
      "epoch": 1.9274153997057382,
      "grad_norm": 2.0043375206642864,
      "learning_rate": 1.7984259921812742e-08,
      "loss": 0.4553,
      "step": 15720
    },
    {
      "epoch": 1.9275380088278569,
      "grad_norm": 1.7934600108187302,
      "learning_rate": 1.7923626245406335e-08,
      "loss": 0.3852,
      "step": 15721
    },
    {
      "epoch": 1.9276606179499756,
      "grad_norm": 1.9200418757446616,
      "learning_rate": 1.78630945867328e-08,
      "loss": 0.4469,
      "step": 15722
    },
    {
      "epoch": 1.9277832270720943,
      "grad_norm": 2.1560078433748395,
      "learning_rate": 1.7802664948280145e-08,
      "loss": 0.465,
      "step": 15723
    },
    {
      "epoch": 1.927905836194213,
      "grad_norm": 1.854721341316398,
      "learning_rate": 1.7742337332532498e-08,
      "loss": 0.4031,
      "step": 15724
    },
    {
      "epoch": 1.9280284453163317,
      "grad_norm": 1.8681121852611997,
      "learning_rate": 1.7682111741969265e-08,
      "loss": 0.4531,
      "step": 15725
    },
    {
      "epoch": 1.9281510544384504,
      "grad_norm": 1.8721737671921945,
      "learning_rate": 1.7621988179065965e-08,
      "loss": 0.4181,
      "step": 15726
    },
    {
      "epoch": 1.928273663560569,
      "grad_norm": 1.972991673645669,
      "learning_rate": 1.756196664629367e-08,
      "loss": 0.3921,
      "step": 15727
    },
    {
      "epoch": 1.9283962726826875,
      "grad_norm": 2.1607909875435456,
      "learning_rate": 1.7502047146119306e-08,
      "loss": 0.446,
      "step": 15728
    },
    {
      "epoch": 1.9285188818048062,
      "grad_norm": 1.912179788968138,
      "learning_rate": 1.7442229681005617e-08,
      "loss": 0.4206,
      "step": 15729
    },
    {
      "epoch": 1.928641490926925,
      "grad_norm": 2.1244980240843114,
      "learning_rate": 1.7382514253411474e-08,
      "loss": 0.3916,
      "step": 15730
    },
    {
      "epoch": 1.9287641000490436,
      "grad_norm": 1.991684680583154,
      "learning_rate": 1.7322900865791025e-08,
      "loss": 0.4306,
      "step": 15731
    },
    {
      "epoch": 1.9288867091711623,
      "grad_norm": 2.0157683441247403,
      "learning_rate": 1.726338952059453e-08,
      "loss": 0.4116,
      "step": 15732
    },
    {
      "epoch": 1.929009318293281,
      "grad_norm": 1.9356450402180492,
      "learning_rate": 1.720398022026809e-08,
      "loss": 0.405,
      "step": 15733
    },
    {
      "epoch": 1.9291319274153997,
      "grad_norm": 1.8677531543143666,
      "learning_rate": 1.714467296725364e-08,
      "loss": 0.3968,
      "step": 15734
    },
    {
      "epoch": 1.9292545365375184,
      "grad_norm": 1.9214958481650832,
      "learning_rate": 1.7085467763988673e-08,
      "loss": 0.3819,
      "step": 15735
    },
    {
      "epoch": 1.929377145659637,
      "grad_norm": 2.0007276147816913,
      "learning_rate": 1.7026364612906244e-08,
      "loss": 0.3975,
      "step": 15736
    },
    {
      "epoch": 1.9294997547817556,
      "grad_norm": 1.9733567973248838,
      "learning_rate": 1.6967363516436354e-08,
      "loss": 0.3852,
      "step": 15737
    },
    {
      "epoch": 1.9296223639038743,
      "grad_norm": 1.9113053651847396,
      "learning_rate": 1.6908464477003726e-08,
      "loss": 0.4263,
      "step": 15738
    },
    {
      "epoch": 1.929744973025993,
      "grad_norm": 1.9758469921986386,
      "learning_rate": 1.6849667497028933e-08,
      "loss": 0.4495,
      "step": 15739
    },
    {
      "epoch": 1.9298675821481117,
      "grad_norm": 1.8882832545492907,
      "learning_rate": 1.679097257892892e-08,
      "loss": 0.4209,
      "step": 15740
    },
    {
      "epoch": 1.9299901912702304,
      "grad_norm": 1.9287405136726234,
      "learning_rate": 1.6732379725116486e-08,
      "loss": 0.4411,
      "step": 15741
    },
    {
      "epoch": 1.9301128003923491,
      "grad_norm": 2.0907792893326445,
      "learning_rate": 1.667388893799915e-08,
      "loss": 0.4395,
      "step": 15742
    },
    {
      "epoch": 1.9302354095144678,
      "grad_norm": 2.054698442952436,
      "learning_rate": 1.6615500219981372e-08,
      "loss": 0.4239,
      "step": 15743
    },
    {
      "epoch": 1.9303580186365865,
      "grad_norm": 2.023205320052155,
      "learning_rate": 1.655721357346318e-08,
      "loss": 0.4188,
      "step": 15744
    },
    {
      "epoch": 1.9304806277587052,
      "grad_norm": 2.0218200973212554,
      "learning_rate": 1.6499029000839883e-08,
      "loss": 0.448,
      "step": 15745
    },
    {
      "epoch": 1.930603236880824,
      "grad_norm": 1.9614658078459275,
      "learning_rate": 1.6440946504503452e-08,
      "loss": 0.4082,
      "step": 15746
    },
    {
      "epoch": 1.9307258460029426,
      "grad_norm": 1.8396974074848165,
      "learning_rate": 1.638296608684087e-08,
      "loss": 0.4153,
      "step": 15747
    },
    {
      "epoch": 1.9308484551250613,
      "grad_norm": 2.0315494487501558,
      "learning_rate": 1.6325087750235223e-08,
      "loss": 0.4275,
      "step": 15748
    },
    {
      "epoch": 1.93097106424718,
      "grad_norm": 1.7692548967648507,
      "learning_rate": 1.6267311497065452e-08,
      "loss": 0.4036,
      "step": 15749
    },
    {
      "epoch": 1.9310936733692987,
      "grad_norm": 1.9397253859273678,
      "learning_rate": 1.620963732970604e-08,
      "loss": 0.3933,
      "step": 15750
    },
    {
      "epoch": 1.9312162824914174,
      "grad_norm": 1.9029508155667552,
      "learning_rate": 1.6152065250528148e-08,
      "loss": 0.4154,
      "step": 15751
    },
    {
      "epoch": 1.9313388916135361,
      "grad_norm": 2.1545796627560807,
      "learning_rate": 1.6094595261897382e-08,
      "loss": 0.3915,
      "step": 15752
    },
    {
      "epoch": 1.9314615007356548,
      "grad_norm": 2.032655872321709,
      "learning_rate": 1.60372273661763e-08,
      "loss": 0.4172,
      "step": 15753
    },
    {
      "epoch": 1.9315841098577735,
      "grad_norm": 1.93260370634696,
      "learning_rate": 1.5979961565722733e-08,
      "loss": 0.4497,
      "step": 15754
    },
    {
      "epoch": 1.9317067189798922,
      "grad_norm": 1.951555728184178,
      "learning_rate": 1.592279786289036e-08,
      "loss": 0.3948,
      "step": 15755
    },
    {
      "epoch": 1.931829328102011,
      "grad_norm": 1.9438562781629243,
      "learning_rate": 1.5865736260028686e-08,
      "loss": 0.4258,
      "step": 15756
    },
    {
      "epoch": 1.9319519372241296,
      "grad_norm": 1.9456484176896514,
      "learning_rate": 1.580877675948278e-08,
      "loss": 0.4183,
      "step": 15757
    },
    {
      "epoch": 1.9320745463462483,
      "grad_norm": 2.0977918216106968,
      "learning_rate": 1.575191936359438e-08,
      "loss": 0.4319,
      "step": 15758
    },
    {
      "epoch": 1.932197155468367,
      "grad_norm": 2.024472263309197,
      "learning_rate": 1.569516407470023e-08,
      "loss": 0.3937,
      "step": 15759
    },
    {
      "epoch": 1.9323197645904855,
      "grad_norm": 1.8809709917115165,
      "learning_rate": 1.563851089513263e-08,
      "loss": 0.3812,
      "step": 15760
    },
    {
      "epoch": 1.9324423737126042,
      "grad_norm": 1.8801806303168576,
      "learning_rate": 1.558195982722055e-08,
      "loss": 0.4144,
      "step": 15761
    },
    {
      "epoch": 1.9325649828347229,
      "grad_norm": 1.9704851077611225,
      "learning_rate": 1.552551087328824e-08,
      "loss": 0.4484,
      "step": 15762
    },
    {
      "epoch": 1.9326875919568416,
      "grad_norm": 1.8309739081554848,
      "learning_rate": 1.5469164035655793e-08,
      "loss": 0.3848,
      "step": 15763
    },
    {
      "epoch": 1.9328102010789603,
      "grad_norm": 1.9955006020348074,
      "learning_rate": 1.5412919316639406e-08,
      "loss": 0.4751,
      "step": 15764
    },
    {
      "epoch": 1.932932810201079,
      "grad_norm": 1.8720008055974031,
      "learning_rate": 1.535677671855057e-08,
      "loss": 0.3979,
      "step": 15765
    },
    {
      "epoch": 1.9330554193231977,
      "grad_norm": 1.8136998029826992,
      "learning_rate": 1.530073624369688e-08,
      "loss": 0.4385,
      "step": 15766
    },
    {
      "epoch": 1.9331780284453162,
      "grad_norm": 1.8259958259656797,
      "learning_rate": 1.5244797894381768e-08,
      "loss": 0.4418,
      "step": 15767
    },
    {
      "epoch": 1.9333006375674349,
      "grad_norm": 2.0475136212630436,
      "learning_rate": 1.5188961672904512e-08,
      "loss": 0.4422,
      "step": 15768
    },
    {
      "epoch": 1.9334232466895536,
      "grad_norm": 1.9865449864702414,
      "learning_rate": 1.5133227581559663e-08,
      "loss": 0.4101,
      "step": 15769
    },
    {
      "epoch": 1.9335458558116723,
      "grad_norm": 1.8617437409060988,
      "learning_rate": 1.5077595622638442e-08,
      "loss": 0.3906,
      "step": 15770
    },
    {
      "epoch": 1.933668464933791,
      "grad_norm": 1.6038629159616573,
      "learning_rate": 1.5022065798427078e-08,
      "loss": 0.3687,
      "step": 15771
    },
    {
      "epoch": 1.9337910740559097,
      "grad_norm": 1.8842875086423638,
      "learning_rate": 1.4966638111208466e-08,
      "loss": 0.4266,
      "step": 15772
    },
    {
      "epoch": 1.9339136831780284,
      "grad_norm": 2.2237797582829826,
      "learning_rate": 1.4911312563260228e-08,
      "loss": 0.4182,
      "step": 15773
    },
    {
      "epoch": 1.934036292300147,
      "grad_norm": 1.8747859405296325,
      "learning_rate": 1.4856089156856658e-08,
      "loss": 0.4083,
      "step": 15774
    },
    {
      "epoch": 1.9341589014222658,
      "grad_norm": 1.9315806318692414,
      "learning_rate": 1.4800967894267049e-08,
      "loss": 0.3896,
      "step": 15775
    },
    {
      "epoch": 1.9342815105443845,
      "grad_norm": 1.856448660691436,
      "learning_rate": 1.4745948777757646e-08,
      "loss": 0.4201,
      "step": 15776
    },
    {
      "epoch": 1.9344041196665032,
      "grad_norm": 1.9761069936159235,
      "learning_rate": 1.4691031809589418e-08,
      "loss": 0.4445,
      "step": 15777
    },
    {
      "epoch": 1.9345267287886219,
      "grad_norm": 1.9984199965098441,
      "learning_rate": 1.4636216992020003e-08,
      "loss": 0.4828,
      "step": 15778
    },
    {
      "epoch": 1.9346493379107406,
      "grad_norm": 1.7976786989950277,
      "learning_rate": 1.4581504327301765e-08,
      "loss": 0.4215,
      "step": 15779
    },
    {
      "epoch": 1.9347719470328593,
      "grad_norm": 1.788170715505387,
      "learning_rate": 1.4526893817684017e-08,
      "loss": 0.4512,
      "step": 15780
    },
    {
      "epoch": 1.934894556154978,
      "grad_norm": 1.9542875564331545,
      "learning_rate": 1.4472385465410798e-08,
      "loss": 0.4356,
      "step": 15781
    },
    {
      "epoch": 1.9350171652770967,
      "grad_norm": 1.9144562866846995,
      "learning_rate": 1.4417979272723093e-08,
      "loss": 0.4283,
      "step": 15782
    },
    {
      "epoch": 1.9351397743992154,
      "grad_norm": 1.9517734832415028,
      "learning_rate": 1.4363675241856612e-08,
      "loss": 0.3974,
      "step": 15783
    },
    {
      "epoch": 1.935262383521334,
      "grad_norm": 1.8581926061757952,
      "learning_rate": 1.4309473375043737e-08,
      "loss": 0.3853,
      "step": 15784
    },
    {
      "epoch": 1.9353849926434528,
      "grad_norm": 1.9976389803876358,
      "learning_rate": 1.4255373674511851e-08,
      "loss": 0.453,
      "step": 15785
    },
    {
      "epoch": 1.9355076017655715,
      "grad_norm": 1.9617719538323202,
      "learning_rate": 1.4201376142485012e-08,
      "loss": 0.412,
      "step": 15786
    },
    {
      "epoch": 1.9356302108876902,
      "grad_norm": 1.9827048284313422,
      "learning_rate": 1.4147480781181999e-08,
      "loss": 0.3999,
      "step": 15787
    },
    {
      "epoch": 1.9357528200098089,
      "grad_norm": 1.8288996033985996,
      "learning_rate": 1.4093687592818817e-08,
      "loss": 0.3908,
      "step": 15788
    },
    {
      "epoch": 1.9358754291319276,
      "grad_norm": 1.9051955632146071,
      "learning_rate": 1.4039996579605919e-08,
      "loss": 0.3812,
      "step": 15789
    },
    {
      "epoch": 1.9359980382540463,
      "grad_norm": 1.8422121643002314,
      "learning_rate": 1.3986407743750153e-08,
      "loss": 0.442,
      "step": 15790
    },
    {
      "epoch": 1.9361206473761647,
      "grad_norm": 2.0288070500332567,
      "learning_rate": 1.3932921087454199e-08,
      "loss": 0.4346,
      "step": 15791
    },
    {
      "epoch": 1.9362432564982834,
      "grad_norm": 2.0418126350448023,
      "learning_rate": 1.38795366129163e-08,
      "loss": 0.4279,
      "step": 15792
    },
    {
      "epoch": 1.9363658656204021,
      "grad_norm": 2.1481539325744254,
      "learning_rate": 1.3826254322330812e-08,
      "loss": 0.398,
      "step": 15793
    },
    {
      "epoch": 1.9364884747425208,
      "grad_norm": 2.0648273597574476,
      "learning_rate": 1.3773074217887927e-08,
      "loss": 0.4771,
      "step": 15794
    },
    {
      "epoch": 1.9366110838646395,
      "grad_norm": 2.0277035595177373,
      "learning_rate": 1.3719996301772842e-08,
      "loss": 0.376,
      "step": 15795
    },
    {
      "epoch": 1.9367336929867582,
      "grad_norm": 1.9566225120024894,
      "learning_rate": 1.36670205761677e-08,
      "loss": 0.4255,
      "step": 15796
    },
    {
      "epoch": 1.936856302108877,
      "grad_norm": 2.1341090889962993,
      "learning_rate": 1.3614147043249649e-08,
      "loss": 0.481,
      "step": 15797
    },
    {
      "epoch": 1.9369789112309956,
      "grad_norm": 2.0495319858057712,
      "learning_rate": 1.3561375705192226e-08,
      "loss": 0.4169,
      "step": 15798
    },
    {
      "epoch": 1.9371015203531141,
      "grad_norm": 1.789802595074522,
      "learning_rate": 1.3508706564163976e-08,
      "loss": 0.4443,
      "step": 15799
    },
    {
      "epoch": 1.9372241294752328,
      "grad_norm": 1.66779045571944,
      "learning_rate": 1.3456139622329557e-08,
      "loss": 0.3592,
      "step": 15800
    },
    {
      "epoch": 1.9373467385973515,
      "grad_norm": 2.00869224634239,
      "learning_rate": 1.340367488185057e-08,
      "loss": 0.4373,
      "step": 15801
    },
    {
      "epoch": 1.9374693477194702,
      "grad_norm": 1.8412974733927379,
      "learning_rate": 1.335131234488224e-08,
      "loss": 0.4537,
      "step": 15802
    },
    {
      "epoch": 1.937591956841589,
      "grad_norm": 2.024275325928519,
      "learning_rate": 1.3299052013577285e-08,
      "loss": 0.4682,
      "step": 15803
    },
    {
      "epoch": 1.9377145659637076,
      "grad_norm": 1.9766705319904416,
      "learning_rate": 1.3246893890083711e-08,
      "loss": 0.4387,
      "step": 15804
    },
    {
      "epoch": 1.9378371750858263,
      "grad_norm": 1.8462408320622559,
      "learning_rate": 1.3194837976545361e-08,
      "loss": 0.3651,
      "step": 15805
    },
    {
      "epoch": 1.937959784207945,
      "grad_norm": 1.9742384408096525,
      "learning_rate": 1.3142884275101353e-08,
      "loss": 0.4333,
      "step": 15806
    },
    {
      "epoch": 1.9380823933300637,
      "grad_norm": 2.012226906852543,
      "learning_rate": 1.3091032787887758e-08,
      "loss": 0.4052,
      "step": 15807
    },
    {
      "epoch": 1.9382050024521824,
      "grad_norm": 2.1555156713204076,
      "learning_rate": 1.3039283517035372e-08,
      "loss": 0.4865,
      "step": 15808
    },
    {
      "epoch": 1.9383276115743011,
      "grad_norm": 1.9259486760235585,
      "learning_rate": 1.2987636464671105e-08,
      "loss": 0.4119,
      "step": 15809
    },
    {
      "epoch": 1.9384502206964198,
      "grad_norm": 1.8738361240269619,
      "learning_rate": 1.293609163291798e-08,
      "loss": 0.3913,
      "step": 15810
    },
    {
      "epoch": 1.9385728298185385,
      "grad_norm": 2.0455981686786573,
      "learning_rate": 1.288464902389458e-08,
      "loss": 0.4493,
      "step": 15811
    },
    {
      "epoch": 1.9386954389406572,
      "grad_norm": 2.1201415030900073,
      "learning_rate": 1.2833308639715048e-08,
      "loss": 0.4175,
      "step": 15812
    },
    {
      "epoch": 1.938818048062776,
      "grad_norm": 1.8728687182416184,
      "learning_rate": 1.2782070482489917e-08,
      "loss": 0.4166,
      "step": 15813
    },
    {
      "epoch": 1.9389406571848946,
      "grad_norm": 1.8607579203316829,
      "learning_rate": 1.2730934554325003e-08,
      "loss": 0.3953,
      "step": 15814
    },
    {
      "epoch": 1.9390632663070133,
      "grad_norm": 2.010515458138749,
      "learning_rate": 1.267990085732168e-08,
      "loss": 0.451,
      "step": 15815
    },
    {
      "epoch": 1.939185875429132,
      "grad_norm": 1.9450864054892794,
      "learning_rate": 1.2628969393577995e-08,
      "loss": 0.422,
      "step": 15816
    },
    {
      "epoch": 1.9393084845512507,
      "grad_norm": 1.9421843836571626,
      "learning_rate": 1.2578140165187547e-08,
      "loss": 0.4642,
      "step": 15817
    },
    {
      "epoch": 1.9394310936733694,
      "grad_norm": 2.1368259295533742,
      "learning_rate": 1.2527413174238945e-08,
      "loss": 0.4013,
      "step": 15818
    },
    {
      "epoch": 1.939553702795488,
      "grad_norm": 1.9091567013607393,
      "learning_rate": 1.2476788422817187e-08,
      "loss": 0.3513,
      "step": 15819
    },
    {
      "epoch": 1.9396763119176068,
      "grad_norm": 1.7755591834782416,
      "learning_rate": 1.2426265913003389e-08,
      "loss": 0.4291,
      "step": 15820
    },
    {
      "epoch": 1.9397989210397255,
      "grad_norm": 1.9804949280712203,
      "learning_rate": 1.2375845646873941e-08,
      "loss": 0.4395,
      "step": 15821
    },
    {
      "epoch": 1.939921530161844,
      "grad_norm": 1.9382463238990086,
      "learning_rate": 1.2325527626501355e-08,
      "loss": 0.4779,
      "step": 15822
    },
    {
      "epoch": 1.9400441392839627,
      "grad_norm": 2.048794320211156,
      "learning_rate": 1.2275311853953697e-08,
      "loss": 0.4184,
      "step": 15823
    },
    {
      "epoch": 1.9401667484060814,
      "grad_norm": 2.0401268645038937,
      "learning_rate": 1.2225198331294874e-08,
      "loss": 0.4328,
      "step": 15824
    },
    {
      "epoch": 1.9402893575282,
      "grad_norm": 1.9361257501921165,
      "learning_rate": 1.2175187060584347e-08,
      "loss": 0.405,
      "step": 15825
    },
    {
      "epoch": 1.9404119666503188,
      "grad_norm": 1.965693302400113,
      "learning_rate": 1.2125278043878253e-08,
      "loss": 0.4229,
      "step": 15826
    },
    {
      "epoch": 1.9405345757724375,
      "grad_norm": 1.6750844356575305,
      "learning_rate": 1.2075471283227724e-08,
      "loss": 0.3874,
      "step": 15827
    },
    {
      "epoch": 1.9406571848945562,
      "grad_norm": 1.9686483530999606,
      "learning_rate": 1.2025766780679737e-08,
      "loss": 0.4469,
      "step": 15828
    },
    {
      "epoch": 1.9407797940166749,
      "grad_norm": 2.0056621892863786,
      "learning_rate": 1.197616453827738e-08,
      "loss": 0.4419,
      "step": 15829
    },
    {
      "epoch": 1.9409024031387934,
      "grad_norm": 1.9762894565005236,
      "learning_rate": 1.1926664558059298e-08,
      "loss": 0.4065,
      "step": 15830
    },
    {
      "epoch": 1.941025012260912,
      "grad_norm": 2.2622968670532244,
      "learning_rate": 1.1877266842060253e-08,
      "loss": 0.4465,
      "step": 15831
    },
    {
      "epoch": 1.9411476213830308,
      "grad_norm": 2.04280744856272,
      "learning_rate": 1.1827971392310288e-08,
      "loss": 0.4253,
      "step": 15832
    },
    {
      "epoch": 1.9412702305051495,
      "grad_norm": 1.7667067087387942,
      "learning_rate": 1.1778778210835563e-08,
      "loss": 0.4005,
      "step": 15833
    },
    {
      "epoch": 1.9413928396272682,
      "grad_norm": 2.136655968395881,
      "learning_rate": 1.1729687299658344e-08,
      "loss": 0.4138,
      "step": 15834
    },
    {
      "epoch": 1.9415154487493869,
      "grad_norm": 1.8925175287619058,
      "learning_rate": 1.1680698660795908e-08,
      "loss": 0.4023,
      "step": 15835
    },
    {
      "epoch": 1.9416380578715056,
      "grad_norm": 1.9674833753072491,
      "learning_rate": 1.1631812296261924e-08,
      "loss": 0.4288,
      "step": 15836
    },
    {
      "epoch": 1.9417606669936243,
      "grad_norm": 2.109335515039915,
      "learning_rate": 1.1583028208065894e-08,
      "loss": 0.4291,
      "step": 15837
    },
    {
      "epoch": 1.941883276115743,
      "grad_norm": 1.9520418524625174,
      "learning_rate": 1.1534346398212882e-08,
      "loss": 0.378,
      "step": 15838
    },
    {
      "epoch": 1.9420058852378617,
      "grad_norm": 1.8465375471968346,
      "learning_rate": 1.1485766868703507e-08,
      "loss": 0.4205,
      "step": 15839
    },
    {
      "epoch": 1.9421284943599804,
      "grad_norm": 1.8150711682413112,
      "learning_rate": 1.1437289621534787e-08,
      "loss": 0.4133,
      "step": 15840
    },
    {
      "epoch": 1.942251103482099,
      "grad_norm": 1.7969625884392078,
      "learning_rate": 1.138891465869929e-08,
      "loss": 0.4387,
      "step": 15841
    },
    {
      "epoch": 1.9423737126042178,
      "grad_norm": 1.9203565406038756,
      "learning_rate": 1.1340641982184874e-08,
      "loss": 0.4338,
      "step": 15842
    },
    {
      "epoch": 1.9424963217263365,
      "grad_norm": 1.8920401454868896,
      "learning_rate": 1.1292471593976061e-08,
      "loss": 0.4742,
      "step": 15843
    },
    {
      "epoch": 1.9426189308484552,
      "grad_norm": 1.926929649134179,
      "learning_rate": 1.1244403496052659e-08,
      "loss": 0.4175,
      "step": 15844
    },
    {
      "epoch": 1.9427415399705739,
      "grad_norm": 1.8907354392000508,
      "learning_rate": 1.1196437690390028e-08,
      "loss": 0.4485,
      "step": 15845
    },
    {
      "epoch": 1.9428641490926926,
      "grad_norm": 1.8612458829285414,
      "learning_rate": 1.1148574178959926e-08,
      "loss": 0.3964,
      "step": 15846
    },
    {
      "epoch": 1.9429867582148113,
      "grad_norm": 1.9410369525130378,
      "learning_rate": 1.1100812963729668e-08,
      "loss": 0.4599,
      "step": 15847
    },
    {
      "epoch": 1.94310936733693,
      "grad_norm": 1.9487827438889849,
      "learning_rate": 1.1053154046662406e-08,
      "loss": 0.4768,
      "step": 15848
    },
    {
      "epoch": 1.9432319764590487,
      "grad_norm": 1.88800349173779,
      "learning_rate": 1.1005597429716574e-08,
      "loss": 0.44,
      "step": 15849
    },
    {
      "epoch": 1.9433545855811674,
      "grad_norm": 1.8934304958508186,
      "learning_rate": 1.0958143114847275e-08,
      "loss": 0.3634,
      "step": 15850
    },
    {
      "epoch": 1.943477194703286,
      "grad_norm": 2.05787906600125,
      "learning_rate": 1.0910791104004615e-08,
      "loss": 0.4523,
      "step": 15851
    },
    {
      "epoch": 1.9435998038254048,
      "grad_norm": 1.89932467296742,
      "learning_rate": 1.0863541399135092e-08,
      "loss": 0.4191,
      "step": 15852
    },
    {
      "epoch": 1.9437224129475235,
      "grad_norm": 1.9667761870211784,
      "learning_rate": 1.0816394002180763e-08,
      "loss": 0.4447,
      "step": 15853
    },
    {
      "epoch": 1.943845022069642,
      "grad_norm": 2.2103591359460175,
      "learning_rate": 1.0769348915079525e-08,
      "loss": 0.4304,
      "step": 15854
    },
    {
      "epoch": 1.9439676311917606,
      "grad_norm": 1.9304883713952936,
      "learning_rate": 1.0722406139764829e-08,
      "loss": 0.4211,
      "step": 15855
    },
    {
      "epoch": 1.9440902403138793,
      "grad_norm": 2.0502873205031684,
      "learning_rate": 1.0675565678165967e-08,
      "loss": 0.3864,
      "step": 15856
    },
    {
      "epoch": 1.944212849435998,
      "grad_norm": 1.9966025660564533,
      "learning_rate": 1.0628827532208342e-08,
      "loss": 0.4309,
      "step": 15857
    },
    {
      "epoch": 1.9443354585581167,
      "grad_norm": 1.9125020811535638,
      "learning_rate": 1.0582191703813193e-08,
      "loss": 0.3709,
      "step": 15858
    },
    {
      "epoch": 1.9444580676802354,
      "grad_norm": 2.009168556069171,
      "learning_rate": 1.053565819489677e-08,
      "loss": 0.4822,
      "step": 15859
    },
    {
      "epoch": 1.9445806768023541,
      "grad_norm": 1.8778865399023759,
      "learning_rate": 1.0489227007372538e-08,
      "loss": 0.441,
      "step": 15860
    },
    {
      "epoch": 1.9447032859244728,
      "grad_norm": 1.9991979389418184,
      "learning_rate": 1.044289814314814e-08,
      "loss": 0.4286,
      "step": 15861
    },
    {
      "epoch": 1.9448258950465913,
      "grad_norm": 1.9402406490975832,
      "learning_rate": 1.039667160412816e-08,
      "loss": 0.4512,
      "step": 15862
    },
    {
      "epoch": 1.94494850416871,
      "grad_norm": 1.915745892700923,
      "learning_rate": 1.0350547392212196e-08,
      "loss": 0.4454,
      "step": 15863
    },
    {
      "epoch": 1.9450711132908287,
      "grad_norm": 1.923610888867623,
      "learning_rate": 1.0304525509296782e-08,
      "loss": 0.4475,
      "step": 15864
    },
    {
      "epoch": 1.9451937224129474,
      "grad_norm": 2.0296185642293536,
      "learning_rate": 1.0258605957272627e-08,
      "loss": 0.3919,
      "step": 15865
    },
    {
      "epoch": 1.9453163315350661,
      "grad_norm": 1.9110651753956396,
      "learning_rate": 1.0212788738027667e-08,
      "loss": 0.4379,
      "step": 15866
    },
    {
      "epoch": 1.9454389406571848,
      "grad_norm": 1.9176247174406815,
      "learning_rate": 1.0167073853444842e-08,
      "loss": 0.4142,
      "step": 15867
    },
    {
      "epoch": 1.9455615497793035,
      "grad_norm": 2.0519468155600733,
      "learning_rate": 1.0121461305403202e-08,
      "loss": 0.3699,
      "step": 15868
    },
    {
      "epoch": 1.9456841589014222,
      "grad_norm": 1.8870436560863784,
      "learning_rate": 1.0075951095777636e-08,
      "loss": 0.4497,
      "step": 15869
    },
    {
      "epoch": 1.945806768023541,
      "grad_norm": 1.9358946752858286,
      "learning_rate": 1.0030543226438316e-08,
      "loss": 0.4241,
      "step": 15870
    },
    {
      "epoch": 1.9459293771456596,
      "grad_norm": 2.136620868278662,
      "learning_rate": 9.985237699251805e-09,
      "loss": 0.4657,
      "step": 15871
    },
    {
      "epoch": 1.9460519862677783,
      "grad_norm": 1.9390758726429667,
      "learning_rate": 9.9400345160805e-09,
      "loss": 0.4022,
      "step": 15872
    },
    {
      "epoch": 1.946174595389897,
      "grad_norm": 1.9234276307271965,
      "learning_rate": 9.894933678781804e-09,
      "loss": 0.3823,
      "step": 15873
    },
    {
      "epoch": 1.9462972045120157,
      "grad_norm": 1.856140775885396,
      "learning_rate": 9.849935189210069e-09,
      "loss": 0.4181,
      "step": 15874
    },
    {
      "epoch": 1.9464198136341344,
      "grad_norm": 1.9759666556994444,
      "learning_rate": 9.805039049214093e-09,
      "loss": 0.445,
      "step": 15875
    },
    {
      "epoch": 1.946542422756253,
      "grad_norm": 2.071616088341421,
      "learning_rate": 9.7602452606399e-09,
      "loss": 0.4625,
      "step": 15876
    },
    {
      "epoch": 1.9466650318783718,
      "grad_norm": 1.776160339327787,
      "learning_rate": 9.715553825327962e-09,
      "loss": 0.426,
      "step": 15877
    },
    {
      "epoch": 1.9467876410004905,
      "grad_norm": 2.053010094798788,
      "learning_rate": 9.670964745115418e-09,
      "loss": 0.436,
      "step": 15878
    },
    {
      "epoch": 1.9469102501226092,
      "grad_norm": 1.822426454266408,
      "learning_rate": 9.62647802183525e-09,
      "loss": 0.4376,
      "step": 15879
    },
    {
      "epoch": 1.947032859244728,
      "grad_norm": 2.020827887958417,
      "learning_rate": 9.582093657315716e-09,
      "loss": 0.432,
      "step": 15880
    },
    {
      "epoch": 1.9471554683668466,
      "grad_norm": 2.090661598109125,
      "learning_rate": 9.537811653380913e-09,
      "loss": 0.4044,
      "step": 15881
    },
    {
      "epoch": 1.9472780774889653,
      "grad_norm": 1.960135139989259,
      "learning_rate": 9.49363201185105e-09,
      "loss": 0.3832,
      "step": 15882
    },
    {
      "epoch": 1.947400686611084,
      "grad_norm": 1.8363629282458438,
      "learning_rate": 9.4495547345419e-09,
      "loss": 0.423,
      "step": 15883
    },
    {
      "epoch": 1.9475232957332027,
      "grad_norm": 1.8471247194135711,
      "learning_rate": 9.405579823265343e-09,
      "loss": 0.4152,
      "step": 15884
    },
    {
      "epoch": 1.9476459048553212,
      "grad_norm": 2.062992667884446,
      "learning_rate": 9.361707279828824e-09,
      "loss": 0.4435,
      "step": 15885
    },
    {
      "epoch": 1.9477685139774399,
      "grad_norm": 2.052665132796119,
      "learning_rate": 9.317937106035346e-09,
      "loss": 0.4051,
      "step": 15886
    },
    {
      "epoch": 1.9478911230995586,
      "grad_norm": 2.032491873835074,
      "learning_rate": 9.274269303684303e-09,
      "loss": 0.4128,
      "step": 15887
    },
    {
      "epoch": 1.9480137322216773,
      "grad_norm": 1.806040955130953,
      "learning_rate": 9.230703874569813e-09,
      "loss": 0.4298,
      "step": 15888
    },
    {
      "epoch": 1.948136341343796,
      "grad_norm": 2.0397690289484998,
      "learning_rate": 9.1872408204835e-09,
      "loss": 0.4432,
      "step": 15889
    },
    {
      "epoch": 1.9482589504659147,
      "grad_norm": 1.8918089670103482,
      "learning_rate": 9.143880143210882e-09,
      "loss": 0.4427,
      "step": 15890
    },
    {
      "epoch": 1.9483815595880334,
      "grad_norm": 1.8213701863837217,
      "learning_rate": 9.100621844534418e-09,
      "loss": 0.3965,
      "step": 15891
    },
    {
      "epoch": 1.948504168710152,
      "grad_norm": 1.7719886742421824,
      "learning_rate": 9.057465926232411e-09,
      "loss": 0.435,
      "step": 15892
    },
    {
      "epoch": 1.9486267778322706,
      "grad_norm": 1.7095714747031387,
      "learning_rate": 9.014412390078441e-09,
      "loss": 0.4278,
      "step": 15893
    },
    {
      "epoch": 1.9487493869543893,
      "grad_norm": 1.898351962405442,
      "learning_rate": 8.971461237841928e-09,
      "loss": 0.3927,
      "step": 15894
    },
    {
      "epoch": 1.948871996076508,
      "grad_norm": 1.9623244796069883,
      "learning_rate": 8.928612471288401e-09,
      "loss": 0.4228,
      "step": 15895
    },
    {
      "epoch": 1.9489946051986267,
      "grad_norm": 1.91977393361595,
      "learning_rate": 8.885866092178952e-09,
      "loss": 0.4026,
      "step": 15896
    },
    {
      "epoch": 1.9491172143207454,
      "grad_norm": 1.9758116964078662,
      "learning_rate": 8.843222102270788e-09,
      "loss": 0.4213,
      "step": 15897
    },
    {
      "epoch": 1.949239823442864,
      "grad_norm": 1.7142459186184555,
      "learning_rate": 8.800680503316117e-09,
      "loss": 0.414,
      "step": 15898
    },
    {
      "epoch": 1.9493624325649828,
      "grad_norm": 1.9664798570537516,
      "learning_rate": 8.758241297063819e-09,
      "loss": 0.4287,
      "step": 15899
    },
    {
      "epoch": 1.9494850416871015,
      "grad_norm": 2.0064391510962998,
      "learning_rate": 8.715904485258609e-09,
      "loss": 0.4898,
      "step": 15900
    },
    {
      "epoch": 1.9496076508092202,
      "grad_norm": 1.793872354938604,
      "learning_rate": 8.673670069639928e-09,
      "loss": 0.4107,
      "step": 15901
    },
    {
      "epoch": 1.9497302599313389,
      "grad_norm": 2.1379974964016237,
      "learning_rate": 8.63153805194389e-09,
      "loss": 0.3987,
      "step": 15902
    },
    {
      "epoch": 1.9498528690534576,
      "grad_norm": 1.9606374385689782,
      "learning_rate": 8.589508433902161e-09,
      "loss": 0.4042,
      "step": 15903
    },
    {
      "epoch": 1.9499754781755763,
      "grad_norm": 1.9840229059502033,
      "learning_rate": 8.547581217242529e-09,
      "loss": 0.428,
      "step": 15904
    },
    {
      "epoch": 1.950098087297695,
      "grad_norm": 1.9147252096349083,
      "learning_rate": 8.505756403688337e-09,
      "loss": 0.4093,
      "step": 15905
    },
    {
      "epoch": 1.9502206964198137,
      "grad_norm": 1.8793905484481295,
      "learning_rate": 8.464033994957932e-09,
      "loss": 0.4433,
      "step": 15906
    },
    {
      "epoch": 1.9503433055419324,
      "grad_norm": 2.1311190565758285,
      "learning_rate": 8.422413992766887e-09,
      "loss": 0.4372,
      "step": 15907
    },
    {
      "epoch": 1.950465914664051,
      "grad_norm": 2.0756806134609205,
      "learning_rate": 8.3808963988255e-09,
      "loss": 0.4492,
      "step": 15908
    },
    {
      "epoch": 1.9505885237861698,
      "grad_norm": 1.853292937051349,
      "learning_rate": 8.339481214840461e-09,
      "loss": 0.4209,
      "step": 15909
    },
    {
      "epoch": 1.9507111329082885,
      "grad_norm": 2.062490098226261,
      "learning_rate": 8.298168442513743e-09,
      "loss": 0.4319,
      "step": 15910
    },
    {
      "epoch": 1.9508337420304072,
      "grad_norm": 1.9768520597201478,
      "learning_rate": 8.256958083543709e-09,
      "loss": 0.424,
      "step": 15911
    },
    {
      "epoch": 1.9509563511525259,
      "grad_norm": 2.0383776855770575,
      "learning_rate": 8.215850139623727e-09,
      "loss": 0.4436,
      "step": 15912
    },
    {
      "epoch": 1.9510789602746446,
      "grad_norm": 1.7122548937441078,
      "learning_rate": 8.174844612443833e-09,
      "loss": 0.4362,
      "step": 15913
    },
    {
      "epoch": 1.9512015693967633,
      "grad_norm": 2.0831896886472783,
      "learning_rate": 8.133941503689346e-09,
      "loss": 0.4328,
      "step": 15914
    },
    {
      "epoch": 1.951324178518882,
      "grad_norm": 2.0072171143057616,
      "learning_rate": 8.093140815041145e-09,
      "loss": 0.4282,
      "step": 15915
    },
    {
      "epoch": 1.9514467876410004,
      "grad_norm": 1.751095973722964,
      "learning_rate": 8.052442548176498e-09,
      "loss": 0.3804,
      "step": 15916
    },
    {
      "epoch": 1.9515693967631191,
      "grad_norm": 1.9611289274578596,
      "learning_rate": 8.011846704768233e-09,
      "loss": 0.4528,
      "step": 15917
    },
    {
      "epoch": 1.9516920058852378,
      "grad_norm": 1.8552955486692402,
      "learning_rate": 7.97135328648474e-09,
      "loss": 0.4545,
      "step": 15918
    },
    {
      "epoch": 1.9518146150073565,
      "grad_norm": 2.018997185302368,
      "learning_rate": 7.930962294990241e-09,
      "loss": 0.4008,
      "step": 15919
    },
    {
      "epoch": 1.9519372241294752,
      "grad_norm": 2.192325721014607,
      "learning_rate": 7.890673731945076e-09,
      "loss": 0.4226,
      "step": 15920
    },
    {
      "epoch": 1.952059833251594,
      "grad_norm": 2.1139691638198963,
      "learning_rate": 7.850487599005419e-09,
      "loss": 0.4089,
      "step": 15921
    },
    {
      "epoch": 1.9521824423737126,
      "grad_norm": 1.942448684054406,
      "learning_rate": 7.81040389782245e-09,
      "loss": 0.4971,
      "step": 15922
    },
    {
      "epoch": 1.9523050514958313,
      "grad_norm": 1.8022525647884942,
      "learning_rate": 7.770422630044017e-09,
      "loss": 0.4032,
      "step": 15923
    },
    {
      "epoch": 1.9524276606179498,
      "grad_norm": 1.959807731552752,
      "learning_rate": 7.730543797313528e-09,
      "loss": 0.3906,
      "step": 15924
    },
    {
      "epoch": 1.9525502697400685,
      "grad_norm": 2.2894971400494484,
      "learning_rate": 7.690767401269672e-09,
      "loss": 0.417,
      "step": 15925
    },
    {
      "epoch": 1.9526728788621872,
      "grad_norm": 1.9162071543244206,
      "learning_rate": 7.651093443547809e-09,
      "loss": 0.3941,
      "step": 15926
    },
    {
      "epoch": 1.952795487984306,
      "grad_norm": 2.1070011130929482,
      "learning_rate": 7.611521925778298e-09,
      "loss": 0.4417,
      "step": 15927
    },
    {
      "epoch": 1.9529180971064246,
      "grad_norm": 2.000288375742408,
      "learning_rate": 7.572052849587342e-09,
      "loss": 0.4286,
      "step": 15928
    },
    {
      "epoch": 1.9530407062285433,
      "grad_norm": 1.7705363480837475,
      "learning_rate": 7.532686216597807e-09,
      "loss": 0.4045,
      "step": 15929
    },
    {
      "epoch": 1.953163315350662,
      "grad_norm": 2.045849386208917,
      "learning_rate": 7.493422028427566e-09,
      "loss": 0.4575,
      "step": 15930
    },
    {
      "epoch": 1.9532859244727807,
      "grad_norm": 2.0069694072425346,
      "learning_rate": 7.454260286690052e-09,
      "loss": 0.4171,
      "step": 15931
    },
    {
      "epoch": 1.9534085335948994,
      "grad_norm": 2.1987276072043884,
      "learning_rate": 7.4152009929950864e-09,
      "loss": 0.4001,
      "step": 15932
    },
    {
      "epoch": 1.953531142717018,
      "grad_norm": 1.9550271218606077,
      "learning_rate": 7.37624414894833e-09,
      "loss": 0.3717,
      "step": 15933
    },
    {
      "epoch": 1.9536537518391368,
      "grad_norm": 2.0210319155737273,
      "learning_rate": 7.3373897561507256e-09,
      "loss": 0.4029,
      "step": 15934
    },
    {
      "epoch": 1.9537763609612555,
      "grad_norm": 2.073919369871825,
      "learning_rate": 7.298637816199327e-09,
      "loss": 0.3769,
      "step": 15935
    },
    {
      "epoch": 1.9538989700833742,
      "grad_norm": 1.9499039861054712,
      "learning_rate": 7.259988330687029e-09,
      "loss": 0.3869,
      "step": 15936
    },
    {
      "epoch": 1.954021579205493,
      "grad_norm": 1.7379610454576264,
      "learning_rate": 7.221441301202003e-09,
      "loss": 0.3598,
      "step": 15937
    },
    {
      "epoch": 1.9541441883276116,
      "grad_norm": 2.007063693749044,
      "learning_rate": 7.182996729329095e-09,
      "loss": 0.4186,
      "step": 15938
    },
    {
      "epoch": 1.9542667974497303,
      "grad_norm": 1.7988605955512926,
      "learning_rate": 7.14465461664815e-09,
      "loss": 0.4339,
      "step": 15939
    },
    {
      "epoch": 1.954389406571849,
      "grad_norm": 1.8323063666283692,
      "learning_rate": 7.106414964735131e-09,
      "loss": 0.4163,
      "step": 15940
    },
    {
      "epoch": 1.9545120156939677,
      "grad_norm": 1.9404751049189881,
      "learning_rate": 7.068277775161558e-09,
      "loss": 0.4613,
      "step": 15941
    },
    {
      "epoch": 1.9546346248160864,
      "grad_norm": 1.8897642047922918,
      "learning_rate": 7.030243049495344e-09,
      "loss": 0.4163,
      "step": 15942
    },
    {
      "epoch": 1.954757233938205,
      "grad_norm": 1.7800429916147567,
      "learning_rate": 6.992310789299683e-09,
      "loss": 0.3818,
      "step": 15943
    },
    {
      "epoch": 1.9548798430603238,
      "grad_norm": 1.893440611929912,
      "learning_rate": 6.9544809961333286e-09,
      "loss": 0.418,
      "step": 15944
    },
    {
      "epoch": 1.9550024521824425,
      "grad_norm": 2.0280034520060513,
      "learning_rate": 6.916753671551701e-09,
      "loss": 0.4148,
      "step": 15945
    },
    {
      "epoch": 1.9551250613045612,
      "grad_norm": 1.854222984194213,
      "learning_rate": 6.879128817104952e-09,
      "loss": 0.4221,
      "step": 15946
    },
    {
      "epoch": 1.95524767042668,
      "grad_norm": 1.8464761667317224,
      "learning_rate": 6.841606434339898e-09,
      "loss": 0.4058,
      "step": 15947
    },
    {
      "epoch": 1.9553702795487984,
      "grad_norm": 1.9100721761667332,
      "learning_rate": 6.804186524798362e-09,
      "loss": 0.4502,
      "step": 15948
    },
    {
      "epoch": 1.955492888670917,
      "grad_norm": 1.9015974198230523,
      "learning_rate": 6.766869090018557e-09,
      "loss": 0.4672,
      "step": 15949
    },
    {
      "epoch": 1.9556154977930358,
      "grad_norm": 1.8479290662275432,
      "learning_rate": 6.729654131534813e-09,
      "loss": 0.4283,
      "step": 15950
    },
    {
      "epoch": 1.9557381069151545,
      "grad_norm": 1.9602528653371982,
      "learning_rate": 6.692541650875906e-09,
      "loss": 0.437,
      "step": 15951
    },
    {
      "epoch": 1.9558607160372732,
      "grad_norm": 2.187081764614201,
      "learning_rate": 6.65553164956756e-09,
      "loss": 0.4582,
      "step": 15952
    },
    {
      "epoch": 1.9559833251593919,
      "grad_norm": 1.8432290257610868,
      "learning_rate": 6.618624129131335e-09,
      "loss": 0.4406,
      "step": 15953
    },
    {
      "epoch": 1.9561059342815106,
      "grad_norm": 2.1905327774070686,
      "learning_rate": 6.581819091083519e-09,
      "loss": 0.4471,
      "step": 15954
    },
    {
      "epoch": 1.9562285434036293,
      "grad_norm": 1.7860909603193542,
      "learning_rate": 6.545116536937069e-09,
      "loss": 0.4304,
      "step": 15955
    },
    {
      "epoch": 1.9563511525257478,
      "grad_norm": 1.9806006978487818,
      "learning_rate": 6.508516468201053e-09,
      "loss": 0.4108,
      "step": 15956
    },
    {
      "epoch": 1.9564737616478665,
      "grad_norm": 1.8155237691384858,
      "learning_rate": 6.472018886379272e-09,
      "loss": 0.421,
      "step": 15957
    },
    {
      "epoch": 1.9565963707699852,
      "grad_norm": 1.947227305849094,
      "learning_rate": 6.435623792971635e-09,
      "loss": 0.4049,
      "step": 15958
    },
    {
      "epoch": 1.9567189798921039,
      "grad_norm": 1.9174450848621982,
      "learning_rate": 6.399331189474723e-09,
      "loss": 0.3895,
      "step": 15959
    },
    {
      "epoch": 1.9568415890142226,
      "grad_norm": 2.0415428926657735,
      "learning_rate": 6.363141077379842e-09,
      "loss": 0.4011,
      "step": 15960
    },
    {
      "epoch": 1.9569641981363413,
      "grad_norm": 1.9569559578159628,
      "learning_rate": 6.3270534581744145e-09,
      "loss": 0.3886,
      "step": 15961
    },
    {
      "epoch": 1.95708680725846,
      "grad_norm": 2.0067646074821965,
      "learning_rate": 6.2910683333416985e-09,
      "loss": 0.4276,
      "step": 15962
    },
    {
      "epoch": 1.9572094163805787,
      "grad_norm": 1.8543575660108218,
      "learning_rate": 6.255185704361067e-09,
      "loss": 0.3856,
      "step": 15963
    },
    {
      "epoch": 1.9573320255026974,
      "grad_norm": 1.963854299596035,
      "learning_rate": 6.219405572706894e-09,
      "loss": 0.4292,
      "step": 15964
    },
    {
      "epoch": 1.957454634624816,
      "grad_norm": 1.9550276792944064,
      "learning_rate": 6.183727939850226e-09,
      "loss": 0.3925,
      "step": 15965
    },
    {
      "epoch": 1.9575772437469348,
      "grad_norm": 2.0325946168101674,
      "learning_rate": 6.1481528072571125e-09,
      "loss": 0.4439,
      "step": 15966
    },
    {
      "epoch": 1.9576998528690535,
      "grad_norm": 1.9912598264826338,
      "learning_rate": 6.112680176389996e-09,
      "loss": 0.4587,
      "step": 15967
    },
    {
      "epoch": 1.9578224619911722,
      "grad_norm": 1.9360112713530198,
      "learning_rate": 6.077310048707152e-09,
      "loss": 0.4046,
      "step": 15968
    },
    {
      "epoch": 1.9579450711132909,
      "grad_norm": 1.7801930279588507,
      "learning_rate": 6.042042425661588e-09,
      "loss": 0.4192,
      "step": 15969
    },
    {
      "epoch": 1.9580676802354096,
      "grad_norm": 1.9716032509609875,
      "learning_rate": 6.006877308703251e-09,
      "loss": 0.3944,
      "step": 15970
    },
    {
      "epoch": 1.9581902893575283,
      "grad_norm": 1.8516938210412894,
      "learning_rate": 5.971814699277656e-09,
      "loss": 0.3777,
      "step": 15971
    },
    {
      "epoch": 1.958312898479647,
      "grad_norm": 1.6541561557347746,
      "learning_rate": 5.93685459882587e-09,
      "loss": 0.4164,
      "step": 15972
    },
    {
      "epoch": 1.9584355076017657,
      "grad_norm": 1.9761409634890148,
      "learning_rate": 5.901997008784799e-09,
      "loss": 0.4391,
      "step": 15973
    },
    {
      "epoch": 1.9585581167238844,
      "grad_norm": 1.852975045091641,
      "learning_rate": 5.867241930586909e-09,
      "loss": 0.4137,
      "step": 15974
    },
    {
      "epoch": 1.958680725846003,
      "grad_norm": 1.9109014624829121,
      "learning_rate": 5.832589365661057e-09,
      "loss": 0.4096,
      "step": 15975
    },
    {
      "epoch": 1.9588033349681218,
      "grad_norm": 1.988782737067164,
      "learning_rate": 5.79803931543138e-09,
      "loss": 0.4492,
      "step": 15976
    },
    {
      "epoch": 1.9589259440902405,
      "grad_norm": 2.2000541201607864,
      "learning_rate": 5.763591781317856e-09,
      "loss": 0.4337,
      "step": 15977
    },
    {
      "epoch": 1.9590485532123592,
      "grad_norm": 1.8291645576603395,
      "learning_rate": 5.729246764736296e-09,
      "loss": 0.433,
      "step": 15978
    },
    {
      "epoch": 1.9591711623344776,
      "grad_norm": 1.882582436995186,
      "learning_rate": 5.695004267098625e-09,
      "loss": 0.4631,
      "step": 15979
    },
    {
      "epoch": 1.9592937714565963,
      "grad_norm": 2.0008828841369963,
      "learning_rate": 5.6608642898120514e-09,
      "loss": 0.393,
      "step": 15980
    },
    {
      "epoch": 1.959416380578715,
      "grad_norm": 1.9308279637543075,
      "learning_rate": 5.626826834279897e-09,
      "loss": 0.4219,
      "step": 15981
    },
    {
      "epoch": 1.9595389897008337,
      "grad_norm": 2.088206093085473,
      "learning_rate": 5.592891901900765e-09,
      "loss": 0.4459,
      "step": 15982
    },
    {
      "epoch": 1.9596615988229524,
      "grad_norm": 1.8785957109177316,
      "learning_rate": 5.559059494070207e-09,
      "loss": 0.4129,
      "step": 15983
    },
    {
      "epoch": 1.9597842079450711,
      "grad_norm": 1.9047710698516567,
      "learning_rate": 5.525329612178221e-09,
      "loss": 0.4505,
      "step": 15984
    },
    {
      "epoch": 1.9599068170671898,
      "grad_norm": 2.1230864760266446,
      "learning_rate": 5.491702257611198e-09,
      "loss": 0.4212,
      "step": 15985
    },
    {
      "epoch": 1.9600294261893085,
      "grad_norm": 1.948714472431336,
      "learning_rate": 5.458177431751643e-09,
      "loss": 0.4384,
      "step": 15986
    },
    {
      "epoch": 1.960152035311427,
      "grad_norm": 1.9822947263777186,
      "learning_rate": 5.424755135976789e-09,
      "loss": 0.3658,
      "step": 15987
    },
    {
      "epoch": 1.9602746444335457,
      "grad_norm": 1.7664399662140338,
      "learning_rate": 5.3914353716610916e-09,
      "loss": 0.4455,
      "step": 15988
    },
    {
      "epoch": 1.9603972535556644,
      "grad_norm": 1.9500063860208379,
      "learning_rate": 5.358218140173732e-09,
      "loss": 0.4085,
      "step": 15989
    },
    {
      "epoch": 1.960519862677783,
      "grad_norm": 1.9165763563839024,
      "learning_rate": 5.3251034428800086e-09,
      "loss": 0.4275,
      "step": 15990
    },
    {
      "epoch": 1.9606424717999018,
      "grad_norm": 2.0013756138946053,
      "learning_rate": 5.292091281141054e-09,
      "loss": 0.4642,
      "step": 15991
    },
    {
      "epoch": 1.9607650809220205,
      "grad_norm": 1.9137421796176544,
      "learning_rate": 5.259181656313561e-09,
      "loss": 0.4447,
      "step": 15992
    },
    {
      "epoch": 1.9608876900441392,
      "grad_norm": 1.9834206659907834,
      "learning_rate": 5.226374569750614e-09,
      "loss": 0.4196,
      "step": 15993
    },
    {
      "epoch": 1.961010299166258,
      "grad_norm": 1.9702487288536175,
      "learning_rate": 5.193670022800023e-09,
      "loss": 0.4461,
      "step": 15994
    },
    {
      "epoch": 1.9611329082883766,
      "grad_norm": 1.9968018543743051,
      "learning_rate": 5.16106801680627e-09,
      "loss": 0.4176,
      "step": 15995
    },
    {
      "epoch": 1.9612555174104953,
      "grad_norm": 1.9935968858639268,
      "learning_rate": 5.1285685531096695e-09,
      "loss": 0.4962,
      "step": 15996
    },
    {
      "epoch": 1.961378126532614,
      "grad_norm": 1.9760292158138155,
      "learning_rate": 5.0961716330455436e-09,
      "loss": 0.3786,
      "step": 15997
    },
    {
      "epoch": 1.9615007356547327,
      "grad_norm": 1.8969560301316164,
      "learning_rate": 5.063877257945604e-09,
      "loss": 0.4395,
      "step": 15998
    },
    {
      "epoch": 1.9616233447768514,
      "grad_norm": 1.896477426792883,
      "learning_rate": 5.031685429137123e-09,
      "loss": 0.449,
      "step": 15999
    },
    {
      "epoch": 1.96174595389897,
      "grad_norm": 1.9265238402719953,
      "learning_rate": 4.999596147943486e-09,
      "loss": 0.4381,
      "step": 16000
    },
    {
      "epoch": 1.9618685630210888,
      "grad_norm": 1.8250706334072933,
      "learning_rate": 4.967609415683639e-09,
      "loss": 0.4149,
      "step": 16001
    },
    {
      "epoch": 1.9619911721432075,
      "grad_norm": 2.082863129529508,
      "learning_rate": 4.935725233672084e-09,
      "loss": 0.3949,
      "step": 16002
    },
    {
      "epoch": 1.9621137812653262,
      "grad_norm": 2.109137866646781,
      "learning_rate": 4.903943603219441e-09,
      "loss": 0.4613,
      "step": 16003
    },
    {
      "epoch": 1.962236390387445,
      "grad_norm": 1.9079170993114936,
      "learning_rate": 4.8722645256316096e-09,
      "loss": 0.4479,
      "step": 16004
    },
    {
      "epoch": 1.9623589995095636,
      "grad_norm": 1.8393017966534506,
      "learning_rate": 4.840688002211158e-09,
      "loss": 0.4135,
      "step": 16005
    },
    {
      "epoch": 1.9624816086316823,
      "grad_norm": 1.9164718649414267,
      "learning_rate": 4.809214034255938e-09,
      "loss": 0.4421,
      "step": 16006
    },
    {
      "epoch": 1.962604217753801,
      "grad_norm": 2.0866651141958803,
      "learning_rate": 4.777842623059359e-09,
      "loss": 0.3981,
      "step": 16007
    },
    {
      "epoch": 1.9627268268759197,
      "grad_norm": 1.917126063788446,
      "learning_rate": 4.7465737699106675e-09,
      "loss": 0.4226,
      "step": 16008
    },
    {
      "epoch": 1.9628494359980384,
      "grad_norm": 1.7451406691942204,
      "learning_rate": 4.7154074760955014e-09,
      "loss": 0.432,
      "step": 16009
    },
    {
      "epoch": 1.962972045120157,
      "grad_norm": 2.044169963872877,
      "learning_rate": 4.684343742894781e-09,
      "loss": 0.3759,
      "step": 16010
    },
    {
      "epoch": 1.9630946542422756,
      "grad_norm": 2.028050269693904,
      "learning_rate": 4.653382571584708e-09,
      "loss": 0.4289,
      "step": 16011
    },
    {
      "epoch": 1.9632172633643943,
      "grad_norm": 1.9661488136476208,
      "learning_rate": 4.622523963438708e-09,
      "loss": 0.443,
      "step": 16012
    },
    {
      "epoch": 1.963339872486513,
      "grad_norm": 2.084302822728812,
      "learning_rate": 4.591767919724377e-09,
      "loss": 0.4196,
      "step": 16013
    },
    {
      "epoch": 1.9634624816086317,
      "grad_norm": 2.0814536503455354,
      "learning_rate": 4.56111444170626e-09,
      "loss": 0.4189,
      "step": 16014
    },
    {
      "epoch": 1.9635850907307504,
      "grad_norm": 1.8646754101437588,
      "learning_rate": 4.530563530644183e-09,
      "loss": 0.4255,
      "step": 16015
    },
    {
      "epoch": 1.963707699852869,
      "grad_norm": 1.9728310949535222,
      "learning_rate": 4.500115187793808e-09,
      "loss": 0.4208,
      "step": 16016
    },
    {
      "epoch": 1.9638303089749878,
      "grad_norm": 2.127437535297831,
      "learning_rate": 4.469769414406633e-09,
      "loss": 0.3995,
      "step": 16017
    },
    {
      "epoch": 1.9639529180971063,
      "grad_norm": 2.2011949674795734,
      "learning_rate": 4.439526211729994e-09,
      "loss": 0.4599,
      "step": 16018
    },
    {
      "epoch": 1.964075527219225,
      "grad_norm": 1.941040475342297,
      "learning_rate": 4.409385581006786e-09,
      "loss": 0.4254,
      "step": 16019
    },
    {
      "epoch": 1.9641981363413437,
      "grad_norm": 1.9801638634295153,
      "learning_rate": 4.379347523476018e-09,
      "loss": 0.4084,
      "step": 16020
    },
    {
      "epoch": 1.9643207454634624,
      "grad_norm": 2.0585595206564222,
      "learning_rate": 4.349412040372258e-09,
      "loss": 0.4401,
      "step": 16021
    },
    {
      "epoch": 1.964443354585581,
      "grad_norm": 1.8992168552992386,
      "learning_rate": 4.319579132925633e-09,
      "loss": 0.4473,
      "step": 16022
    },
    {
      "epoch": 1.9645659637076998,
      "grad_norm": 2.0954093729230947,
      "learning_rate": 4.2898488023629395e-09,
      "loss": 0.4356,
      "step": 16023
    },
    {
      "epoch": 1.9646885728298185,
      "grad_norm": 2.0908048604244254,
      "learning_rate": 4.260221049905422e-09,
      "loss": 0.4584,
      "step": 16024
    },
    {
      "epoch": 1.9648111819519372,
      "grad_norm": 1.9236131183583847,
      "learning_rate": 4.2306958767712735e-09,
      "loss": 0.4263,
      "step": 16025
    },
    {
      "epoch": 1.9649337910740559,
      "grad_norm": 2.150438210000758,
      "learning_rate": 4.201273284173968e-09,
      "loss": 0.446,
      "step": 16026
    },
    {
      "epoch": 1.9650564001961746,
      "grad_norm": 1.7790119845123897,
      "learning_rate": 4.171953273322815e-09,
      "loss": 0.4287,
      "step": 16027
    },
    {
      "epoch": 1.9651790093182933,
      "grad_norm": 1.9078087167883506,
      "learning_rate": 4.142735845422685e-09,
      "loss": 0.3842,
      "step": 16028
    },
    {
      "epoch": 1.965301618440412,
      "grad_norm": 1.9715765570264496,
      "learning_rate": 4.113621001674839e-09,
      "loss": 0.4539,
      "step": 16029
    },
    {
      "epoch": 1.9654242275625307,
      "grad_norm": 1.9171588999547928,
      "learning_rate": 4.08460874327582e-09,
      "loss": 0.4055,
      "step": 16030
    },
    {
      "epoch": 1.9655468366846494,
      "grad_norm": 2.2085882085789583,
      "learning_rate": 4.0556990714180065e-09,
      "loss": 0.41,
      "step": 16031
    },
    {
      "epoch": 1.965669445806768,
      "grad_norm": 1.9413421742684738,
      "learning_rate": 4.026891987289894e-09,
      "loss": 0.4359,
      "step": 16032
    },
    {
      "epoch": 1.9657920549288868,
      "grad_norm": 1.851902467304413,
      "learning_rate": 3.9981874920749784e-09,
      "loss": 0.408,
      "step": 16033
    },
    {
      "epoch": 1.9659146640510055,
      "grad_norm": 2.0446755634070346,
      "learning_rate": 3.969585586953428e-09,
      "loss": 0.491,
      "step": 16034
    },
    {
      "epoch": 1.9660372731731242,
      "grad_norm": 1.725934908891825,
      "learning_rate": 3.941086273100969e-09,
      "loss": 0.4253,
      "step": 16035
    },
    {
      "epoch": 1.9661598822952429,
      "grad_norm": 2.1793305848424462,
      "learning_rate": 3.9126895516888864e-09,
      "loss": 0.4746,
      "step": 16036
    },
    {
      "epoch": 1.9662824914173616,
      "grad_norm": 1.9103996345496475,
      "learning_rate": 3.884395423884024e-09,
      "loss": 0.4049,
      "step": 16037
    },
    {
      "epoch": 1.9664051005394803,
      "grad_norm": 2.2630317338940587,
      "learning_rate": 3.856203890849341e-09,
      "loss": 0.4325,
      "step": 16038
    },
    {
      "epoch": 1.966527709661599,
      "grad_norm": 2.0088452089844355,
      "learning_rate": 3.828114953743911e-09,
      "loss": 0.4201,
      "step": 16039
    },
    {
      "epoch": 1.9666503187837177,
      "grad_norm": 1.9219706842123498,
      "learning_rate": 3.8001286137223645e-09,
      "loss": 0.3794,
      "step": 16040
    },
    {
      "epoch": 1.9667729279058364,
      "grad_norm": 1.8049514502176647,
      "learning_rate": 3.7722448719343385e-09,
      "loss": 0.3741,
      "step": 16041
    },
    {
      "epoch": 1.9668955370279548,
      "grad_norm": 1.7850608841172118,
      "learning_rate": 3.7444637295264155e-09,
      "loss": 0.4154,
      "step": 16042
    },
    {
      "epoch": 1.9670181461500735,
      "grad_norm": 1.8504276196497798,
      "learning_rate": 3.7167851876404593e-09,
      "loss": 0.4332,
      "step": 16043
    },
    {
      "epoch": 1.9671407552721922,
      "grad_norm": 1.884911848394584,
      "learning_rate": 3.6892092474136164e-09,
      "loss": 0.4242,
      "step": 16044
    },
    {
      "epoch": 1.967263364394311,
      "grad_norm": 1.896663143653409,
      "learning_rate": 3.6617359099799797e-09,
      "loss": 0.4284,
      "step": 16045
    },
    {
      "epoch": 1.9673859735164296,
      "grad_norm": 1.888193088549295,
      "learning_rate": 3.6343651764680907e-09,
      "loss": 0.4322,
      "step": 16046
    },
    {
      "epoch": 1.9675085826385483,
      "grad_norm": 2.1301388104801324,
      "learning_rate": 3.6070970480034383e-09,
      "loss": 0.417,
      "step": 16047
    },
    {
      "epoch": 1.967631191760667,
      "grad_norm": 2.10065817700409,
      "learning_rate": 3.5799315257067925e-09,
      "loss": 0.4275,
      "step": 16048
    },
    {
      "epoch": 1.9677538008827857,
      "grad_norm": 2.103070022073393,
      "learning_rate": 3.5528686106942045e-09,
      "loss": 0.4442,
      "step": 16049
    },
    {
      "epoch": 1.9678764100049042,
      "grad_norm": 2.1220229675051594,
      "learning_rate": 3.525908304078396e-09,
      "loss": 0.4611,
      "step": 16050
    },
    {
      "epoch": 1.967999019127023,
      "grad_norm": 1.8607495832449708,
      "learning_rate": 3.4990506069676467e-09,
      "loss": 0.4061,
      "step": 16051
    },
    {
      "epoch": 1.9681216282491416,
      "grad_norm": 1.8861463774329,
      "learning_rate": 3.4722955204652407e-09,
      "loss": 0.4767,
      "step": 16052
    },
    {
      "epoch": 1.9682442373712603,
      "grad_norm": 1.8091255383459068,
      "learning_rate": 3.445643045671687e-09,
      "loss": 0.4302,
      "step": 16053
    },
    {
      "epoch": 1.968366846493379,
      "grad_norm": 2.0672699622609336,
      "learning_rate": 3.4190931836816652e-09,
      "loss": 0.4248,
      "step": 16054
    },
    {
      "epoch": 1.9684894556154977,
      "grad_norm": 2.0028972182831173,
      "learning_rate": 3.3926459355868024e-09,
      "loss": 0.376,
      "step": 16055
    },
    {
      "epoch": 1.9686120647376164,
      "grad_norm": 2.060278935558774,
      "learning_rate": 3.366301302474284e-09,
      "loss": 0.3508,
      "step": 16056
    },
    {
      "epoch": 1.968734673859735,
      "grad_norm": 1.9613220116510088,
      "learning_rate": 3.340059285426578e-09,
      "loss": 0.423,
      "step": 16057
    },
    {
      "epoch": 1.9688572829818538,
      "grad_norm": 2.0550360257584,
      "learning_rate": 3.313919885522543e-09,
      "loss": 0.4185,
      "step": 16058
    },
    {
      "epoch": 1.9689798921039725,
      "grad_norm": 1.741691672683994,
      "learning_rate": 3.2878831038363204e-09,
      "loss": 0.4194,
      "step": 16059
    },
    {
      "epoch": 1.9691025012260912,
      "grad_norm": 1.912151044847757,
      "learning_rate": 3.2619489414381645e-09,
      "loss": 0.3935,
      "step": 16060
    },
    {
      "epoch": 1.96922511034821,
      "grad_norm": 1.9310641259801615,
      "learning_rate": 3.2361173993941673e-09,
      "loss": 0.3925,
      "step": 16061
    },
    {
      "epoch": 1.9693477194703286,
      "grad_norm": 2.0442531162003776,
      "learning_rate": 3.2103884787657026e-09,
      "loss": 0.4292,
      "step": 16062
    },
    {
      "epoch": 1.9694703285924473,
      "grad_norm": 1.8697444200119158,
      "learning_rate": 3.1847621806105344e-09,
      "loss": 0.4317,
      "step": 16063
    },
    {
      "epoch": 1.969592937714566,
      "grad_norm": 2.049909940392773,
      "learning_rate": 3.1592385059819873e-09,
      "loss": 0.438,
      "step": 16064
    },
    {
      "epoch": 1.9697155468366847,
      "grad_norm": 1.8189355765600894,
      "learning_rate": 3.1338174559292222e-09,
      "loss": 0.4386,
      "step": 16065
    },
    {
      "epoch": 1.9698381559588034,
      "grad_norm": 1.8595378200210342,
      "learning_rate": 3.1084990314966814e-09,
      "loss": 0.39,
      "step": 16066
    },
    {
      "epoch": 1.969960765080922,
      "grad_norm": 1.8941481952753958,
      "learning_rate": 3.0832832337251982e-09,
      "loss": 0.4446,
      "step": 16067
    },
    {
      "epoch": 1.9700833742030408,
      "grad_norm": 1.7283950354709696,
      "learning_rate": 3.0581700636514443e-09,
      "loss": 0.3973,
      "step": 16068
    },
    {
      "epoch": 1.9702059833251595,
      "grad_norm": 2.012046948394191,
      "learning_rate": 3.0331595223070942e-09,
      "loss": 0.4554,
      "step": 16069
    },
    {
      "epoch": 1.9703285924472782,
      "grad_norm": 1.9936889996266882,
      "learning_rate": 3.008251610720492e-09,
      "loss": 0.4098,
      "step": 16070
    },
    {
      "epoch": 1.970451201569397,
      "grad_norm": 1.9334569328967706,
      "learning_rate": 2.9834463299155404e-09,
      "loss": 0.4505,
      "step": 16071
    },
    {
      "epoch": 1.9705738106915156,
      "grad_norm": 1.9083064396448837,
      "learning_rate": 2.9587436809114243e-09,
      "loss": 0.4172,
      "step": 16072
    },
    {
      "epoch": 1.970696419813634,
      "grad_norm": 1.9882682013067106,
      "learning_rate": 2.934143664723721e-09,
      "loss": 0.4053,
      "step": 16073
    },
    {
      "epoch": 1.9708190289357528,
      "grad_norm": 1.7325810467334393,
      "learning_rate": 2.9096462823632874e-09,
      "loss": 0.364,
      "step": 16074
    },
    {
      "epoch": 1.9709416380578715,
      "grad_norm": 2.071324106952054,
      "learning_rate": 2.8852515348370968e-09,
      "loss": 0.4439,
      "step": 16075
    },
    {
      "epoch": 1.9710642471799902,
      "grad_norm": 1.936967668862963,
      "learning_rate": 2.8609594231479577e-09,
      "loss": 0.4069,
      "step": 16076
    },
    {
      "epoch": 1.9711868563021089,
      "grad_norm": 1.8805425639217055,
      "learning_rate": 2.836769948294238e-09,
      "loss": 0.3996,
      "step": 16077
    },
    {
      "epoch": 1.9713094654242276,
      "grad_norm": 1.9432310800739268,
      "learning_rate": 2.8126831112701423e-09,
      "loss": 0.4191,
      "step": 16078
    },
    {
      "epoch": 1.9714320745463463,
      "grad_norm": 2.1159806985370904,
      "learning_rate": 2.788698913065435e-09,
      "loss": 0.4424,
      "step": 16079
    },
    {
      "epoch": 1.971554683668465,
      "grad_norm": 1.9940892919452813,
      "learning_rate": 2.7648173546665493e-09,
      "loss": 0.4104,
      "step": 16080
    },
    {
      "epoch": 1.9716772927905835,
      "grad_norm": 1.8852648262585403,
      "learning_rate": 2.7410384370543664e-09,
      "loss": 0.4069,
      "step": 16081
    },
    {
      "epoch": 1.9717999019127022,
      "grad_norm": 1.9668811152329466,
      "learning_rate": 2.7173621612069933e-09,
      "loss": 0.4404,
      "step": 16082
    },
    {
      "epoch": 1.9719225110348209,
      "grad_norm": 1.891626660888277,
      "learning_rate": 2.693788528096708e-09,
      "loss": 0.3994,
      "step": 16083
    },
    {
      "epoch": 1.9720451201569396,
      "grad_norm": 1.9759911861069317,
      "learning_rate": 2.6703175386930126e-09,
      "loss": 0.3902,
      "step": 16084
    },
    {
      "epoch": 1.9721677292790583,
      "grad_norm": 1.7740083614471196,
      "learning_rate": 2.646949193960413e-09,
      "loss": 0.4432,
      "step": 16085
    },
    {
      "epoch": 1.972290338401177,
      "grad_norm": 1.9036946108920563,
      "learning_rate": 2.6236834948592525e-09,
      "loss": 0.4542,
      "step": 16086
    },
    {
      "epoch": 1.9724129475232957,
      "grad_norm": 1.9535496028515404,
      "learning_rate": 2.600520442346266e-09,
      "loss": 0.4325,
      "step": 16087
    },
    {
      "epoch": 1.9725355566454144,
      "grad_norm": 1.9331761603921673,
      "learning_rate": 2.577460037372914e-09,
      "loss": 0.3849,
      "step": 16088
    },
    {
      "epoch": 1.972658165767533,
      "grad_norm": 1.557694926943856,
      "learning_rate": 2.5545022808876054e-09,
      "loss": 0.3864,
      "step": 16089
    },
    {
      "epoch": 1.9727807748896518,
      "grad_norm": 1.94088641640532,
      "learning_rate": 2.5316471738334735e-09,
      "loss": 0.4221,
      "step": 16090
    },
    {
      "epoch": 1.9729033840117705,
      "grad_norm": 1.8047052398736199,
      "learning_rate": 2.5088947171500455e-09,
      "loss": 0.4508,
      "step": 16091
    },
    {
      "epoch": 1.9730259931338892,
      "grad_norm": 1.9150438448526723,
      "learning_rate": 2.486244911772406e-09,
      "loss": 0.3872,
      "step": 16092
    },
    {
      "epoch": 1.9731486022560079,
      "grad_norm": 2.124309268661918,
      "learning_rate": 2.4636977586317556e-09,
      "loss": 0.4124,
      "step": 16093
    },
    {
      "epoch": 1.9732712113781266,
      "grad_norm": 1.876873079537479,
      "learning_rate": 2.4412532586545747e-09,
      "loss": 0.4257,
      "step": 16094
    },
    {
      "epoch": 1.9733938205002453,
      "grad_norm": 2.043589362649192,
      "learning_rate": 2.4189114127634584e-09,
      "loss": 0.3976,
      "step": 16095
    },
    {
      "epoch": 1.973516429622364,
      "grad_norm": 1.9487409497844488,
      "learning_rate": 2.396672221876839e-09,
      "loss": 0.3939,
      "step": 16096
    },
    {
      "epoch": 1.9736390387444827,
      "grad_norm": 2.0604422811641596,
      "learning_rate": 2.3745356869084302e-09,
      "loss": 0.4218,
      "step": 16097
    },
    {
      "epoch": 1.9737616478666014,
      "grad_norm": 2.1789342935434592,
      "learning_rate": 2.352501808768337e-09,
      "loss": 0.4691,
      "step": 16098
    },
    {
      "epoch": 1.97388425698872,
      "grad_norm": 1.9076926076750613,
      "learning_rate": 2.3305705883622243e-09,
      "loss": 0.4236,
      "step": 16099
    },
    {
      "epoch": 1.9740068661108388,
      "grad_norm": 1.8882461732596292,
      "learning_rate": 2.3087420265915925e-09,
      "loss": 0.392,
      "step": 16100
    },
    {
      "epoch": 1.9741294752329575,
      "grad_norm": 1.9543809714935936,
      "learning_rate": 2.2870161243532252e-09,
      "loss": 0.4087,
      "step": 16101
    },
    {
      "epoch": 1.9742520843550762,
      "grad_norm": 1.8441948546707878,
      "learning_rate": 2.2653928825405734e-09,
      "loss": 0.3666,
      "step": 16102
    },
    {
      "epoch": 1.9743746934771949,
      "grad_norm": 2.286216527922391,
      "learning_rate": 2.243872302042094e-09,
      "loss": 0.4359,
      "step": 16103
    },
    {
      "epoch": 1.9744973025993136,
      "grad_norm": 1.9769229471512089,
      "learning_rate": 2.222454383742356e-09,
      "loss": 0.4574,
      "step": 16104
    },
    {
      "epoch": 1.974619911721432,
      "grad_norm": 2.0742017947552998,
      "learning_rate": 2.2011391285217676e-09,
      "loss": 0.3951,
      "step": 16105
    },
    {
      "epoch": 1.9747425208435507,
      "grad_norm": 2.090363260585694,
      "learning_rate": 2.179926537256294e-09,
      "loss": 0.4382,
      "step": 16106
    },
    {
      "epoch": 1.9748651299656694,
      "grad_norm": 2.006336471755019,
      "learning_rate": 2.1588166108177377e-09,
      "loss": 0.3911,
      "step": 16107
    },
    {
      "epoch": 1.9749877390877881,
      "grad_norm": 2.2901705627250344,
      "learning_rate": 2.137809350074016e-09,
      "loss": 0.4584,
      "step": 16108
    },
    {
      "epoch": 1.9751103482099068,
      "grad_norm": 1.9720247779483768,
      "learning_rate": 2.1169047558886048e-09,
      "loss": 0.4667,
      "step": 16109
    },
    {
      "epoch": 1.9752329573320255,
      "grad_norm": 2.0242820436550284,
      "learning_rate": 2.096102829120539e-09,
      "loss": 0.3922,
      "step": 16110
    },
    {
      "epoch": 1.9753555664541442,
      "grad_norm": 2.020468972409422,
      "learning_rate": 2.07540357062469e-09,
      "loss": 0.4349,
      "step": 16111
    },
    {
      "epoch": 1.975478175576263,
      "grad_norm": 2.1202660205622195,
      "learning_rate": 2.054806981252044e-09,
      "loss": 0.4036,
      "step": 16112
    },
    {
      "epoch": 1.9756007846983814,
      "grad_norm": 1.9221577642577248,
      "learning_rate": 2.0343130618491467e-09,
      "loss": 0.428,
      "step": 16113
    },
    {
      "epoch": 1.9757233938205,
      "grad_norm": 1.9766378533087148,
      "learning_rate": 2.013921813258102e-09,
      "loss": 0.4063,
      "step": 16114
    },
    {
      "epoch": 1.9758460029426188,
      "grad_norm": 1.8941268661236366,
      "learning_rate": 1.993633236317405e-09,
      "loss": 0.4302,
      "step": 16115
    },
    {
      "epoch": 1.9759686120647375,
      "grad_norm": 1.9711594102012604,
      "learning_rate": 1.973447331860556e-09,
      "loss": 0.4324,
      "step": 16116
    },
    {
      "epoch": 1.9760912211868562,
      "grad_norm": 2.0828425108704747,
      "learning_rate": 1.9533641007177252e-09,
      "loss": 0.4636,
      "step": 16117
    },
    {
      "epoch": 1.976213830308975,
      "grad_norm": 1.93300410451818,
      "learning_rate": 1.933383543713807e-09,
      "loss": 0.4241,
      "step": 16118
    },
    {
      "epoch": 1.9763364394310936,
      "grad_norm": 1.9043930838878782,
      "learning_rate": 1.9135056616703672e-09,
      "loss": 0.4012,
      "step": 16119
    },
    {
      "epoch": 1.9764590485532123,
      "grad_norm": 1.9816308418259452,
      "learning_rate": 1.89373045540453e-09,
      "loss": 0.415,
      "step": 16120
    },
    {
      "epoch": 1.976581657675331,
      "grad_norm": 1.982218836319982,
      "learning_rate": 1.8740579257287004e-09,
      "loss": 0.4132,
      "step": 16121
    },
    {
      "epoch": 1.9767042667974497,
      "grad_norm": 1.9493259469535484,
      "learning_rate": 1.8544880734516768e-09,
      "loss": 0.3946,
      "step": 16122
    },
    {
      "epoch": 1.9768268759195684,
      "grad_norm": 1.9517371862326824,
      "learning_rate": 1.8350208993778152e-09,
      "loss": 0.4362,
      "step": 16123
    },
    {
      "epoch": 1.976949485041687,
      "grad_norm": 2.0796381301842897,
      "learning_rate": 1.8156564043073087e-09,
      "loss": 0.4595,
      "step": 16124
    },
    {
      "epoch": 1.9770720941638058,
      "grad_norm": 1.8907405284187544,
      "learning_rate": 1.7963945890361878e-09,
      "loss": 0.4146,
      "step": 16125
    },
    {
      "epoch": 1.9771947032859245,
      "grad_norm": 1.953337746673025,
      "learning_rate": 1.7772354543557635e-09,
      "loss": 0.4222,
      "step": 16126
    },
    {
      "epoch": 1.9773173124080432,
      "grad_norm": 2.0828366083016605,
      "learning_rate": 1.758179001053739e-09,
      "loss": 0.4563,
      "step": 16127
    },
    {
      "epoch": 1.977439921530162,
      "grad_norm": 2.1157189232372318,
      "learning_rate": 1.739225229913377e-09,
      "loss": 0.4252,
      "step": 16128
    },
    {
      "epoch": 1.9775625306522806,
      "grad_norm": 2.029641336393476,
      "learning_rate": 1.7203741417137765e-09,
      "loss": 0.4246,
      "step": 16129
    },
    {
      "epoch": 1.9776851397743993,
      "grad_norm": 1.8908894851225173,
      "learning_rate": 1.7016257372295952e-09,
      "loss": 0.4135,
      "step": 16130
    },
    {
      "epoch": 1.977807748896518,
      "grad_norm": 1.9975003599565355,
      "learning_rate": 1.6829800172313283e-09,
      "loss": 0.468,
      "step": 16131
    },
    {
      "epoch": 1.9779303580186367,
      "grad_norm": 1.8706977718632205,
      "learning_rate": 1.6644369824855844e-09,
      "loss": 0.4274,
      "step": 16132
    },
    {
      "epoch": 1.9780529671407554,
      "grad_norm": 1.9025704373861658,
      "learning_rate": 1.6459966337545318e-09,
      "loss": 0.4145,
      "step": 16133
    },
    {
      "epoch": 1.978175576262874,
      "grad_norm": 1.914530841105299,
      "learning_rate": 1.627658971795898e-09,
      "loss": 0.3873,
      "step": 16134
    },
    {
      "epoch": 1.9782981853849928,
      "grad_norm": 1.9561498981314376,
      "learning_rate": 1.609423997363524e-09,
      "loss": 0.3979,
      "step": 16135
    },
    {
      "epoch": 1.9784207945071113,
      "grad_norm": 1.9812757431926455,
      "learning_rate": 1.591291711207088e-09,
      "loss": 0.4407,
      "step": 16136
    },
    {
      "epoch": 1.97854340362923,
      "grad_norm": 1.8226442870816184,
      "learning_rate": 1.5732621140712722e-09,
      "loss": 0.3793,
      "step": 16137
    },
    {
      "epoch": 1.9786660127513487,
      "grad_norm": 1.735874921469492,
      "learning_rate": 1.5553352066977057e-09,
      "loss": 0.4031,
      "step": 16138
    },
    {
      "epoch": 1.9787886218734674,
      "grad_norm": 2.132801028822095,
      "learning_rate": 1.5375109898230213e-09,
      "loss": 0.4603,
      "step": 16139
    },
    {
      "epoch": 1.978911230995586,
      "grad_norm": 2.0771522246613174,
      "learning_rate": 1.519789464179966e-09,
      "loss": 0.4272,
      "step": 16140
    },
    {
      "epoch": 1.9790338401177048,
      "grad_norm": 1.9242512615381895,
      "learning_rate": 1.5021706304965688e-09,
      "loss": 0.4301,
      "step": 16141
    },
    {
      "epoch": 1.9791564492398235,
      "grad_norm": 1.9516144857935023,
      "learning_rate": 1.4846544894972504e-09,
      "loss": 0.4247,
      "step": 16142
    },
    {
      "epoch": 1.9792790583619422,
      "grad_norm": 2.0547531186652592,
      "learning_rate": 1.4672410419017125e-09,
      "loss": 0.4133,
      "step": 16143
    },
    {
      "epoch": 1.9794016674840607,
      "grad_norm": 2.143514747308648,
      "learning_rate": 1.449930288426049e-09,
      "loss": 0.4816,
      "step": 16144
    },
    {
      "epoch": 1.9795242766061794,
      "grad_norm": 2.053922525241207,
      "learning_rate": 1.4327222297816357e-09,
      "loss": 0.4037,
      "step": 16145
    },
    {
      "epoch": 1.979646885728298,
      "grad_norm": 2.006676595730091,
      "learning_rate": 1.4156168666756842e-09,
      "loss": 0.4007,
      "step": 16146
    },
    {
      "epoch": 1.9797694948504168,
      "grad_norm": 2.0196219103381847,
      "learning_rate": 1.3986141998112435e-09,
      "loss": 0.4047,
      "step": 16147
    },
    {
      "epoch": 1.9798921039725355,
      "grad_norm": 2.038856123748885,
      "learning_rate": 1.3817142298871988e-09,
      "loss": 0.4438,
      "step": 16148
    },
    {
      "epoch": 1.9800147130946542,
      "grad_norm": 1.7810560440599144,
      "learning_rate": 1.3649169575979949e-09,
      "loss": 0.3958,
      "step": 16149
    },
    {
      "epoch": 1.9801373222167729,
      "grad_norm": 2.04154395305582,
      "learning_rate": 1.3482223836344676e-09,
      "loss": 0.4413,
      "step": 16150
    },
    {
      "epoch": 1.9802599313388916,
      "grad_norm": 2.0133000520906177,
      "learning_rate": 1.3316305086824577e-09,
      "loss": 0.4348,
      "step": 16151
    },
    {
      "epoch": 1.9803825404610103,
      "grad_norm": 1.894486984903934,
      "learning_rate": 1.3151413334239193e-09,
      "loss": 0.4188,
      "step": 16152
    },
    {
      "epoch": 1.980505149583129,
      "grad_norm": 1.991321131161384,
      "learning_rate": 1.298754858536644e-09,
      "loss": 0.394,
      "step": 16153
    },
    {
      "epoch": 1.9806277587052477,
      "grad_norm": 1.9662206864904042,
      "learning_rate": 1.2824710846939814e-09,
      "loss": 0.4596,
      "step": 16154
    },
    {
      "epoch": 1.9807503678273664,
      "grad_norm": 1.8020952787954168,
      "learning_rate": 1.266290012565674e-09,
      "loss": 0.3745,
      "step": 16155
    },
    {
      "epoch": 1.980872976949485,
      "grad_norm": 1.9942444000071788,
      "learning_rate": 1.2502116428161904e-09,
      "loss": 0.4274,
      "step": 16156
    },
    {
      "epoch": 1.9809955860716038,
      "grad_norm": 1.8534627053644954,
      "learning_rate": 1.2342359761069455e-09,
      "loss": 0.4357,
      "step": 16157
    },
    {
      "epoch": 1.9811181951937225,
      "grad_norm": 2.012896655466643,
      "learning_rate": 1.218363013094359e-09,
      "loss": 0.4707,
      "step": 16158
    },
    {
      "epoch": 1.9812408043158412,
      "grad_norm": 1.9932439368370203,
      "learning_rate": 1.202592754430687e-09,
      "loss": 0.422,
      "step": 16159
    },
    {
      "epoch": 1.9813634134379599,
      "grad_norm": 1.988129842007645,
      "learning_rate": 1.186925200764022e-09,
      "loss": 0.4016,
      "step": 16160
    },
    {
      "epoch": 1.9814860225600786,
      "grad_norm": 2.036842675544208,
      "learning_rate": 1.1713603527388484e-09,
      "loss": 0.4269,
      "step": 16161
    },
    {
      "epoch": 1.9816086316821973,
      "grad_norm": 1.8758219805500436,
      "learning_rate": 1.1558982109943772e-09,
      "loss": 0.4093,
      "step": 16162
    },
    {
      "epoch": 1.981731240804316,
      "grad_norm": 1.8833421033160385,
      "learning_rate": 1.1405387761664888e-09,
      "loss": 0.4578,
      "step": 16163
    },
    {
      "epoch": 1.9818538499264347,
      "grad_norm": 1.9790019707346675,
      "learning_rate": 1.1252820488863447e-09,
      "loss": 0.4168,
      "step": 16164
    },
    {
      "epoch": 1.9819764590485534,
      "grad_norm": 2.110983915299922,
      "learning_rate": 1.1101280297809436e-09,
      "loss": 0.4388,
      "step": 16165
    },
    {
      "epoch": 1.982099068170672,
      "grad_norm": 2.037537519820613,
      "learning_rate": 1.095076719473398e-09,
      "loss": 0.4018,
      "step": 16166
    },
    {
      "epoch": 1.9822216772927905,
      "grad_norm": 2.015934109084878,
      "learning_rate": 1.0801281185818247e-09,
      "loss": 0.4442,
      "step": 16167
    },
    {
      "epoch": 1.9823442864149092,
      "grad_norm": 2.0107527718615232,
      "learning_rate": 1.065282227721287e-09,
      "loss": 0.4424,
      "step": 16168
    },
    {
      "epoch": 1.982466895537028,
      "grad_norm": 1.992976319300119,
      "learning_rate": 1.0505390475015753e-09,
      "loss": 0.4191,
      "step": 16169
    },
    {
      "epoch": 1.9825895046591466,
      "grad_norm": 2.1132386443374074,
      "learning_rate": 1.0358985785285935e-09,
      "loss": 0.3545,
      "step": 16170
    },
    {
      "epoch": 1.9827121137812653,
      "grad_norm": 2.0375436292021303,
      "learning_rate": 1.0213608214043602e-09,
      "loss": 0.4455,
      "step": 16171
    },
    {
      "epoch": 1.982834722903384,
      "grad_norm": 2.048440853791411,
      "learning_rate": 1.006925776726453e-09,
      "loss": 0.4288,
      "step": 16172
    },
    {
      "epoch": 1.9829573320255027,
      "grad_norm": 2.026878968209519,
      "learning_rate": 9.925934450877305e-10,
      "loss": 0.46,
      "step": 16173
    },
    {
      "epoch": 1.9830799411476214,
      "grad_norm": 1.8547212985567065,
      "learning_rate": 9.783638270777219e-10,
      "loss": 0.4077,
      "step": 16174
    },
    {
      "epoch": 1.98320255026974,
      "grad_norm": 2.1772303398877093,
      "learning_rate": 9.642369232812366e-10,
      "loss": 0.4403,
      "step": 16175
    },
    {
      "epoch": 1.9833251593918586,
      "grad_norm": 2.128057268053087,
      "learning_rate": 9.502127342786438e-10,
      "loss": 0.4543,
      "step": 16176
    },
    {
      "epoch": 1.9834477685139773,
      "grad_norm": 2.007572465193526,
      "learning_rate": 9.362912606467046e-10,
      "loss": 0.4465,
      "step": 16177
    },
    {
      "epoch": 1.983570377636096,
      "grad_norm": 1.8888636501707992,
      "learning_rate": 9.224725029571835e-10,
      "loss": 0.4395,
      "step": 16178
    },
    {
      "epoch": 1.9836929867582147,
      "grad_norm": 1.8745573240667046,
      "learning_rate": 9.08756461778515e-10,
      "loss": 0.4088,
      "step": 16179
    },
    {
      "epoch": 1.9838155958803334,
      "grad_norm": 2.0463068408894425,
      "learning_rate": 8.951431376741371e-10,
      "loss": 0.4164,
      "step": 16180
    },
    {
      "epoch": 1.983938205002452,
      "grad_norm": 1.9329458827703987,
      "learning_rate": 8.816325312036023e-10,
      "loss": 0.4234,
      "step": 16181
    },
    {
      "epoch": 1.9840608141245708,
      "grad_norm": 1.7754292667070286,
      "learning_rate": 8.682246429225771e-10,
      "loss": 0.4299,
      "step": 16182
    },
    {
      "epoch": 1.9841834232466895,
      "grad_norm": 1.8419953246767782,
      "learning_rate": 8.549194733817323e-10,
      "loss": 0.448,
      "step": 16183
    },
    {
      "epoch": 1.9843060323688082,
      "grad_norm": 1.9879520116350398,
      "learning_rate": 8.417170231278526e-10,
      "loss": 0.4373,
      "step": 16184
    },
    {
      "epoch": 1.984428641490927,
      "grad_norm": 1.8848964935944676,
      "learning_rate": 8.286172927041147e-10,
      "loss": 0.4206,
      "step": 16185
    },
    {
      "epoch": 1.9845512506130456,
      "grad_norm": 2.0290708399926407,
      "learning_rate": 8.156202826486991e-10,
      "loss": 0.4314,
      "step": 16186
    },
    {
      "epoch": 1.9846738597351643,
      "grad_norm": 1.8359859151801756,
      "learning_rate": 8.027259934956233e-10,
      "loss": 0.4288,
      "step": 16187
    },
    {
      "epoch": 1.984796468857283,
      "grad_norm": 2.039659897140664,
      "learning_rate": 7.899344257750185e-10,
      "loss": 0.3754,
      "step": 16188
    },
    {
      "epoch": 1.9849190779794017,
      "grad_norm": 1.9285218694049477,
      "learning_rate": 7.772455800125756e-10,
      "loss": 0.4468,
      "step": 16189
    },
    {
      "epoch": 1.9850416871015204,
      "grad_norm": 2.0419984168467162,
      "learning_rate": 7.646594567298216e-10,
      "loss": 0.4217,
      "step": 16190
    },
    {
      "epoch": 1.985164296223639,
      "grad_norm": 1.8493237114341718,
      "learning_rate": 7.521760564441205e-10,
      "loss": 0.3921,
      "step": 16191
    },
    {
      "epoch": 1.9852869053457578,
      "grad_norm": 1.9861793027536565,
      "learning_rate": 7.39795379668673e-10,
      "loss": 0.4177,
      "step": 16192
    },
    {
      "epoch": 1.9854095144678765,
      "grad_norm": 1.9209582236449012,
      "learning_rate": 7.275174269119612e-10,
      "loss": 0.3871,
      "step": 16193
    },
    {
      "epoch": 1.9855321235899952,
      "grad_norm": 2.0288388482654804,
      "learning_rate": 7.153421986791365e-10,
      "loss": 0.4432,
      "step": 16194
    },
    {
      "epoch": 1.985654732712114,
      "grad_norm": 1.9178876950043375,
      "learning_rate": 7.032696954703544e-10,
      "loss": 0.4146,
      "step": 16195
    },
    {
      "epoch": 1.9857773418342326,
      "grad_norm": 1.898494892447554,
      "learning_rate": 6.912999177818846e-10,
      "loss": 0.4125,
      "step": 16196
    },
    {
      "epoch": 1.9858999509563513,
      "grad_norm": 1.8266452951748555,
      "learning_rate": 6.79432866105556e-10,
      "loss": 0.4183,
      "step": 16197
    },
    {
      "epoch": 1.98602256007847,
      "grad_norm": 2.0152504227578,
      "learning_rate": 6.676685409293115e-10,
      "loss": 0.4407,
      "step": 16198
    },
    {
      "epoch": 1.9861451692005885,
      "grad_norm": 1.9748115513061453,
      "learning_rate": 6.560069427366533e-10,
      "loss": 0.4212,
      "step": 16199
    },
    {
      "epoch": 1.9862677783227072,
      "grad_norm": 1.8845722512613703,
      "learning_rate": 6.444480720066426e-10,
      "loss": 0.4014,
      "step": 16200
    },
    {
      "epoch": 1.9863903874448259,
      "grad_norm": 2.0176709599395406,
      "learning_rate": 6.329919292147324e-10,
      "loss": 0.3823,
      "step": 16201
    },
    {
      "epoch": 1.9865129965669446,
      "grad_norm": 1.9341948265905597,
      "learning_rate": 6.216385148316573e-10,
      "loss": 0.451,
      "step": 16202
    },
    {
      "epoch": 1.9866356056890633,
      "grad_norm": 1.948843752550627,
      "learning_rate": 6.103878293239885e-10,
      "loss": 0.4044,
      "step": 16203
    },
    {
      "epoch": 1.986758214811182,
      "grad_norm": 1.8835068375812625,
      "learning_rate": 5.992398731544113e-10,
      "loss": 0.4087,
      "step": 16204
    },
    {
      "epoch": 1.9868808239333007,
      "grad_norm": 1.8665292951346428,
      "learning_rate": 5.881946467806155e-10,
      "loss": 0.3958,
      "step": 16205
    },
    {
      "epoch": 1.9870034330554194,
      "grad_norm": 1.831776251134447,
      "learning_rate": 5.772521506569595e-10,
      "loss": 0.3926,
      "step": 16206
    },
    {
      "epoch": 1.9871260421775379,
      "grad_norm": 1.9820394392595881,
      "learning_rate": 5.664123852330839e-10,
      "loss": 0.3904,
      "step": 16207
    },
    {
      "epoch": 1.9872486512996566,
      "grad_norm": 1.9039011371376506,
      "learning_rate": 5.55675350954743e-10,
      "loss": 0.4348,
      "step": 16208
    },
    {
      "epoch": 1.9873712604217753,
      "grad_norm": 2.0000795236592968,
      "learning_rate": 5.450410482629731e-10,
      "loss": 0.408,
      "step": 16209
    },
    {
      "epoch": 1.987493869543894,
      "grad_norm": 2.0269965420067475,
      "learning_rate": 5.345094775949244e-10,
      "loss": 0.4245,
      "step": 16210
    },
    {
      "epoch": 1.9876164786660127,
      "grad_norm": 1.960466949083729,
      "learning_rate": 5.240806393833064e-10,
      "loss": 0.4669,
      "step": 16211
    },
    {
      "epoch": 1.9877390877881314,
      "grad_norm": 1.9642805740856109,
      "learning_rate": 5.137545340572203e-10,
      "loss": 0.4194,
      "step": 16212
    },
    {
      "epoch": 1.98786169691025,
      "grad_norm": 1.8527661047105126,
      "learning_rate": 5.035311620407712e-10,
      "loss": 0.4219,
      "step": 16213
    },
    {
      "epoch": 1.9879843060323688,
      "grad_norm": 1.851857245375602,
      "learning_rate": 4.934105237541786e-10,
      "loss": 0.382,
      "step": 16214
    },
    {
      "epoch": 1.9881069151544875,
      "grad_norm": 1.912480920441777,
      "learning_rate": 4.833926196134986e-10,
      "loss": 0.4366,
      "step": 16215
    },
    {
      "epoch": 1.9882295242766062,
      "grad_norm": 1.8115119968361375,
      "learning_rate": 4.734774500303463e-10,
      "loss": 0.4169,
      "step": 16216
    },
    {
      "epoch": 1.9883521333987249,
      "grad_norm": 1.8909514685696376,
      "learning_rate": 4.636650154121736e-10,
      "loss": 0.4334,
      "step": 16217
    },
    {
      "epoch": 1.9884747425208436,
      "grad_norm": 1.9751726985947455,
      "learning_rate": 4.5395531616254653e-10,
      "loss": 0.4277,
      "step": 16218
    },
    {
      "epoch": 1.9885973516429623,
      "grad_norm": 1.9322539533270366,
      "learning_rate": 4.443483526805903e-10,
      "loss": 0.3864,
      "step": 16219
    },
    {
      "epoch": 1.988719960765081,
      "grad_norm": 1.9870293376031212,
      "learning_rate": 4.348441253609892e-10,
      "loss": 0.4349,
      "step": 16220
    },
    {
      "epoch": 1.9888425698871997,
      "grad_norm": 1.8929319941692901,
      "learning_rate": 4.254426345945417e-10,
      "loss": 0.4223,
      "step": 16221
    },
    {
      "epoch": 1.9889651790093184,
      "grad_norm": 1.9926377475240928,
      "learning_rate": 4.1614388076732793e-10,
      "loss": 0.4104,
      "step": 16222
    },
    {
      "epoch": 1.989087788131437,
      "grad_norm": 2.1171772896146086,
      "learning_rate": 4.0694786426181966e-10,
      "loss": 0.4407,
      "step": 16223
    },
    {
      "epoch": 1.9892103972535558,
      "grad_norm": 1.7432318734216534,
      "learning_rate": 3.978545854560478e-10,
      "loss": 0.4161,
      "step": 16224
    },
    {
      "epoch": 1.9893330063756745,
      "grad_norm": 2.000484349247133,
      "learning_rate": 3.8886404472360253e-10,
      "loss": 0.4177,
      "step": 16225
    },
    {
      "epoch": 1.9894556154977932,
      "grad_norm": 1.8554304293784618,
      "learning_rate": 3.799762424339104e-10,
      "loss": 0.4586,
      "step": 16226
    },
    {
      "epoch": 1.9895782246199119,
      "grad_norm": 2.015224670443978,
      "learning_rate": 3.7119117895251247e-10,
      "loss": 0.4114,
      "step": 16227
    },
    {
      "epoch": 1.9897008337420306,
      "grad_norm": 2.0714269236657814,
      "learning_rate": 3.625088546405087e-10,
      "loss": 0.4091,
      "step": 16228
    },
    {
      "epoch": 1.9898234428641492,
      "grad_norm": 2.1485921349758432,
      "learning_rate": 3.539292698545582e-10,
      "loss": 0.4388,
      "step": 16229
    },
    {
      "epoch": 1.9899460519862677,
      "grad_norm": 1.9530635431382999,
      "learning_rate": 3.4545242494743446e-10,
      "loss": 0.419,
      "step": 16230
    },
    {
      "epoch": 1.9900686611083864,
      "grad_norm": 1.767884577376448,
      "learning_rate": 3.370783202674699e-10,
      "loss": 0.3975,
      "step": 16231
    },
    {
      "epoch": 1.9901912702305051,
      "grad_norm": 1.9089282657928401,
      "learning_rate": 3.288069561588336e-10,
      "loss": 0.4322,
      "step": 16232
    },
    {
      "epoch": 1.9903138793526238,
      "grad_norm": 1.9051682453520793,
      "learning_rate": 3.206383329615315e-10,
      "loss": 0.3837,
      "step": 16233
    },
    {
      "epoch": 1.9904364884747425,
      "grad_norm": 2.06724473711222,
      "learning_rate": 3.125724510114059e-10,
      "loss": 0.4441,
      "step": 16234
    },
    {
      "epoch": 1.9905590975968612,
      "grad_norm": 1.9742152184526287,
      "learning_rate": 3.0460931064013597e-10,
      "loss": 0.3918,
      "step": 16235
    },
    {
      "epoch": 1.99068170671898,
      "grad_norm": 1.9953025155002622,
      "learning_rate": 2.9674891217440496e-10,
      "loss": 0.3975,
      "step": 16236
    },
    {
      "epoch": 1.9908043158410986,
      "grad_norm": 1.989435335269215,
      "learning_rate": 2.889912559378427e-10,
      "loss": 0.4367,
      "step": 16237
    },
    {
      "epoch": 1.990926924963217,
      "grad_norm": 2.060932263978889,
      "learning_rate": 2.8133634224908337e-10,
      "loss": 0.4355,
      "step": 16238
    },
    {
      "epoch": 1.9910495340853358,
      "grad_norm": 1.8221288574945962,
      "learning_rate": 2.7378417142287507e-10,
      "loss": 0.417,
      "step": 16239
    },
    {
      "epoch": 1.9911721432074545,
      "grad_norm": 1.95526159905873,
      "learning_rate": 2.663347437695252e-10,
      "loss": 0.4511,
      "step": 16240
    },
    {
      "epoch": 1.9912947523295732,
      "grad_norm": 2.0233483657449836,
      "learning_rate": 2.589880595951777e-10,
      "loss": 0.4153,
      "step": 16241
    },
    {
      "epoch": 1.991417361451692,
      "grad_norm": 1.9763536469849845,
      "learning_rate": 2.517441192018133e-10,
      "loss": 0.3996,
      "step": 16242
    },
    {
      "epoch": 1.9915399705738106,
      "grad_norm": 2.105577930100112,
      "learning_rate": 2.4460292288752683e-10,
      "loss": 0.4391,
      "step": 16243
    },
    {
      "epoch": 1.9916625796959293,
      "grad_norm": 2.079081855363515,
      "learning_rate": 2.375644709451397e-10,
      "loss": 0.4495,
      "step": 16244
    },
    {
      "epoch": 1.991785188818048,
      "grad_norm": 2.1041181378451537,
      "learning_rate": 2.3062876366469755e-10,
      "loss": 0.4441,
      "step": 16245
    },
    {
      "epoch": 1.9919077979401667,
      "grad_norm": 1.8628153101825615,
      "learning_rate": 2.2379580133069512e-10,
      "loss": 0.3935,
      "step": 16246
    },
    {
      "epoch": 1.9920304070622854,
      "grad_norm": 1.6377021930987377,
      "learning_rate": 2.1706558422429635e-10,
      "loss": 0.4044,
      "step": 16247
    },
    {
      "epoch": 1.992153016184404,
      "grad_norm": 2.0245183193922824,
      "learning_rate": 2.104381126219468e-10,
      "loss": 0.4001,
      "step": 16248
    },
    {
      "epoch": 1.9922756253065228,
      "grad_norm": 1.818871810452812,
      "learning_rate": 2.0391338679592864e-10,
      "loss": 0.4209,
      "step": 16249
    },
    {
      "epoch": 1.9923982344286415,
      "grad_norm": 2.1539391324258226,
      "learning_rate": 1.9749140701491588e-10,
      "loss": 0.4426,
      "step": 16250
    },
    {
      "epoch": 1.9925208435507602,
      "grad_norm": 2.056259454429453,
      "learning_rate": 1.9117217354230887e-10,
      "loss": 0.4204,
      "step": 16251
    },
    {
      "epoch": 1.992643452672879,
      "grad_norm": 1.8686351157663728,
      "learning_rate": 1.8495568663845497e-10,
      "loss": 0.4172,
      "step": 16252
    },
    {
      "epoch": 1.9927660617949976,
      "grad_norm": 1.9290400763442854,
      "learning_rate": 1.7884194655815035e-10,
      "loss": 0.4556,
      "step": 16253
    },
    {
      "epoch": 1.9928886709171163,
      "grad_norm": 1.8984392716715264,
      "learning_rate": 1.7283095355313807e-10,
      "loss": 0.4125,
      "step": 16254
    },
    {
      "epoch": 1.993011280039235,
      "grad_norm": 2.056993640216812,
      "learning_rate": 1.6692270787044272e-10,
      "loss": 0.4404,
      "step": 16255
    },
    {
      "epoch": 1.9931338891613537,
      "grad_norm": 1.8216800367365265,
      "learning_rate": 1.6111720975264812e-10,
      "loss": 0.3792,
      "step": 16256
    },
    {
      "epoch": 1.9932564982834724,
      "grad_norm": 1.8742567096461022,
      "learning_rate": 1.5541445943872967e-10,
      "loss": 0.392,
      "step": 16257
    },
    {
      "epoch": 1.993379107405591,
      "grad_norm": 1.8550517867707736,
      "learning_rate": 1.498144571629445e-10,
      "loss": 0.4137,
      "step": 16258
    },
    {
      "epoch": 1.9935017165277098,
      "grad_norm": 1.9182135045269877,
      "learning_rate": 1.4431720315538634e-10,
      "loss": 0.4064,
      "step": 16259
    },
    {
      "epoch": 1.9936243256498285,
      "grad_norm": 1.9654412584724823,
      "learning_rate": 1.3892269764198552e-10,
      "loss": 0.4448,
      "step": 16260
    },
    {
      "epoch": 1.9937469347719472,
      "grad_norm": 1.945752679469385,
      "learning_rate": 1.3363094084450912e-10,
      "loss": 0.4404,
      "step": 16261
    },
    {
      "epoch": 1.9938695438940657,
      "grad_norm": 1.9025861894585374,
      "learning_rate": 1.284419329802833e-10,
      "loss": 0.4528,
      "step": 16262
    },
    {
      "epoch": 1.9939921530161844,
      "grad_norm": 1.9648643512899708,
      "learning_rate": 1.2335567426302596e-10,
      "loss": 0.4304,
      "step": 16263
    },
    {
      "epoch": 1.994114762138303,
      "grad_norm": 2.0866578045609767,
      "learning_rate": 1.183721649014591e-10,
      "loss": 0.466,
      "step": 16264
    },
    {
      "epoch": 1.9942373712604218,
      "grad_norm": 1.9191423539442654,
      "learning_rate": 1.1349140510041879e-10,
      "loss": 0.4127,
      "step": 16265
    },
    {
      "epoch": 1.9943599803825405,
      "grad_norm": 1.8771956879939686,
      "learning_rate": 1.0871339506057788e-10,
      "loss": 0.4405,
      "step": 16266
    },
    {
      "epoch": 1.9944825895046592,
      "grad_norm": 1.8171916189266488,
      "learning_rate": 1.0403813497844583e-10,
      "loss": 0.4353,
      "step": 16267
    },
    {
      "epoch": 1.9946051986267779,
      "grad_norm": 1.7761609117502402,
      "learning_rate": 9.946562504609125e-11,
      "loss": 0.4218,
      "step": 16268
    },
    {
      "epoch": 1.9947278077488964,
      "grad_norm": 1.8944190982658604,
      "learning_rate": 9.499586545141936e-11,
      "loss": 0.461,
      "step": 16269
    },
    {
      "epoch": 1.994850416871015,
      "grad_norm": 1.7955466877895572,
      "learning_rate": 9.062885637817209e-11,
      "loss": 0.4444,
      "step": 16270
    },
    {
      "epoch": 1.9949730259931338,
      "grad_norm": 1.9803346332156402,
      "learning_rate": 8.636459800565045e-11,
      "loss": 0.4258,
      "step": 16271
    },
    {
      "epoch": 1.9950956351152525,
      "grad_norm": 1.9688193774855143,
      "learning_rate": 8.220309050926967e-11,
      "loss": 0.4052,
      "step": 16272
    },
    {
      "epoch": 1.9952182442373712,
      "grad_norm": 2.0926546319877817,
      "learning_rate": 7.814433406028166e-11,
      "loss": 0.4512,
      "step": 16273
    },
    {
      "epoch": 1.9953408533594899,
      "grad_norm": 2.000459936440041,
      "learning_rate": 7.418832882521987e-11,
      "loss": 0.4355,
      "step": 16274
    },
    {
      "epoch": 1.9954634624816086,
      "grad_norm": 1.78245155418511,
      "learning_rate": 7.033507496673198e-11,
      "loss": 0.3948,
      "step": 16275
    },
    {
      "epoch": 1.9955860716037273,
      "grad_norm": 1.974030002514398,
      "learning_rate": 6.658457264330231e-11,
      "loss": 0.4797,
      "step": 16276
    },
    {
      "epoch": 1.995708680725846,
      "grad_norm": 1.8646319898204144,
      "learning_rate": 6.293682200925189e-11,
      "loss": 0.4078,
      "step": 16277
    },
    {
      "epoch": 1.9958312898479647,
      "grad_norm": 2.017833507990152,
      "learning_rate": 5.939182321418325e-11,
      "loss": 0.4681,
      "step": 16278
    },
    {
      "epoch": 1.9959538989700834,
      "grad_norm": 1.956502786786716,
      "learning_rate": 5.594957640381315e-11,
      "loss": 0.4447,
      "step": 16279
    },
    {
      "epoch": 1.996076508092202,
      "grad_norm": 1.9719183482463454,
      "learning_rate": 5.261008171969506e-11,
      "loss": 0.4266,
      "step": 16280
    },
    {
      "epoch": 1.9961991172143208,
      "grad_norm": 1.9984993049087734,
      "learning_rate": 4.9373339298941505e-11,
      "loss": 0.4169,
      "step": 16281
    },
    {
      "epoch": 1.9963217263364395,
      "grad_norm": 1.922450568787385,
      "learning_rate": 4.62393492750568e-11,
      "loss": 0.4027,
      "step": 16282
    },
    {
      "epoch": 1.9964443354585582,
      "grad_norm": 1.9111559500280795,
      "learning_rate": 4.320811177654927e-11,
      "loss": 0.4084,
      "step": 16283
    },
    {
      "epoch": 1.9965669445806769,
      "grad_norm": 2.068628008678012,
      "learning_rate": 4.027962692776388e-11,
      "loss": 0.3929,
      "step": 16284
    },
    {
      "epoch": 1.9966895537027955,
      "grad_norm": 2.030846732170121,
      "learning_rate": 3.74538948494374e-11,
      "loss": 0.4526,
      "step": 16285
    },
    {
      "epoch": 1.9968121628249142,
      "grad_norm": 1.9999248810112424,
      "learning_rate": 3.473091565786568e-11,
      "loss": 0.4413,
      "step": 16286
    },
    {
      "epoch": 1.996934771947033,
      "grad_norm": 1.9584822503508252,
      "learning_rate": 3.2110689464348587e-11,
      "loss": 0.4764,
      "step": 16287
    },
    {
      "epoch": 1.9970573810691516,
      "grad_norm": 2.0511595479256597,
      "learning_rate": 2.9593216377132864e-11,
      "loss": 0.449,
      "step": 16288
    },
    {
      "epoch": 1.9971799901912703,
      "grad_norm": 1.9854221365376208,
      "learning_rate": 2.7178496499191688e-11,
      "loss": 0.389,
      "step": 16289
    },
    {
      "epoch": 1.997302599313389,
      "grad_norm": 1.8689592247248055,
      "learning_rate": 2.486652993044514e-11,
      "loss": 0.4383,
      "step": 16290
    },
    {
      "epoch": 1.9974252084355077,
      "grad_norm": 2.0267139330984896,
      "learning_rate": 2.265731676526217e-11,
      "loss": 0.4462,
      "step": 16291
    },
    {
      "epoch": 1.9975478175576264,
      "grad_norm": 1.9849118581196332,
      "learning_rate": 2.055085709495863e-11,
      "loss": 0.4223,
      "step": 16292
    },
    {
      "epoch": 1.997670426679745,
      "grad_norm": 2.073564678248084,
      "learning_rate": 1.8547151005576803e-11,
      "loss": 0.4053,
      "step": 16293
    },
    {
      "epoch": 1.9977930358018636,
      "grad_norm": 2.0179976236482347,
      "learning_rate": 1.6646198580105855e-11,
      "loss": 0.4427,
      "step": 16294
    },
    {
      "epoch": 1.9979156449239823,
      "grad_norm": 2.0131447462769128,
      "learning_rate": 1.48479998962614e-11,
      "loss": 0.3922,
      "step": 16295
    },
    {
      "epoch": 1.998038254046101,
      "grad_norm": 1.8811075667250206,
      "learning_rate": 1.315255502787327e-11,
      "loss": 0.3975,
      "step": 16296
    },
    {
      "epoch": 1.9981608631682197,
      "grad_norm": 1.949402350305885,
      "learning_rate": 1.1559864044885516e-11,
      "loss": 0.3892,
      "step": 16297
    },
    {
      "epoch": 1.9982834722903384,
      "grad_norm": 1.6992656821384338,
      "learning_rate": 1.006992701252374e-11,
      "loss": 0.4327,
      "step": 16298
    },
    {
      "epoch": 1.9984060814124571,
      "grad_norm": 2.0327189099532204,
      "learning_rate": 8.682743992405319e-12,
      "loss": 0.4389,
      "step": 16299
    },
    {
      "epoch": 1.9985286905345758,
      "grad_norm": 1.8612638694695915,
      "learning_rate": 7.398315041151626e-12,
      "loss": 0.4397,
      "step": 16300
    },
    {
      "epoch": 1.9986512996566943,
      "grad_norm": 2.031066012556018,
      "learning_rate": 6.216640212053371e-12,
      "loss": 0.4046,
      "step": 16301
    },
    {
      "epoch": 1.998773908778813,
      "grad_norm": 1.9284526453440438,
      "learning_rate": 5.13771955285014e-12,
      "loss": 0.4716,
      "step": 16302
    },
    {
      "epoch": 1.9988965179009317,
      "grad_norm": 1.7666914447740414,
      "learning_rate": 4.161553108783522e-12,
      "loss": 0.4077,
      "step": 16303
    },
    {
      "epoch": 1.9990191270230504,
      "grad_norm": 1.8460178315512819,
      "learning_rate": 3.288140919266436e-12,
      "loss": 0.4488,
      "step": 16304
    },
    {
      "epoch": 1.999141736145169,
      "grad_norm": 1.9028761671060517,
      "learning_rate": 2.5174830206586843e-12,
      "loss": 0.4121,
      "step": 16305
    },
    {
      "epoch": 1.9992643452672878,
      "grad_norm": 2.0381151687782237,
      "learning_rate": 1.8495794446016234e-12,
      "loss": 0.3959,
      "step": 16306
    },
    {
      "epoch": 1.9993869543894065,
      "grad_norm": 1.9787456119473739,
      "learning_rate": 1.284430218295718e-12,
      "loss": 0.3927,
      "step": 16307
    },
    {
      "epoch": 1.9995095635115252,
      "grad_norm": 2.0430782654469253,
      "learning_rate": 8.220353650556512e-13,
      "loss": 0.4312,
      "step": 16308
    },
    {
      "epoch": 1.999632172633644,
      "grad_norm": 1.9047117223013188,
      "learning_rate": 4.623949037552144e-13,
      "loss": 0.4274,
      "step": 16309
    },
    {
      "epoch": 1.9997547817557626,
      "grad_norm": 1.6654537462669283,
      "learning_rate": 2.055088496599744e-13,
      "loss": 0.3913,
      "step": 16310
    },
    {
      "epoch": 1.9998773908778813,
      "grad_norm": 1.959907780612039,
      "learning_rate": 5.137721303949406e-14,
      "loss": 0.4125,
      "step": 16311
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.0558295458955045,
      "learning_rate": 0.0,
      "loss": 0.4375,
      "step": 16312
    },
    {
      "epoch": 2.0,
      "step": 16312,
      "total_flos": 2571439607218176.0,
      "train_loss": 0.5015400409344045,
      "train_runtime": 29771.6371,
      "train_samples_per_second": 17.532,
      "train_steps_per_second": 0.548
    }
  ],
  "logging_steps": 1,
  "max_steps": 16312,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 2,
  "save_steps": 1000,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 2571439607218176.0,
  "train_batch_size": 1,
  "trial_name": null,
  "trial_params": null
}
