[ 1/ 720] output_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 2/ 720] per_layer_model_proj.weight - [ 2560, 10752, 1, 1], type = f16, size = 52.500 MiB
[ 3/ 720] per_layer_proj_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB
[ 4/ 720] per_layer_token_embd.weight - [ 10752, 262144, 1, 1], type = f16, converting to q8_0 .. size = 5376.00 MiB -> 2856.00 MiB
[ 5/ 720] rope_freqs.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB
[ 6/ 720] token_embd.weight - [ 2560, 262144, 1, 1], type = f16, converting to q8_0 .. size = 1280.00 MiB -> 680.00 MiB
[ 7/ 720] blk.0.attn_k.weight - [ 2560, 512, 1, 1], type = f16, converting to q8_0 .. size = 2.50 MiB -> 1.33 MiB
[ 8/ 720] blk.0.attn_k_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB
[ 9/ 720] blk.0.attn_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 10/ 720] blk.0.attn_output.weight - [ 2048, 2560, 1, 1], type = f16, converting to q8_0 .. size = 10.00 MiB -> 5.31 MiB
[ 11/ 720] blk.0.attn_q.weight - [ 2560, 2048, 1, 1], type = f16, converting to q8_0 .. size = 10.00 MiB -> 5.31 MiB
[ 12/ 720] blk.0.attn_q_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB
[ 13/ 720] blk.0.attn_v.weight - [ 2560, 512, 1, 1], type = f16, converting to q8_0 .. size = 2.50 MiB -> 1.33 MiB
[ 14/ 720] blk.0.ffn_down.weight - [ 10240, 2560, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 15/ 720] blk.0.ffn_gate.weight - [ 2560, 10240, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 16/ 720] blk.0.ffn_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 17/ 720] blk.0.ffn_up.weight - [ 2560, 10240, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 18/ 720] blk.0.inp_gate.weight - [ 2560, 256, 1, 1], type = f16, converting to q8_0 .. size = 1.25 MiB -> 0.66 MiB
[ 19/ 720] blk.0.layer_output_scale.weight - [ 1, 1, 1, 1], type = f32, size = 0.000 MiB
[ 20/ 720] blk.0.post_attention_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 21/ 720] blk.0.post_ffw_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 22/ 720] blk.0.post_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 23/ 720] blk.0.proj.weight - [ 256, 2560, 1, 1], type = f16, converting to q8_0 .. size = 1.25 MiB -> 0.66 MiB
[ 24/ 720] blk.1.attn_k.weight - [ 2560, 512, 1, 1], type = f16, converting to q8_0 .. size = 2.50 MiB -> 1.33 MiB
[ 25/ 720] blk.1.attn_k_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB
[ 26/ 720] blk.1.attn_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 27/ 720] blk.1.attn_output.weight - [ 2048, 2560, 1, 1], type = f16, converting to q8_0 .. size = 10.00 MiB -> 5.31 MiB
[ 28/ 720] blk.1.attn_q.weight - [ 2560, 2048, 1, 1], type = f16, converting to q8_0 .. size = 10.00 MiB -> 5.31 MiB
[ 29/ 720] blk.1.attn_q_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB
[ 30/ 720] blk.1.attn_v.weight - [ 2560, 512, 1, 1], type = f16, converting to q8_0 .. size = 2.50 MiB -> 1.33 MiB
[ 31/ 720] blk.1.ffn_down.weight - [ 10240, 2560, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 32/ 720] blk.1.ffn_gate.weight - [ 2560, 10240, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 33/ 720] blk.1.ffn_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 34/ 720] blk.1.ffn_up.weight - [ 2560, 10240, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 35/ 720] blk.1.inp_gate.weight - [ 2560, 256, 1, 1], type = f16, converting to q8_0 .. size = 1.25 MiB -> 0.66 MiB
[ 36/ 720] blk.1.layer_output_scale.weight - [ 1, 1, 1, 1], type = f32, size = 0.000 MiB
[ 37/ 720] blk.1.post_attention_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 38/ 720] blk.1.post_ffw_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 39/ 720] blk.1.post_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 40/ 720] blk.1.proj.weight - [ 256, 2560, 1, 1], type = f16, converting to q8_0 .. size = 1.25 MiB -> 0.66 MiB
[ 41/ 720] blk.2.attn_k.weight - [ 2560, 512, 1, 1], type = f16, converting to q8_0 .. size = 2.50 MiB -> 1.33 MiB
[ 42/ 720] blk.2.attn_k_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB
[ 43/ 720] blk.2.attn_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 44/ 720] blk.2.attn_output.weight - [ 2048, 2560, 1, 1], type = f16, converting to q8_0 .. size = 10.00 MiB -> 5.31 MiB
[ 45/ 720] blk.2.attn_q.weight - [ 2560, 2048, 1, 1], type = f16, converting to q8_0 .. size = 10.00 MiB -> 5.31 MiB
[ 46/ 720] blk.2.attn_q_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB
[ 47/ 720] blk.2.attn_v.weight - [ 2560, 512, 1, 1], type = f16, converting to q8_0 .. size = 2.50 MiB -> 1.33 MiB
[ 48/ 720] blk.2.ffn_down.weight - [ 10240, 2560, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 49/ 720] blk.2.ffn_gate.weight - [ 2560, 10240, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 50/ 720] blk.2.ffn_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 51/ 720] blk.2.ffn_up.weight - [ 2560, 10240, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 52/ 720] blk.2.inp_gate.weight - [ 2560, 256, 1, 1], type = f16, converting to q8_0 .. size = 1.25 MiB -> 0.66 MiB
[ 53/ 720] blk.2.layer_output_scale.weight - [ 1, 1, 1, 1], type = f32, size = 0.000 MiB
[ 54/ 720] blk.2.post_attention_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 55/ 720] blk.2.post_ffw_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 56/ 720] blk.2.post_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 57/ 720] blk.2.proj.weight - [ 256, 2560, 1, 1], type = f16, converting to q8_0 .. size = 1.25 MiB -> 0.66 MiB
[ 58/ 720] blk.3.attn_k.weight - [ 2560, 512, 1, 1], type = f16, converting to q8_0 .. size = 2.50 MiB -> 1.33 MiB
[ 59/ 720] blk.3.attn_k_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB
[ 60/ 720] blk.3.attn_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 61/ 720] blk.3.attn_output.weight - [ 2048, 2560, 1, 1], type = f16, converting to q8_0 .. size = 10.00 MiB -> 5.31 MiB
[ 62/ 720] blk.3.attn_q.weight - [ 2560, 2048, 1, 1], type = f16, converting to q8_0 .. size = 10.00 MiB -> 5.31 MiB
[ 63/ 720] blk.3.attn_q_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB
[ 64/ 720] blk.3.attn_v.weight - [ 2560, 512, 1, 1], type = f16, converting to q8_0 .. size = 2.50 MiB -> 1.33 MiB
[ 65/ 720] blk.3.ffn_down.weight - [ 10240, 2560, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 66/ 720] blk.3.ffn_gate.weight - [ 2560, 10240, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 67/ 720] blk.3.ffn_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 68/ 720] blk.3.ffn_up.weight - [ 2560, 10240, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 69/ 720] blk.3.inp_gate.weight - [ 2560, 256, 1, 1], type = f16, converting to q8_0 .. size = 1.25 MiB -> 0.66 MiB
[ 70/ 720] blk.3.layer_output_scale.weight - [ 1, 1, 1, 1], type = f32, size = 0.000 MiB
[ 71/ 720] blk.3.post_attention_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 72/ 720] blk.3.post_ffw_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 73/ 720] blk.3.post_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 74/ 720] blk.3.proj.weight - [ 256, 2560, 1, 1], type = f16, converting to q8_0 .. size = 1.25 MiB -> 0.66 MiB
[ 75/ 720] blk.4.attn_k.weight - [ 2560, 512, 1, 1], type = f16, converting to q8_0 .. size = 2.50 MiB -> 1.33 MiB
[ 76/ 720] blk.4.attn_k_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB
[ 77/ 720] blk.4.attn_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 78/ 720] blk.4.attn_output.weight - [ 2048, 2560, 1, 1], type = f16, converting to q8_0 .. size = 10.00 MiB -> 5.31 MiB
[ 79/ 720] blk.4.attn_q.weight - [ 2560, 2048, 1, 1], type = f16, converting to q8_0 .. size = 10.00 MiB -> 5.31 MiB
[ 80/ 720] blk.4.attn_q_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB
[ 81/ 720] blk.4.attn_v.weight - [ 2560, 512, 1, 1], type = f16, converting to q8_0 .. size = 2.50 MiB -> 1.33 MiB
[ 82/ 720] blk.4.ffn_down.weight - [ 10240, 2560, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 83/ 720] blk.4.ffn_gate.weight - [ 2560, 10240, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 84/ 720] blk.4.ffn_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 85/ 720] blk.4.ffn_up.weight - [ 2560, 10240, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 86/ 720] blk.4.inp_gate.weight - [ 2560, 256, 1, 1], type = f16, converting to q8_0 .. size = 1.25 MiB -> 0.66 MiB
[ 87/ 720] blk.4.layer_output_scale.weight - [ 1, 1, 1, 1], type = f32, size = 0.000 MiB
[ 88/ 720] blk.4.post_attention_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 89/ 720] blk.4.post_ffw_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 90/ 720] blk.4.post_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 91/ 720] blk.4.proj.weight - [ 256, 2560, 1, 1], type = f16, converting to q8_0 .. size = 1.25 MiB -> 0.66 MiB
[ 92/ 720] blk.5.attn_k.weight - [ 2560, 1024, 1, 1], type = f16, converting to q8_0 .. size = 5.00 MiB -> 2.66 MiB
[ 93/ 720] blk.5.attn_k_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MiB
[ 94/ 720] blk.5.attn_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 95/ 720] blk.5.attn_output.weight - [ 4096, 2560, 1, 1], type = f16, converting to q8_0 .. size = 20.00 MiB -> 10.62 MiB
[ 96/ 720] blk.5.attn_q.weight - [ 2560, 4096, 1, 1], type = f16, converting to q8_0 .. size = 20.00 MiB -> 10.62 MiB
[ 97/ 720] blk.5.attn_q_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MiB
[ 98/ 720] blk.5.attn_v.weight - [ 2560, 1024, 1, 1], type = f16, converting to q8_0 .. size = 5.00 MiB -> 2.66 MiB
[ 99/ 720] blk.5.ffn_down.weight - [ 10240, 2560, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 100/ 720] blk.5.ffn_gate.weight - [ 2560, 10240, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 101/ 720] blk.5.ffn_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 102/ 720] blk.5.ffn_up.weight - [ 2560, 10240, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 103/ 720] blk.5.inp_gate.weight - [ 2560, 256, 1, 1], type = f16, converting to q8_0 .. size = 1.25 MiB -> 0.66 MiB
[ 104/ 720] blk.5.layer_output_scale.weight - [ 1, 1, 1, 1], type = f32, size = 0.000 MiB
[ 105/ 720] blk.5.post_attention_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 106/ 720] blk.5.post_ffw_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 107/ 720] blk.5.post_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 108/ 720] blk.5.proj.weight - [ 256, 2560, 1, 1], type = f16, converting to q8_0 .. size = 1.25 MiB -> 0.66 MiB
[ 109/ 720] blk.6.attn_k.weight - [ 2560, 512, 1, 1], type = f16, converting to q8_0 .. size = 2.50 MiB -> 1.33 MiB
[ 110/ 720] blk.6.attn_k_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB
[ 111/ 720] blk.6.attn_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 112/ 720] blk.6.attn_output.weight - [ 2048, 2560, 1, 1], type = f16, converting to q8_0 .. size = 10.00 MiB -> 5.31 MiB
[ 113/ 720] blk.6.attn_q.weight - [ 2560, 2048, 1, 1], type = f16, converting to q8_0 .. size = 10.00 MiB -> 5.31 MiB
[ 114/ 720] blk.6.attn_q_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB
[ 115/ 720] blk.6.attn_v.weight - [ 2560, 512, 1, 1], type = f16, converting to q8_0 .. size = 2.50 MiB -> 1.33 MiB
[ 116/ 720] blk.6.ffn_down.weight - [ 10240, 2560, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 117/ 720] blk.6.ffn_gate.weight - [ 2560, 10240, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 118/ 720] blk.6.ffn_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 119/ 720] blk.6.ffn_up.weight - [ 2560, 10240, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 120/ 720] blk.6.inp_gate.weight - [ 2560, 256, 1, 1], type = f16, converting to q8_0 .. size = 1.25 MiB -> 0.66 MiB
[ 121/ 720] blk.6.layer_output_scale.weight - [ 1, 1, 1, 1], type = f32, size = 0.000 MiB
[ 122/ 720] blk.6.post_attention_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 123/ 720] blk.6.post_ffw_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 124/ 720] blk.6.post_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 125/ 720] blk.6.proj.weight - [ 256, 2560, 1, 1], type = f16, converting to q8_0 .. size = 1.25 MiB -> 0.66 MiB
[ 126/ 720] blk.7.attn_k.weight - [ 2560, 512, 1, 1], type = f16, converting to q8_0 .. size = 2.50 MiB -> 1.33 MiB
[ 127/ 720] blk.7.attn_k_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB
[ 128/ 720] blk.7.attn_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 129/ 720] blk.7.attn_output.weight - [ 2048, 2560, 1, 1], type = f16, converting to q8_0 .. size = 10.00 MiB -> 5.31 MiB
[ 130/ 720] blk.7.attn_q.weight - [ 2560, 2048, 1, 1], type = f16, converting to q8_0 .. size = 10.00 MiB -> 5.31 MiB
[ 131/ 720] blk.7.attn_q_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB
[ 132/ 720] blk.7.attn_v.weight - [ 2560, 512, 1, 1], type = f16, converting to q8_0 .. size = 2.50 MiB -> 1.33 MiB
[ 133/ 720] blk.7.ffn_down.weight - [ 10240, 2560, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 134/ 720] blk.7.ffn_gate.weight - [ 2560, 10240, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 135/ 720] blk.7.ffn_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 136/ 720] blk.7.ffn_up.weight - [ 2560, 10240, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 137/ 720] blk.7.inp_gate.weight - [ 2560, 256, 1, 1], type = f16, converting to q8_0 .. size = 1.25 MiB -> 0.66 MiB
[ 138/ 720] blk.7.layer_output_scale.weight - [ 1, 1, 1, 1], type = f32, size = 0.000 MiB
[ 139/ 720] blk.7.post_attention_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 140/ 720] blk.7.post_ffw_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 141/ 720] blk.7.post_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 142/ 720] blk.7.proj.weight - [ 256, 2560, 1, 1], type = f16, converting to q8_0 .. size = 1.25 MiB -> 0.66 MiB
[ 143/ 720] blk.8.attn_k.weight - [ 2560, 512, 1, 1], type = f16, converting to q8_0 .. size = 2.50 MiB -> 1.33 MiB
[ 144/ 720] blk.8.attn_k_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB
[ 145/ 720] blk.8.attn_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 146/ 720] blk.8.attn_output.weight - [ 2048, 2560, 1, 1], type = f16, converting to q8_0 .. size = 10.00 MiB -> 5.31 MiB
[ 147/ 720] blk.8.attn_q.weight - [ 2560, 2048, 1, 1], type = f16, converting to q8_0 .. size = 10.00 MiB -> 5.31 MiB
[ 148/ 720] blk.8.attn_q_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB
[ 149/ 720] blk.8.attn_v.weight - [ 2560, 512, 1, 1], type = f16, converting to q8_0 .. size = 2.50 MiB -> 1.33 MiB
[ 150/ 720] blk.8.ffn_down.weight - [ 10240, 2560, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 151/ 720] blk.8.ffn_gate.weight - [ 2560, 10240, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 152/ 720] blk.8.ffn_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 153/ 720] blk.8.ffn_up.weight - [ 2560, 10240, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 154/ 720] blk.8.inp_gate.weight - [ 2560, 256, 1, 1], type = f16, converting to q8_0 .. size = 1.25 MiB -> 0.66 MiB
[ 155/ 720] blk.8.layer_output_scale.weight - [ 1, 1, 1, 1], type = f32, size = 0.000 MiB
[ 156/ 720] blk.8.post_attention_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 157/ 720] blk.8.post_ffw_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 158/ 720] blk.8.post_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 159/ 720] blk.8.proj.weight - [ 256, 2560, 1, 1], type = f16, converting to q8_0 .. size = 1.25 MiB -> 0.66 MiB
[ 160/ 720] blk.9.attn_k.weight - [ 2560, 512, 1, 1], type = f16, converting to q8_0 .. size = 2.50 MiB -> 1.33 MiB
[ 161/ 720] blk.9.attn_k_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB
[ 162/ 720] blk.9.attn_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 163/ 720] blk.9.attn_output.weight - [ 2048, 2560, 1, 1], type = f16, converting to q8_0 .. size = 10.00 MiB -> 5.31 MiB
[ 164/ 720] blk.9.attn_q.weight - [ 2560, 2048, 1, 1], type = f16, converting to q8_0 .. size = 10.00 MiB -> 5.31 MiB
[ 165/ 720] blk.9.attn_q_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB
[ 166/ 720] blk.9.attn_v.weight - [ 2560, 512, 1, 1], type = f16, converting to q8_0 .. size = 2.50 MiB -> 1.33 MiB
[ 167/ 720] blk.9.ffn_down.weight - [ 10240, 2560, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 168/ 720] blk.9.ffn_gate.weight - [ 2560, 10240, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 169/ 720] blk.9.ffn_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 170/ 720] blk.9.ffn_up.weight - [ 2560, 10240, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 171/ 720] blk.9.inp_gate.weight - [ 2560, 256, 1, 1], type = f16, converting to q8_0 .. size = 1.25 MiB -> 0.66 MiB
[ 172/ 720] blk.9.layer_output_scale.weight - [ 1, 1, 1, 1], type = f32, size = 0.000 MiB
[ 173/ 720] blk.9.post_attention_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 174/ 720] blk.9.post_ffw_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 175/ 720] blk.9.post_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 176/ 720] blk.9.proj.weight - [ 256, 2560, 1, 1], type = f16, converting to q8_0 .. size = 1.25 MiB -> 0.66 MiB
[ 177/ 720] blk.10.attn_k.weight - [ 2560, 512, 1, 1], type = f16, converting to q8_0 .. size = 2.50 MiB -> 1.33 MiB
[ 178/ 720] blk.10.attn_k_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB
[ 179/ 720] blk.10.attn_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 180/ 720] blk.10.attn_output.weight - [ 2048, 2560, 1, 1], type = f16, converting to q8_0 .. size = 10.00 MiB -> 5.31 MiB
[ 181/ 720] blk.10.attn_q.weight - [ 2560, 2048, 1, 1], type = f16, converting to q8_0 .. size = 10.00 MiB -> 5.31 MiB
[ 182/ 720] blk.10.attn_q_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB
[ 183/ 720] blk.10.attn_v.weight - [ 2560, 512, 1, 1], type = f16, converting to q8_0 .. size = 2.50 MiB -> 1.33 MiB
[ 184/ 720] blk.10.ffn_down.weight - [ 10240, 2560, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 185/ 720] blk.10.ffn_gate.weight - [ 2560, 10240, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 186/ 720] blk.10.ffn_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 187/ 720] blk.10.ffn_up.weight - [ 2560, 10240, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 188/ 720] blk.10.inp_gate.weight - [ 2560, 256, 1, 1], type = f16, converting to q8_0 .. size = 1.25 MiB -> 0.66 MiB
[ 189/ 720] blk.10.layer_output_scale.weight - [ 1, 1, 1, 1], type = f32, size = 0.000 MiB
[ 190/ 720] blk.10.post_attention_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 191/ 720] blk.10.post_ffw_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 192/ 720] blk.10.post_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 193/ 720] blk.10.proj.weight - [ 256, 2560, 1, 1], type = f16, converting to q8_0 .. size = 1.25 MiB -> 0.66 MiB
[ 194/ 720] blk.11.attn_k.weight - [ 2560, 1024, 1, 1], type = f16, converting to q8_0 .. size = 5.00 MiB -> 2.66 MiB
[ 195/ 720] blk.11.attn_k_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MiB
[ 196/ 720] blk.11.attn_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 197/ 720] blk.11.attn_output.weight - [ 4096, 2560, 1, 1], type = f16, converting to q8_0 .. size = 20.00 MiB -> 10.62 MiB
[ 198/ 720] blk.11.attn_q.weight - [ 2560, 4096, 1, 1], type = f16, converting to q8_0 .. size = 20.00 MiB -> 10.62 MiB
[ 199/ 720] blk.11.attn_q_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MiB
[ 200/ 720] blk.11.attn_v.weight - [ 2560, 1024, 1, 1], type = f16, converting to q8_0 .. size = 5.00 MiB -> 2.66 MiB
[ 201/ 720] blk.11.ffn_down.weight - [ 10240, 2560, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 202/ 720] blk.11.ffn_gate.weight - [ 2560, 10240, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 203/ 720] blk.11.ffn_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 204/ 720] blk.11.ffn_up.weight - [ 2560, 10240, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 205/ 720] blk.11.inp_gate.weight - [ 2560, 256, 1, 1], type = f16, converting to q8_0 .. size = 1.25 MiB -> 0.66 MiB
[ 206/ 720] blk.11.layer_output_scale.weight - [ 1, 1, 1, 1], type = f32, size = 0.000 MiB
[ 207/ 720] blk.11.post_attention_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 208/ 720] blk.11.post_ffw_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 209/ 720] blk.11.post_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 210/ 720] blk.11.proj.weight - [ 256, 2560, 1, 1], type = f16, converting to q8_0 .. size = 1.25 MiB -> 0.66 MiB
[ 211/ 720] blk.12.attn_k.weight - [ 2560, 512, 1, 1], type = f16, converting to q8_0 .. size = 2.50 MiB -> 1.33 MiB
[ 212/ 720] blk.12.attn_k_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB
[ 213/ 720] blk.12.attn_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 214/ 720] blk.12.attn_output.weight - [ 2048, 2560, 1, 1], type = f16, converting to q8_0 .. size = 10.00 MiB -> 5.31 MiB
[ 215/ 720] blk.12.attn_q.weight - [ 2560, 2048, 1, 1], type = f16, converting to q8_0 .. size = 10.00 MiB -> 5.31 MiB
[ 216/ 720] blk.12.attn_q_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB
[ 217/ 720] blk.12.attn_v.weight - [ 2560, 512, 1, 1], type = f16, converting to q8_0 .. size = 2.50 MiB -> 1.33 MiB
[ 218/ 720] blk.12.ffn_down.weight - [ 10240, 2560, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 219/ 720] blk.12.ffn_gate.weight - [ 2560, 10240, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 220/ 720] blk.12.ffn_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 221/ 720] blk.12.ffn_up.weight - [ 2560, 10240, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 222/ 720] blk.12.inp_gate.weight - [ 2560, 256, 1, 1], type = f16, converting to q8_0 .. size = 1.25 MiB -> 0.66 MiB
[ 223/ 720] blk.12.layer_output_scale.weight - [ 1, 1, 1, 1], type = f32, size = 0.000 MiB
[ 224/ 720] blk.12.post_attention_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 225/ 720] blk.12.post_ffw_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 226/ 720] blk.12.post_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 227/ 720] blk.12.proj.weight - [ 256, 2560, 1, 1], type = f16, converting to q8_0 .. size = 1.25 MiB -> 0.66 MiB
[ 228/ 720] blk.13.attn_k.weight - [ 2560, 512, 1, 1], type = f16, converting to q8_0 .. size = 2.50 MiB -> 1.33 MiB
[ 229/ 720] blk.13.attn_k_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB
[ 230/ 720] blk.13.attn_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 231/ 720] blk.13.attn_output.weight - [ 2048, 2560, 1, 1], type = f16, converting to q8_0 .. size = 10.00 MiB -> 5.31 MiB
[ 232/ 720] blk.13.attn_q.weight - [ 2560, 2048, 1, 1], type = f16, converting to q8_0 .. size = 10.00 MiB -> 5.31 MiB
[ 233/ 720] blk.13.attn_q_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB
[ 234/ 720] blk.13.attn_v.weight - [ 2560, 512, 1, 1], type = f16, converting to q8_0 .. size = 2.50 MiB -> 1.33 MiB
[ 235/ 720] blk.13.ffn_down.weight - [ 10240, 2560, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 236/ 720] blk.13.ffn_gate.weight - [ 2560, 10240, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 237/ 720] blk.13.ffn_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 238/ 720] blk.13.ffn_up.weight - [ 2560, 10240, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 239/ 720] blk.13.inp_gate.weight - [ 2560, 256, 1, 1], type = f16, converting to q8_0 .. size = 1.25 MiB -> 0.66 MiB
[ 240/ 720] blk.13.layer_output_scale.weight - [ 1, 1, 1, 1], type = f32, size = 0.000 MiB
[ 241/ 720] blk.13.post_attention_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 242/ 720] blk.13.post_ffw_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 243/ 720] blk.13.post_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 244/ 720] blk.13.proj.weight - [ 256, 2560, 1, 1], type = f16, converting to q8_0 .. size = 1.25 MiB -> 0.66 MiB
[ 245/ 720] blk.14.attn_k.weight - [ 2560, 512, 1, 1], type = f16, converting to q8_0 .. size = 2.50 MiB -> 1.33 MiB
[ 246/ 720] blk.14.attn_k_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB
[ 247/ 720] blk.14.attn_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 248/ 720] blk.14.attn_output.weight - [ 2048, 2560, 1, 1], type = f16, converting to q8_0 .. size = 10.00 MiB -> 5.31 MiB
[ 249/ 720] blk.14.attn_q.weight - [ 2560, 2048, 1, 1], type = f16, converting to q8_0 .. size = 10.00 MiB -> 5.31 MiB
[ 250/ 720] blk.14.attn_q_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB
[ 251/ 720] blk.14.attn_v.weight - [ 2560, 512, 1, 1], type = f16, converting to q8_0 .. size = 2.50 MiB -> 1.33 MiB
[ 252/ 720] blk.14.ffn_down.weight - [ 10240, 2560, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 253/ 720] blk.14.ffn_gate.weight - [ 2560, 10240, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 254/ 720] blk.14.ffn_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 255/ 720] blk.14.ffn_up.weight - [ 2560, 10240, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 256/ 720] blk.14.inp_gate.weight - [ 2560, 256, 1, 1], type = f16, converting to q8_0 .. size = 1.25 MiB -> 0.66 MiB
[ 257/ 720] blk.14.layer_output_scale.weight - [ 1, 1, 1, 1], type = f32, size = 0.000 MiB
[ 258/ 720] blk.14.post_attention_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 259/ 720] blk.14.post_ffw_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 260/ 720] blk.14.post_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 261/ 720] blk.14.proj.weight - [ 256, 2560, 1, 1], type = f16, converting to q8_0 .. size = 1.25 MiB -> 0.66 MiB
[ 262/ 720] blk.15.attn_k.weight - [ 2560, 512, 1, 1], type = f16, converting to q8_0 .. size = 2.50 MiB -> 1.33 MiB
[ 263/ 720] blk.15.attn_k_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB
[ 264/ 720] blk.15.attn_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 265/ 720] blk.15.attn_output.weight - [ 2048, 2560, 1, 1], type = f16, converting to q8_0 .. size = 10.00 MiB -> 5.31 MiB
[ 266/ 720] blk.15.attn_q.weight - [ 2560, 2048, 1, 1], type = f16, converting to q8_0 .. size = 10.00 MiB -> 5.31 MiB
[ 267/ 720] blk.15.attn_q_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB
[ 268/ 720] blk.15.attn_v.weight - [ 2560, 512, 1, 1], type = f16, converting to q8_0 .. size = 2.50 MiB -> 1.33 MiB
[ 269/ 720] blk.15.ffn_down.weight - [ 10240, 2560, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 270/ 720] blk.15.ffn_gate.weight - [ 2560, 10240, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 271/ 720] blk.15.ffn_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 272/ 720] blk.15.ffn_up.weight - [ 2560, 10240, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 273/ 720] blk.15.inp_gate.weight - [ 2560, 256, 1, 1], type = f16, converting to q8_0 .. size = 1.25 MiB -> 0.66 MiB
[ 274/ 720] blk.15.layer_output_scale.weight - [ 1, 1, 1, 1], type = f32, size = 0.000 MiB
[ 275/ 720] blk.15.post_attention_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 276/ 720] blk.15.post_ffw_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 277/ 720] blk.15.post_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 278/ 720] blk.15.proj.weight - [ 256, 2560, 1, 1], type = f16, converting to q8_0 .. size = 1.25 MiB -> 0.66 MiB
[ 279/ 720] blk.16.attn_k.weight - [ 2560, 512, 1, 1], type = f16, converting to q8_0 .. size = 2.50 MiB -> 1.33 MiB
[ 280/ 720] blk.16.attn_k_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB
[ 281/ 720] blk.16.attn_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 282/ 720] blk.16.attn_output.weight - [ 2048, 2560, 1, 1], type = f16, converting to q8_0 .. size = 10.00 MiB -> 5.31 MiB
[ 283/ 720] blk.16.attn_q.weight - [ 2560, 2048, 1, 1], type = f16, converting to q8_0 .. size = 10.00 MiB -> 5.31 MiB
[ 284/ 720] blk.16.attn_q_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB
[ 285/ 720] blk.16.attn_v.weight - [ 2560, 512, 1, 1], type = f16, converting to q8_0 .. size = 2.50 MiB -> 1.33 MiB
[ 286/ 720] blk.16.ffn_down.weight - [ 10240, 2560, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 287/ 720] blk.16.ffn_gate.weight - [ 2560, 10240, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 288/ 720] blk.16.ffn_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 289/ 720] blk.16.ffn_up.weight - [ 2560, 10240, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 290/ 720] blk.16.inp_gate.weight - [ 2560, 256, 1, 1], type = f16, converting to q8_0 .. size = 1.25 MiB -> 0.66 MiB
[ 291/ 720] blk.16.layer_output_scale.weight - [ 1, 1, 1, 1], type = f32, size = 0.000 MiB
[ 292/ 720] blk.16.post_attention_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 293/ 720] blk.16.post_ffw_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 294/ 720] blk.16.post_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 295/ 720] blk.16.proj.weight - [ 256, 2560, 1, 1], type = f16, converting to q8_0 .. size = 1.25 MiB -> 0.66 MiB
[ 296/ 720] blk.17.attn_k.weight - [ 2560, 1024, 1, 1], type = f16, converting to q8_0 .. size = 5.00 MiB -> 2.66 MiB
[ 297/ 720] blk.17.attn_k_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MiB
[ 298/ 720] blk.17.attn_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 299/ 720] blk.17.attn_output.weight - [ 4096, 2560, 1, 1], type = f16, converting to q8_0 .. size = 20.00 MiB -> 10.62 MiB
[ 300/ 720] blk.17.attn_q.weight - [ 2560, 4096, 1, 1], type = f16, converting to q8_0 .. size = 20.00 MiB -> 10.62 MiB
[ 301/ 720] blk.17.attn_q_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MiB
[ 302/ 720] blk.17.attn_v.weight - [ 2560, 1024, 1, 1], type = f16, converting to q8_0 .. size = 5.00 MiB -> 2.66 MiB
[ 303/ 720] blk.17.ffn_down.weight - [ 10240, 2560, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 304/ 720] blk.17.ffn_gate.weight - [ 2560, 10240, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 305/ 720] blk.17.ffn_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 306/ 720] blk.17.ffn_up.weight - [ 2560, 10240, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 307/ 720] blk.17.inp_gate.weight - [ 2560, 256, 1, 1], type = f16, converting to q8_0 .. size = 1.25 MiB -> 0.66 MiB
[ 308/ 720] blk.17.layer_output_scale.weight - [ 1, 1, 1, 1], type = f32, size = 0.000 MiB
[ 309/ 720] blk.17.post_attention_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 310/ 720] blk.17.post_ffw_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 311/ 720] blk.17.post_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 312/ 720] blk.17.proj.weight - [ 256, 2560, 1, 1], type = f16, converting to q8_0 .. size = 1.25 MiB -> 0.66 MiB
[ 313/ 720] blk.18.attn_k.weight - [ 2560, 512, 1, 1], type = f16, converting to q8_0 .. size = 2.50 MiB -> 1.33 MiB
[ 314/ 720] blk.18.attn_k_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB
[ 315/ 720] blk.18.attn_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 316/ 720] blk.18.attn_output.weight - [ 2048, 2560, 1, 1], type = f16, converting to q8_0 .. size = 10.00 MiB -> 5.31 MiB
[ 317/ 720] blk.18.attn_q.weight - [ 2560, 2048, 1, 1], type = f16, converting to q8_0 .. size = 10.00 MiB -> 5.31 MiB
[ 318/ 720] blk.18.attn_q_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB
[ 319/ 720] blk.18.attn_v.weight - [ 2560, 512, 1, 1], type = f16, converting to q8_0 .. size = 2.50 MiB -> 1.33 MiB
[ 320/ 720] blk.18.ffn_down.weight - [ 10240, 2560, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 321/ 720] blk.18.ffn_gate.weight - [ 2560, 10240, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 322/ 720] blk.18.ffn_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 323/ 720] blk.18.ffn_up.weight - [ 2560, 10240, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 324/ 720] blk.18.inp_gate.weight - [ 2560, 256, 1, 1], type = f16, converting to q8_0 .. size = 1.25 MiB -> 0.66 MiB
[ 325/ 720] blk.18.layer_output_scale.weight - [ 1, 1, 1, 1], type = f32, size = 0.000 MiB
[ 326/ 720] blk.18.post_attention_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 327/ 720] blk.18.post_ffw_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 328/ 720] blk.18.post_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 329/ 720] blk.18.proj.weight - [ 256, 2560, 1, 1], type = f16, converting to q8_0 .. size = 1.25 MiB -> 0.66 MiB
[ 330/ 720] blk.19.attn_k.weight - [ 2560, 512, 1, 1], type = f16, converting to q8_0 .. size = 2.50 MiB -> 1.33 MiB
[ 331/ 720] blk.19.attn_k_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB
[ 332/ 720] blk.19.attn_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 333/ 720] blk.19.attn_output.weight - [ 2048, 2560, 1, 1], type = f16, converting to q8_0 .. size = 10.00 MiB -> 5.31 MiB
[ 334/ 720] blk.19.attn_q.weight - [ 2560, 2048, 1, 1], type = f16, converting to q8_0 .. size = 10.00 MiB -> 5.31 MiB
[ 335/ 720] blk.19.attn_q_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB
[ 336/ 720] blk.19.attn_v.weight - [ 2560, 512, 1, 1], type = f16, converting to q8_0 .. size = 2.50 MiB -> 1.33 MiB
[ 337/ 720] blk.19.ffn_down.weight - [ 10240, 2560, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 338/ 720] blk.19.ffn_gate.weight - [ 2560, 10240, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 339/ 720] blk.19.ffn_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 340/ 720] blk.19.ffn_up.weight - [ 2560, 10240, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 341/ 720] blk.19.inp_gate.weight - [ 2560, 256, 1, 1], type = f16, converting to q8_0 .. size = 1.25 MiB -> 0.66 MiB
[ 342/ 720] blk.19.layer_output_scale.weight - [ 1, 1, 1, 1], type = f32, size = 0.000 MiB
[ 343/ 720] blk.19.post_attention_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 344/ 720] blk.19.post_ffw_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 345/ 720] blk.19.post_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 346/ 720] blk.19.proj.weight - [ 256, 2560, 1, 1], type = f16, converting to q8_0 .. size = 1.25 MiB -> 0.66 MiB
[ 347/ 720] blk.20.attn_k.weight - [ 2560, 512, 1, 1], type = f16, converting to q8_0 .. size = 2.50 MiB -> 1.33 MiB
[ 348/ 720] blk.20.attn_k_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB
[ 349/ 720] blk.20.attn_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 350/ 720] blk.20.attn_output.weight - [ 2048, 2560, 1, 1], type = f16, converting to q8_0 .. size = 10.00 MiB -> 5.31 MiB
[ 351/ 720] blk.20.attn_q.weight - [ 2560, 2048, 1, 1], type = f16, converting to q8_0 .. size = 10.00 MiB -> 5.31 MiB
[ 352/ 720] blk.20.attn_q_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB
[ 353/ 720] blk.20.attn_v.weight - [ 2560, 512, 1, 1], type = f16, converting to q8_0 .. size = 2.50 MiB -> 1.33 MiB
[ 354/ 720] blk.20.ffn_down.weight - [ 10240, 2560, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 355/ 720] blk.20.ffn_gate.weight - [ 2560, 10240, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 356/ 720] blk.20.ffn_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 357/ 720] blk.20.ffn_up.weight - [ 2560, 10240, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 358/ 720] blk.20.inp_gate.weight - [ 2560, 256, 1, 1], type = f16, converting to q8_0 .. size = 1.25 MiB -> 0.66 MiB
[ 359/ 720] blk.20.layer_output_scale.weight - [ 1, 1, 1, 1], type = f32, size = 0.000 MiB
[ 360/ 720] blk.20.post_attention_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 361/ 720] blk.20.post_ffw_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 362/ 720] blk.20.post_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 363/ 720] blk.20.proj.weight - [ 256, 2560, 1, 1], type = f16, converting to q8_0 .. size = 1.25 MiB -> 0.66 MiB
[ 364/ 720] blk.21.attn_k.weight - [ 2560, 512, 1, 1], type = f16, converting to q8_0 .. size = 2.50 MiB -> 1.33 MiB
[ 365/ 720] blk.21.attn_k_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB
[ 366/ 720] blk.21.attn_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 367/ 720] blk.21.attn_output.weight - [ 2048, 2560, 1, 1], type = f16, converting to q8_0 .. size = 10.00 MiB -> 5.31 MiB
[ 368/ 720] blk.21.attn_q.weight - [ 2560, 2048, 1, 1], type = f16, converting to q8_0 .. size = 10.00 MiB -> 5.31 MiB
[ 369/ 720] blk.21.attn_q_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB
[ 370/ 720] blk.21.attn_v.weight - [ 2560, 512, 1, 1], type = f16, converting to q8_0 .. size = 2.50 MiB -> 1.33 MiB
[ 371/ 720] blk.21.ffn_down.weight - [ 10240, 2560, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 372/ 720] blk.21.ffn_gate.weight - [ 2560, 10240, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 373/ 720] blk.21.ffn_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 374/ 720] blk.21.ffn_up.weight - [ 2560, 10240, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 375/ 720] blk.21.inp_gate.weight - [ 2560, 256, 1, 1], type = f16, converting to q8_0 .. size = 1.25 MiB -> 0.66 MiB
[ 376/ 720] blk.21.layer_output_scale.weight - [ 1, 1, 1, 1], type = f32, size = 0.000 MiB
[ 377/ 720] blk.21.post_attention_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 378/ 720] blk.21.post_ffw_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 379/ 720] blk.21.post_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 380/ 720] blk.21.proj.weight - [ 256, 2560, 1, 1], type = f16, converting to q8_0 .. size = 1.25 MiB -> 0.66 MiB
[ 381/ 720] blk.22.attn_k.weight - [ 2560, 512, 1, 1], type = f16, converting to q8_0 .. size = 2.50 MiB -> 1.33 MiB
[ 382/ 720] blk.22.attn_k_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB
[ 383/ 720] blk.22.attn_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 384/ 720] blk.22.attn_output.weight - [ 2048, 2560, 1, 1], type = f16, converting to q8_0 .. size = 10.00 MiB -> 5.31 MiB
[ 385/ 720] blk.22.attn_q.weight - [ 2560, 2048, 1, 1], type = f16, converting to q8_0 .. size = 10.00 MiB -> 5.31 MiB
[ 386/ 720] blk.22.attn_q_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB
[ 387/ 720] blk.22.attn_v.weight - [ 2560, 512, 1, 1], type = f16, converting to q8_0 .. size = 2.50 MiB -> 1.33 MiB
[ 388/ 720] blk.22.ffn_down.weight - [ 10240, 2560, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 389/ 720] blk.22.ffn_gate.weight - [ 2560, 10240, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 390/ 720] blk.22.ffn_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 391/ 720] blk.22.ffn_up.weight - [ 2560, 10240, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 392/ 720] blk.22.inp_gate.weight - [ 2560, 256, 1, 1], type = f16, converting to q8_0 .. size = 1.25 MiB -> 0.66 MiB
[ 393/ 720] blk.22.layer_output_scale.weight - [ 1, 1, 1, 1], type = f32, size = 0.000 MiB
[ 394/ 720] blk.22.post_attention_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 395/ 720] blk.22.post_ffw_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 396/ 720] blk.22.post_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 397/ 720] blk.22.proj.weight - [ 256, 2560, 1, 1], type = f16, converting to q8_0 .. size = 1.25 MiB -> 0.66 MiB
[ 398/ 720] blk.23.attn_k.weight - [ 2560, 1024, 1, 1], type = f16, converting to q8_0 .. size = 5.00 MiB -> 2.66 MiB
[ 399/ 720] blk.23.attn_k_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MiB
[ 400/ 720] blk.23.attn_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 401/ 720] blk.23.attn_output.weight - [ 4096, 2560, 1, 1], type = f16, converting to q8_0 .. size = 20.00 MiB -> 10.62 MiB
[ 402/ 720] blk.23.attn_q.weight - [ 2560, 4096, 1, 1], type = f16, converting to q8_0 .. size = 20.00 MiB -> 10.62 MiB
[ 403/ 720] blk.23.attn_q_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MiB
[ 404/ 720] blk.23.attn_v.weight - [ 2560, 1024, 1, 1], type = f16, converting to q8_0 .. size = 5.00 MiB -> 2.66 MiB
[ 405/ 720] blk.23.ffn_down.weight - [ 10240, 2560, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 406/ 720] blk.23.ffn_gate.weight - [ 2560, 10240, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 407/ 720] blk.23.ffn_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 408/ 720] blk.23.ffn_up.weight - [ 2560, 10240, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 409/ 720] blk.23.inp_gate.weight - [ 2560, 256, 1, 1], type = f16, converting to q8_0 .. size = 1.25 MiB -> 0.66 MiB
[ 410/ 720] blk.23.layer_output_scale.weight - [ 1, 1, 1, 1], type = f32, size = 0.000 MiB
[ 411/ 720] blk.23.post_attention_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 412/ 720] blk.23.post_ffw_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 413/ 720] blk.23.post_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 414/ 720] blk.23.proj.weight - [ 256, 2560, 1, 1], type = f16, converting to q8_0 .. size = 1.25 MiB -> 0.66 MiB
[ 415/ 720] blk.24.attn_k.weight - [ 2560, 512, 1, 1], type = f16, converting to q8_0 .. size = 2.50 MiB -> 1.33 MiB
[ 416/ 720] blk.24.attn_k_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB
[ 417/ 720] blk.24.attn_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 418/ 720] blk.24.attn_output.weight - [ 2048, 2560, 1, 1], type = f16, converting to q8_0 .. size = 10.00 MiB -> 5.31 MiB
[ 419/ 720] blk.24.attn_q.weight - [ 2560, 2048, 1, 1], type = f16, converting to q8_0 .. size = 10.00 MiB -> 5.31 MiB
[ 420/ 720] blk.24.attn_q_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB
[ 421/ 720] blk.24.attn_v.weight - [ 2560, 512, 1, 1], type = f16, converting to q8_0 .. size = 2.50 MiB -> 1.33 MiB
[ 422/ 720] blk.24.ffn_down.weight - [ 10240, 2560, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 423/ 720] blk.24.ffn_gate.weight - [ 2560, 10240, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 424/ 720] blk.24.ffn_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 425/ 720] blk.24.ffn_up.weight - [ 2560, 10240, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 426/ 720] blk.24.inp_gate.weight - [ 2560, 256, 1, 1], type = f16, converting to q8_0 .. size = 1.25 MiB -> 0.66 MiB
[ 427/ 720] blk.24.layer_output_scale.weight - [ 1, 1, 1, 1], type = f32, size = 0.000 MiB
[ 428/ 720] blk.24.post_attention_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 429/ 720] blk.24.post_ffw_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 430/ 720] blk.24.post_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 431/ 720] blk.24.proj.weight - [ 256, 2560, 1, 1], type = f16, converting to q8_0 .. size = 1.25 MiB -> 0.66 MiB
[ 432/ 720] blk.25.attn_k.weight - [ 2560, 512, 1, 1], type = f16, converting to q8_0 .. size = 2.50 MiB -> 1.33 MiB
[ 433/ 720] blk.25.attn_k_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB
[ 434/ 720] blk.25.attn_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 435/ 720] blk.25.attn_output.weight - [ 2048, 2560, 1, 1], type = f16, converting to q8_0 .. size = 10.00 MiB -> 5.31 MiB
[ 436/ 720] blk.25.attn_q.weight - [ 2560, 2048, 1, 1], type = f16, converting to q8_0 .. size = 10.00 MiB -> 5.31 MiB
[ 437/ 720] blk.25.attn_q_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB
[ 438/ 720] blk.25.attn_v.weight - [ 2560, 512, 1, 1], type = f16, converting to q8_0 .. size = 2.50 MiB -> 1.33 MiB
[ 439/ 720] blk.25.ffn_down.weight - [ 10240, 2560, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 440/ 720] blk.25.ffn_gate.weight - [ 2560, 10240, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 441/ 720] blk.25.ffn_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 442/ 720] blk.25.ffn_up.weight - [ 2560, 10240, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 443/ 720] blk.25.inp_gate.weight - [ 2560, 256, 1, 1], type = f16, converting to q8_0 .. size = 1.25 MiB -> 0.66 MiB
[ 444/ 720] blk.25.layer_output_scale.weight - [ 1, 1, 1, 1], type = f32, size = 0.000 MiB
[ 445/ 720] blk.25.post_attention_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 446/ 720] blk.25.post_ffw_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 447/ 720] blk.25.post_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 448/ 720] blk.25.proj.weight - [ 256, 2560, 1, 1], type = f16, converting to q8_0 .. size = 1.25 MiB -> 0.66 MiB
[ 449/ 720] blk.26.attn_k.weight - [ 2560, 512, 1, 1], type = f16, converting to q8_0 .. size = 2.50 MiB -> 1.33 MiB
[ 450/ 720] blk.26.attn_k_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB
[ 451/ 720] blk.26.attn_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 452/ 720] blk.26.attn_output.weight - [ 2048, 2560, 1, 1], type = f16, converting to q8_0 .. size = 10.00 MiB -> 5.31 MiB
[ 453/ 720] blk.26.attn_q.weight - [ 2560, 2048, 1, 1], type = f16, converting to q8_0 .. size = 10.00 MiB -> 5.31 MiB
[ 454/ 720] blk.26.attn_q_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB
[ 455/ 720] blk.26.attn_v.weight - [ 2560, 512, 1, 1], type = f16, converting to q8_0 .. size = 2.50 MiB -> 1.33 MiB
[ 456/ 720] blk.26.ffn_down.weight - [ 10240, 2560, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 457/ 720] blk.26.ffn_gate.weight - [ 2560, 10240, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 458/ 720] blk.26.ffn_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 459/ 720] blk.26.ffn_up.weight - [ 2560, 10240, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 460/ 720] blk.26.inp_gate.weight - [ 2560, 256, 1, 1], type = f16, converting to q8_0 .. size = 1.25 MiB -> 0.66 MiB
[ 461/ 720] blk.26.layer_output_scale.weight - [ 1, 1, 1, 1], type = f32, size = 0.000 MiB
[ 462/ 720] blk.26.post_attention_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 463/ 720] blk.26.post_ffw_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 464/ 720] blk.26.post_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 465/ 720] blk.26.proj.weight - [ 256, 2560, 1, 1], type = f16, converting to q8_0 .. size = 1.25 MiB -> 0.66 MiB
[ 466/ 720] blk.27.attn_k.weight - [ 2560, 512, 1, 1], type = f16, converting to q8_0 .. size = 2.50 MiB -> 1.33 MiB
[ 467/ 720] blk.27.attn_k_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB
[ 468/ 720] blk.27.attn_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 469/ 720] blk.27.attn_output.weight - [ 2048, 2560, 1, 1], type = f16, converting to q8_0 .. size = 10.00 MiB -> 5.31 MiB
[ 470/ 720] blk.27.attn_q.weight - [ 2560, 2048, 1, 1], type = f16, converting to q8_0 .. size = 10.00 MiB -> 5.31 MiB
[ 471/ 720] blk.27.attn_q_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB
[ 472/ 720] blk.27.attn_v.weight - [ 2560, 512, 1, 1], type = f16, converting to q8_0 .. size = 2.50 MiB -> 1.33 MiB
[ 473/ 720] blk.27.ffn_down.weight - [ 10240, 2560, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 474/ 720] blk.27.ffn_gate.weight - [ 2560, 10240, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 475/ 720] blk.27.ffn_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 476/ 720] blk.27.ffn_up.weight - [ 2560, 10240, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 477/ 720] blk.27.inp_gate.weight - [ 2560, 256, 1, 1], type = f16, converting to q8_0 .. size = 1.25 MiB -> 0.66 MiB
[ 478/ 720] blk.27.layer_output_scale.weight - [ 1, 1, 1, 1], type = f32, size = 0.000 MiB
[ 479/ 720] blk.27.post_attention_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 480/ 720] blk.27.post_ffw_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 481/ 720] blk.27.post_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 482/ 720] blk.27.proj.weight - [ 256, 2560, 1, 1], type = f16, converting to q8_0 .. size = 1.25 MiB -> 0.66 MiB
[ 483/ 720] blk.28.attn_k.weight - [ 2560, 512, 1, 1], type = f16, converting to q8_0 .. size = 2.50 MiB -> 1.33 MiB
[ 484/ 720] blk.28.attn_k_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB
[ 485/ 720] blk.28.attn_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 486/ 720] blk.28.attn_output.weight - [ 2048, 2560, 1, 1], type = f16, converting to q8_0 .. size = 10.00 MiB -> 5.31 MiB
[ 487/ 720] blk.28.attn_q.weight - [ 2560, 2048, 1, 1], type = f16, converting to q8_0 .. size = 10.00 MiB -> 5.31 MiB
[ 488/ 720] blk.28.attn_q_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB
[ 489/ 720] blk.28.attn_v.weight - [ 2560, 512, 1, 1], type = f16, converting to q8_0 .. size = 2.50 MiB -> 1.33 MiB
[ 490/ 720] blk.28.ffn_down.weight - [ 10240, 2560, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 491/ 720] blk.28.ffn_gate.weight - [ 2560, 10240, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 492/ 720] blk.28.ffn_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 493/ 720] blk.28.ffn_up.weight - [ 2560, 10240, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 494/ 720] blk.28.inp_gate.weight - [ 2560, 256, 1, 1], type = f16, converting to q8_0 .. size = 1.25 MiB -> 0.66 MiB
[ 495/ 720] blk.28.layer_output_scale.weight - [ 1, 1, 1, 1], type = f32, size = 0.000 MiB
[ 496/ 720] blk.28.post_attention_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 497/ 720] blk.28.post_ffw_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 498/ 720] blk.28.post_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 499/ 720] blk.28.proj.weight - [ 256, 2560, 1, 1], type = f16, converting to q8_0 .. size = 1.25 MiB -> 0.66 MiB
[ 500/ 720] blk.29.attn_k.weight - [ 2560, 1024, 1, 1], type = f16, converting to q8_0 .. size = 5.00 MiB -> 2.66 MiB
[ 501/ 720] blk.29.attn_k_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MiB
[ 502/ 720] blk.29.attn_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 503/ 720] blk.29.attn_output.weight - [ 4096, 2560, 1, 1], type = f16, converting to q8_0 .. size = 20.00 MiB -> 10.62 MiB
[ 504/ 720] blk.29.attn_q.weight - [ 2560, 4096, 1, 1], type = f16, converting to q8_0 .. size = 20.00 MiB -> 10.62 MiB
[ 505/ 720] blk.29.attn_q_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MiB
[ 506/ 720] blk.29.attn_v.weight - [ 2560, 1024, 1, 1], type = f16, converting to q8_0 .. size = 5.00 MiB -> 2.66 MiB
[ 507/ 720] blk.29.ffn_down.weight - [ 10240, 2560, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 508/ 720] blk.29.ffn_gate.weight - [ 2560, 10240, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 509/ 720] blk.29.ffn_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 510/ 720] blk.29.ffn_up.weight - [ 2560, 10240, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 511/ 720] blk.29.inp_gate.weight - [ 2560, 256, 1, 1], type = f16, converting to q8_0 .. size = 1.25 MiB -> 0.66 MiB
[ 512/ 720] blk.29.layer_output_scale.weight - [ 1, 1, 1, 1], type = f32, size = 0.000 MiB
[ 513/ 720] blk.29.post_attention_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 514/ 720] blk.29.post_ffw_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 515/ 720] blk.29.post_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 516/ 720] blk.29.proj.weight - [ 256, 2560, 1, 1], type = f16, converting to q8_0 .. size = 1.25 MiB -> 0.66 MiB
[ 517/ 720] blk.30.attn_k.weight - [ 2560, 512, 1, 1], type = f16, converting to q8_0 .. size = 2.50 MiB -> 1.33 MiB
[ 518/ 720] blk.30.attn_k_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB
[ 519/ 720] blk.30.attn_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 520/ 720] blk.30.attn_output.weight - [ 2048, 2560, 1, 1], type = f16, converting to q8_0 .. size = 10.00 MiB -> 5.31 MiB
[ 521/ 720] blk.30.attn_q.weight - [ 2560, 2048, 1, 1], type = f16, converting to q8_0 .. size = 10.00 MiB -> 5.31 MiB
[ 522/ 720] blk.30.attn_q_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB
[ 523/ 720] blk.30.attn_v.weight - [ 2560, 512, 1, 1], type = f16, converting to q8_0 .. size = 2.50 MiB -> 1.33 MiB
[ 524/ 720] blk.30.ffn_down.weight - [ 10240, 2560, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 525/ 720] blk.30.ffn_gate.weight - [ 2560, 10240, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 526/ 720] blk.30.ffn_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 527/ 720] blk.30.ffn_up.weight - [ 2560, 10240, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 528/ 720] blk.30.inp_gate.weight - [ 2560, 256, 1, 1], type = f16, converting to q8_0 .. size = 1.25 MiB -> 0.66 MiB
[ 529/ 720] blk.30.layer_output_scale.weight - [ 1, 1, 1, 1], type = f32, size = 0.000 MiB
[ 530/ 720] blk.30.post_attention_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 531/ 720] blk.30.post_ffw_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 532/ 720] blk.30.post_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 533/ 720] blk.30.proj.weight - [ 256, 2560, 1, 1], type = f16, converting to q8_0 .. size = 1.25 MiB -> 0.66 MiB
[ 534/ 720] blk.31.attn_k.weight - [ 2560, 512, 1, 1], type = f16, converting to q8_0 .. size = 2.50 MiB -> 1.33 MiB
[ 535/ 720] blk.31.attn_k_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB
[ 536/ 720] blk.31.attn_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 537/ 720] blk.31.attn_output.weight - [ 2048, 2560, 1, 1], type = f16, converting to q8_0 .. size = 10.00 MiB -> 5.31 MiB
[ 538/ 720] blk.31.attn_q.weight - [ 2560, 2048, 1, 1], type = f16, converting to q8_0 .. size = 10.00 MiB -> 5.31 MiB
[ 539/ 720] blk.31.attn_q_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB
[ 540/ 720] blk.31.attn_v.weight - [ 2560, 512, 1, 1], type = f16, converting to q8_0 .. size = 2.50 MiB -> 1.33 MiB
[ 541/ 720] blk.31.ffn_down.weight - [ 10240, 2560, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 542/ 720] blk.31.ffn_gate.weight - [ 2560, 10240, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 543/ 720] blk.31.ffn_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 544/ 720] blk.31.ffn_up.weight - [ 2560, 10240, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 545/ 720] blk.31.inp_gate.weight - [ 2560, 256, 1, 1], type = f16, converting to q8_0 .. size = 1.25 MiB -> 0.66 MiB
[ 546/ 720] blk.31.layer_output_scale.weight - [ 1, 1, 1, 1], type = f32, size = 0.000 MiB
[ 547/ 720] blk.31.post_attention_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 548/ 720] blk.31.post_ffw_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 549/ 720] blk.31.post_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 550/ 720] blk.31.proj.weight - [ 256, 2560, 1, 1], type = f16, converting to q8_0 .. size = 1.25 MiB -> 0.66 MiB
[ 551/ 720] blk.32.attn_k.weight - [ 2560, 512, 1, 1], type = f16, converting to q8_0 .. size = 2.50 MiB -> 1.33 MiB
[ 552/ 720] blk.32.attn_k_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB
[ 553/ 720] blk.32.attn_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 554/ 720] blk.32.attn_output.weight - [ 2048, 2560, 1, 1], type = f16, converting to q8_0 .. size = 10.00 MiB -> 5.31 MiB
[ 555/ 720] blk.32.attn_q.weight - [ 2560, 2048, 1, 1], type = f16, converting to q8_0 .. size = 10.00 MiB -> 5.31 MiB
[ 556/ 720] blk.32.attn_q_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB
[ 557/ 720] blk.32.attn_v.weight - [ 2560, 512, 1, 1], type = f16, converting to q8_0 .. size = 2.50 MiB -> 1.33 MiB
[ 558/ 720] blk.32.ffn_down.weight - [ 10240, 2560, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 559/ 720] blk.32.ffn_gate.weight - [ 2560, 10240, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 560/ 720] blk.32.ffn_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 561/ 720] blk.32.ffn_up.weight - [ 2560, 10240, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 562/ 720] blk.32.inp_gate.weight - [ 2560, 256, 1, 1], type = f16, converting to q8_0 .. size = 1.25 MiB -> 0.66 MiB
[ 563/ 720] blk.32.layer_output_scale.weight - [ 1, 1, 1, 1], type = f32, size = 0.000 MiB
[ 564/ 720] blk.32.post_attention_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 565/ 720] blk.32.post_ffw_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 566/ 720] blk.32.post_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 567/ 720] blk.32.proj.weight - [ 256, 2560, 1, 1], type = f16, converting to q8_0 .. size = 1.25 MiB -> 0.66 MiB
[ 568/ 720] blk.33.attn_k.weight - [ 2560, 512, 1, 1], type = f16, converting to q8_0 .. size = 2.50 MiB -> 1.33 MiB
[ 569/ 720] blk.33.attn_k_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB
[ 570/ 720] blk.33.attn_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 571/ 720] blk.33.attn_output.weight - [ 2048, 2560, 1, 1], type = f16, converting to q8_0 .. size = 10.00 MiB -> 5.31 MiB
[ 572/ 720] blk.33.attn_q.weight - [ 2560, 2048, 1, 1], type = f16, converting to q8_0 .. size = 10.00 MiB -> 5.31 MiB
[ 573/ 720] blk.33.attn_q_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB
[ 574/ 720] blk.33.attn_v.weight - [ 2560, 512, 1, 1], type = f16, converting to q8_0 .. size = 2.50 MiB -> 1.33 MiB
[ 575/ 720] blk.33.ffn_down.weight - [ 10240, 2560, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 576/ 720] blk.33.ffn_gate.weight - [ 2560, 10240, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 577/ 720] blk.33.ffn_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 578/ 720] blk.33.ffn_up.weight - [ 2560, 10240, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 579/ 720] blk.33.inp_gate.weight - [ 2560, 256, 1, 1], type = f16, converting to q8_0 .. size = 1.25 MiB -> 0.66 MiB
[ 580/ 720] blk.33.layer_output_scale.weight - [ 1, 1, 1, 1], type = f32, size = 0.000 MiB
[ 581/ 720] blk.33.post_attention_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 582/ 720] blk.33.post_ffw_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 583/ 720] blk.33.post_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 584/ 720] blk.33.proj.weight - [ 256, 2560, 1, 1], type = f16, converting to q8_0 .. size = 1.25 MiB -> 0.66 MiB
[ 585/ 720] blk.34.attn_k.weight - [ 2560, 512, 1, 1], type = f16, converting to q8_0 .. size = 2.50 MiB -> 1.33 MiB
[ 586/ 720] blk.34.attn_k_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB
[ 587/ 720] blk.34.attn_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 588/ 720] blk.34.attn_output.weight - [ 2048, 2560, 1, 1], type = f16, converting to q8_0 .. size = 10.00 MiB -> 5.31 MiB
[ 589/ 720] blk.34.attn_q.weight - [ 2560, 2048, 1, 1], type = f16, converting to q8_0 .. size = 10.00 MiB -> 5.31 MiB
[ 590/ 720] blk.34.attn_q_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB
[ 591/ 720] blk.34.attn_v.weight - [ 2560, 512, 1, 1], type = f16, converting to q8_0 .. size = 2.50 MiB -> 1.33 MiB
[ 592/ 720] blk.34.ffn_down.weight - [ 10240, 2560, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 593/ 720] blk.34.ffn_gate.weight - [ 2560, 10240, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 594/ 720] blk.34.ffn_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 595/ 720] blk.34.ffn_up.weight - [ 2560, 10240, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 596/ 720] blk.34.inp_gate.weight - [ 2560, 256, 1, 1], type = f16, converting to q8_0 .. size = 1.25 MiB -> 0.66 MiB
[ 597/ 720] blk.34.layer_output_scale.weight - [ 1, 1, 1, 1], type = f32, size = 0.000 MiB
[ 598/ 720] blk.34.post_attention_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 599/ 720] blk.34.post_ffw_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 600/ 720] blk.34.post_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 601/ 720] blk.34.proj.weight - [ 256, 2560, 1, 1], type = f16, converting to q8_0 .. size = 1.25 MiB -> 0.66 MiB
[ 602/ 720] blk.35.attn_k.weight - [ 2560, 1024, 1, 1], type = f16, converting to q8_0 .. size = 5.00 MiB -> 2.66 MiB
[ 603/ 720] blk.35.attn_k_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MiB
[ 604/ 720] blk.35.attn_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 605/ 720] blk.35.attn_output.weight - [ 4096, 2560, 1, 1], type = f16, converting to q8_0 .. size = 20.00 MiB -> 10.62 MiB
[ 606/ 720] blk.35.attn_q.weight - [ 2560, 4096, 1, 1], type = f16, converting to q8_0 .. size = 20.00 MiB -> 10.62 MiB
[ 607/ 720] blk.35.attn_q_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MiB
[ 608/ 720] blk.35.attn_v.weight - [ 2560, 1024, 1, 1], type = f16, converting to q8_0 .. size = 5.00 MiB -> 2.66 MiB
[ 609/ 720] blk.35.ffn_down.weight - [ 10240, 2560, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 610/ 720] blk.35.ffn_gate.weight - [ 2560, 10240, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 611/ 720] blk.35.ffn_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 612/ 720] blk.35.ffn_up.weight - [ 2560, 10240, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 613/ 720] blk.35.inp_gate.weight - [ 2560, 256, 1, 1], type = f16, converting to q8_0 .. size = 1.25 MiB -> 0.66 MiB
[ 614/ 720] blk.35.layer_output_scale.weight - [ 1, 1, 1, 1], type = f32, size = 0.000 MiB
[ 615/ 720] blk.35.post_attention_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 616/ 720] blk.35.post_ffw_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 617/ 720] blk.35.post_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 618/ 720] blk.35.proj.weight - [ 256, 2560, 1, 1], type = f16, converting to q8_0 .. size = 1.25 MiB -> 0.66 MiB
[ 619/ 720] blk.36.attn_k.weight - [ 2560, 512, 1, 1], type = f16, converting to q8_0 .. size = 2.50 MiB -> 1.33 MiB
[ 620/ 720] blk.36.attn_k_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB
[ 621/ 720] blk.36.attn_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 622/ 720] blk.36.attn_output.weight - [ 2048, 2560, 1, 1], type = f16, converting to q8_0 .. size = 10.00 MiB -> 5.31 MiB
[ 623/ 720] blk.36.attn_q.weight - [ 2560, 2048, 1, 1], type = f16, converting to q8_0 .. size = 10.00 MiB -> 5.31 MiB
[ 624/ 720] blk.36.attn_q_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB
[ 625/ 720] blk.36.attn_v.weight - [ 2560, 512, 1, 1], type = f16, converting to q8_0 .. size = 2.50 MiB -> 1.33 MiB
[ 626/ 720] blk.36.ffn_down.weight - [ 10240, 2560, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 627/ 720] blk.36.ffn_gate.weight - [ 2560, 10240, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 628/ 720] blk.36.ffn_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 629/ 720] blk.36.ffn_up.weight - [ 2560, 10240, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 630/ 720] blk.36.inp_gate.weight - [ 2560, 256, 1, 1], type = f16, converting to q8_0 .. size = 1.25 MiB -> 0.66 MiB
[ 631/ 720] blk.36.layer_output_scale.weight - [ 1, 1, 1, 1], type = f32, size = 0.000 MiB
[ 632/ 720] blk.36.post_attention_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 633/ 720] blk.36.post_ffw_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 634/ 720] blk.36.post_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 635/ 720] blk.36.proj.weight - [ 256, 2560, 1, 1], type = f16, converting to q8_0 .. size = 1.25 MiB -> 0.66 MiB
[ 636/ 720] blk.37.attn_k.weight - [ 2560, 512, 1, 1], type = f16, converting to q8_0 .. size = 2.50 MiB -> 1.33 MiB
[ 637/ 720] blk.37.attn_k_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB
[ 638/ 720] blk.37.attn_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 639/ 720] blk.37.attn_output.weight - [ 2048, 2560, 1, 1], type = f16, converting to q8_0 .. size = 10.00 MiB -> 5.31 MiB
[ 640/ 720] blk.37.attn_q.weight - [ 2560, 2048, 1, 1], type = f16, converting to q8_0 .. size = 10.00 MiB -> 5.31 MiB
[ 641/ 720] blk.37.attn_q_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB
[ 642/ 720] blk.37.attn_v.weight - [ 2560, 512, 1, 1], type = f16, converting to q8_0 .. size = 2.50 MiB -> 1.33 MiB
[ 643/ 720] blk.37.ffn_down.weight - [ 10240, 2560, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 644/ 720] blk.37.ffn_gate.weight - [ 2560, 10240, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 645/ 720] blk.37.ffn_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 646/ 720] blk.37.ffn_up.weight - [ 2560, 10240, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 647/ 720] blk.37.inp_gate.weight - [ 2560, 256, 1, 1], type = f16, converting to q8_0 .. size = 1.25 MiB -> 0.66 MiB
[ 648/ 720] blk.37.layer_output_scale.weight - [ 1, 1, 1, 1], type = f32, size = 0.000 MiB
[ 649/ 720] blk.37.post_attention_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 650/ 720] blk.37.post_ffw_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 651/ 720] blk.37.post_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 652/ 720] blk.37.proj.weight - [ 256, 2560, 1, 1], type = f16, converting to q8_0 .. size = 1.25 MiB -> 0.66 MiB
[ 653/ 720] blk.38.attn_k.weight - [ 2560, 512, 1, 1], type = f16, converting to q8_0 .. size = 2.50 MiB -> 1.33 MiB
[ 654/ 720] blk.38.attn_k_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB
[ 655/ 720] blk.38.attn_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 656/ 720] blk.38.attn_output.weight - [ 2048, 2560, 1, 1], type = f16, converting to q8_0 .. size = 10.00 MiB -> 5.31 MiB
[ 657/ 720] blk.38.attn_q.weight - [ 2560, 2048, 1, 1], type = f16, converting to q8_0 .. size = 10.00 MiB -> 5.31 MiB
[ 658/ 720] blk.38.attn_q_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB
[ 659/ 720] blk.38.attn_v.weight - [ 2560, 512, 1, 1], type = f16, converting to q8_0 .. size = 2.50 MiB -> 1.33 MiB
[ 660/ 720] blk.38.ffn_down.weight - [ 10240, 2560, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 661/ 720] blk.38.ffn_gate.weight - [ 2560, 10240, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 662/ 720] blk.38.ffn_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 663/ 720] blk.38.ffn_up.weight - [ 2560, 10240, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 664/ 720] blk.38.inp_gate.weight - [ 2560, 256, 1, 1], type = f16, converting to q8_0 .. size = 1.25 MiB -> 0.66 MiB
[ 665/ 720] blk.38.layer_output_scale.weight - [ 1, 1, 1, 1], type = f32, size = 0.000 MiB
[ 666/ 720] blk.38.post_attention_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 667/ 720] blk.38.post_ffw_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 668/ 720] blk.38.post_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 669/ 720] blk.38.proj.weight - [ 256, 2560, 1, 1], type = f16, converting to q8_0 .. size = 1.25 MiB -> 0.66 MiB
[ 670/ 720] blk.39.attn_k.weight - [ 2560, 512, 1, 1], type = f16, converting to q8_0 .. size = 2.50 MiB -> 1.33 MiB
[ 671/ 720] blk.39.attn_k_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB
[ 672/ 720] blk.39.attn_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 673/ 720] blk.39.attn_output.weight - [ 2048, 2560, 1, 1], type = f16, converting to q8_0 .. size = 10.00 MiB -> 5.31 MiB
[ 674/ 720] blk.39.attn_q.weight - [ 2560, 2048, 1, 1], type = f16, converting to q8_0 .. size = 10.00 MiB -> 5.31 MiB
[ 675/ 720] blk.39.attn_q_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB
[ 676/ 720] blk.39.attn_v.weight - [ 2560, 512, 1, 1], type = f16, converting to q8_0 .. size = 2.50 MiB -> 1.33 MiB
[ 677/ 720] blk.39.ffn_down.weight - [ 10240, 2560, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 678/ 720] blk.39.ffn_gate.weight - [ 2560, 10240, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 679/ 720] blk.39.ffn_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 680/ 720] blk.39.ffn_up.weight - [ 2560, 10240, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 681/ 720] blk.39.inp_gate.weight - [ 2560, 256, 1, 1], type = f16, converting to q8_0 .. size = 1.25 MiB -> 0.66 MiB
[ 682/ 720] blk.39.layer_output_scale.weight - [ 1, 1, 1, 1], type = f32, size = 0.000 MiB
[ 683/ 720] blk.39.post_attention_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 684/ 720] blk.39.post_ffw_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 685/ 720] blk.39.post_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 686/ 720] blk.39.proj.weight - [ 256, 2560, 1, 1], type = f16, converting to q8_0 .. size = 1.25 MiB -> 0.66 MiB
[ 687/ 720] blk.40.attn_k.weight - [ 2560, 512, 1, 1], type = f16, converting to q8_0 .. size = 2.50 MiB -> 1.33 MiB
[ 688/ 720] blk.40.attn_k_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB
[ 689/ 720] blk.40.attn_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 690/ 720] blk.40.attn_output.weight - [ 2048, 2560, 1, 1], type = f16, converting to q8_0 .. size = 10.00 MiB -> 5.31 MiB
[ 691/ 720] blk.40.attn_q.weight - [ 2560, 2048, 1, 1], type = f16, converting to q8_0 .. size = 10.00 MiB -> 5.31 MiB
[ 692/ 720] blk.40.attn_q_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB
[ 693/ 720] blk.40.attn_v.weight - [ 2560, 512, 1, 1], type = f16, converting to q8_0 .. size = 2.50 MiB -> 1.33 MiB
[ 694/ 720] blk.40.ffn_down.weight - [ 10240, 2560, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 695/ 720] blk.40.ffn_gate.weight - [ 2560, 10240, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 696/ 720] blk.40.ffn_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 697/ 720] blk.40.ffn_up.weight - [ 2560, 10240, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 698/ 720] blk.40.inp_gate.weight - [ 2560, 256, 1, 1], type = f16, converting to q8_0 .. size = 1.25 MiB -> 0.66 MiB
[ 699/ 720] blk.40.layer_output_scale.weight - [ 1, 1, 1, 1], type = f32, size = 0.000 MiB
[ 700/ 720] blk.40.post_attention_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 701/ 720] blk.40.post_ffw_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 702/ 720] blk.40.post_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 703/ 720] blk.40.proj.weight - [ 256, 2560, 1, 1], type = f16, converting to q8_0 .. size = 1.25 MiB -> 0.66 MiB
[ 704/ 720] blk.41.attn_k.weight - [ 2560, 1024, 1, 1], type = f16, converting to q8_0 .. size = 5.00 MiB -> 2.66 MiB
[ 705/ 720] blk.41.attn_k_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MiB
[ 706/ 720] blk.41.attn_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 707/ 720] blk.41.attn_output.weight - [ 4096, 2560, 1, 1], type = f16, converting to q8_0 .. size = 20.00 MiB -> 10.62 MiB
[ 708/ 720] blk.41.attn_q.weight - [ 2560, 4096, 1, 1], type = f16, converting to q8_0 .. size = 20.00 MiB -> 10.62 MiB
[ 709/ 720] blk.41.attn_q_norm.weight - [ 512, 1, 1, 1], type = f32, size = 0.002 MiB
[ 710/ 720] blk.41.attn_v.weight - [ 2560, 1024, 1, 1], type = f16, converting to q8_0 .. size = 5.00 MiB -> 2.66 MiB
[ 711/ 720] blk.41.ffn_down.weight - [ 10240, 2560, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 712/ 720] blk.41.ffn_gate.weight - [ 2560, 10240, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 713/ 720] blk.41.ffn_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 714/ 720] blk.41.ffn_up.weight - [ 2560, 10240, 1, 1], type = f16, converting to q8_0 .. size = 50.00 MiB -> 26.56 MiB
[ 715/ 720] blk.41.inp_gate.weight - [ 2560, 256, 1, 1], type = f16, converting to q8_0 .. size = 1.25 MiB -> 0.66 MiB
[ 716/ 720] blk.41.layer_output_scale.weight - [ 1, 1, 1, 1], type = f32, size = 0.000 MiB
[ 717/ 720] blk.41.post_attention_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 718/ 720] blk.41.post_ffw_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 719/ 720] blk.41.post_norm.weight - [ 2560, 1, 1, 1], type = f32, size = 0.010 MiB
[ 720/ 720] blk.41.proj.weight - [ 256, 2560, 1, 1], type = f16, converting to q8_0 .. size = 1.25 MiB -> 0.66 MiB