2:p:weight - (50400, 4096), 412876842 - tensor:[raw: 412876800, compressed: 412902019, ratio: 1.00]
6:p:weight - (4096,), 8231 - tensor:[raw: 8192, compressed: 6148, ratio: 1.33]
6:p:bias - (4096,), 8229 - tensor:[raw: 8192, compressed: 8215, ratio: 1.00]
8:b:bias - (1, 1, 2048, 2048), 4194346 - tensor:[raw: 4194304, compressed: 31247, ratio: 134.23]
8:b:masked_bias - (), 43 - tensor:[raw: 2, compressed: 25, ratio: 0.08]
8:b:scale_attn - (), 42 - tensor:[raw: 2, compressed: 25, ratio: 0.08]
8:b:sin - (2048, 32), 131110 - tensor:[raw: 131072, compressed: 124142, ratio: 1.06]
8:b:cos - (2048, 32), 131110 - tensor:[raw: 131072, compressed: 112911, ratio: 1.16]
11:p:weight - (4096, 4096), 33554474 - tensor:[raw: 33554432, compressed: 33556499, ratio: 1.00]
12:p:weight - (4096, 4096), 33554474 - tensor:[raw: 33554432, compressed: 33556499, ratio: 1.00]
13:p:weight - (4096, 4096), 33554474 - tensor:[raw: 33554432, compressed: 33556499, ratio: 1.00]
14:p:weight - (4096, 4096), 33554474 - tensor:[raw: 33554432, compressed: 33556499, ratio: 1.00]
16:p:weight - (16384, 4096), 134217770 - tensor:[raw: 134217728, compressed: 134225939, ratio: 1.00]
16:p:bias - (16384,), 32805 - tensor:[raw: 32768, compressed: 31860, ratio: 1.03]
17:p:weight - (4096, 16384), 134217770 - tensor:[raw: 134217728, compressed: 134225939, ratio: 1.00]
17:p:bias - (4096,), 8229 - tensor:[raw: 8192, compressed: 8215, ratio: 1.00]
20:p:weight - (4096,), 8231 - tensor:[raw: 8192, compressed: 6460, ratio: 1.27]
20:p:bias - (4096,), 8229 - tensor:[raw: 8192, compressed: 8215, ratio: 1.00]
22:b:bias - (1, 1, 2048, 2048), 4194346 - tensor:[raw: 4194304, compressed: 31247, ratio: 134.23]
22:b:masked_bias - (), 43 - tensor:[raw: 2, compressed: 25, ratio: 0.08]
22:b:scale_attn - (), 42 - tensor:[raw: 2, compressed: 25, ratio: 0.08]
22:b:sin - (2048, 32), 131110 - tensor:[raw: 131072, compressed: 124142, ratio: 1.06]
22:b:cos - (2048, 32), 131110 - tensor:[raw: 131072, compressed: 112911, ratio: 1.16]
25:p:weight - (4096, 4096), 33554474 - tensor:[raw: 33554432, compressed: 33556499, ratio: 1.00]
26:p:weight - (4096, 4096), 33554474 - tensor:[raw: 33554432, compressed: 33556499, ratio: 1.00]
27:p:weight - (4096, 4096), 33554474 - tensor:[raw: 33554432, compressed: 33556499, ratio: 1.00]
28:p:weight - (4096, 4096), 33554474 - tensor:[raw: 33554432, compressed: 33556499, ratio: 1.00]
30:p:weight - (16384, 4096), 134217770 - tensor:[raw: 134217728, compressed: 134225939, ratio: 1.00]
30:p:bias - (16384,), 32805 - tensor:[raw: 32768, compressed: 29065, ratio: 1.13]
31:p:weight - (4096, 16384), 134217770 - tensor:[raw: 134217728, compressed: 134225939, ratio: 1.00]
31:p:bias - (4096,), 8229 - tensor:[raw: 8192, compressed: 8215, ratio: 1.00]
34:p:weight - (4096,), 8231 - tensor:[raw: 8192, compressed: 6213, ratio: 1.32]
34:p:bias - (4096,), 8229 - tensor:[raw: 8192, compressed: 8215, ratio: 1.00]
36:b:bias - (1, 1, 2048, 2048), 4194346 - tensor:[raw: 4194304, compressed: 31247, ratio: 134.23]
36:b:masked_bias - (), 43 - tensor:[raw: 2, compressed: 25, ratio: 0.08]
36:b:scale_attn - (), 42 - tensor:[raw: 2, compressed: 25, ratio: 0.08]
36:b:sin - (2048, 32), 131110 - tensor:[raw: 131072, compressed: 124142, ratio: 1.06]
36:b:cos - (2048, 32), 131110 - tensor:[raw: 131072, compressed: 112911, ratio: 1.16]
39:p:weight - (4096, 4096), 33554474 - tensor:[raw: 33554432, compressed: 33556499, ratio: 1.00]
40:p:weight - (4096, 4096), 33554474 - tensor:[raw: 33554432, compressed: 33556499, ratio: 1.00]
41:p:weight - (4096, 4096), 33554474 - tensor:[raw: 33554432, compressed: 33556499, ratio: 1.00]
42:p:weight - (4096, 4096), 33554474 - tensor:[raw: 33554432, compressed: 33556499, ratio: 1.00]
44:p:weight - (16384, 4096), 134217770 - tensor:[raw: 134217728, compressed: 134225939, ratio: 1.00]
44:p:bias - (16384,), 32805 - tensor:[raw: 32768, compressed: 29210, ratio: 1.12]
45:p:weight - (4096, 16384), 134217770 - tensor:[raw: 134217728, compressed: 134225939, ratio: 1.00]
45:p:bias - (4096,), 8229 - tensor:[raw: 8192, compressed: 8215, ratio: 1.00]
48:p:weight - (4096,), 8231 - tensor:[raw: 8192, compressed: 6250, ratio: 1.31]
48:p:bias - (4096,), 8229 - tensor:[raw: 8192, compressed: 8215, ratio: 1.00]
50:b:bias - (1, 1, 2048, 2048), 4194346 - tensor:[raw: 4194304, compressed: 31247, ratio: 134.23]
50:b:masked_bias - (), 43 - tensor:[raw: 2, compressed: 25, ratio: 0.08]
50:b:scale_attn - (), 42 - tensor:[raw: 2, compressed: 25, ratio: 0.08]
50:b:sin - (2048, 32), 131110 - tensor:[raw: 131072, compressed: 124142, ratio: 1.06]
50:b:cos - (2048, 32), 131110 - tensor:[raw: 131072, compressed: 112911, ratio: 1.16]
53:p:weight - (4096, 4096), 33554474 - tensor:[raw: 33554432, compressed: 33556499, ratio: 1.00]
54:p:weight - (4096, 4096), 33554474 - tensor:[raw: 33554432, compressed: 33556499, ratio: 1.00]
55:p:weight - (4096, 4096), 33554474 - tensor:[raw: 33554432, compressed: 33556499, ratio: 1.00]
56:p:weight - (4096, 4096), 33554474 - tensor:[raw: 33554432, compressed: 33556499, ratio: 1.00]
58:p:weight - (16384, 4096), 134217770 - tensor:[raw: 134217728, compressed: 134225939, ratio: 1.00]
58:p:bias - (16384,), 32805 - tensor:[raw: 32768, compressed: 30545, ratio: 1.07]
59:p:weight - (4096, 16384), 134217770 - tensor:[raw: 134217728, compressed: 134225939, ratio: 1.00]
59:p:bias - (4096,), 8229 - tensor:[raw: 8192, compressed: 8215, ratio: 1.00]
62:p:weight - (4096,), 8231 - tensor:[raw: 8192, compressed: 6178, ratio: 1.33]
62:p:bias - (4096,), 8229 - tensor:[raw: 8192, compressed: 8215, ratio: 1.00]
64:b:bias - (1, 1, 2048, 2048), 4194346 - tensor:[raw: 4194304, compressed: 31247, ratio: 134.23]
64:b:masked_bias - (), 43 - tensor:[raw: 2, compressed: 25, ratio: 0.08]
64:b:scale_attn - (), 42 - tensor:[raw: 2, compressed: 25, ratio: 0.08]
64:b:sin - (2048, 32), 131110 - tensor:[raw: 131072, compressed: 124142, ratio: 1.06]
64:b:cos - (2048, 32), 131110 - tensor:[raw: 131072, compressed: 112911, ratio: 1.16]
67:p:weight - (4096, 4096), 33554474 - tensor:[raw: 33554432, compressed: 33556499, ratio: 1.00]
68:p:weight - (4096, 4096), 33554474 - tensor:[raw: 33554432, compressed: 33556499, ratio: 1.00]
69:p:weight - (4096, 4096), 33554474 - tensor:[raw: 33554432, compressed: 33556499, ratio: 1.00]
70:p:weight - (4096, 4096), 33554474 - tensor:[raw: 33554432, compressed: 33556499, ratio: 1.00]
72:p:weight - (16384, 4096), 134217770 - tensor:[raw: 134217728, compressed: 134225939, ratio: 1.00]
72:p:bias - (16384,), 32805 - tensor:[raw: 32768, compressed: 31314, ratio: 1.05]
73:p:weight - (4096, 16384), 134217770 - tensor:[raw: 134217728, compressed: 134225939, ratio: 1.00]
73:p:bias - (4096,), 8229 - tensor:[raw: 8192, compressed: 8215, ratio: 1.00]
76:p:weight - (4096,), 8231 - tensor:[raw: 8192, compressed: 6237, ratio: 1.31]
76:p:bias - (4096,), 8229 - tensor:[raw: 8192, compressed: 8215, ratio: 1.00]
78:b:bias - (1, 1, 2048, 2048), 4194346 - tensor:[raw: 4194304, compressed: 31247, ratio: 134.23]
78:b:masked_bias - (), 43 - tensor:[raw: 2, compressed: 25, ratio: 0.08]
78:b:scale_attn - (), 42 - tensor:[raw: 2, compressed: 25, ratio: 0.08]
78:b:sin - (2048, 32), 131110 - tensor:[raw: 131072, compressed: 124142, ratio: 1.06]
78:b:cos - (2048, 32), 131110 - tensor:[raw: 131072, compressed: 112911, ratio: 1.16]
81:p:weight - (4096, 4096), 33554474 - tensor:[raw: 33554432, compressed: 33556499, ratio: 1.00]
82:p:weight - (4096, 4096), 33554474 - tensor:[raw: 33554432, compressed: 33556499, ratio: 1.00]
83:p:weight - (4096, 4096), 33554474 - tensor:[raw: 33554432, compressed: 33556499, ratio: 1.00]
84:p:weight - (4096, 4096), 33554474 - tensor:[raw: 33554432, compressed: 33556499, ratio: 1.00]
86:p:weight - (16384, 4096), 134217770 - tensor:[raw: 134217728, compressed: 134225939, ratio: 1.00]
86:p:bias - (16384,), 32805 - tensor:[raw: 32768, compressed: 31822, ratio: 1.03]
87:p:weight - (4096, 16384), 134217770 - tensor:[raw: 134217728, compressed: 134225939, ratio: 1.00]
87:p:bias - (4096,), 8229 - tensor:[raw: 8192, compressed: 8215, ratio: 1.00]
90:p:weight - (4096,), 8231 - tensor:[raw: 8192, compressed: 6256, ratio: 1.31]
90:p:bias - (4096,), 8229 - tensor:[raw: 8192, compressed: 8215, ratio: 1.00]
92:b:bias - (1, 1, 2048, 2048), 4194346 - tensor:[raw: 4194304, compressed: 31247, ratio: 134.23]
92:b:masked_bias - (), 43 - tensor:[raw: 2, compressed: 25, ratio: 0.08]
92:b:scale_attn - (), 42 - tensor:[raw: 2, compressed: 25, ratio: 0.08]
92:b:sin - (2048, 32), 131110 - tensor:[raw: 131072, compressed: 124142, ratio: 1.06]
92:b:cos - (2048, 32), 131110 - tensor:[raw: 131072, compressed: 112911, ratio: 1.16]
95:p:weight - (4096, 4096), 33554474 - tensor:[raw: 33554432, compressed: 33556499, ratio: 1.00]
96:p:weight - (4096, 4096), 33554474 - tensor:[raw: 33554432, compressed: 33556499, ratio: 1.00]
97:p:weight - (4096, 4096), 33554474 - tensor:[raw: 33554432, compressed: 33556499, ratio: 1.00]
98:p:weight - (4096, 4096), 33554474 - tensor:[raw: 33554432, compressed: 33556499, ratio: 1.00]
100:p:weight - (16384, 4096), 134217770 - tensor:[raw: 134217728, compressed: 134225939, ratio: 1.00]
100:p:bias - (16384,), 32805 - tensor:[raw: 32768, compressed: 31697, ratio: 1.03]
101:p:weight - (4096, 16384), 134217770 - tensor:[raw: 134217728, compressed: 134225939, ratio: 1.00]
101:p:bias - (4096,), 8229 - tensor:[raw: 8192, compressed: 8215, ratio: 1.00]
104:p:weight - (4096,), 8231 - tensor:[raw: 8192, compressed: 6312, ratio: 1.30]
104:p:bias - (4096,), 8229 - tensor:[raw: 8192, compressed: 8215, ratio: 1.00]
106:b:bias - (1, 1, 2048, 2048), 4194346 - tensor:[raw: 4194304, compressed: 31247, ratio: 134.23]
106:b:masked_bias - (), 43 - tensor:[raw: 2, compressed: 25, ratio: 0.08]
106:b:scale_attn - (), 42 - tensor:[raw: 2, compressed: 25, ratio: 0.08]
106:b:sin - (2048, 32), 131110 - tensor:[raw: 131072, compressed: 124142, ratio: 1.06]
106:b:cos - (2048, 32), 131110 - tensor:[raw: 131072, compressed: 112911, ratio: 1.16]
109:p:weight - (4096, 4096), 33554474 - tensor:[raw: 33554432, compressed: 33556499, ratio: 1.00]
110:p:weight - (4096, 4096), 33554474 - tensor:[raw: 33554432, compressed: 33556499, ratio: 1.00]
111:p:weight - (4096, 4096), 33554474 - tensor:[raw: 33554432, compressed: 33556499, ratio: 1.00]
112:p:weight - (4096, 4096), 33554474 - tensor:[raw: 33554432, compressed: 33556499, ratio: 1.00]
114:p:weight - (16384, 4096), 134217770 - tensor:[raw: 134217728, compressed: 134225939, ratio: 1.00]
114:p:bias - (16384,), 32805 - tensor:[raw: 32768, compressed: 32116, ratio: 1.02]
115:p:weight - (4096, 16384), 134217770 - tensor:[raw: 134217728, compressed: 134225939, ratio: 1.00]
115:p:bias - (4096,), 8229 - tensor:[raw: 8192, compressed: 8215, ratio: 1.00]
118:p:weight - (4096,), 8231 - tensor:[raw: 8192, compressed: 6305, ratio: 1.30]
118:p:bias - (4096,), 8229 - tensor:[raw: 8192, compressed: 8215, ratio: 1.00]
120:b:bias - (1, 1, 2048, 2048), 4194346 - tensor:[raw: 4194304, compressed: 31247, ratio: 134.23]
120:b:masked_bias - (), 43 - tensor:[raw: 2, compressed: 25, ratio: 0.08]
120:b:scale_attn - (), 42 - tensor:[raw: 2, compressed: 25, ratio: 0.08]
120:b:sin - (2048, 32), 131110 - tensor:[raw: 131072, compressed: 124142, ratio: 1.06]
120:b:cos - (2048, 32), 131110 - tensor:[raw: 131072, compressed: 112911, ratio: 1.16]
123:p:weight - (4096, 4096), 33554474 - tensor:[raw: 33554432, compressed: 33556499, ratio: 1.00]
124:p:weight - (4096, 4096), 33554474 - tensor:[raw: 33554432, compressed: 33556499, ratio: 1.00]
125:p:weight - (4096, 4096), 33554474 - tensor:[raw: 33554432, compressed: 33556499, ratio: 1.00]
126:p:weight - (4096, 4096), 33554474 - tensor:[raw: 33554432, compressed: 33556499, ratio: 1.00]
128:p:weight - (16384, 4096), 134217770 - tensor:[raw: 134217728, compressed: 134225939, ratio: 1.00]
128:p:bias - (16384,), 32805 - tensor:[raw: 32768, compressed: 32098, ratio: 1.02]
129:p:weight - (4096, 16384), 134217770 - tensor:[raw: 134217728, compressed: 134225939, ratio: 1.00]
129:p:bias - (4096,), 8229 - tensor:[raw: 8192, compressed: 8215, ratio: 1.00]
132:p:weight - (4096,), 8231 - tensor:[raw: 8192, compressed: 6325, ratio: 1.30]
132:p:bias - (4096,), 8229 - tensor:[raw: 8192, compressed: 8215, ratio: 1.00]
134:b:bias - (1, 1, 2048, 2048), 4194346 - tensor:[raw: 4194304, compressed: 31247, ratio: 134.23]
134:b:masked_bias - (), 43 - tensor:[raw: 2, compressed: 25, ratio: 0.08]
134:b:scale_attn - (), 42 - tensor:[raw: 2, compressed: 25, ratio: 0.08]
134:b:sin - (2048, 32), 131110 - tensor:[raw: 131072, compressed: 124142, ratio: 1.06]
134:b:cos - (2048, 32), 131110 - tensor:[raw: 131072, compressed: 112911, ratio: 1.16]
137:p:weight - (4096, 4096), 33554474 - tensor:[raw: 33554432, compressed: 33556499, ratio: 1.00]
138:p:weight - (4096, 4096), 33554474 - tensor:[raw: 33554432, compressed: 33556499, ratio: 1.00]
139:p:weight - (4096, 4096), 33554474 - tensor:[raw: 33554432, compressed: 33556499, ratio: 1.00]
140:p:weight - (4096, 4096), 33554474 - tensor:[raw: 33554432, compressed: 33556499, ratio: 1.00]
142:p:weight - (16384, 4096), 134217770 - tensor:[raw: 134217728, compressed: 134225939, ratio: 1.00]
142:p:bias - (16384,), 32805 - tensor:[raw: 32768, compressed: 32084, ratio: 1.02]
143:p:weight - (4096, 16384), 134217770 - tensor:[raw: 134217728, compressed: 134225939, ratio: 1.00]
143:p:bias - (4096,), 8229 - tensor:[raw: 8192, compressed: 8215, ratio: 1.00]
146:p:weight - (4096,), 8231 - tensor:[raw: 8192, compressed: 6327, ratio: 1.29]
146:p:bias - (4096,), 8229 - tensor:[raw: 8192, compressed: 8215, ratio: 1.00]
148:b:bias - (1, 1, 2048, 2048), 4194346 - tensor:[raw: 4194304, compressed: 31247, ratio: 134.23]
148:b:masked_bias - (), 43 - tensor:[raw: 2, compressed: 25, ratio: 0.08]
148:b:scale_attn - (), 42 - tensor:[raw: 2, compressed: 25, ratio: 0.08]
148:b:sin - (2048, 32), 131110 - tensor:[raw: 131072, compressed: 124142, ratio: 1.06]
148:b:cos - (2048, 32), 131110 - tensor:[raw: 131072, compressed: 112911, ratio: 1.16]
151:p:weight - (4096, 4096), 33554474 - tensor:[raw: 33554432, compressed: 33556499, ratio: 1.00]
152:p:weight - (4096, 4096), 33554474 - tensor:[raw: 33554432, compressed: 33556499, ratio: 1.00]
153:p:weight - (4096, 4096), 33554474 - tensor:[raw: 33554432, compressed: 33556499, ratio: 1.00]
154:p:weight - (4096, 4096), 33554474 - tensor:[raw: 33554432, compressed: 33556499, ratio: 1.00]
156:p:weight - (16384, 4096), 134217770 - tensor:[raw: 134217728, compressed: 134225939, ratio: 1.00]
156:p:bias - (16384,), 32805 - tensor:[raw: 32768, compressed: 32215, ratio: 1.02]
157:p:weight - (4096, 16384), 134217770 - tensor:[raw: 134217728, compressed: 134225939, ratio: 1.00]
157:p:bias - (4096,), 8229 - tensor:[raw: 8192, compressed: 8215, ratio: 1.00]
160:p:weight - (4096,), 8231 - tensor:[raw: 8192, compressed: 6293, ratio: 1.30]
160:p:bias - (4096,), 8229 - tensor:[raw: 8192, compressed: 8215, ratio: 1.00]
162:b:bias - (1, 1, 2048, 2048), 4194346 - tensor:[raw: 4194304, compressed: 31247, ratio: 134.23]
162:b:masked_bias - (), 43 - tensor:[raw: 2, compressed: 25, ratio: 0.08]
162:b:scale_attn - (), 42 - tensor:[raw: 2, compressed: 25, ratio: 0.08]
162:b:sin - (2048, 32), 131110 - tensor:[raw: 131072, compressed: 124142, ratio: 1.06]
162:b:cos - (2048, 32), 131110 - tensor:[raw: 131072, compressed: 112911, ratio: 1.16]
165:p:weight - (4096, 4096), 33554474 - tensor:[raw: 33554432, compressed: 33556499, ratio: 1.00]
166:p:weight - (4096, 4096), 33554474 - tensor:[raw: 33554432, compressed: 33556499, ratio: 1.00]
167:p:weight - (4096, 4096), 33554474 - tensor:[raw: 33554432, compressed: 33556499, ratio: 1.00]
168:p:weight - (4096, 4096), 33554474 - tensor:[raw: 33554432, compressed: 33556499, ratio: 1.00]
170:p:weight - (16384, 4096), 134217770 - tensor:[raw: 134217728, compressed: 134225939, ratio: 1.00]
170:p:bias - (16384,), 32805 - tensor:[raw: 32768, compressed: 32166, ratio: 1.02]
171:p:weight - (4096, 16384), 134217770 - tensor:[raw: 134217728, compressed: 134225939, ratio: 1.00]
171:p:bias - (4096,), 8229 - tensor:[raw: 8192, compressed: 8215, ratio: 1.00]
174:p:weight - (4096,), 8231 - tensor:[raw: 8192, compressed: 6294, ratio: 1.30]
174:p:bias - (4096,), 8229 - tensor:[raw: 8192, compressed: 8215, ratio: 1.00]
176:b:bias - (1, 1, 2048, 2048), 4194346 - tensor:[raw: 4194304, compressed: 31247, ratio: 134.23]
176:b:masked_bias - (), 43 - tensor:[raw: 2, compressed: 25, ratio: 0.08]
176:b:scale_attn - (), 42 - tensor:[raw: 2, compressed: 25, ratio: 0.08]
176:b:sin - (2048, 32), 131110 - tensor:[raw: 131072, compressed: 124142, ratio: 1.06]
176:b:cos - (2048, 32), 131110 - tensor:[raw: 131072, compressed: 112911, ratio: 1.16]
179:p:weight - (4096, 4096), 33554474 - tensor:[raw: 33554432, compressed: 33556499, ratio: 1.00]
180:p:weight - (4096, 4096), 33554474 - tensor:[raw: 33554432, compressed: 33556499, ratio: 1.00]
181:p:weight - (4096, 4096), 33554474 - tensor:[raw: 33554432, compressed: 33556499, ratio: 1.00]
182:p:weight - (4096, 4096), 33554474 - tensor:[raw: 33554432, compressed: 33556499, ratio: 1.00]
184:p:weight - (16384, 4096), 134217770 - tensor:[raw: 134217728, compressed: 134225939, ratio: 1.00]
184:p:bias - (16384,), 32805 - tensor:[raw: 32768, compressed: 31817, ratio: 1.03]
185:p:weight - (4096, 16384), 134217770 - tensor:[raw: 134217728, compressed: 134225939, ratio: 1.00]
185:p:bias - (4096,), 8229 - tensor:[raw: 8192, compressed: 8215, ratio: 1.00]
188:p:weight - (4096,), 8231 - tensor:[raw: 8192, compressed: 6210, ratio: 1.32]
188:p:bias - (4096,), 8229 - tensor:[raw: 8192, compressed: 8215, ratio: 1.00]
190:b:bias - (1, 1, 2048, 2048), 4194346 - tensor:[raw: 4194304, compressed: 31247, ratio: 134.23]
190:b:masked_bias - (), 43 - tensor:[raw: 2, compressed: 25, ratio: 0.08]
190:b:scale_attn - (), 42 - tensor:[raw: 2, compressed: 25, ratio: 0.08]
190:b:sin - (2048, 32), 131110 - tensor:[raw: 131072, compressed: 124142, ratio: 1.06]
190:b:cos - (2048, 32), 131110 - tensor:[raw: 131072, compressed: 112911, ratio: 1.16]
193:p:weight - (4096, 4096), 33554474 - tensor:[raw: 33554432, compressed: 33556499, ratio: 1.00]
194:p:weight - (4096, 4096), 33554474 - tensor:[raw: 33554432, compressed: 33556499, ratio: 1.00]
195:p:weight - (4096, 4096), 33554474 - tensor:[raw: 33554432, compressed: 33556499, ratio: 1.00]
196:p:weight - (4096, 4096), 33554474 - tensor:[raw: 33554432, compressed: 33556499, ratio: 1.00]
198:p:weight - (16384, 4096), 134217770 - tensor:[raw: 134217728, compressed: 134225939, ratio: 1.00]
198:p:bias - (16384,), 32805 - tensor:[raw: 32768, compressed: 32088, ratio: 1.02]
199:p:weight - (4096, 16384), 134217770 - tensor:[raw: 134217728, compressed: 134225939, ratio: 1.00]
199:p:bias - (4096,), 8229 - tensor:[raw: 8192, compressed: 8215, ratio: 1.00]
202:p:weight - (4096,), 8231 - tensor:[raw: 8192, compressed: 6267, ratio: 1.31]
202:p:bias - (4096,), 8229 - tensor:[raw: 8192, compressed: 8215, ratio: 1.00]
204:b:bias - (1, 1, 2048, 2048), 4194346 - tensor:[raw: 4194304, compressed: 31247, ratio: 134.23]
204:b:masked_bias - (), 43 - tensor:[raw: 2, compressed: 25, ratio: 0.08]
204:b:scale_attn - (), 42 - tensor:[raw: 2, compressed: 25, ratio: 0.08]
204:b:sin - (2048, 32), 131110 - tensor:[raw: 131072, compressed: 124142, ratio: 1.06]
204:b:cos - (2048, 32), 131110 - tensor:[raw: 131072, compressed: 112911, ratio: 1.16]
207:p:weight - (4096, 4096), 33554474 - tensor:[raw: 33554432, compressed: 33556499, ratio: 1.00]
208:p:weight - (4096, 4096), 33554474 - tensor:[raw: 33554432, compressed: 33556499, ratio: 1.00]
209:p:weight - (4096, 4096), 33554474 - tensor:[raw: 33554432, compressed: 33556499, ratio: 1.00]
210:p:weight - (4096, 4096), 33554474 - tensor:[raw: 33554432, compressed: 33556499, ratio: 1.00]
212:p:weight - (16384, 4096), 134217770 - tensor:[raw: 134217728, compressed: 134225939, ratio: 1.00]
212:p:bias - (16384,), 32805 - tensor:[raw: 32768, compressed: 31756, ratio: 1.03]
213:p:weight - (4096, 16384), 134217770 - tensor:[raw: 134217728, compressed: 134225939, ratio: 1.00]
213:p:bias - (4096,), 8229 - tensor:[raw: 8192, compressed: 8215, ratio: 1.00]
216:p:weight - (4096,), 8231 - tensor:[raw: 8192, compressed: 6173, ratio: 1.33]
216:p:bias - (4096,), 8229 - tensor:[raw: 8192, compressed: 8215, ratio: 1.00]
218:b:bias - (1, 1, 2048, 2048), 4194346 - tensor:[raw: 4194304, compressed: 31247, ratio: 134.23]
218:b:masked_bias - (), 43 - tensor:[raw: 2, compressed: 25, ratio: 0.08]
218:b:scale_attn - (), 42 - tensor:[raw: 2, compressed: 25, ratio: 0.08]
218:b:sin - (2048, 32), 131110 - tensor:[raw: 131072, compressed: 124142, ratio: 1.06]
218:b:cos - (2048, 32), 131110 - tensor:[raw: 131072, compressed: 112911, ratio: 1.16]
221:p:weight - (4096, 4096), 33554474 - tensor:[raw: 33554432, compressed: 33556499, ratio: 1.00]
222:p:weight - (4096, 4096), 33554474 - tensor:[raw: 33554432, compressed: 33556499, ratio: 1.00]
223:p:weight - (4096, 4096), 33554474 - tensor:[raw: 33554432, compressed: 33556499, ratio: 1.00]
224:p:weight - (4096, 4096), 33554474 - tensor:[raw: 33554432, compressed: 33556499, ratio: 1.00]
226:p:weight - (16384, 4096), 134217770 - tensor:[raw: 134217728, compressed: 134225939, ratio: 1.00]
226:p:bias - (16384,), 32805 - tensor:[raw: 32768, compressed: 31403, ratio: 1.04]
227:p:weight - (4096, 16384), 134217770 - tensor:[raw: 134217728, compressed: 134225939, ratio: 1.00]
227:p:bias - (4096,), 8229 - tensor:[raw: 8192, compressed: 8215, ratio: 1.00]
230:p:weight - (4096,), 8231 - tensor:[raw: 8192, compressed: 6121, ratio: 1.34]
230:p:bias - (4096,), 8229 - tensor:[raw: 8192, compressed: 8215, ratio: 1.00]
232:b:bias - (1, 1, 2048, 2048), 4194346 - tensor:[raw: 4194304, compressed: 31247, ratio: 134.23]
232:b:masked_bias - (), 43 - tensor:[raw: 2, compressed: 25, ratio: 0.08]
232:b:scale_attn - (), 42 - tensor:[raw: 2, compressed: 25, ratio: 0.08]
232:b:sin - (2048, 32), 131110 - tensor:[raw: 131072, compressed: 124142, ratio: 1.06]
232:b:cos - (2048, 32), 131110 - tensor:[raw: 131072, compressed: 112911, ratio: 1.16]
235:p:weight - (4096, 4096), 33554474 - tensor:[raw: 33554432, compressed: 33556499, ratio: 1.00]
236:p:weight - (4096, 4096), 33554474 - tensor:[raw: 33554432, compressed: 33556499, ratio: 1.00]
237:p:weight - (4096, 4096), 33554474 - tensor:[raw: 33554432, compressed: 33556499, ratio: 1.00]
238:p:weight - (4096, 4096), 33554474 - tensor:[raw: 33554432, compressed: 33556499, ratio: 1.00]
240:p:weight - (16384, 4096), 134217770 - tensor:[raw: 134217728, compressed: 134225939, ratio: 1.00]
240:p:bias - (16384,), 32805 - tensor:[raw: 32768, compressed: 30993, ratio: 1.06]
241:p:weight - (4096, 16384), 134217770 - tensor:[raw: 134217728, compressed: 134225939, ratio: 1.00]
241:p:bias - (4096,), 8229 - tensor:[raw: 8192, compressed: 8215, ratio: 1.00]
244:p:weight - (4096,), 8231 - tensor:[raw: 8192, compressed: 6139, ratio: 1.33]
244:p:bias - (4096,), 8229 - tensor:[raw: 8192, compressed: 8215, ratio: 1.00]
246:b:bias - (1, 1, 2048, 2048), 4194346 - tensor:[raw: 4194304, compressed: 31247, ratio: 134.23]
246:b:masked_bias - (), 43 - tensor:[raw: 2, compressed: 25, ratio: 0.08]
246:b:scale_attn - (), 42 - tensor:[raw: 2, compressed: 25, ratio: 0.08]
246:b:sin - (2048, 32), 131110 - tensor:[raw: 131072, compressed: 124142, ratio: 1.06]
246:b:cos - (2048, 32), 131110 - tensor:[raw: 131072, compressed: 112911, ratio: 1.16]
249:p:weight - (4096, 4096), 33554474 - tensor:[raw: 33554432, compressed: 33556499, ratio: 1.00]
250:p:weight - (4096, 4096), 33554474 - tensor:[raw: 33554432, compressed: 33556499, ratio: 1.00]
251:p:weight - (4096, 4096), 33554474 - tensor:[raw: 33554432, compressed: 33556499, ratio: 1.00]
252:p:weight - (4096, 4096), 33554474 - tensor:[raw: 33554432, compressed: 33556499, ratio: 1.00]
254:p:weight - (16384, 4096), 134217770 - tensor:[raw: 134217728, compressed: 134225939, ratio: 1.00]
254:p:bias - (16384,), 32805 - tensor:[raw: 32768, compressed: 30639, ratio: 1.07]
255:p:weight - (4096, 16384), 134217770 - tensor:[raw: 134217728, compressed: 134225939, ratio: 1.00]
255:p:bias - (4096,), 8229 - tensor:[raw: 8192, compressed: 8215, ratio: 1.00]
258:p:weight - (4096,), 8231 - tensor:[raw: 8192, compressed: 6137, ratio: 1.33]
258:p:bias - (4096,), 8229 - tensor:[raw: 8192, compressed: 8215, ratio: 1.00]
260:b:bias - (1, 1, 2048, 2048), 4194346 - tensor:[raw: 4194304, compressed: 31247, ratio: 134.23]
260:b:masked_bias - (), 43 - tensor:[raw: 2, compressed: 25, ratio: 0.08]
260:b:scale_attn - (), 42 - tensor:[raw: 2, compressed: 25, ratio: 0.08]
260:b:sin - (2048, 32), 131110 - tensor:[raw: 131072, compressed: 124142, ratio: 1.06]
260:b:cos - (2048, 32), 131110 - tensor:[raw: 131072, compressed: 112911, ratio: 1.16]
263:p:weight - (4096, 4096), 33554474 - tensor:[raw: 33554432, compressed: 33556499, ratio: 1.00]
264:p:weight - (4096, 4096), 33554474 - tensor:[raw: 33554432, compressed: 33556499, ratio: 1.00]
265:p:weight - (4096, 4096), 33554474 - tensor:[raw: 33554432, compressed: 33556499, ratio: 1.00]
266:p:weight - (4096, 4096), 33554474 - tensor:[raw: 33554432, compressed: 33556499, ratio: 1.00]
268:p:weight - (16384, 4096), 134217770 - tensor:[raw: 134217728, compressed: 134225939, ratio: 1.00]
268:p:bias - (16384,), 32805 - tensor:[raw: 32768, compressed: 30248, ratio: 1.08]
269:p:weight - (4096, 16384), 134217770 - tensor:[raw: 134217728, compressed: 134225939, ratio: 1.00]
269:p:bias - (4096,), 8229 - tensor:[raw: 8192, compressed: 8215, ratio: 1.00]
272:p:weight - (4096,), 8231 - tensor:[raw: 8192, compressed: 6054, ratio: 1.35]
272:p:bias - (4096,), 8229 - tensor:[raw: 8192, compressed: 8215, ratio: 1.00]
274:b:bias - (1, 1, 2048, 2048), 4194346 - tensor:[raw: 4194304, compressed: 31247, ratio: 134.23]
274:b:masked_bias - (), 43 - tensor:[raw: 2, compressed: 25, ratio: 0.08]
274:b:scale_attn - (), 42 - tensor:[raw: 2, compressed: 25, ratio: 0.08]
274:b:sin - (2048, 32), 131110 - tensor:[raw: 131072, compressed: 124142, ratio: 1.06]
274:b:cos - (2048, 32), 131110 - tensor:[raw: 131072, compressed: 112911, ratio: 1.16]
277:p:weight - (4096, 4096), 33554474 - tensor:[raw: 33554432, compressed: 33556499, ratio: 1.00]
278:p:weight - (4096, 4096), 33554474 - tensor:[raw: 33554432, compressed: 33556499, ratio: 1.00]
279:p:weight - (4096, 4096), 33554474 - tensor:[raw: 33554432, compressed: 33556499, ratio: 1.00]
280:p:weight - (4096, 4096), 33554474 - tensor:[raw: 33554432, compressed: 33556499, ratio: 1.00]
282:p:weight - (16384, 4096), 134217770 - tensor:[raw: 134217728, compressed: 134225939, ratio: 1.00]
282:p:bias - (16384,), 32805 - tensor:[raw: 32768, compressed: 29918, ratio: 1.10]
283:p:weight - (4096, 16384), 134217770 - tensor:[raw: 134217728, compressed: 134225939, ratio: 1.00]
283:p:bias - (4096,), 8229 - tensor:[raw: 8192, compressed: 8215, ratio: 1.00]
286:p:weight - (4096,), 8231 - tensor:[raw: 8192, compressed: 5969, ratio: 1.37]
286:p:bias - (4096,), 8229 - tensor:[raw: 8192, compressed: 8215, ratio: 1.00]
288:b:bias - (1, 1, 2048, 2048), 4194346 - tensor:[raw: 4194304, compressed: 31247, ratio: 134.23]
288:b:masked_bias - (), 43 - tensor:[raw: 2, compressed: 25, ratio: 0.08]
288:b:scale_attn - (), 42 - tensor:[raw: 2, compressed: 25, ratio: 0.08]
288:b:sin - (2048, 32), 131110 - tensor:[raw: 131072, compressed: 124142, ratio: 1.06]
288:b:cos - (2048, 32), 131110 - tensor:[raw: 131072, compressed: 112911, ratio: 1.16]
291:p:weight - (4096, 4096), 33554474 - tensor:[raw: 33554432, compressed: 33556499, ratio: 1.00]
292:p:weight - (4096, 4096), 33554474 - tensor:[raw: 33554432, compressed: 33556499, ratio: 1.00]
293:p:weight - (4096, 4096), 33554474 - tensor:[raw: 33554432, compressed: 33556499, ratio: 1.00]
294:p:weight - (4096, 4096), 33554474 - tensor:[raw: 33554432, compressed: 33556499, ratio: 1.00]
296:p:weight - (16384, 4096), 134217770 - tensor:[raw: 134217728, compressed: 134225939, ratio: 1.00]
296:p:bias - (16384,), 32805 - tensor:[raw: 32768, compressed: 29787, ratio: 1.10]
297:p:weight - (4096, 16384), 134217770 - tensor:[raw: 134217728, compressed: 134225939, ratio: 1.00]
297:p:bias - (4096,), 8229 - tensor:[raw: 8192, compressed: 8215, ratio: 1.00]
300:p:weight - (4096,), 8231 - tensor:[raw: 8192, compressed: 5836, ratio: 1.40]
300:p:bias - (4096,), 8229 - tensor:[raw: 8192, compressed: 8215, ratio: 1.00]
302:b:bias - (1, 1, 2048, 2048), 4194346 - tensor:[raw: 4194304, compressed: 31247, ratio: 134.23]
302:b:masked_bias - (), 43 - tensor:[raw: 2, compressed: 25, ratio: 0.08]
302:b:scale_attn - (), 42 - tensor:[raw: 2, compressed: 25, ratio: 0.08]
302:b:sin - (2048, 32), 131110 - tensor:[raw: 131072, compressed: 124142, ratio: 1.06]
302:b:cos - (2048, 32), 131110 - tensor:[raw: 131072, compressed: 112911, ratio: 1.16]
305:p:weight - (4096, 4096), 33554474 - tensor:[raw: 33554432, compressed: 33556499, ratio: 1.00]
306:p:weight - (4096, 4096), 33554474 - tensor:[raw: 33554432, compressed: 33556499, ratio: 1.00]
307:p:weight - (4096, 4096), 33554474 - tensor:[raw: 33554432, compressed: 33556499, ratio: 1.00]
308:p:weight - (4096, 4096), 33554474 - tensor:[raw: 33554432, compressed: 33556499, ratio: 1.00]
310:p:weight - (16384, 4096), 134217770 - tensor:[raw: 134217728, compressed: 134225939, ratio: 1.00]
310:p:bias - (16384,), 32805 - tensor:[raw: 32768, compressed: 30009, ratio: 1.09]
311:p:weight - (4096, 16384), 134217770 - tensor:[raw: 134217728, compressed: 134225939, ratio: 1.00]
311:p:bias - (4096,), 8229 - tensor:[raw: 8192, compressed: 8215, ratio: 1.00]
314:p:weight - (4096,), 8231 - tensor:[raw: 8192, compressed: 5797, ratio: 1.41]
314:p:bias - (4096,), 8229 - tensor:[raw: 8192, compressed: 8215, ratio: 1.00]
316:b:bias - (1, 1, 2048, 2048), 4194346 - tensor:[raw: 4194304, compressed: 31247, ratio: 134.23]
316:b:masked_bias - (), 43 - tensor:[raw: 2, compressed: 25, ratio: 0.08]
316:b:scale_attn - (), 42 - tensor:[raw: 2, compressed: 25, ratio: 0.08]
316:b:sin - (2048, 32), 131110 - tensor:[raw: 131072, compressed: 124142, ratio: 1.06]
316:b:cos - (2048, 32), 131110 - tensor:[raw: 131072, compressed: 112911, ratio: 1.16]
319:p:weight - (4096, 4096), 33554474 - tensor:[raw: 33554432, compressed: 33556499, ratio: 1.00]
320:p:weight - (4096, 4096), 33554474 - tensor:[raw: 33554432, compressed: 33556499, ratio: 1.00]
321:p:weight - (4096, 4096), 33554474 - tensor:[raw: 33554432, compressed: 33556499, ratio: 1.00]
322:p:weight - (4096, 4096), 33554474 - tensor:[raw: 33554432, compressed: 33556499, ratio: 1.00]
324:p:weight - (16384, 4096), 134217770 - tensor:[raw: 134217728, compressed: 134225939, ratio: 1.00]
324:p:bias - (16384,), 32805 - tensor:[raw: 32768, compressed: 30307, ratio: 1.08]
325:p:weight - (4096, 16384), 134217770 - tensor:[raw: 134217728, compressed: 134225939, ratio: 1.00]
325:p:bias - (4096,), 8229 - tensor:[raw: 8192, compressed: 8215, ratio: 1.00]
328:p:weight - (4096,), 8231 - tensor:[raw: 8192, compressed: 5781, ratio: 1.42]
328:p:bias - (4096,), 8229 - tensor:[raw: 8192, compressed: 8215, ratio: 1.00]
330:b:bias - (1, 1, 2048, 2048), 4194346 - tensor:[raw: 4194304, compressed: 31247, ratio: 134.23]
330:b:masked_bias - (), 43 - tensor:[raw: 2, compressed: 25, ratio: 0.08]
330:b:scale_attn - (), 42 - tensor:[raw: 2, compressed: 25, ratio: 0.08]
330:b:sin - (2048, 32), 131110 - tensor:[raw: 131072, compressed: 124142, ratio: 1.06]
330:b:cos - (2048, 32), 131110 - tensor:[raw: 131072, compressed: 112911, ratio: 1.16]
333:p:weight - (4096, 4096), 33554474 - tensor:[raw: 33554432, compressed: 33556499, ratio: 1.00]
334:p:weight - (4096, 4096), 33554474 - tensor:[raw: 33554432, compressed: 33556499, ratio: 1.00]
335:p:weight - (4096, 4096), 33554474 - tensor:[raw: 33554432, compressed: 33556499, ratio: 1.00]
336:p:weight - (4096, 4096), 33554474 - tensor:[raw: 33554432, compressed: 33556499, ratio: 1.00]
338:p:weight - (16384, 4096), 134217770 - tensor:[raw: 134217728, compressed: 134225939, ratio: 1.00]
338:p:bias - (16384,), 32805 - tensor:[raw: 32768, compressed: 30764, ratio: 1.07]
339:p:weight - (4096, 16384), 134217770 - tensor:[raw: 134217728, compressed: 134225939, ratio: 1.00]
339:p:bias - (4096,), 8229 - tensor:[raw: 8192, compressed: 8215, ratio: 1.00]
342:p:weight - (4096,), 8231 - tensor:[raw: 8192, compressed: 5774, ratio: 1.42]
342:p:bias - (4096,), 8229 - tensor:[raw: 8192, compressed: 8215, ratio: 1.00]
344:b:bias - (1, 1, 2048, 2048), 4194346 - tensor:[raw: 4194304, compressed: 31247, ratio: 134.23]
344:b:masked_bias - (), 43 - tensor:[raw: 2, compressed: 25, ratio: 0.08]
344:b:scale_attn - (), 42 - tensor:[raw: 2, compressed: 25, ratio: 0.08]
344:b:sin - (2048, 32), 131110 - tensor:[raw: 131072, compressed: 124142, ratio: 1.06]
344:b:cos - (2048, 32), 131110 - tensor:[raw: 131072, compressed: 112911, ratio: 1.16]
347:p:weight - (4096, 4096), 33554474 - tensor:[raw: 33554432, compressed: 33556499, ratio: 1.00]
348:p:weight - (4096, 4096), 33554474 - tensor:[raw: 33554432, compressed: 33556499, ratio: 1.00]
349:p:weight - (4096, 4096), 33554474 - tensor:[raw: 33554432, compressed: 33556499, ratio: 1.00]
350:p:weight - (4096, 4096), 33554474 - tensor:[raw: 33554432, compressed: 33556499, ratio: 1.00]
352:p:weight - (16384, 4096), 134217770 - tensor:[raw: 134217728, compressed: 134225939, ratio: 1.00]
352:p:bias - (16384,), 32805 - tensor:[raw: 32768, compressed: 31095, ratio: 1.05]
353:p:weight - (4096, 16384), 134217770 - tensor:[raw: 134217728, compressed: 134225939, ratio: 1.00]
353:p:bias - (4096,), 8229 - tensor:[raw: 8192, compressed: 8215, ratio: 1.00]
356:p:weight - (4096,), 8231 - tensor:[raw: 8192, compressed: 5740, ratio: 1.43]
356:p:bias - (4096,), 8229 - tensor:[raw: 8192, compressed: 8215, ratio: 1.00]
358:b:bias - (1, 1, 2048, 2048), 4194346 - tensor:[raw: 4194304, compressed: 31247, ratio: 134.23]
358:b:masked_bias - (), 43 - tensor:[raw: 2, compressed: 25, ratio: 0.08]
358:b:scale_attn - (), 42 - tensor:[raw: 2, compressed: 25, ratio: 0.08]
358:b:sin - (2048, 32), 131110 - tensor:[raw: 131072, compressed: 124142, ratio: 1.06]
358:b:cos - (2048, 32), 131110 - tensor:[raw: 131072, compressed: 112911, ratio: 1.16]
361:p:weight - (4096, 4096), 33554474 - tensor:[raw: 33554432, compressed: 33556499, ratio: 1.00]
362:p:weight - (4096, 4096), 33554474 - tensor:[raw: 33554432, compressed: 33556499, ratio: 1.00]
363:p:weight - (4096, 4096), 33554474 - tensor:[raw: 33554432, compressed: 33556499, ratio: 1.00]
364:p:weight - (4096, 4096), 33554474 - tensor:[raw: 33554432, compressed: 33556499, ratio: 1.00]
366:p:weight - (16384, 4096), 134217770 - tensor:[raw: 134217728, compressed: 134225939, ratio: 1.00]
366:p:bias - (16384,), 32805 - tensor:[raw: 32768, compressed: 31796, ratio: 1.03]
367:p:weight - (4096, 16384), 134217770 - tensor:[raw: 134217728, compressed: 134225939, ratio: 1.00]
367:p:bias - (4096,), 8229 - tensor:[raw: 8192, compressed: 8215, ratio: 1.00]
370:p:weight - (4096,), 8231 - tensor:[raw: 8192, compressed: 5765, ratio: 1.42]
370:p:bias - (4096,), 8229 - tensor:[raw: 8192, compressed: 8215, ratio: 1.00]
372:b:bias - (1, 1, 2048, 2048), 4194346 - tensor:[raw: 4194304, compressed: 31247, ratio: 134.23]
372:b:masked_bias - (), 43 - tensor:[raw: 2, compressed: 25, ratio: 0.08]
372:b:scale_attn - (), 42 - tensor:[raw: 2, compressed: 25, ratio: 0.08]
372:b:sin - (2048, 32), 131110 - tensor:[raw: 131072, compressed: 124142, ratio: 1.06]
372:b:cos - (2048, 32), 131110 - tensor:[raw: 131072, compressed: 112911, ratio: 1.16]
375:p:weight - (4096, 4096), 33554474 - tensor:[raw: 33554432, compressed: 33556499, ratio: 1.00]
376:p:weight - (4096, 4096), 33554474 - tensor:[raw: 33554432, compressed: 33556499, ratio: 1.00]
377:p:weight - (4096, 4096), 33554474 - tensor:[raw: 33554432, compressed: 33556499, ratio: 1.00]
378:p:weight - (4096, 4096), 33554474 - tensor:[raw: 33554432, compressed: 33556499, ratio: 1.00]
380:p:weight - (16384, 4096), 134217770 - tensor:[raw: 134217728, compressed: 134225939, ratio: 1.00]
380:p:bias - (16384,), 32805 - tensor:[raw: 32768, compressed: 32077, ratio: 1.02]
381:p:weight - (4096, 16384), 134217770 - tensor:[raw: 134217728, compressed: 134225939, ratio: 1.00]
381:p:bias - (4096,), 8229 - tensor:[raw: 8192, compressed: 8215, ratio: 1.00]
384:p:weight - (4096,), 8231 - tensor:[raw: 8192, compressed: 5776, ratio: 1.42]
384:p:bias - (4096,), 8229 - tensor:[raw: 8192, compressed: 8215, ratio: 1.00]
386:b:bias - (1, 1, 2048, 2048), 4194346 - tensor:[raw: 4194304, compressed: 31247, ratio: 134.23]
386:b:masked_bias - (), 43 - tensor:[raw: 2, compressed: 25, ratio: 0.08]
386:b:scale_attn - (), 42 - tensor:[raw: 2, compressed: 25, ratio: 0.08]
386:b:sin - (2048, 32), 131110 - tensor:[raw: 131072, compressed: 124142, ratio: 1.06]
386:b:cos - (2048, 32), 131110 - tensor:[raw: 131072, compressed: 112911, ratio: 1.16]
389:p:weight - (4096, 4096), 33554474 - tensor:[raw: 33554432, compressed: 33556499, ratio: 1.00]
390:p:weight - (4096, 4096), 33554474 - tensor:[raw: 33554432, compressed: 33556499, ratio: 1.00]
391:p:weight - (4096, 4096), 33554474 - tensor:[raw: 33554432, compressed: 33556499, ratio: 1.00]
392:p:weight - (4096, 4096), 33554474 - tensor:[raw: 33554432, compressed: 33556499, ratio: 1.00]
394:p:weight - (16384, 4096), 134217770 - tensor:[raw: 134217728, compressed: 134225939, ratio: 1.00]
394:p:bias - (16384,), 32805 - tensor:[raw: 32768, compressed: 32489, ratio: 1.01]
395:p:weight - (4096, 16384), 134217770 - tensor:[raw: 134217728, compressed: 134225939, ratio: 1.00]
395:p:bias - (4096,), 8229 - tensor:[raw: 8192, compressed: 8215, ratio: 1.00]
397:p:weight - (4096,), 8231 - tensor:[raw: 8192, compressed: 6020, ratio: 1.36]
397:p:bias - (4096,), 8229 - tensor:[raw: 8192, compressed: 8215, ratio: 1.00]
398:p:weight - (50400, 4096), 412876842 - tensor:[raw: 412876800, compressed: 412902019, ratio: 1.00]
398:p:bias - (50400,), 100837 - tensor:[raw: 100800, compressed: 100827, ratio: 1.00]