1
2
3
4
5 package ssa
6
7 import (
8 "cmd/compile/internal/ir"
9 "cmd/compile/internal/types"
10 "cmd/internal/obj"
11 "slices"
12 )
13
14
15
16 func pair(f *Func) {
17
18 switch f.Config.arch {
19 case "arm64":
20 default:
21 return
22 }
23 pairLoads(f)
24 pairStores(f)
25 }
26
27 type pairableLoadInfo struct {
28 width int64
29 pair Op
30 }
31
32
33
34 var pairableLoads = map[Op]pairableLoadInfo{
35 OpARM64MOVDload: {8, OpARM64LDP},
36 OpARM64MOVWUload: {4, OpARM64LDPW},
37 OpARM64MOVWload: {4, OpARM64LDPSW},
38
39
40 OpARM64FMOVDload: {8, OpARM64FLDPD},
41 OpARM64FMOVSload: {4, OpARM64FLDPS},
42 }
43
44 type pairableStoreInfo struct {
45 width int64
46 pair Op
47 }
48
49
50
51
52 var pairableStores = map[Op]pairableStoreInfo{
53 OpARM64MOVDstore: {8, OpARM64STP},
54 OpARM64MOVWstore: {4, OpARM64STPW},
55 OpARM64FMOVDstore: {8, OpARM64FSTPD},
56 OpARM64FMOVSstore: {4, OpARM64FSTPS},
57 }
58
59
60
61
62
63
64
65 func offsetOk(aux Aux, off, width int64) bool {
66 if true {
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90 return true
91 }
92 if aux != nil {
93 if _, ok := aux.(*ir.Name); !ok {
94
95 return false
96 }
97
98
99
100
101
102
103
104 if off >= 0 {
105 off += 120
106 }
107
108 }
109 switch width {
110 case 4:
111 if off >= -256 && off <= 252 && off%4 == 0 {
112 return true
113 }
114 case 8:
115 if off >= -512 && off <= 504 && off%8 == 0 {
116 return true
117 }
118 }
119 return false
120 }
121
122 func pairLoads(f *Func) {
123 var loads []*Value
124
125
126 auxIDs := map[Aux]int{}
127 auxID := func(aux Aux) int {
128 id, ok := auxIDs[aux]
129 if !ok {
130 id = len(auxIDs)
131 auxIDs[aux] = id
132 }
133 return id
134 }
135
136 for _, b := range f.Blocks {
137
138 loads = loads[:0]
139 clear(auxIDs)
140 for _, v := range b.Values {
141 info := pairableLoads[v.Op]
142 if info.width == 0 {
143 continue
144 }
145 if !offsetOk(v.Aux, v.AuxInt, info.width) {
146 continue
147 }
148 loads = append(loads, v)
149 }
150 if len(loads) < 2 {
151 continue
152 }
153
154
155 slices.SortFunc(loads, func(x, y *Value) int {
156
157 if x.Op != y.Op {
158 return int(x.Op - y.Op)
159 }
160 if x.Args[0].ID != y.Args[0].ID {
161 return int(x.Args[0].ID - y.Args[0].ID)
162 }
163 if x.Args[1].ID != y.Args[1].ID {
164 return int(x.Args[1].ID - y.Args[1].ID)
165 }
166
167 if x.Aux != nil {
168 if y.Aux == nil {
169 return 1
170 }
171 a, b := auxID(x.Aux), auxID(y.Aux)
172 if a != b {
173 return a - b
174 }
175 } else if y.Aux != nil {
176 return -1
177 }
178
179 return int(x.AuxInt - y.AuxInt)
180 })
181
182
183 for i := 0; i < len(loads)-1; i++ {
184 x := loads[i]
185 y := loads[i+1]
186 if x.Op != y.Op || x.Args[0] != y.Args[0] || x.Args[1] != y.Args[1] {
187 continue
188 }
189 if x.Aux != y.Aux {
190 continue
191 }
192 if x.AuxInt+pairableLoads[x.Op].width != y.AuxInt {
193 continue
194 }
195
196
197
198
199 load := b.NewValue2IA(x.Pos, pairableLoads[x.Op].pair, types.NewTuple(x.Type, y.Type), x.AuxInt, x.Aux, x.Args[0], x.Args[1])
200
201
202 x.reset(OpSelect0)
203 x.SetArgs1(load)
204 y.reset(OpSelect1)
205 y.SetArgs1(load)
206
207 i++
208 }
209 }
210
211
212
213
214 type nextBlockKey struct {
215 op Op
216 ptr ID
217 mem ID
218 auxInt int64
219 aux any
220 }
221 nextBlock := map[nextBlockKey]*Value{}
222 for _, b := range f.Blocks {
223 if memoryBarrierTest(b) {
224
225
226
227
228
229
230
231
232
233
234
235
236
237 continue
238 }
239
240
241 clear(nextBlock)
242 for _, e := range b.Succs {
243 if len(e.b.Preds) > 1 {
244 continue
245 }
246 for _, v := range e.b.Values {
247 info := pairableLoads[v.Op]
248 if info.width == 0 {
249 continue
250 }
251 if !offsetOk(v.Aux, v.AuxInt, info.width) {
252 continue
253 }
254 nextBlock[nextBlockKey{op: v.Op, ptr: v.Args[0].ID, mem: v.Args[1].ID, auxInt: v.AuxInt, aux: v.Aux}] = v
255 }
256 }
257 if len(nextBlock) == 0 {
258 continue
259 }
260
261 const maxMoved = 4
262 nMoved := 0
263 for i := len(b.Values) - 1; i >= 0 && nMoved < maxMoved; i-- {
264 x := b.Values[i]
265 info := pairableLoads[x.Op]
266 if info.width == 0 {
267 continue
268 }
269 if !offsetOk(x.Aux, x.AuxInt, info.width) {
270 continue
271 }
272 key := nextBlockKey{op: x.Op, ptr: x.Args[0].ID, mem: x.Args[1].ID, auxInt: x.AuxInt + info.width, aux: x.Aux}
273 if y := nextBlock[key]; y != nil {
274 delete(nextBlock, key)
275
276
277 load := b.NewValue2IA(x.Pos, info.pair, types.NewTuple(x.Type, y.Type), x.AuxInt, x.Aux, x.Args[0], x.Args[1])
278
279
280 x.reset(OpSelect0)
281 x.SetArgs1(load)
282
283
284 y.reset(OpCopy)
285 y.SetArgs1(b.NewValue1(y.Pos, OpSelect1, y.Type, load))
286 nMoved++
287 continue
288 }
289 key.auxInt = x.AuxInt - info.width
290 if y := nextBlock[key]; y != nil {
291 delete(nextBlock, key)
292
293
294 load := b.NewValue2IA(x.Pos, info.pair, types.NewTuple(y.Type, x.Type), y.AuxInt, x.Aux, x.Args[0], x.Args[1])
295
296
297 x.reset(OpSelect1)
298 x.SetArgs1(load)
299
300 y.reset(OpCopy)
301 y.SetArgs1(b.NewValue1(y.Pos, OpSelect0, y.Type, load))
302 nMoved++
303 continue
304 }
305 }
306 }
307 }
308
309 func memoryBarrierTest(b *Block) bool {
310 if b.Kind != BlockARM64NZW {
311 return false
312 }
313 c := b.Controls[0]
314 if c.Op != OpARM64MOVWUload {
315 return false
316 }
317 if globl, ok := c.Aux.(*obj.LSym); ok {
318 return globl.Name == "runtime.writeBarrier"
319 }
320 return false
321 }
322
323 func pairStores(f *Func) {
324 last := f.Cache.allocBoolSlice(f.NumValues())
325 defer f.Cache.freeBoolSlice(last)
326
327
328
329 prevStore := func(v *Value) *Value {
330 if v.Op == OpInitMem || v.Op == OpPhi {
331 return nil
332 }
333 m := v.MemoryArg()
334 if m.Block != v.Block {
335 return nil
336 }
337 return m
338 }
339
340 for _, b := range f.Blocks {
341
342
343
344
345 for _, v := range b.Values {
346 if v.Type.IsMemory() {
347 last[v.ID] = true
348 }
349 }
350 for _, v := range b.Values {
351 if v.Type.IsMemory() {
352 if m := prevStore(v); m != nil {
353 last[m.ID] = false
354 }
355 }
356 }
357 var lastMem *Value
358 for _, v := range b.Values {
359 if last[v.ID] {
360 lastMem = v
361 break
362 }
363 }
364
365
366 memCheck:
367 for v := lastMem; v != nil; v = prevStore(v) {
368 info := pairableStores[v.Op]
369 if info.width == 0 {
370 continue
371 }
372 if !offsetOk(v.Aux, v.AuxInt, info.width) {
373 continue
374 }
375 ptr := v.Args[0]
376 val := v.Args[1]
377 mem := v.Args[2]
378 off := v.AuxInt
379 aux := v.Aux
380
381
382 lowerOk := true
383 higherOk := true
384 count := 10
385 for w := prevStore(v); w != nil; w = prevStore(w) {
386 if w.Uses != 1 {
387
388
389
390
391
392 continue memCheck
393 }
394 if w.Op == v.Op &&
395 w.Args[0] == ptr &&
396 w.Aux == aux &&
397 (lowerOk && w.AuxInt == off-info.width || higherOk && w.AuxInt == off+info.width) {
398
399
400
401
402
403 args := []*Value{ptr, val, w.Args[1], mem}
404 if w.AuxInt == off-info.width {
405 args[1], args[2] = args[2], args[1]
406 off -= info.width
407 }
408 v.reset(info.pair)
409 v.AddArgs(args...)
410 v.Aux = aux
411 v.AuxInt = off
412 v.Pos = w.Pos
413
414
415 wmem := w.MemoryArg()
416 w.reset(OpCopy)
417 w.SetArgs1(wmem)
418 continue memCheck
419 }
420 if count--; count == 0 {
421
422
423
424
425
426 continue memCheck
427 }
428
429
430
431
432
433 var width int64
434 switch w.Op {
435 case OpARM64MOVDstore, OpARM64FMOVDstore:
436 width = 8
437 case OpARM64MOVWstore, OpARM64FMOVSstore:
438 width = 4
439 case OpARM64MOVHstore:
440 width = 2
441 case OpARM64MOVBstore:
442 width = 1
443 case OpCopy:
444 continue
445 default:
446
447
448 continue memCheck
449 }
450
451
452
453
454 if w.Args[0] != ptr || w.Aux != aux {
455 continue memCheck
456 }
457 if overlap(w.AuxInt, width, off-info.width, info.width) {
458
459 lowerOk = false
460 }
461 if overlap(w.AuxInt, width, off+info.width, info.width) {
462
463 higherOk = false
464 }
465 if !higherOk && !lowerOk {
466 continue memCheck
467 }
468 }
469 }
470 }
471 }
472
View as plain text