1
2
3
4
5 package ssa
6
7 import (
8 "internal/goarch"
9 "slices"
10 )
11
12 var truthTableValues [3]uint8 = [3]uint8{0b1111_0000, 0b1100_1100, 0b1010_1010}
13
14 func (slop SIMDLogicalOP) String() string {
15 if slop == sloInterior {
16 return "leaf"
17 }
18 interior := ""
19 if slop&sloInterior != 0 {
20 interior = "+interior"
21 }
22 switch slop &^ sloInterior {
23 case sloAnd:
24 return "and" + interior
25 case sloXor:
26 return "xor" + interior
27 case sloOr:
28 return "or" + interior
29 case sloAndNot:
30 return "andNot" + interior
31 case sloNot:
32 return "not" + interior
33 }
34 return "wrong"
35 }
36
37 func rewriteTern(f *Func) {
38 if f.maxCPUFeatures == CPUNone {
39 return
40 }
41
42 arch := f.Config.Ctxt().Arch.Family
43
44 if arch != goarch.AMD64 {
45 return
46 }
47
48 boolExprTrees := make(map[*Value]SIMDLogicalOP)
49
50
51
52
53
54 for _, b := range f.Blocks {
55 for _, v := range b.Values {
56 slo := classifyBooleanSIMD(v)
57 switch slo {
58 case sloOr,
59 sloAndNot,
60 sloXor,
61 sloAnd:
62 boolExprTrees[v.Args[1]] |= sloInterior
63 fallthrough
64 case sloNot:
65 boolExprTrees[v.Args[0]] |= sloInterior
66 boolExprTrees[v] |= slo
67 }
68 }
69 }
70
71
72 var roots []*Value
73 for v, slo := range boolExprTrees {
74 if f.pass.debug > 1 {
75 f.Warnl(v.Pos, "%s has SLO %v", v.LongString(), slo)
76 }
77
78 if slo&sloInterior == 0 && v.Block.CPUfeatures.hasFeature(CPUavx512) {
79 roots = append(roots, v)
80 }
81 }
82 slices.SortFunc(roots, func(u, v *Value) int { return int(u.ID - v.ID) })
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104 var rewrite func(v *Value) [3]*Value
105
106
107
108
109
110
111
112
113
114 var computeTT func(v *Value, vars [3]*Value) uint8
115
116
117
118
119
120 combine := func(a, b [3]*Value) ([3]*Value, bool) {
121 var c [3]*Value
122 i := 0
123 for _, v := range a {
124 if v == nil {
125 break
126 }
127 c[i] = v
128 i++
129 }
130 bloop:
131 for _, v := range b {
132 if v == nil {
133 break
134 }
135 for _, u := range a {
136 if v == u {
137 continue bloop
138 }
139 }
140 if i == 3 {
141 return [3]*Value{}, false
142 }
143 c[i] = v
144 i++
145 }
146 return c, true
147 }
148
149 computeTT = func(v *Value, vars [3]*Value) uint8 {
150 i := 0
151 for ; i < len(vars); i++ {
152 if vars[i] == v {
153 return truthTableValues[i]
154 }
155 }
156 slo := boolExprTrees[v] &^ sloInterior
157 a := computeTT(v.Args[0], vars)
158 switch slo {
159 case sloNot:
160 return ^a
161 case sloAnd:
162 return a & computeTT(v.Args[1], vars)
163 case sloXor:
164 return a ^ computeTT(v.Args[1], vars)
165 case sloOr:
166 return a | computeTT(v.Args[1], vars)
167 case sloAndNot:
168 return a & ^computeTT(v.Args[1], vars)
169 }
170 panic("switch should have covered all cases, or unknown var in logical expression")
171 }
172
173 replace := func(a0 *Value, vars0 [3]*Value) {
174 imm := computeTT(a0, vars0)
175 op := ternOpForLogical(a0.Op)
176 if op == a0.Op {
177 if f.pass.debug > 0 {
178 f.Warnl(a0.Pos, "Skipping rewrite for %s, op=%v", a0.LongString(), op)
179 }
180 return
181 }
182 if f.pass.debug > 0 {
183 f.Warnl(a0.Pos, "Rewriting %s into %v of 0b%b %v %v %v", a0.LongString(), op, imm,
184 vars0[0], vars0[1], vars0[2])
185 }
186 a0.reset(op)
187 a0.SetArgs3(vars0[0], vars0[1], vars0[2])
188 a0.AuxInt = int64(int8(imm))
189 }
190
191
192
193
194
195 addOne := func(vars [3]*Value, v *Value) [3]*Value {
196 if vars[2] != nil {
197 panic("rewriteTern.addOne, vars[2] should be nil")
198 }
199 if v == vars[0] || v == vars[1] {
200 return vars
201 }
202 if vars[1] == nil {
203 vars[1] = v
204 } else {
205 vars[2] = v
206 }
207 return vars
208 }
209
210 rewrite = func(v *Value) [3]*Value {
211 slo := boolExprTrees[v]
212 if slo == sloInterior {
213 return [3]*Value{v, nil, nil}
214 }
215 var vars [3]*Value
216 hasFeature := v.Block.CPUfeatures.hasFeature(CPUavx512)
217 if slo&sloNot == sloNot {
218 vars = rewrite(v.Args[0])
219 if !hasFeature {
220 if vars[2] != nil {
221 replace(v.Args[0], vars)
222 return [3]*Value{v, nil, nil}
223 }
224 return vars
225 }
226 } else {
227 var ok bool
228 a0, a1 := v.Args[0], v.Args[1]
229 vars0 := rewrite(a0)
230 vars1 := rewrite(a1)
231 vars, ok = combine(vars0, vars1)
232
233 if f.pass.debug > 1 {
234 f.Warnl(a0.Pos, "combine(%v, %v) -> %v, %v", vars0, vars1, vars, ok)
235 }
236
237 if !(ok && v.Block.CPUfeatures.hasFeature(CPUavx512)) {
238
239
240 if vars0[2] != nil && a0.Block.CPUfeatures.hasFeature(CPUavx512) {
241 replace(a0, vars0)
242 }
243 if vars1[2] != nil && a1.Block.CPUfeatures.hasFeature(CPUavx512) {
244 replace(a1, vars1)
245 }
246
247
248
249
250
251 if vars0[2] == nil {
252 if vars1[2] == nil {
253
254
255
256
257
258
259
260
261
262
263 return [3]*Value{a0, a1, nil}
264 } else {
265
266 vars = addOne(vars0, a1)
267 }
268 } else if vars1[2] == nil {
269
270 vars = addOne(vars1, a0)
271 } else if !ok {
272
273
274
275 return [3]*Value{a0, a1, nil}
276 }
277
278 }
279 }
280
281 if slo&sloInterior == 0 && vars[2] != nil && hasFeature {
282 replace(v, vars)
283 return [3]*Value{v, nil, nil}
284 }
285 return vars
286 }
287
288 for _, v := range roots {
289 if f.pass.debug > 1 {
290 f.Warnl(v.Pos, "SLO root %s", v.LongString())
291 }
292 rewrite(v)
293 }
294 }
295
View as plain text