Text file
src/math/exp_loong64.s
1 // Copyright 2025 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 #include "textflag.h"
6
7 #define NearZero 0x3e30000000000000 // 2**-28
8 #define PosInf 0x7ff0000000000000
9 #define FracMask 0x000fffffffffffff
10 #define C1 0x3cb0000000000000 // 2**-52
11
12 DATA exprodata<>+0(SB)/8, $0.0
13 DATA exprodata<>+8(SB)/8, $0.5
14 DATA exprodata<>+16(SB)/8, $1.0
15 DATA exprodata<>+24(SB)/8, $2.0
16 DATA exprodata<>+32(SB)/8, $6.93147180369123816490e-01 // Ln2Hi
17 DATA exprodata<>+40(SB)/8, $1.90821492927058770002e-10 // Ln2Lo
18 DATA exprodata<>+48(SB)/8, $1.44269504088896338700e+00 // Log2e
19 DATA exprodata<>+56(SB)/8, $7.09782712893383973096e+02 // Overflow
20 DATA exprodata<>+64(SB)/8, $-7.45133219101941108420e+02 // Underflow
21 DATA exprodata<>+72(SB)/8, $1.0239999999999999e+03 // Overflow2
22 DATA exprodata<>+80(SB)/8, $-1.0740e+03 // Underflow2
23 DATA exprodata<>+88(SB)/8, $3.7252902984619141e-09 // NearZero
24 GLOBL exprodata<>+0(SB), NOPTR|RODATA, $96
25
26 DATA expmultirodata<>+0(SB)/8, $1.66666666666666657415e-01 // P1
27 DATA expmultirodata<>+8(SB)/8, $-2.77777777770155933842e-03 // P2
28 DATA expmultirodata<>+16(SB)/8, $6.61375632143793436117e-05 // P3
29 DATA expmultirodata<>+24(SB)/8, $-1.65339022054652515390e-06 // P4
30 DATA expmultirodata<>+32(SB)/8, $4.13813679705723846039e-08 // P5
31 GLOBL expmultirodata<>+0(SB), NOPTR|RODATA, $40
32
33 // Exp returns e**x, the base-e exponential of x.
34 // This is an assembly implementation of the method used for function Exp in file exp.go.
35 //
36 // func Exp(x float64) float64
37 TEXT ·archExp(SB),$0-16
38 MOVD x+0(FP), F0 // F0 = x
39
40 MOVV $exprodata<>+0(SB), R10
41 MOVD 56(R10), F1 // Overflow
42 MOVD 64(R10), F2 // Underflow
43 MOVD 88(R10), F3 // NearZero
44 MOVD 16(R10), F17 // 1.0
45
46 CMPEQD F0, F0, FCC0
47 BFPF isNaN // x = NaN, return NaN
48
49 CMPGTD F0, F1, FCC0
50 BFPT overflow // x > Overflow, return PosInf
51
52 CMPGTD F2, F0, FCC0
53 BFPT underflow // x < Underflow, return 0
54
55 ABSD F0, F5
56 CMPGTD F3, F5, FCC0
57 BFPT nearzero // fabs(x) < NearZero, return 1 + x
58
59 // argument reduction, x = k*ln2 + r, |r| <= 0.5*ln2
60 // computed as r = hi - lo for extra precision.
61 MOVD 0(R10), F5
62 MOVD 8(R10), F3
63 MOVD 48(R10), F2
64 CMPGTD F0, F5, FCC0
65 FMSUBD F3, F2, F0, F4 // Log2e*x - 0.5
66 FMADDD F3, F2, F0, F3 // Log2e*x + 0.5
67 FSEL FCC0, F3, F4, F3
68 FTINTRZVD F3, F4 // float64 -> int64
69 MOVV F4, R5 // R5 = int(k)
70 FFINTDV F4, F3 // int64 -> float64
71
72 MOVD 32(R10), F4
73 MOVD 40(R10), F5
74 FNMSUBD F0, F3, F4, F4
75 MULD F3, F5, F5
76 SUBD F5, F4, F6
77 MULD F6, F6, F7
78
79 // compute c
80 MOVV $expmultirodata<>+0(SB), R11
81 MOVD 32(R11), F8
82 MOVD 24(R11), F9
83 FMADDD F9, F8, F7, F13
84 MOVD 16(R11), F10
85 FMADDD F10, F13, F7, F13
86 MOVD 8(R11), F11
87 FMADDD F11, F13, F7, F13
88 MOVD 0(R11), F12
89 FMADDD F12, F13, F7, F13
90 FNMSUBD F6, F13, F7, F13
91
92 // compute y
93 MOVD 24(R10), F14
94 SUBD F13, F14, F14
95 MULD F6, F13, F15
96 DIVD F14, F15, F15
97 SUBD F15, F5, F15
98 SUBD F4, F15, F15
99 SUBD F15, F17, F16
100
101 // inline Ldexp(y, k), benefit:
102 // 1, no parameter pass overhead.
103 // 2, skip unnecessary checks for Inf/NaN/Zero
104 MOVV F16, R4
105 MOVV $FracMask, R9
106 AND R9, R4, R6 // fraction
107 SRLV $52, R4, R7 // exponent
108 ADDV R5, R7
109 MOVV $1, R12
110 BGE R7, R12, normal
111 ADDV $52, R7 // denormal
112 MOVV $C1, R8
113 MOVV R8, F17
114 normal:
115 SLLV $52, R7
116 OR R7, R6, R4
117 MOVV R4, F0
118 MULD F17, F0 // return m * x
119 MOVD F0, ret+8(FP)
120 RET
121 nearzero:
122 ADDD F17, F0, F0
123 isNaN:
124 MOVD F0, ret+8(FP)
125 RET
126 underflow:
127 MOVV R0, ret+8(FP)
128 RET
129 overflow:
130 MOVV $PosInf, R4
131 MOVV R4, ret+8(FP)
132 RET
133
134
135 // Exp2 returns 2**x, the base-2 exponential of x.
136 // This is an assembly implementation of the method used for function Exp2 in file exp.go.
137 //
138 // func Exp2(x float64) float64
139 TEXT ·archExp2(SB),$0-16
140 MOVD x+0(FP), F0 // F0 = x
141
142 MOVV $exprodata<>+0(SB), R10
143 MOVD 72(R10), F1 // Overflow2
144 MOVD 80(R10), F2 // Underflow2
145 MOVD 88(R10), F3 // NearZero
146
147 CMPEQD F0, F0, FCC0
148 BFPF isNaN // x = NaN, return NaN
149
150 CMPGTD F0, F1, FCC0
151 BFPT overflow // x > Overflow, return PosInf
152
153 CMPGTD F2, F0, FCC0
154 BFPT underflow // x < Underflow, return 0
155
156 // argument reduction; x = r*lg(e) + k with |r| <= ln(2)/2
157 // computed as r = hi - lo for extra precision.
158 MOVD 0(R10), F10
159 MOVD 8(R10), F2
160 CMPGTD F0, F10, FCC0
161 SUBD F2, F0, F4 // x - 0.5
162 ADDD F2, F0, F3 // x + 0.5
163 FSEL FCC0, F3, F4, F3
164 FTINTRZVD F3, F4
165 MOVV F4, R5
166 FFINTDV F4, F3
167
168 MOVD 32(R10), F4
169 MOVD 40(R10), F5
170 SUBD F3, F0, F3
171 MULD F3, F4
172 FNMSUBD F10, F3, F5, F5
173 SUBD F5, F4, F6
174 MULD F6, F6, F7
175
176 // compute c
177 MOVV $expmultirodata<>+0(SB), R11
178 MOVD 32(R11), F8
179 MOVD 24(R11), F9
180 FMADDD F9, F8, F7, F13
181 MOVD 16(R11), F10
182 FMADDD F10, F13, F7, F13
183 MOVD 8(R11), F11
184 FMADDD F11, F13, F7, F13
185 MOVD 0(R11), F12
186 FMADDD F12, F13, F7, F13
187 FNMSUBD F6, F13, F7, F13
188
189 // compute y
190 MOVD 24(R10), F14
191 SUBD F13, F14, F14
192 MULD F6, F13, F15
193 DIVD F14, F15
194
195 MOVD 16(R10), F17
196 SUBD F15, F5, F15
197 SUBD F4, F15, F15
198 SUBD F15, F17, F16
199
200 // inline Ldexp(y, k), benefit:
201 // 1, no parameter pass overhead.
202 // 2, skip unnecessary checks for Inf/NaN/Zero
203 MOVV F16, R4
204 MOVV $FracMask, R9
205 SRLV $52, R4, R7 // exponent
206 AND R9, R4, R6 // fraction
207 ADDV R5, R7
208 MOVV $1, R12
209 BGE R7, R12, normal
210
211 ADDV $52, R7 // denormal
212 MOVV $C1, R8
213 MOVV R8, F17
214 normal:
215 SLLV $52, R7
216 OR R7, R6, R4
217 MOVV R4, F0
218 MULD F17, F0
219 isNaN:
220 MOVD F0, ret+8(FP)
221 RET
222 underflow:
223 MOVV R0, ret+8(FP)
224 RET
225 overflow:
226 MOVV $PosInf, R4
227 MOVV R4, ret+8(FP)
228 RET
229
View as plain text