Text file
src/crypto/sha1/sha1block_loong64.s
1 // Copyright 2024 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 //go:build !purego
6
7 #include "textflag.h"
8
9 // SHA-1 block routine. See sha1block.go for Go equivalent.
10 //
11 // There are 80 rounds of 4 types:
12 // - rounds 0-15 are type 1 and load data (ROUND1 macro).
13 // - rounds 16-19 are type 1 and do not load data (ROUND1x macro).
14 // - rounds 20-39 are type 2 and do not load data (ROUND2 macro).
15 // - rounds 40-59 are type 3 and do not load data (ROUND3 macro).
16 // - rounds 60-79 are type 4 and do not load data (ROUND4 macro).
17 //
18 // Each round loads or shuffles the data, then computes a per-round
19 // function of b, c, d, and then mixes the result into and rotates the
20 // five registers a, b, c, d, e holding the intermediate results.
21 //
22 // The register rotation is implemented by rotating the arguments to
23 // the round macros instead of by explicit move instructions.
24
25 #define REGTMP R30
26 #define REGTMP1 R17
27 #define REGTMP2 R18
28 #define REGTMP3 R19
29 #define KEYREG1 R25
30 #define KEYREG2 R26
31 #define KEYREG3 R27
32 #define KEYREG4 R28
33
34 #define LOAD1(index) \
35 MOVW (index*4)(R5), REGTMP3; \
36 REVB2W REGTMP3, REGTMP3; \
37 MOVW REGTMP3, (index*4)(R3)
38
39 #define LOAD(index) \
40 MOVW (((index)&0xf)*4)(R3), REGTMP3; \
41 MOVW (((index-3)&0xf)*4)(R3), REGTMP; \
42 MOVW (((index-8)&0xf)*4)(R3), REGTMP1; \
43 MOVW (((index-14)&0xf)*4)(R3), REGTMP2; \
44 XOR REGTMP, REGTMP3; \
45 XOR REGTMP1, REGTMP3; \
46 XOR REGTMP2, REGTMP3; \
47 ROTR $31, REGTMP3; \
48 MOVW REGTMP3, (((index)&0xf)*4)(R3)
49
50 // f = d ^ (b & (c ^ d))
51 #define FUNC1(a, b, c, d, e) \
52 XOR c, d, REGTMP1; \
53 AND b, REGTMP1; \
54 XOR d, REGTMP1
55
56 // f = b ^ c ^ d
57 #define FUNC2(a, b, c, d, e) \
58 XOR b, c, REGTMP1; \
59 XOR d, REGTMP1
60
61 // f = (b & c) | ((b | c) & d)
62 #define FUNC3(a, b, c, d, e) \
63 OR b, c, REGTMP2; \
64 AND b, c, REGTMP; \
65 AND d, REGTMP2; \
66 OR REGTMP, REGTMP2, REGTMP1
67
68 #define FUNC4 FUNC2
69
70 #define MIX(a, b, c, d, e, key) \
71 ROTR $2, b; \ // b << 30
72 ADD REGTMP1, e; \ // e = e + f
73 ROTR $27, a, REGTMP2; \ // a << 5
74 ADD REGTMP3, e; \ // e = e + w[i]
75 ADDV key, e; \ // e = e + k
76 ADD REGTMP2, e // e = e + a<<5
77
78 #define ROUND1(a, b, c, d, e, index) \
79 LOAD1(index); \
80 FUNC1(a, b, c, d, e); \
81 MIX(a, b, c, d, e, KEYREG1)
82
83 #define ROUND1x(a, b, c, d, e, index) \
84 LOAD(index); \
85 FUNC1(a, b, c, d, e); \
86 MIX(a, b, c, d, e, KEYREG1)
87
88 #define ROUND2(a, b, c, d, e, index) \
89 LOAD(index); \
90 FUNC2(a, b, c, d, e); \
91 MIX(a, b, c, d, e, KEYREG2)
92
93 #define ROUND3(a, b, c, d, e, index) \
94 LOAD(index); \
95 FUNC3(a, b, c, d, e); \
96 MIX(a, b, c, d, e, KEYREG3)
97
98 #define ROUND4(a, b, c, d, e, index) \
99 LOAD(index); \
100 FUNC4(a, b, c, d, e); \
101 MIX(a, b, c, d, e, KEYREG4)
102
103 // A stack frame size of 64 bytes is required here, because
104 // the frame size used for data expansion is 64 bytes.
105 // See the definition of the macro LOAD above, and the definition
106 // of the local variable w in the general implementation (sha1block.go).
107 TEXT ·block(SB),NOSPLIT,$64-32
108 MOVV dig+0(FP), R4
109 MOVV p_base+8(FP), R5
110 MOVV p_len+16(FP), R6
111 AND $~63, R6
112 BEQ R6, zero
113
114 // p_len >= 64
115 ADDV R5, R6, R24
116 MOVW (0*4)(R4), R7
117 MOVW (1*4)(R4), R8
118 MOVW (2*4)(R4), R9
119 MOVW (3*4)(R4), R10
120 MOVW (4*4)(R4), R11
121
122 MOVV $·_K(SB), R21
123 MOVW (0*4)(R21), KEYREG1
124 MOVW (1*4)(R21), KEYREG2
125 MOVW (2*4)(R21), KEYREG3
126 MOVW (3*4)(R21), KEYREG4
127
128 loop:
129 MOVW R7, R12
130 MOVW R8, R13
131 MOVW R9, R14
132 MOVW R10, R15
133 MOVW R11, R16
134
135 ROUND1(R7, R8, R9, R10, R11, 0)
136 ROUND1(R11, R7, R8, R9, R10, 1)
137 ROUND1(R10, R11, R7, R8, R9, 2)
138 ROUND1(R9, R10, R11, R7, R8, 3)
139 ROUND1(R8, R9, R10, R11, R7, 4)
140 ROUND1(R7, R8, R9, R10, R11, 5)
141 ROUND1(R11, R7, R8, R9, R10, 6)
142 ROUND1(R10, R11, R7, R8, R9, 7)
143 ROUND1(R9, R10, R11, R7, R8, 8)
144 ROUND1(R8, R9, R10, R11, R7, 9)
145 ROUND1(R7, R8, R9, R10, R11, 10)
146 ROUND1(R11, R7, R8, R9, R10, 11)
147 ROUND1(R10, R11, R7, R8, R9, 12)
148 ROUND1(R9, R10, R11, R7, R8, 13)
149 ROUND1(R8, R9, R10, R11, R7, 14)
150 ROUND1(R7, R8, R9, R10, R11, 15)
151
152 ROUND1x(R11, R7, R8, R9, R10, 16)
153 ROUND1x(R10, R11, R7, R8, R9, 17)
154 ROUND1x(R9, R10, R11, R7, R8, 18)
155 ROUND1x(R8, R9, R10, R11, R7, 19)
156
157 ROUND2(R7, R8, R9, R10, R11, 20)
158 ROUND2(R11, R7, R8, R9, R10, 21)
159 ROUND2(R10, R11, R7, R8, R9, 22)
160 ROUND2(R9, R10, R11, R7, R8, 23)
161 ROUND2(R8, R9, R10, R11, R7, 24)
162 ROUND2(R7, R8, R9, R10, R11, 25)
163 ROUND2(R11, R7, R8, R9, R10, 26)
164 ROUND2(R10, R11, R7, R8, R9, 27)
165 ROUND2(R9, R10, R11, R7, R8, 28)
166 ROUND2(R8, R9, R10, R11, R7, 29)
167 ROUND2(R7, R8, R9, R10, R11, 30)
168 ROUND2(R11, R7, R8, R9, R10, 31)
169 ROUND2(R10, R11, R7, R8, R9, 32)
170 ROUND2(R9, R10, R11, R7, R8, 33)
171 ROUND2(R8, R9, R10, R11, R7, 34)
172 ROUND2(R7, R8, R9, R10, R11, 35)
173 ROUND2(R11, R7, R8, R9, R10, 36)
174 ROUND2(R10, R11, R7, R8, R9, 37)
175 ROUND2(R9, R10, R11, R7, R8, 38)
176 ROUND2(R8, R9, R10, R11, R7, 39)
177
178 ROUND3(R7, R8, R9, R10, R11, 40)
179 ROUND3(R11, R7, R8, R9, R10, 41)
180 ROUND3(R10, R11, R7, R8, R9, 42)
181 ROUND3(R9, R10, R11, R7, R8, 43)
182 ROUND3(R8, R9, R10, R11, R7, 44)
183 ROUND3(R7, R8, R9, R10, R11, 45)
184 ROUND3(R11, R7, R8, R9, R10, 46)
185 ROUND3(R10, R11, R7, R8, R9, 47)
186 ROUND3(R9, R10, R11, R7, R8, 48)
187 ROUND3(R8, R9, R10, R11, R7, 49)
188 ROUND3(R7, R8, R9, R10, R11, 50)
189 ROUND3(R11, R7, R8, R9, R10, 51)
190 ROUND3(R10, R11, R7, R8, R9, 52)
191 ROUND3(R9, R10, R11, R7, R8, 53)
192 ROUND3(R8, R9, R10, R11, R7, 54)
193 ROUND3(R7, R8, R9, R10, R11, 55)
194 ROUND3(R11, R7, R8, R9, R10, 56)
195 ROUND3(R10, R11, R7, R8, R9, 57)
196 ROUND3(R9, R10, R11, R7, R8, 58)
197 ROUND3(R8, R9, R10, R11, R7, 59)
198
199 ROUND4(R7, R8, R9, R10, R11, 60)
200 ROUND4(R11, R7, R8, R9, R10, 61)
201 ROUND4(R10, R11, R7, R8, R9, 62)
202 ROUND4(R9, R10, R11, R7, R8, 63)
203 ROUND4(R8, R9, R10, R11, R7, 64)
204 ROUND4(R7, R8, R9, R10, R11, 65)
205 ROUND4(R11, R7, R8, R9, R10, 66)
206 ROUND4(R10, R11, R7, R8, R9, 67)
207 ROUND4(R9, R10, R11, R7, R8, 68)
208 ROUND4(R8, R9, R10, R11, R7, 69)
209 ROUND4(R7, R8, R9, R10, R11, 70)
210 ROUND4(R11, R7, R8, R9, R10, 71)
211 ROUND4(R10, R11, R7, R8, R9, 72)
212 ROUND4(R9, R10, R11, R7, R8, 73)
213 ROUND4(R8, R9, R10, R11, R7, 74)
214 ROUND4(R7, R8, R9, R10, R11, 75)
215 ROUND4(R11, R7, R8, R9, R10, 76)
216 ROUND4(R10, R11, R7, R8, R9, 77)
217 ROUND4(R9, R10, R11, R7, R8, 78)
218 ROUND4(R8, R9, R10, R11, R7, 79)
219
220 ADD R12, R7
221 ADD R13, R8
222 ADD R14, R9
223 ADD R15, R10
224 ADD R16, R11
225
226 ADDV $64, R5
227 BNE R5, R24, loop
228
229 end:
230 MOVW R7, (0*4)(R4)
231 MOVW R8, (1*4)(R4)
232 MOVW R9, (2*4)(R4)
233 MOVW R10, (3*4)(R4)
234 MOVW R11, (4*4)(R4)
235 zero:
236 RET
237
238 GLOBL ·_K(SB),RODATA,$16
239 DATA ·_K+0(SB)/4, $0x5A827999
240 DATA ·_K+4(SB)/4, $0x6ED9EBA1
241 DATA ·_K+8(SB)/4, $0x8F1BBCDC
242 DATA ·_K+12(SB)/4, $0xCA62C1D6
243
View as plain text