Text file src/crypto/sha1/sha1block_loong64.s

     1  // Copyright 2024 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  //go:build !purego
     6  
     7  #include "textflag.h"
     8  
     9  // SHA-1 block routine. See sha1block.go for Go equivalent.
    10  //
    11  // There are 80 rounds of 4 types:
    12  //   - rounds 0-15 are type 1 and load data (ROUND1 macro).
    13  //   - rounds 16-19 are type 1 and do not load data (ROUND1x macro).
    14  //   - rounds 20-39 are type 2 and do not load data (ROUND2 macro).
    15  //   - rounds 40-59 are type 3 and do not load data (ROUND3 macro).
    16  //   - rounds 60-79 are type 4 and do not load data (ROUND4 macro).
    17  //
    18  // Each round loads or shuffles the data, then computes a per-round
    19  // function of b, c, d, and then mixes the result into and rotates the
    20  // five registers a, b, c, d, e holding the intermediate results.
    21  //
    22  // The register rotation is implemented by rotating the arguments to
    23  // the round macros instead of by explicit move instructions.
    24  
    25  #define REGTMP	R30
    26  #define REGTMP1	R17
    27  #define REGTMP2	R18
    28  #define REGTMP3	R19
    29  #define KEYREG1	R25
    30  #define KEYREG2	R26
    31  #define KEYREG3	R27
    32  #define KEYREG4	R28
    33  
    34  #define LOAD1(index) \
    35  	MOVW	(index*4)(R5), REGTMP3; \
    36  	REVB2W	REGTMP3, REGTMP3; \
    37  	MOVW	REGTMP3, (index*4)(R3)
    38  
    39  #define LOAD(index) \
    40  	MOVW	(((index)&0xf)*4)(R3), REGTMP3; \
    41  	MOVW	(((index-3)&0xf)*4)(R3), REGTMP; \
    42  	MOVW	(((index-8)&0xf)*4)(R3), REGTMP1; \
    43  	MOVW	(((index-14)&0xf)*4)(R3), REGTMP2; \
    44  	XOR	REGTMP, REGTMP3; \
    45  	XOR	REGTMP1, REGTMP3; \
    46  	XOR	REGTMP2, REGTMP3; \
    47  	ROTR	$31, REGTMP3; \
    48  	MOVW	REGTMP3, (((index)&0xf)*4)(R3)
    49  
    50  // f = d ^ (b & (c ^ d))
    51  #define FUNC1(a, b, c, d, e) \
    52  	XOR	c, d, REGTMP1; \
    53  	AND	b, REGTMP1; \
    54  	XOR	d, REGTMP1
    55  
    56  // f = b ^ c ^ d
    57  #define FUNC2(a, b, c, d, e) \
    58  	XOR	b, c, REGTMP1; \
    59  	XOR	d, REGTMP1
    60  
    61  // f = (b & c) | ((b | c) & d)
    62  #define FUNC3(a, b, c, d, e) \
    63  	OR	b, c, REGTMP2; \
    64  	AND	b, c, REGTMP; \
    65  	AND	d, REGTMP2; \
    66  	OR	REGTMP, REGTMP2, REGTMP1
    67  
    68  #define FUNC4 FUNC2
    69  
    70  #define MIX(a, b, c, d, e, key) \
    71  	ROTR	$2, b; \	// b << 30
    72  	ADD	REGTMP1, e; \	// e = e + f
    73  	ROTR	$27, a, REGTMP2; \	// a << 5
    74  	ADD	REGTMP3, e; \	// e = e + w[i]
    75  	ADDV	key, e; \	// e = e + k
    76  	ADD	REGTMP2, e	// e = e + a<<5
    77  
    78  #define ROUND1(a, b, c, d, e, index) \
    79  	LOAD1(index); \
    80  	FUNC1(a, b, c, d, e); \
    81  	MIX(a, b, c, d, e, KEYREG1)
    82  
    83  #define ROUND1x(a, b, c, d, e, index) \
    84  	LOAD(index); \
    85  	FUNC1(a, b, c, d, e); \
    86  	MIX(a, b, c, d, e, KEYREG1)
    87  
    88  #define ROUND2(a, b, c, d, e, index) \
    89  	LOAD(index); \
    90  	FUNC2(a, b, c, d, e); \
    91  	MIX(a, b, c, d, e, KEYREG2)
    92  
    93  #define ROUND3(a, b, c, d, e, index) \
    94  	LOAD(index); \
    95  	FUNC3(a, b, c, d, e); \
    96  	MIX(a, b, c, d, e, KEYREG3)
    97  
    98  #define ROUND4(a, b, c, d, e, index) \
    99  	LOAD(index); \
   100  	FUNC4(a, b, c, d, e); \
   101  	MIX(a, b, c, d, e, KEYREG4)
   102  
   103  // A stack frame size of 64 bytes is required here, because
   104  // the frame size used for data expansion is 64 bytes.
   105  // See the definition of the macro LOAD above, and the definition
   106  // of the local variable w in the general implementation (sha1block.go).
   107  TEXT ·block(SB),NOSPLIT,$64-32
   108  	MOVV	dig+0(FP),	R4
   109  	MOVV	p_base+8(FP),	R5
   110  	MOVV	p_len+16(FP),	R6
   111  	AND	$~63, R6
   112  	BEQ	R6, zero
   113  
   114  	// p_len >= 64
   115  	ADDV	R5, R6, R24
   116  	MOVW	(0*4)(R4), R7
   117  	MOVW	(1*4)(R4), R8
   118  	MOVW	(2*4)(R4), R9
   119  	MOVW	(3*4)(R4), R10
   120  	MOVW	(4*4)(R4), R11
   121  
   122  	MOVV	$·_K(SB), R21
   123  	MOVW	(0*4)(R21), KEYREG1
   124  	MOVW	(1*4)(R21), KEYREG2
   125  	MOVW	(2*4)(R21), KEYREG3
   126  	MOVW	(3*4)(R21), KEYREG4
   127  
   128  loop:
   129  	MOVW	R7,	R12
   130  	MOVW	R8,	R13
   131  	MOVW	R9,	R14
   132  	MOVW	R10,	R15
   133  	MOVW	R11,	R16
   134  
   135  	ROUND1(R7,  R8,  R9,  R10, R11, 0)
   136  	ROUND1(R11, R7,  R8,  R9,  R10, 1)
   137  	ROUND1(R10, R11, R7,  R8,  R9,  2)
   138  	ROUND1(R9,  R10, R11, R7,  R8,  3)
   139  	ROUND1(R8,  R9,  R10, R11, R7,  4)
   140  	ROUND1(R7,  R8,  R9,  R10, R11, 5)
   141  	ROUND1(R11, R7,  R8,  R9,  R10, 6)
   142  	ROUND1(R10, R11, R7,  R8,  R9,  7)
   143  	ROUND1(R9,  R10, R11, R7,  R8,  8)
   144  	ROUND1(R8,  R9,  R10, R11, R7,  9)
   145  	ROUND1(R7,  R8,  R9,  R10, R11, 10)
   146  	ROUND1(R11, R7,  R8,  R9,  R10, 11)
   147  	ROUND1(R10, R11, R7,  R8,  R9,  12)
   148  	ROUND1(R9,  R10, R11, R7,  R8,  13)
   149  	ROUND1(R8,  R9,  R10, R11, R7,  14)
   150  	ROUND1(R7,  R8,  R9,  R10, R11, 15)
   151  
   152  	ROUND1x(R11, R7,  R8,  R9,  R10, 16)
   153  	ROUND1x(R10, R11, R7,  R8,  R9,  17)
   154  	ROUND1x(R9,  R10, R11, R7,  R8,  18)
   155  	ROUND1x(R8,  R9,  R10, R11, R7,  19)
   156  
   157  	ROUND2(R7,  R8,  R9,  R10, R11, 20)
   158  	ROUND2(R11, R7,  R8,  R9,  R10, 21)
   159  	ROUND2(R10, R11, R7,  R8,  R9,  22)
   160  	ROUND2(R9,  R10, R11, R7,  R8,  23)
   161  	ROUND2(R8,  R9,  R10, R11, R7,  24)
   162  	ROUND2(R7,  R8,  R9,  R10, R11, 25)
   163  	ROUND2(R11, R7,  R8,  R9,  R10, 26)
   164  	ROUND2(R10, R11, R7,  R8,  R9,  27)
   165  	ROUND2(R9,  R10, R11, R7,  R8,  28)
   166  	ROUND2(R8,  R9,  R10, R11, R7,  29)
   167  	ROUND2(R7,  R8,  R9,  R10, R11, 30)
   168  	ROUND2(R11, R7,  R8,  R9,  R10, 31)
   169  	ROUND2(R10, R11, R7,  R8,  R9,  32)
   170  	ROUND2(R9,  R10, R11, R7,  R8,  33)
   171  	ROUND2(R8,  R9,  R10, R11, R7,  34)
   172  	ROUND2(R7,  R8,  R9,  R10, R11, 35)
   173  	ROUND2(R11, R7,  R8,  R9,  R10, 36)
   174  	ROUND2(R10, R11, R7,  R8,  R9,  37)
   175  	ROUND2(R9,  R10, R11, R7,  R8,  38)
   176  	ROUND2(R8,  R9,  R10, R11, R7,  39)
   177  
   178  	ROUND3(R7,  R8,  R9,  R10, R11, 40)
   179  	ROUND3(R11, R7,  R8,  R9,  R10, 41)
   180  	ROUND3(R10, R11, R7,  R8,  R9,  42)
   181  	ROUND3(R9,  R10, R11, R7,  R8,  43)
   182  	ROUND3(R8,  R9,  R10, R11, R7,  44)
   183  	ROUND3(R7,  R8,  R9,  R10, R11, 45)
   184  	ROUND3(R11, R7,  R8,  R9,  R10, 46)
   185  	ROUND3(R10, R11, R7,  R8,  R9,  47)
   186  	ROUND3(R9,  R10, R11, R7,  R8,  48)
   187  	ROUND3(R8,  R9,  R10, R11, R7,  49)
   188  	ROUND3(R7,  R8,  R9,  R10, R11, 50)
   189  	ROUND3(R11, R7,  R8,  R9,  R10, 51)
   190  	ROUND3(R10, R11, R7,  R8,  R9,  52)
   191  	ROUND3(R9,  R10, R11, R7,  R8,  53)
   192  	ROUND3(R8,  R9,  R10, R11, R7,  54)
   193  	ROUND3(R7,  R8,  R9,  R10, R11, 55)
   194  	ROUND3(R11, R7,  R8,  R9,  R10, 56)
   195  	ROUND3(R10, R11, R7,  R8,  R9,  57)
   196  	ROUND3(R9,  R10, R11, R7,  R8,  58)
   197  	ROUND3(R8,  R9,  R10, R11, R7,  59)
   198  
   199  	ROUND4(R7,  R8,  R9,  R10, R11, 60)
   200  	ROUND4(R11, R7,  R8,  R9,  R10, 61)
   201  	ROUND4(R10, R11, R7,  R8,  R9,  62)
   202  	ROUND4(R9,  R10, R11, R7,  R8,  63)
   203  	ROUND4(R8,  R9,  R10, R11, R7,  64)
   204  	ROUND4(R7,  R8,  R9,  R10, R11, 65)
   205  	ROUND4(R11, R7,  R8,  R9,  R10, 66)
   206  	ROUND4(R10, R11, R7,  R8,  R9,  67)
   207  	ROUND4(R9,  R10, R11, R7,  R8,  68)
   208  	ROUND4(R8,  R9,  R10, R11, R7,  69)
   209  	ROUND4(R7,  R8,  R9,  R10, R11, 70)
   210  	ROUND4(R11, R7,  R8,  R9,  R10, 71)
   211  	ROUND4(R10, R11, R7,  R8,  R9,  72)
   212  	ROUND4(R9,  R10, R11, R7,  R8,  73)
   213  	ROUND4(R8,  R9,  R10, R11, R7,  74)
   214  	ROUND4(R7,  R8,  R9,  R10, R11, 75)
   215  	ROUND4(R11, R7,  R8,  R9,  R10, 76)
   216  	ROUND4(R10, R11, R7,  R8,  R9,  77)
   217  	ROUND4(R9,  R10, R11, R7,  R8,  78)
   218  	ROUND4(R8,  R9,  R10, R11, R7,  79)
   219  
   220  	ADD	R12, R7
   221  	ADD	R13, R8
   222  	ADD	R14, R9
   223  	ADD	R15, R10
   224  	ADD	R16, R11
   225  
   226  	ADDV	$64, R5
   227  	BNE	R5, R24, loop
   228  
   229  end:
   230  	MOVW	R7, (0*4)(R4)
   231  	MOVW	R8, (1*4)(R4)
   232  	MOVW	R9, (2*4)(R4)
   233  	MOVW	R10, (3*4)(R4)
   234  	MOVW	R11, (4*4)(R4)
   235  zero:
   236  	RET
   237  
   238  GLOBL	·_K(SB),RODATA,$16
   239  DATA	·_K+0(SB)/4, $0x5A827999
   240  DATA	·_K+4(SB)/4, $0x6ED9EBA1
   241  DATA	·_K+8(SB)/4, $0x8F1BBCDC
   242  DATA	·_K+12(SB)/4, $0xCA62C1D6
   243  

View as plain text