Source file test/codegen/mathbits.go
1 // asmcheck 2 3 // Copyright 2018 The Go Authors. All rights reserved. 4 // Use of this source code is governed by a BSD-style 5 // license that can be found in the LICENSE file. 6 7 package codegen 8 9 import ( 10 "math/bits" 11 "unsafe" 12 ) 13 14 // ----------------------- // 15 // bits.LeadingZeros // 16 // ----------------------- // 17 18 func LeadingZeros(n uint) int { 19 // amd64/v1,amd64/v2:"BSRQ" 20 // amd64/v3:"LZCNTQ", -"BSRQ" 21 // arm64:"CLZ" 22 // arm:"CLZ" 23 // loong64:"CLZV" -"SUB" 24 // mips:"CLZ" 25 // ppc64x:"CNTLZD" 26 // riscv64/rva22u64,riscv64/rva23u64:"CLZ " -"SUB" 27 // s390x:"FLOGR" 28 // wasm:"I64Clz" 29 return bits.LeadingZeros(n) 30 } 31 32 func LeadingZeros64(n uint64) int { 33 // amd64/v1,amd64/v2:"BSRQ" 34 // amd64/v3:"LZCNTQ", -"BSRQ" 35 // arm:"CLZ" 36 // arm64:"CLZ" 37 // loong64:"CLZV" -"SUB" 38 // mips:"CLZ" 39 // ppc64x:"CNTLZD" 40 // riscv64/rva22u64,riscv64/rva23u64:"CLZ " -"ADDI" 41 // s390x:"FLOGR" 42 // wasm:"I64Clz" 43 return bits.LeadingZeros64(n) 44 } 45 46 func LeadingZeros32(n uint32) int { 47 // amd64/v1,amd64/v2:"BSRQ" "LEAQ" -"CMOVQEQ" 48 // amd64/v3: "LZCNTL" - "BSRL" 49 // arm:"CLZ" 50 // arm64:"CLZW" 51 // loong64:"CLZW" -"SUB" 52 // mips:"CLZ" 53 // ppc64x:"CNTLZW" 54 // riscv64/rva22u64,riscv64/rva23u64:"CLZW" -"ADDI" 55 // s390x:"FLOGR" 56 // wasm:"I64Clz" 57 return bits.LeadingZeros32(n) 58 } 59 60 func LeadingZeros16(n uint16) int { 61 // amd64/v1,amd64/v2:"BSRL" "LEAL" -"CMOVQEQ" 62 // amd64/v3: "LZCNTL" - "BSRL" 63 // arm64:"CLZ" 64 // arm:"CLZ" 65 // loong64:"CLZV" 66 // mips:"CLZ" 67 // ppc64x:"CNTLZD" 68 // riscv64/rva22u64,riscv64/rva23u64:"CLZ " "ADDI [$]-48" -"NEG" 69 // s390x:"FLOGR" 70 // wasm:"I64Clz" 71 return bits.LeadingZeros16(n) 72 } 73 74 func LeadingZeros8(n uint8) int { 75 // amd64/v1,amd64/v2:"BSRL" "LEAL" -"CMOVQEQ" 76 // amd64/v3: "LZCNTL" - "BSRL" 77 // arm64:"CLZ" 78 // arm:"CLZ" 79 // loong64:"CLZV" 80 // mips:"CLZ" 81 // ppc64x:"CNTLZD" 82 // riscv64/rva22u64,riscv64/rva23u64:"CLZ " "ADDI [$]-56" -"NEG" 83 // s390x:"FLOGR" 84 // wasm:"I64Clz" 85 return bits.LeadingZeros8(n) 86 } 87 88 // --------------- // 89 // bits.Len* // 90 // --------------- // 91 92 func Len(n uint) int { 93 // amd64/v1,amd64/v2:"BSRQ" 94 // amd64/v3: "LZCNTQ" 95 // arm64:"CLZ" 96 // arm:"CLZ" 97 // loong64:"CLZV" 98 // mips:"CLZ" 99 // ppc64x:"SUBC" "CNTLZD" 100 // riscv64/rva22u64,riscv64/rva23u64:"CLZ " "ADDI [$]-64" 101 // s390x:"FLOGR" 102 // wasm:"I64Clz" 103 return bits.Len(n) 104 } 105 106 func Len64(n uint64) int { 107 // amd64/v1,amd64/v2:"BSRQ" 108 // amd64/v3: "LZCNTQ" 109 // arm64:"CLZ" 110 // arm:"CLZ" 111 // loong64:"CLZV" 112 // mips:"CLZ" 113 // ppc64x:"SUBC" "CNTLZD" 114 // riscv64/rva22u64,riscv64/rva23u64:"CLZ " "ADDI [$]-64" 115 // s390x:"FLOGR" 116 // wasm:"I64Clz" 117 return bits.Len64(n) 118 } 119 120 func SubFromLen64(n uint64) int { 121 // loong64:"CLZV" -"ADD" 122 // ppc64x:"CNTLZD" -"SUBC" 123 // riscv64/rva22u64,riscv64/rva23u64:"CLZ " -"ADDI" -"NEG" 124 return 64 - bits.Len64(n) 125 } 126 127 func CompareWithLen64(n uint64) bool { 128 // loong64:"CLZV" -"ADD" -"[$]64" -"[$]9" 129 return bits.Len64(n) < 9 130 } 131 132 func Len32(n uint32) int { 133 // amd64/v1,amd64/v2:"BSRQ" "LEAQ" -"CMOVQEQ" 134 // amd64/v3: "LZCNTL" 135 // arm64:"CLZ" 136 // arm:"CLZ" 137 // loong64:"CLZW" 138 // mips:"CLZ" 139 // ppc64x: "CNTLZW" 140 // riscv64/rva22u64,riscv64/rva23u64:"CLZW" "ADDI [$]-32" 141 // s390x:"FLOGR" 142 // wasm:"I64Clz" 143 return bits.Len32(n) 144 } 145 146 func Len16(n uint16) int { 147 // amd64/v1,amd64/v2:"BSRL" "LEAL" -"CMOVQEQ" 148 // amd64/v3: "LZCNTL" 149 // arm64:"CLZ" 150 // arm:"CLZ" 151 // loong64:"CLZV" 152 // mips:"CLZ" 153 // ppc64x:"SUBC" "CNTLZD" 154 // riscv64/rva22u64,riscv64/rva23u64:"CLZ " "ADDI [$]-64" 155 // s390x:"FLOGR" 156 // wasm:"I64Clz" 157 return bits.Len16(n) 158 } 159 160 func Len8(n uint8) int { 161 // amd64/v1,amd64/v2:"BSRL" "LEAL" -"CMOVQEQ" 162 // amd64/v3: "LZCNTL" 163 // arm64:"CLZ" 164 // arm:"CLZ" 165 // loong64:"CLZV" 166 // mips:"CLZ" 167 // ppc64x:"SUBC" "CNTLZD" 168 // riscv64/rva22u64,riscv64/rva23u64:"CLZ " "ADDI [$]-64" 169 // s390x:"FLOGR" 170 // wasm:"I64Clz" 171 return bits.Len8(n) 172 } 173 174 // -------------------- // 175 // bits.OnesCount // 176 // -------------------- // 177 178 // TODO(register args) Restore a m d 6 4 / v 1 :.*x86HasPOPCNT when only one ABI is tested. 179 func OnesCount(n uint) int { 180 // amd64/v2:-".*x86HasPOPCNT" amd64/v3:-".*x86HasPOPCNT" 181 // amd64:"POPCNTQ" 182 // arm64:"VCNT" "VUADDLV" 183 // loong64:"VPCNTV" 184 // ppc64x:"POPCNTD" 185 // riscv64:"CPOP " 186 // s390x:"POPCNT" 187 // wasm:"I64Popcnt" 188 return bits.OnesCount(n) 189 } 190 191 func OnesCount64(n uint64) int { 192 // amd64/v2:-".*x86HasPOPCNT" amd64/v3:-".*x86HasPOPCNT" 193 // amd64:"POPCNTQ" 194 // arm64:"VCNT" "VUADDLV" 195 // loong64:"VPCNTV" 196 // ppc64x:"POPCNTD" 197 // riscv64:"CPOP " 198 // s390x:"POPCNT" 199 // wasm:"I64Popcnt" 200 return bits.OnesCount64(n) 201 } 202 203 func OnesCount32(n uint32) int { 204 // amd64/v2:-".*x86HasPOPCNT" amd64/v3:-".*x86HasPOPCNT" 205 // amd64:"POPCNTL" 206 // arm64:"VCNT" "VUADDLV" 207 // loong64:"VPCNTW" 208 // ppc64x:"POPCNTW" 209 // riscv64:"CPOPW" 210 // s390x:"POPCNT" 211 // wasm:"I64Popcnt" 212 return bits.OnesCount32(n) 213 } 214 215 func OnesCount16(n uint16) int { 216 // amd64/v2:-".*x86HasPOPCNT" amd64/v3:-".*x86HasPOPCNT" 217 // amd64:"POPCNTL" 218 // arm64:"VCNT" "VUADDLV" 219 // loong64:"VPCNTH" 220 // ppc64x:"POPCNTW" 221 // riscv64:"CPOP " 222 // s390x:"POPCNT" 223 // wasm:"I64Popcnt" 224 return bits.OnesCount16(n) 225 } 226 227 func OnesCount8(n uint8) int { 228 // ppc64x:"POPCNTB" 229 // riscv64/rva22u64,riscv64/rva23u64:"CPOP " 230 // s390x:"POPCNT" 231 // wasm:"I64Popcnt" 232 return bits.OnesCount8(n) 233 } 234 235 // ------------------ // 236 // bits.Reverse // 237 // ------------------ // 238 239 func Reverse(n uint) uint { 240 // loong64:"BITREVV" 241 return bits.Reverse(n) 242 } 243 244 func Reverse64(n uint64) uint64 { 245 // loong64:"BITREVV" 246 return bits.Reverse64(n) 247 } 248 249 func Reverse32(n uint32) uint32 { 250 // loong64:"BITREVW" 251 return bits.Reverse32(n) 252 } 253 254 func Reverse16(n uint16) uint16 { 255 // loong64:"BITREV4B" "REVB2H" 256 return bits.Reverse16(n) 257 } 258 259 func Reverse8(n uint8) uint8 { 260 // loong64:"BITREV4B" 261 return bits.Reverse8(n) 262 } 263 264 // ----------------------- // 265 // bits.ReverseBytes // 266 // ----------------------- // 267 268 func ReverseBytes(n uint) uint { 269 // 386:"BSWAPL" 270 // amd64:"BSWAPQ" 271 // arm64:"REV" 272 // loong64:"REVBV" 273 // riscv64/rva22u64,riscv64/rva23u64:"REV8" 274 // s390x:"MOVDBR" 275 return bits.ReverseBytes(n) 276 } 277 278 func ReverseBytes64(n uint64) uint64 { 279 // 386:"BSWAPL" 280 // amd64:"BSWAPQ" 281 // arm64:"REV" 282 // loong64:"REVBV" 283 // ppc64x/power10: "BRD" 284 // riscv64/rva22u64,riscv64/rva23u64:"REV8" 285 // s390x:"MOVDBR" 286 return bits.ReverseBytes64(n) 287 } 288 289 func ReverseBytes32(n uint32) uint32 { 290 // 386:"BSWAPL" 291 // amd64:"BSWAPL" 292 // arm64:"REVW" 293 // loong64:"REVB2W" 294 // ppc64x/power10: "BRW" 295 // riscv64/rva22u64,riscv64/rva23u64:"REV8" "SRLI [$]32" 296 // s390x:"MOVWBR" 297 return bits.ReverseBytes32(n) 298 } 299 300 func ReverseBytes16(n uint16) uint16 { 301 // amd64:"ROLW" 302 // arm/5:"SLL" "SRL" "ORR" 303 // arm/6:"REV16" 304 // arm/7:"REV16" 305 // arm64:"REV16W" -"UBFX" -"ORR" 306 // loong64:"REVB2H" 307 // ppc64x/power10: "BRH" 308 // riscv64/rva22u64,riscv64/rva23u64:"REV8" "SRLI [$]48" 309 return bits.ReverseBytes16(n) 310 } 311 312 // --------------------- // 313 // bits.RotateLeft // 314 // --------------------- // 315 316 func RotateLeft64(n uint64) uint64 { 317 // amd64:"ROLQ" 318 // arm64:"ROR" 319 // loong64:"ROTRV" 320 // ppc64x:"ROTL" 321 // riscv64:"RORI" 322 // s390x:"RISBGZ [$]0, [$]63, [$]37, " 323 // wasm:"I64Rotl" 324 return bits.RotateLeft64(n, 37) 325 } 326 327 func RotateLeft32(n uint32) uint32 { 328 // amd64:"ROLL" 386:"ROLL" 329 // arm:`MOVW R[0-9]+@>23` 330 // arm64:"RORW" 331 // loong64:"ROTR " 332 // ppc64x:"ROTLW" 333 // riscv64:"RORIW" 334 // s390x:"RLL" 335 // wasm:"I32Rotl" 336 return bits.RotateLeft32(n, 9) 337 } 338 339 func RotateLeft16(n uint16, s int) uint16 { 340 // amd64:"ROLW" 386:"ROLW" 341 // arm64:"RORW" -"CSEL" 342 // loong64:"ROTR " "SLLV" 343 return bits.RotateLeft16(n, s) 344 } 345 346 func RotateLeft8(n uint8, s int) uint8 { 347 // amd64:"ROLB" 386:"ROLB" 348 // arm64:"LSL" "LSR" -"CSEL" 349 // loong64:"OR" "SLLV" "SRLV" 350 return bits.RotateLeft8(n, s) 351 } 352 353 func RotateLeftVariable(n uint, m int) uint { 354 // amd64:"ROLQ" 355 // arm64:"ROR" 356 // loong64:"ROTRV" 357 // ppc64x:"ROTL" 358 // riscv64:"ROL" 359 // s390x:"RLLG" 360 // wasm:"I64Rotl" 361 return bits.RotateLeft(n, m) 362 } 363 364 func RotateLeftVariable64(n uint64, m int) uint64 { 365 // amd64:"ROLQ" 366 // arm64:"ROR" 367 // loong64:"ROTRV" 368 // ppc64x:"ROTL" 369 // riscv64:"ROL" 370 // s390x:"RLLG" 371 // wasm:"I64Rotl" 372 return bits.RotateLeft64(n, m) 373 } 374 375 func RotateLeftVariable32(n uint32, m int) uint32 { 376 // arm:`MOVW R[0-9]+@>R[0-9]+` 377 // amd64:"ROLL" 378 // arm64:"RORW" 379 // loong64:"ROTR " 380 // ppc64x:"ROTLW" 381 // riscv64:"ROLW" 382 // s390x:"RLL" 383 // wasm:"I32Rotl" 384 return bits.RotateLeft32(n, m) 385 } 386 387 // ------------------------ // 388 // bits.TrailingZeros // 389 // ------------------------ // 390 391 func TrailingZeros(n uint) int { 392 // 386:"BSFL" 393 // amd64/v1,amd64/v2:"BSFQ" "MOVL [$]64" "CMOVQEQ" 394 // amd64/v3:"TZCNTQ" 395 // arm:"CLZ" 396 // arm64:"RBIT" "CLZ" 397 // loong64:"CTZV" 398 // ppc64x/power8:"ANDN" "POPCNTD" 399 // ppc64x/power9: "CNTTZD" 400 // riscv64/rva22u64,riscv64/rva23u64: "CTZ " 401 // s390x:"FLOGR" 402 // wasm:"I64Ctz" 403 return bits.TrailingZeros(n) 404 } 405 406 func TrailingZeros64(n uint64) int { 407 // 386:"BSFL" "JNE" 408 // amd64/v1,amd64/v2:"BSFQ" "MOVL [$]64" "CMOVQEQ" 409 // amd64/v3:"TZCNTQ" 410 // arm64:"RBIT" "CLZ" 411 // loong64:"CTZV" 412 // ppc64x/power8:"ANDN" "POPCNTD" 413 // ppc64x/power9: "CNTTZD" 414 // riscv64/rva22u64,riscv64/rva23u64: "CTZ " 415 // s390x:"FLOGR" 416 // wasm:"I64Ctz" 417 return bits.TrailingZeros64(n) 418 } 419 420 func TrailingZeros64Subtract(n uint64) int { 421 // ppc64x/power8:"NEG" "SUBC" "ANDN" "POPCNTD" 422 // ppc64x/power9:"SUBC" "CNTTZD" 423 return bits.TrailingZeros64(1 - n) 424 } 425 426 func TrailingZeros32(n uint32) int { 427 // 386:"BSFL" 428 // amd64/v1,amd64/v2:"BTSQ [$]32" "BSFQ" 429 // amd64/v3:"TZCNTL" 430 // arm:"CLZ" 431 // arm64:"RBITW" "CLZW" 432 // loong64:"CTZW" 433 // ppc64x/power8:"ANDN" "POPCNTW" 434 // ppc64x/power9: "CNTTZW" 435 // riscv64/rva22u64,riscv64/rva23u64: "CTZW" 436 // s390x:"FLOGR" "MOVWZ" 437 // wasm:"I64Ctz" 438 return bits.TrailingZeros32(n) 439 } 440 441 func TrailingZeros16(n uint16) int { 442 // 386:"BSFL " 443 // amd64:"BSFL" "ORL [$]65536" 444 // arm:"ORR [$]65536" "CLZ" -"MOVHU R" 445 // arm64:"ORR [$]65536" "RBITW" "CLZW" -"MOVHU R" -"RBIT " -"CLZ " 446 // loong64:"CTZV" 447 // ppc64x/power8:"POPCNTW" "ADD [$]-1" 448 // ppc64x/power9:"CNTTZD" "ORIS [$]1" 449 // riscv64/rva22u64,riscv64/rva23u64: "ORI [$]65536" "CTZW" 450 // s390x:"FLOGR" "OR [$]65536" 451 // wasm:"I64Ctz" 452 return bits.TrailingZeros16(n) 453 } 454 455 func TrailingZeros8(n uint8) int { 456 // 386:"BSFL" 457 // amd64:"BSFL" "ORL [$]256" 458 // arm:"ORR [$]256" "CLZ" -"MOVBU R" 459 // arm64:"ORR [$]256" "RBITW" "CLZW" -"MOVBU R" -"RBIT " -"CLZ " 460 // loong64:"CTZV" 461 // ppc64x/power8:"POPCNTB" "ADD [$]-1" 462 // ppc64x/power9:"CNTTZD" "OR [$]256" 463 // riscv64/rva22u64,riscv64/rva23u64: "ORI [$]256" "CTZW" 464 // s390x:"FLOGR" "OR [$]256" 465 // wasm:"I64Ctz" 466 return bits.TrailingZeros8(n) 467 } 468 469 // IterateBitsNN checks special handling of TrailingZerosNN when the input is known to be non-zero. 470 471 func IterateBits(n uint) int { 472 i := 0 473 for n != 0 { 474 // amd64/v1,amd64/v2:"BSFQ" -"CMOVEQ" 475 // amd64/v3:"TZCNTQ" 476 i += bits.TrailingZeros(n) 477 n &= n - 1 478 } 479 return i 480 } 481 482 func IterateBits64(n uint64) int { 483 i := 0 484 for n != 0 { 485 // amd64/v1,amd64/v2:"BSFQ" -"CMOVEQ" 486 // amd64/v3:"TZCNTQ" 487 // riscv64/rva22u64,riscv64/rva23u64: "CTZ " 488 i += bits.TrailingZeros64(n) 489 n &= n - 1 490 } 491 return i 492 } 493 494 func IterateBits32(n uint32) int { 495 i := 0 496 for n != 0 { 497 // amd64/v1,amd64/v2:"BSFL" -"BTSQ" 498 // amd64/v3:"TZCNTL" 499 // riscv64/rva22u64,riscv64/rva23u64: "CTZ " 500 i += bits.TrailingZeros32(n) 501 n &= n - 1 502 } 503 return i 504 } 505 506 func IterateBits16(n uint16) int { 507 i := 0 508 for n != 0 { 509 // amd64/v1,amd64/v2:"BSFL" -"BTSL" 510 // amd64/v3:"TZCNTL" 511 // arm64:"RBITW" "CLZW" -"ORR" 512 // riscv64/rva22u64,riscv64/rva23u64: "CTZ " -"ORR" 513 i += bits.TrailingZeros16(n) 514 n &= n - 1 515 } 516 return i 517 } 518 519 func IterateBits8(n uint8) int { 520 i := 0 521 for n != 0 { 522 // amd64/v1,amd64/v2:"BSFL" -"BTSL" 523 // amd64/v3:"TZCNTL" 524 // arm64:"RBITW" "CLZW" -"ORR" 525 // riscv64/rva22u64,riscv64/rva23u64: "CTZ " -"ORR" 526 i += bits.TrailingZeros8(n) 527 n &= n - 1 528 } 529 return i 530 } 531 532 // --------------- // 533 // bits.Add* // 534 // --------------- // 535 536 func Add(x, y, ci uint) (r, co uint) { 537 // arm64:"ADDS" "ADCS" "ADC" -"ADD " -"CMP" 538 // amd64:"NEGL" "ADCQ" "SBBQ" "NEGQ" 539 // ppc64x: "ADDC", "ADDE", "ADDZE" 540 // s390x:"ADDE" "ADDC [$]-1," 541 // riscv64: "ADD" "SLTU" 542 return bits.Add(x, y, ci) 543 } 544 545 func AddC(x, ci uint) (r, co uint) { 546 // arm64:"ADDS" "ADCS" "ADC" -"ADD " -"CMP" 547 // amd64:"NEGL" "ADCQ" "SBBQ" "NEGQ" 548 // loong64: "ADDV", "SGTU" 549 // ppc64x: "ADDC", "ADDE", "ADDZE" 550 // s390x:"ADDE" "ADDC [$]-1," 551 // mips64:"ADDV" "SGTU" 552 // riscv64: "ADD" "SLTU" 553 return bits.Add(x, 7, ci) 554 } 555 556 func AddZ(x, y uint) (r, co uint) { 557 // arm64:"ADDS" "ADC" -"ADCS" -"ADD " -"CMP" 558 // amd64:"ADDQ" "SBBQ" "NEGQ" -"NEGL" -"ADCQ" 559 // loong64: "ADDV", "SGTU" 560 // ppc64x: "ADDC", -"ADDE", "ADDZE" 561 // s390x:"ADDC" -"ADDC [$]-1," 562 // mips64:"ADDV" "SGTU" 563 // riscv64: "ADD" "SLTU" 564 return bits.Add(x, y, 0) 565 } 566 567 func AddR(x, y, ci uint) uint { 568 // arm64:"ADDS" "ADCS" -"ADD " -"CMP" 569 // amd64:"NEGL" "ADCQ" -"SBBQ" -"NEGQ" 570 // loong64: "ADDV", -"SGTU" 571 // ppc64x: "ADDC", "ADDE", -"ADDZE" 572 // s390x:"ADDE" "ADDC [$]-1," 573 // mips64:"ADDV" -"SGTU" 574 // riscv64: "ADD" -"SLTU" 575 r, _ := bits.Add(x, y, ci) 576 return r 577 } 578 579 func AddM(p, q, r *[3]uint) { 580 var c uint 581 r[0], c = bits.Add(p[0], q[0], c) 582 // arm64:"ADCS" -"ADD " -"CMP" 583 // amd64:"ADCQ" -"NEGL" -"SBBQ" -"NEGQ" 584 // s390x:"ADDE" -"ADDC [$]-1," 585 r[1], c = bits.Add(p[1], q[1], c) 586 r[2], c = bits.Add(p[2], q[2], c) 587 } 588 589 func Add64(x, y, ci uint64) (r, co uint64) { 590 // arm64:"ADDS" "ADCS" "ADC" -"ADD " -"CMP" 591 // amd64:"NEGL" "ADCQ" "SBBQ" "NEGQ" 592 // loong64: "ADDV", "SGTU" 593 // ppc64x: "ADDC", "ADDE", "ADDZE" 594 // s390x:"ADDE" "ADDC [$]-1," 595 // mips64:"ADDV" "SGTU" 596 // riscv64: "ADD" "SLTU" 597 return bits.Add64(x, y, ci) 598 } 599 600 func Add64C(x, ci uint64) (r, co uint64) { 601 // arm64:"ADDS" "ADCS" "ADC" -"ADD " -"CMP" 602 // amd64:"NEGL" "ADCQ" "SBBQ" "NEGQ" 603 // loong64: "ADDV", "SGTU" 604 // ppc64x: "ADDC", "ADDE", "ADDZE" 605 // s390x:"ADDE" "ADDC [$]-1," 606 // mips64:"ADDV" "SGTU" 607 // riscv64: "ADD" "SLTU" 608 return bits.Add64(x, 7, ci) 609 } 610 611 func Add64Z(x, y uint64) (r, co uint64) { 612 // arm64:"ADDS" "ADC" -"ADCS" -"ADD " -"CMP" 613 // amd64:"ADDQ" "SBBQ" "NEGQ" -"NEGL" -"ADCQ" 614 // loong64: "ADDV", "SGTU" 615 // ppc64x: "ADDC", -"ADDE", "ADDZE" 616 // s390x:"ADDC" -"ADDC [$]-1," 617 // mips64:"ADDV" "SGTU" 618 // riscv64: "ADD" "SLTU" 619 return bits.Add64(x, y, 0) 620 } 621 622 func Add64R(x, y, ci uint64) uint64 { 623 // arm64:"ADDS" "ADCS" -"ADD " -"CMP" 624 // amd64:"NEGL" "ADCQ" -"SBBQ" -"NEGQ" 625 // loong64: "ADDV", -"SGTU" 626 // ppc64x: "ADDC", "ADDE", -"ADDZE" 627 // s390x:"ADDE" "ADDC [$]-1," 628 // mips64:"ADDV" -"SGTU" 629 // riscv64: "ADD" -"SLTU" 630 r, _ := bits.Add64(x, y, ci) 631 return r 632 } 633 634 func Add64M(p, q, r *[3]uint64) { 635 var c uint64 636 r[0], c = bits.Add64(p[0], q[0], c) 637 // arm64:"ADCS" -"ADD " -"CMP" 638 // amd64:"ADCQ" -"NEGL" -"SBBQ" -"NEGQ" 639 // ppc64x: -"ADDC", "ADDE", -"ADDZE" 640 // s390x:"ADDE" -"ADDC [$]-1," 641 r[1], c = bits.Add64(p[1], q[1], c) 642 r[2], c = bits.Add64(p[2], q[2], c) 643 } 644 645 func Add64M0(p, q, r *[3]uint64) { 646 var c uint64 647 r[0], c = bits.Add64(p[0], q[0], 0) 648 // ppc64x: -"ADDC", -"ADDE", "ADDZE R[1-9]" 649 r[1], c = bits.Add64(p[1], 0, c) 650 // ppc64x: -"ADDC", "ADDE", -"ADDZE" 651 r[2], c = bits.Add64(p[2], p[2], c) 652 } 653 654 func Add64MSaveC(p, q, r, c *[2]uint64) { 655 // ppc64x: "ADDC R", "ADDZE" 656 r[0], c[0] = bits.Add64(p[0], q[0], 0) 657 // ppc64x: "ADDC [$]-1", "ADDE", "ADDZE" 658 r[1], c[1] = bits.Add64(p[1], q[1], c[0]) 659 } 660 661 func Add64PanicOnOverflowEQ(a, b uint64) uint64 { 662 r, c := bits.Add64(a, b, 0) 663 // s390x:"BRC [$]3," -"ADDE" 664 if c == 1 { 665 panic("overflow") 666 } 667 return r 668 } 669 670 func Add64PanicOnOverflowNE(a, b uint64) uint64 { 671 r, c := bits.Add64(a, b, 0) 672 // s390x:"BRC [$]3," -"ADDE" 673 if c != 0 { 674 panic("overflow") 675 } 676 return r 677 } 678 679 func Add64PanicOnOverflowGT(a, b uint64) uint64 { 680 r, c := bits.Add64(a, b, 0) 681 // s390x:"BRC [$]3," -"ADDE" 682 if c > 0 { 683 panic("overflow") 684 } 685 return r 686 } 687 688 func Add64MPanicOnOverflowEQ(a, b [2]uint64) [2]uint64 { 689 var r [2]uint64 690 var c uint64 691 r[0], c = bits.Add64(a[0], b[0], c) 692 r[1], c = bits.Add64(a[1], b[1], c) 693 // s390x:"BRC [$]3," 694 if c == 1 { 695 panic("overflow") 696 } 697 return r 698 } 699 700 func Add64MPanicOnOverflowNE(a, b [2]uint64) [2]uint64 { 701 var r [2]uint64 702 var c uint64 703 r[0], c = bits.Add64(a[0], b[0], c) 704 r[1], c = bits.Add64(a[1], b[1], c) 705 // s390x:"BRC [$]3," 706 if c != 0 { 707 panic("overflow") 708 } 709 return r 710 } 711 712 func Add64MPanicOnOverflowGT(a, b [2]uint64) [2]uint64 { 713 var r [2]uint64 714 var c uint64 715 r[0], c = bits.Add64(a[0], b[0], c) 716 r[1], c = bits.Add64(a[1], b[1], c) 717 // s390x:"BRC [$]3," 718 if c > 0 { 719 panic("overflow") 720 } 721 return r 722 } 723 724 // Verify independent carry chain operations are scheduled efficiently 725 // and do not cause unnecessary save/restore of the CA bit. 726 // 727 // This is an example of why CarryChainTail priority must be lower 728 // (earlier in the block) than Memory. f[0]=f1 could be scheduled 729 // after the first two lower 64 bit limb adds, but before either 730 // high 64 bit limbs are added. 731 // 732 // This is what happened on PPC64 when compiling 733 // crypto/internal/edwards25519/field.feMulGeneric. 734 func Add64MultipleChains(a, b, c, d [2]uint64) [2]uint64 { 735 var cx, d1, d2 uint64 736 a1, a2 := a[0], a[1] 737 b1, b2 := b[0], b[1] 738 c1, c2 := c[0], c[1] 739 740 // ppc64x: "ADDC R\\d+,", -"ADDE", -"MOVD XER" 741 d1, cx = bits.Add64(a1, b1, 0) 742 // ppc64x: "ADDE", -"ADDC", -"MOVD .*, XER" 743 d2, _ = bits.Add64(a2, b2, cx) 744 745 // ppc64x: "ADDC R\\d+,", -"ADDE", -"MOVD XER" 746 d1, cx = bits.Add64(c1, d1, 0) 747 // ppc64x: "ADDE", -"ADDC", -"MOVD .*, XER" 748 d2, _ = bits.Add64(c2, d2, cx) 749 d[0] = d1 750 d[1] = d2 751 return d 752 } 753 754 // --------------- // 755 // bits.Sub* // 756 // --------------- // 757 758 func Sub(x, y, ci uint) (r, co uint) { 759 // amd64:"NEGL" "SBBQ" "NEGQ" 760 // arm64:"NEGS" "SBCS" "NGC" "NEG" -"ADD" -"SUB" -"CMP" 761 // loong64:"SUBV" "SGTU" 762 // ppc64x:"SUBC", "SUBE", "SUBZE", "NEG" 763 // s390x:"SUBE" 764 // mips64:"SUBV" "SGTU" 765 // riscv64: "SUB" "SLTU" 766 return bits.Sub(x, y, ci) 767 } 768 769 func SubC(x, ci uint) (r, co uint) { 770 // amd64:"NEGL" "SBBQ" "NEGQ" 771 // arm64:"NEGS" "SBCS" "NGC" "NEG" -"ADD" -"SUB" -"CMP" 772 // loong64:"SUBV" "SGTU" 773 // ppc64x:"SUBC", "SUBE", "SUBZE", "NEG" 774 // s390x:"SUBE" 775 // mips64:"SUBV" "SGTU" 776 // riscv64: "SUB" "SLTU" 777 return bits.Sub(x, 7, ci) 778 } 779 780 func SubZ(x, y uint) (r, co uint) { 781 // amd64:"SUBQ" "SBBQ" "NEGQ" -"NEGL" 782 // arm64:"SUBS" "NGC" "NEG" -"SBCS" -"ADD" -"SUB " -"CMP" 783 // loong64:"SUBV" "SGTU" 784 // ppc64x:"SUBC", -"SUBE", "SUBZE", "NEG" 785 // s390x:"SUBC" 786 // mips64:"SUBV" "SGTU" 787 // riscv64: "SUB" "SLTU" 788 return bits.Sub(x, y, 0) 789 } 790 791 func SubR(x, y, ci uint) uint { 792 // amd64:"NEGL" "SBBQ" -"NEGQ" 793 // arm64:"NEGS" "SBCS" -"NGC" -"NEG " -"ADD" -"SUB" -"CMP" 794 // loong64:"SUBV" -"SGTU" 795 // ppc64x:"SUBC", "SUBE", -"SUBZE", -"NEG" 796 // s390x:"SUBE" 797 // riscv64: "SUB" -"SLTU" 798 r, _ := bits.Sub(x, y, ci) 799 return r 800 } 801 func SubM(p, q, r *[3]uint) { 802 var c uint 803 r[0], c = bits.Sub(p[0], q[0], c) 804 // amd64:"SBBQ" -"NEGL" -"NEGQ" 805 // arm64:"SBCS" -"NEGS" -"NGC" -"NEG" -"ADD" -"SUB" -"CMP" 806 // ppc64x:-"SUBC", "SUBE", -"SUBZE", -"NEG" 807 // s390x:"SUBE" 808 r[1], c = bits.Sub(p[1], q[1], c) 809 r[2], c = bits.Sub(p[2], q[2], c) 810 } 811 812 func Sub64(x, y, ci uint64) (r, co uint64) { 813 // amd64:"NEGL" "SBBQ" "NEGQ" 814 // arm64:"NEGS" "SBCS" "NGC" "NEG" -"ADD" -"SUB" -"CMP" 815 // loong64:"SUBV" "SGTU" 816 // ppc64x:"SUBC", "SUBE", "SUBZE", "NEG" 817 // s390x:"SUBE" 818 // mips64:"SUBV" "SGTU" 819 // riscv64: "SUB" "SLTU" 820 return bits.Sub64(x, y, ci) 821 } 822 823 func Sub64C(x, ci uint64) (r, co uint64) { 824 // amd64:"NEGL" "SBBQ" "NEGQ" 825 // arm64:"NEGS" "SBCS" "NGC" "NEG" -"ADD" -"SUB" -"CMP" 826 // loong64:"SUBV" "SGTU" 827 // ppc64x:"SUBC", "SUBE", "SUBZE", "NEG" 828 // s390x:"SUBE" 829 // mips64:"SUBV" "SGTU" 830 // riscv64: "SUB" "SLTU" 831 return bits.Sub64(x, 7, ci) 832 } 833 834 func Sub64Z(x, y uint64) (r, co uint64) { 835 // amd64:"SUBQ" "SBBQ" "NEGQ" -"NEGL" 836 // arm64:"SUBS" "NGC" "NEG" -"SBCS" -"ADD" -"SUB " -"CMP" 837 // loong64:"SUBV" "SGTU" 838 // ppc64x:"SUBC", -"SUBE", "SUBZE", "NEG" 839 // s390x:"SUBC" 840 // mips64:"SUBV" "SGTU" 841 // riscv64: "SUB" "SLTU" 842 return bits.Sub64(x, y, 0) 843 } 844 845 func Sub64R(x, y, ci uint64) uint64 { 846 // amd64:"NEGL" "SBBQ" -"NEGQ" 847 // arm64:"NEGS" "SBCS" -"NGC" -"NEG " -"ADD" -"SUB" -"CMP" 848 // loong64:"SUBV" -"SGTU" 849 // ppc64x:"SUBC", "SUBE", -"SUBZE", -"NEG" 850 // s390x:"SUBE" 851 // riscv64: "SUB" -"SLTU" 852 r, _ := bits.Sub64(x, y, ci) 853 return r 854 } 855 func Sub64M(p, q, r *[3]uint64) { 856 var c uint64 857 r[0], c = bits.Sub64(p[0], q[0], c) 858 // amd64:"SBBQ" -"NEGL" -"NEGQ" 859 // arm64:"SBCS" -"NEGS" -"NGC" -"NEG" -"ADD" -"SUB" -"CMP" 860 // s390x:"SUBE" 861 r[1], c = bits.Sub64(p[1], q[1], c) 862 r[2], c = bits.Sub64(p[2], q[2], c) 863 } 864 865 func Sub64MSaveC(p, q, r, c *[2]uint64) { 866 // ppc64x:"SUBC R\\d+, R\\d+,", "SUBZE", "NEG" 867 r[0], c[0] = bits.Sub64(p[0], q[0], 0) 868 // ppc64x:"SUBC R\\d+, [$]0,", "SUBE", "SUBZE", "NEG" 869 r[1], c[1] = bits.Sub64(p[1], q[1], c[0]) 870 } 871 872 func Sub64PanicOnOverflowEQ(a, b uint64) uint64 { 873 r, b := bits.Sub64(a, b, 0) 874 // s390x:"BRC [$]12," -"ADDE" -"SUBE" 875 if b == 1 { 876 panic("overflow") 877 } 878 return r 879 } 880 881 func Sub64PanicOnOverflowNE(a, b uint64) uint64 { 882 r, b := bits.Sub64(a, b, 0) 883 // s390x:"BRC [$]12," -"ADDE" -"SUBE" 884 if b != 0 { 885 panic("overflow") 886 } 887 return r 888 } 889 890 func Sub64PanicOnOverflowGT(a, b uint64) uint64 { 891 r, b := bits.Sub64(a, b, 0) 892 // s390x:"BRC [$]12," -"ADDE" -"SUBE" 893 if b > 0 { 894 panic("overflow") 895 } 896 return r 897 } 898 899 func Sub64MPanicOnOverflowEQ(a, b [2]uint64) [2]uint64 { 900 var r [2]uint64 901 var c uint64 902 r[0], c = bits.Sub64(a[0], b[0], c) 903 r[1], c = bits.Sub64(a[1], b[1], c) 904 // s390x:"BRC [$]12," 905 if c == 1 { 906 panic("overflow") 907 } 908 return r 909 } 910 911 func Sub64MPanicOnOverflowNE(a, b [2]uint64) [2]uint64 { 912 var r [2]uint64 913 var c uint64 914 r[0], c = bits.Sub64(a[0], b[0], c) 915 r[1], c = bits.Sub64(a[1], b[1], c) 916 // s390x:"BRC [$]12," 917 if c != 0 { 918 panic("overflow") 919 } 920 return r 921 } 922 923 func Sub64MPanicOnOverflowGT(a, b [2]uint64) [2]uint64 { 924 var r [2]uint64 925 var c uint64 926 r[0], c = bits.Sub64(a[0], b[0], c) 927 r[1], c = bits.Sub64(a[1], b[1], c) 928 // s390x:"BRC [$]12," 929 if c > 0 { 930 panic("overflow") 931 } 932 return r 933 } 934 935 // --------------- // 936 // bits.Mul* // 937 // --------------- // 938 939 func Mul(x, y uint) (hi, lo uint) { 940 // amd64:"MULQ" 941 // arm64:"UMULH" "MUL" 942 // loong64:"MULV" "MULHVU" 943 // ppc64x:"MULHDU" "MULLD" 944 // s390x:"MLGR" 945 // mips64: "MULVU" 946 // riscv64:"MULHU" "MUL" 947 return bits.Mul(x, y) 948 } 949 950 func Mul64(x, y uint64) (hi, lo uint64) { 951 // amd64:"MULQ" 952 // arm64:"UMULH" "MUL" 953 // loong64:"MULV" "MULHVU" 954 // ppc64x:"MULHDU" "MULLD" 955 // s390x:"MLGR" 956 // mips64: "MULVU" 957 // riscv64:"MULHU" "MUL" 958 return bits.Mul64(x, y) 959 } 960 961 func Mul64HiOnly(x, y uint64) uint64 { 962 // arm64:"UMULH" -"MUL" 963 // loong64:"MULHVU" -"MULV" 964 // riscv64:"MULHU" -"MUL " 965 hi, _ := bits.Mul64(x, y) 966 return hi 967 } 968 969 func Mul64LoOnly(x, y uint64) uint64 { 970 // arm64:"MUL" -"UMULH" 971 // loong64:"MULV" -"MULHVU" 972 // riscv64:"MUL " -"MULHU" 973 _, lo := bits.Mul64(x, y) 974 return lo 975 } 976 977 func Mul64Const() (uint64, uint64) { 978 // 7133701809754865664 == 99<<56 979 // arm64:"MOVD [$]7133701809754865664, R1", "MOVD [$]88, R0" 980 // loong64:"MOVV [$]88, R4" "MOVV [$]7133701809754865664, R5" -"MUL" 981 return bits.Mul64(99+88<<8, 1<<56) 982 } 983 984 func MulUintOverflow(p *uint64) []uint64 { 985 // arm64:"CMP [$]72" 986 return unsafe.Slice(p, 9) 987 } 988 989 // --------------- // 990 // bits.Div* // 991 // --------------- // 992 993 func Div(hi, lo, x uint) (q, r uint) { 994 // amd64:"DIVQ" 995 return bits.Div(hi, lo, x) 996 } 997 998 func Div32(hi, lo, x uint32) (q, r uint32) { 999 // arm64:"ORR" "UDIV" "MSUB" -"UREM" 1000 return bits.Div32(hi, lo, x) 1001 } 1002 1003 func Div64(hi, lo, x uint64) (q, r uint64) { 1004 // amd64:"DIVQ" 1005 return bits.Div64(hi, lo, x) 1006 } 1007 1008 func Div64degenerate(x uint64) (q, r uint64) { 1009 // amd64:-"DIVQ" 1010 return bits.Div64(0, x, 5) 1011 } 1012