Source file src/cmd/compile/internal/ssa/_gen/S390XOps.go

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package main
     6  
     7  import "strings"
     8  
     9  // Notes:
    10  //  - Integer types live in the low portion of registers. Upper portions are junk.
    11  //  - Boolean types use the low-order byte of a register. 0=false, 1=true.
    12  //    Upper bytes are junk.
    13  //  - When doing sub-register operations, we try to write the whole
    14  //    destination register to avoid a partial-register write.
    15  //  - Unused portions of AuxInt (or the Val portion of ValAndOff) are
    16  //    filled by sign-extending the used portion. Users of AuxInt which interpret
    17  //    AuxInt as unsigned (e.g. shifts) must be careful.
    18  //  - The SB 'register' is implemented using instruction-relative addressing. This
    19  //    places some limitations on when and how memory operands that are addressed
    20  //    relative to SB can be used:
    21  //
    22  //     1. Pseudo-instructions do not always map to a single machine instruction when
    23  //        using the SB 'register' to address data. This is because many machine
    24  //        instructions do not have relative long (RL suffix) equivalents. For example,
    25  //        ADDload, which is assembled as AG.
    26  //
    27  //     2. Loads and stores using relative addressing require the data be aligned
    28  //        according to its size (8-bytes for double words, 4-bytes for words
    29  //        and so on).
    30  //
    31  //    We can always work around these by inserting LARL instructions (load address
    32  //    relative long) in the assembler, but typically this results in worse code
    33  //    generation because the address can't be re-used. Inserting instructions in the
    34  //    assembler also means clobbering the temp register and it is a long-term goal
    35  //    to prevent the compiler doing this so that it can be allocated as a normal
    36  //    register.
    37  //
    38  // For more information about the z/Architecture, the instruction set and the
    39  // addressing modes it supports take a look at the z/Architecture Principles of
    40  // Operation: http://publibfp.boulder.ibm.com/epubs/pdf/dz9zr010.pdf
    41  //
    42  // Suffixes encode the bit width of pseudo-instructions.
    43  // D (double word)  = 64 bit (frequently omitted)
    44  // W (word)         = 32 bit
    45  // H (half word)    = 16 bit
    46  // B (byte)         = 8 bit
    47  // S (single prec.) = 32 bit (double precision is omitted)
    48  
    49  // copied from ../../s390x/reg.go
    50  var regNamesS390X = []string{
    51  	"R0",
    52  	"R1",
    53  	"R2",
    54  	"R3",
    55  	"R4",
    56  	"R5",
    57  	"R6",
    58  	"R7",
    59  	"R8",
    60  	"R9",
    61  	"R10",
    62  	"R11",
    63  	"R12",
    64  	"g", // R13
    65  	"R14",
    66  	"SP", // R15
    67  	"F0",
    68  	"F1",
    69  	"F2",
    70  	"F3",
    71  	"F4",
    72  	"F5",
    73  	"F6",
    74  	"F7",
    75  	"F8",
    76  	"F9",
    77  	"F10",
    78  	"F11",
    79  	"F12",
    80  	"F13",
    81  	"F14",
    82  	"F15",
    83  
    84  	// If you add registers, update asyncPreempt in runtime.
    85  
    86  	//pseudo-registers
    87  	"SB",
    88  }
    89  
    90  func init() {
    91  	// Make map from reg names to reg integers.
    92  	if len(regNamesS390X) > 64 {
    93  		panic("too many registers")
    94  	}
    95  	num := map[string]int{}
    96  	for i, name := range regNamesS390X {
    97  		num[name] = i
    98  	}
    99  	buildReg := func(s string) regMask {
   100  		m := regMask(0)
   101  		for _, r := range strings.Split(s, " ") {
   102  			if n, ok := num[r]; ok {
   103  				m |= regMask(1) << uint(n)
   104  				continue
   105  			}
   106  			panic("register " + r + " not found")
   107  		}
   108  		return m
   109  	}
   110  
   111  	// Common individual register masks
   112  	var (
   113  		sp  = buildReg("SP")
   114  		sb  = buildReg("SB")
   115  		r0  = buildReg("R0")
   116  		tmp = buildReg("R11") // R11 is used as a temporary in a small number of instructions.
   117  		lr  = buildReg("R14")
   118  
   119  		// R10 is reserved by the assembler.
   120  		gp   = buildReg("R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R11 R12 R14")
   121  		gpg  = gp | buildReg("g")
   122  		gpsp = gp | sp
   123  
   124  		// R0 is considered to contain the value 0 in address calculations.
   125  		ptr     = gp &^ r0
   126  		ptrsp   = ptr | sp
   127  		ptrspsb = ptrsp | sb
   128  
   129  		fp         = buildReg("F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15")
   130  		callerSave = gp | fp | buildReg("g") // runtime.setg (and anything calling it) may clobber g
   131  		r1         = buildReg("R1")
   132  		r2         = buildReg("R2")
   133  		r3         = buildReg("R3")
   134  		r9         = buildReg("R9")
   135  	)
   136  	// Common slices of register masks
   137  	var (
   138  		gponly = []regMask{gp}
   139  		fponly = []regMask{fp}
   140  	)
   141  
   142  	// Common regInfo
   143  	var (
   144  		gp01    = regInfo{inputs: []regMask{}, outputs: gponly}
   145  		gp11    = regInfo{inputs: []regMask{gp}, outputs: gponly}
   146  		gp11sp  = regInfo{inputs: []regMask{gpsp}, outputs: gponly}
   147  		gp21    = regInfo{inputs: []regMask{gp, gp}, outputs: gponly}
   148  		gp21sp  = regInfo{inputs: []regMask{gpsp, gp}, outputs: gponly}
   149  		gp21tmp = regInfo{inputs: []regMask{gp &^ tmp, gp &^ tmp}, outputs: []regMask{gp &^ tmp}, clobbers: tmp}
   150  
   151  		// R0 evaluates to 0 when used as the number of bits to shift
   152  		// so we need to exclude it from that operand.
   153  		sh21 = regInfo{inputs: []regMask{gp, ptr}, outputs: gponly}
   154  
   155  		addr    = regInfo{inputs: []regMask{sp | sb}, outputs: gponly}
   156  		addridx = regInfo{inputs: []regMask{sp | sb, ptrsp}, outputs: gponly}
   157  
   158  		gp2flags       = regInfo{inputs: []regMask{gpsp, gpsp}}
   159  		gp1flags       = regInfo{inputs: []regMask{gpsp}}
   160  		gp2flags1      = regInfo{inputs: []regMask{gp, gp}, outputs: gponly}
   161  		gp11flags      = regInfo{inputs: []regMask{gp}, outputs: gponly}
   162  		gp21flags      = regInfo{inputs: []regMask{gp, gp}, outputs: gponly}
   163  		gp2flags1flags = regInfo{inputs: []regMask{gp, gp}, outputs: gponly}
   164  
   165  		gpload       = regInfo{inputs: []regMask{ptrspsb, 0}, outputs: gponly}
   166  		gploadidx    = regInfo{inputs: []regMask{ptrspsb, ptrsp, 0}, outputs: gponly}
   167  		gpopload     = regInfo{inputs: []regMask{gp, ptrsp, 0}, outputs: gponly}
   168  		gpstore      = regInfo{inputs: []regMask{ptrspsb, gpsp, 0}}
   169  		gpstoreconst = regInfo{inputs: []regMask{ptrspsb, 0}}
   170  		gpstoreidx   = regInfo{inputs: []regMask{ptrsp, ptrsp, gpsp, 0}}
   171  		gpstorebr    = regInfo{inputs: []regMask{ptrsp, gpsp, 0}}
   172  		gpstorelaa   = regInfo{inputs: []regMask{ptrspsb, gpsp, 0}, outputs: gponly}
   173  		gpstorelab   = regInfo{inputs: []regMask{r1, gpsp, 0}, clobbers: r1}
   174  
   175  		gpmvc = regInfo{inputs: []regMask{ptrsp, ptrsp, 0}}
   176  
   177  		fp01        = regInfo{inputs: []regMask{}, outputs: fponly}
   178  		fp21        = regInfo{inputs: []regMask{fp, fp}, outputs: fponly}
   179  		fp31        = regInfo{inputs: []regMask{fp, fp, fp}, outputs: fponly}
   180  		fp21clobber = regInfo{inputs: []regMask{fp, fp}, outputs: fponly}
   181  		fpgp        = regInfo{inputs: fponly, outputs: gponly}
   182  		gpfp        = regInfo{inputs: gponly, outputs: fponly}
   183  		fp11        = regInfo{inputs: fponly, outputs: fponly}
   184  		fp1flags    = regInfo{inputs: []regMask{fp}}
   185  		fp11clobber = regInfo{inputs: fponly, outputs: fponly}
   186  		fp2flags    = regInfo{inputs: []regMask{fp, fp}}
   187  
   188  		fpload    = regInfo{inputs: []regMask{ptrspsb, 0}, outputs: fponly}
   189  		fploadidx = regInfo{inputs: []regMask{ptrsp, ptrsp, 0}, outputs: fponly}
   190  
   191  		fpstore    = regInfo{inputs: []regMask{ptrspsb, fp, 0}}
   192  		fpstoreidx = regInfo{inputs: []regMask{ptrsp, ptrsp, fp, 0}}
   193  
   194  		sync = regInfo{inputs: []regMask{0}}
   195  
   196  		// LoweredAtomicCas may overwrite arg1, so force it to R0 for now.
   197  		cas = regInfo{inputs: []regMask{ptrsp, r0, gpsp, 0}, outputs: []regMask{gp, 0}, clobbers: r0}
   198  
   199  		// LoweredAtomicExchange overwrites the output before executing
   200  		// CS{,G}, so the output register must not be the same as the
   201  		// input register. For now we just force the output register to
   202  		// R0.
   203  		exchange = regInfo{inputs: []regMask{ptrsp, gpsp &^ r0, 0}, outputs: []regMask{r0, 0}}
   204  	)
   205  
   206  	var S390Xops = []opData{
   207  		// fp ops
   208  		{name: "FADDS", argLength: 2, reg: fp21clobber, typ: "(Float32,Flags)", asm: "FADDS", commutative: true, resultInArg0: true}, // fp32 arg0 + arg1
   209  		{name: "FADD", argLength: 2, reg: fp21clobber, typ: "(Float64,Flags)", asm: "FADD", commutative: true, resultInArg0: true},   // fp64 arg0 + arg1
   210  		{name: "FSUBS", argLength: 2, reg: fp21clobber, typ: "(Float32,Flags)", asm: "FSUBS", resultInArg0: true},                    // fp32 arg0 - arg1
   211  		{name: "FSUB", argLength: 2, reg: fp21clobber, typ: "(Float64,Flags)", asm: "FSUB", resultInArg0: true},                      // fp64 arg0 - arg1
   212  		{name: "FMULS", argLength: 2, reg: fp21, asm: "FMULS", commutative: true, resultInArg0: true},                                // fp32 arg0 * arg1
   213  		{name: "FMUL", argLength: 2, reg: fp21, asm: "FMUL", commutative: true, resultInArg0: true},                                  // fp64 arg0 * arg1
   214  		{name: "FDIVS", argLength: 2, reg: fp21, asm: "FDIVS", resultInArg0: true},                                                   // fp32 arg0 / arg1
   215  		{name: "FDIV", argLength: 2, reg: fp21, asm: "FDIV", resultInArg0: true},                                                     // fp64 arg0 / arg1
   216  		{name: "FNEGS", argLength: 1, reg: fp11clobber, asm: "FNEGS", clobberFlags: true},                                            // fp32 -arg0
   217  		{name: "FNEG", argLength: 1, reg: fp11clobber, asm: "FNEG", clobberFlags: true},                                              // fp64 -arg0
   218  		{name: "FMADDS", argLength: 3, reg: fp31, asm: "FMADDS", resultInArg0: true},                                                 // fp32 arg1 * arg2 + arg0
   219  		{name: "FMADD", argLength: 3, reg: fp31, asm: "FMADD", resultInArg0: true},                                                   // fp64 arg1 * arg2 + arg0
   220  		{name: "FMSUBS", argLength: 3, reg: fp31, asm: "FMSUBS", resultInArg0: true},                                                 // fp32 arg1 * arg2 - arg0
   221  		{name: "FMSUB", argLength: 3, reg: fp31, asm: "FMSUB", resultInArg0: true},                                                   // fp64 arg1 * arg2 - arg0
   222  		{name: "LPDFR", argLength: 1, reg: fp11, asm: "LPDFR"},                                                                       // fp64/fp32 set sign bit
   223  		{name: "LNDFR", argLength: 1, reg: fp11, asm: "LNDFR"},                                                                       // fp64/fp32 clear sign bit
   224  		{name: "CPSDR", argLength: 2, reg: fp21, asm: "CPSDR"},                                                                       // fp64/fp32 copy arg1 sign bit to arg0
   225  
   226  		// Single element vector floating point min / max instructions
   227  		{name: "WFMAXDB", argLength: 2, reg: fp21, asm: "WFMAXDB", typ: "Float64"}, // max[float64](arg0, arg1)
   228  		{name: "WFMAXSB", argLength: 2, reg: fp21, asm: "WFMAXSB", typ: "Float32"}, // max[float32](arg0, arg1)
   229  		{name: "WFMINDB", argLength: 2, reg: fp21, asm: "WFMINDB", typ: "Float64"}, // min[float64](arg0, arg1)
   230  		{name: "WFMINSB", argLength: 2, reg: fp21, asm: "WFMINSB", typ: "Float32"}, // min[float32](arg0, arg1)
   231  
   232  		// Round to integer, float64 only.
   233  		//
   234  		// aux | rounding mode
   235  		// ----+-----------------------------------
   236  		//   1 | round to nearest, ties away from 0
   237  		//   4 | round to nearest, ties to even
   238  		//   5 | round toward 0
   239  		//   6 | round toward +∞
   240  		//   7 | round toward -∞
   241  		{name: "FIDBR", argLength: 1, reg: fp11, asm: "FIDBR", aux: "Int8"},
   242  
   243  		{name: "FMOVSload", argLength: 2, reg: fpload, asm: "FMOVS", aux: "SymOff", faultOnNilArg0: true, symEffect: "Read"}, // fp32 load
   244  		{name: "FMOVDload", argLength: 2, reg: fpload, asm: "FMOVD", aux: "SymOff", faultOnNilArg0: true, symEffect: "Read"}, // fp64 load
   245  		{name: "FMOVSconst", reg: fp01, asm: "FMOVS", aux: "Float32", rematerializeable: true},                               // fp32 constant
   246  		{name: "FMOVDconst", reg: fp01, asm: "FMOVD", aux: "Float64", rematerializeable: true},                               // fp64 constant
   247  		{name: "FMOVSloadidx", argLength: 3, reg: fploadidx, asm: "FMOVS", aux: "SymOff", symEffect: "Read"},                 // fp32 load indexed by i
   248  		{name: "FMOVDloadidx", argLength: 3, reg: fploadidx, asm: "FMOVD", aux: "SymOff", symEffect: "Read"},                 // fp64 load indexed by i
   249  
   250  		{name: "FMOVSstore", argLength: 3, reg: fpstore, asm: "FMOVS", aux: "SymOff", faultOnNilArg0: true, symEffect: "Write"}, // fp32 store
   251  		{name: "FMOVDstore", argLength: 3, reg: fpstore, asm: "FMOVD", aux: "SymOff", faultOnNilArg0: true, symEffect: "Write"}, // fp64 store
   252  		{name: "FMOVSstoreidx", argLength: 4, reg: fpstoreidx, asm: "FMOVS", aux: "SymOff", symEffect: "Write"},                 // fp32 indexed by i store
   253  		{name: "FMOVDstoreidx", argLength: 4, reg: fpstoreidx, asm: "FMOVD", aux: "SymOff", symEffect: "Write"},                 // fp64 indexed by i store
   254  
   255  		// binary ops
   256  		{name: "ADD", argLength: 2, reg: gp21sp, asm: "ADD", commutative: true, clobberFlags: true},                                                                  // arg0 + arg1
   257  		{name: "ADDW", argLength: 2, reg: gp21sp, asm: "ADDW", commutative: true, clobberFlags: true},                                                                // arg0 + arg1
   258  		{name: "ADDconst", argLength: 1, reg: gp11sp, asm: "ADD", aux: "Int32", typ: "UInt64", clobberFlags: true},                                                   // arg0 + auxint
   259  		{name: "ADDWconst", argLength: 1, reg: gp11sp, asm: "ADDW", aux: "Int32", clobberFlags: true},                                                                // arg0 + auxint
   260  		{name: "ADDload", argLength: 3, reg: gpopload, asm: "ADD", aux: "SymOff", resultInArg0: true, clobberFlags: true, faultOnNilArg1: true, symEffect: "Read"},   // arg0 + *arg1. arg2=mem
   261  		{name: "ADDWload", argLength: 3, reg: gpopload, asm: "ADDW", aux: "SymOff", resultInArg0: true, clobberFlags: true, faultOnNilArg1: true, symEffect: "Read"}, // arg0 + *arg1. arg2=mem
   262  
   263  		{name: "SUB", argLength: 2, reg: gp21, asm: "SUB", clobberFlags: true},                                                                                       // arg0 - arg1
   264  		{name: "SUBW", argLength: 2, reg: gp21, asm: "SUBW", clobberFlags: true},                                                                                     // arg0 - arg1
   265  		{name: "SUBconst", argLength: 1, reg: gp11, asm: "SUB", aux: "Int32", resultInArg0: true, clobberFlags: true},                                                // arg0 - auxint
   266  		{name: "SUBWconst", argLength: 1, reg: gp11, asm: "SUBW", aux: "Int32", resultInArg0: true, clobberFlags: true},                                              // arg0 - auxint
   267  		{name: "SUBload", argLength: 3, reg: gpopload, asm: "SUB", aux: "SymOff", resultInArg0: true, clobberFlags: true, faultOnNilArg1: true, symEffect: "Read"},   // arg0 - *arg1. arg2=mem
   268  		{name: "SUBWload", argLength: 3, reg: gpopload, asm: "SUBW", aux: "SymOff", resultInArg0: true, clobberFlags: true, faultOnNilArg1: true, symEffect: "Read"}, // arg0 - *arg1. arg2=mem
   269  
   270  		{name: "MULLD", argLength: 2, reg: gp21, asm: "MULLD", typ: "Int64", commutative: true, resultInArg0: true, clobberFlags: true},                                // arg0 * arg1
   271  		{name: "MULLW", argLength: 2, reg: gp21, asm: "MULLW", typ: "Int32", commutative: true, resultInArg0: true, clobberFlags: true},                                // arg0 * arg1
   272  		{name: "MULLDconst", argLength: 1, reg: gp11, asm: "MULLD", aux: "Int32", typ: "Int64", resultInArg0: true, clobberFlags: true},                                // arg0 * auxint
   273  		{name: "MULLWconst", argLength: 1, reg: gp11, asm: "MULLW", aux: "Int32", typ: "Int32", resultInArg0: true, clobberFlags: true},                                // arg0 * auxint
   274  		{name: "MULLDload", argLength: 3, reg: gpopload, asm: "MULLD", aux: "SymOff", resultInArg0: true, clobberFlags: true, faultOnNilArg1: true, symEffect: "Read"}, // arg0 * *arg1. arg2=mem
   275  		{name: "MULLWload", argLength: 3, reg: gpopload, asm: "MULLW", aux: "SymOff", resultInArg0: true, clobberFlags: true, faultOnNilArg1: true, symEffect: "Read"}, // arg0 * *arg1. arg2=mem
   276  
   277  		{name: "MULHD", argLength: 2, reg: gp21tmp, asm: "MULHD", typ: "Int64", commutative: true, resultInArg0: true, clobberFlags: true},   // (arg0 * arg1) >> width
   278  		{name: "MULHDU", argLength: 2, reg: gp21tmp, asm: "MULHDU", typ: "Int64", commutative: true, resultInArg0: true, clobberFlags: true}, // (arg0 * arg1) >> width
   279  
   280  		{name: "DIVD", argLength: 2, reg: gp21tmp, asm: "DIVD", resultInArg0: true, clobberFlags: true},   // arg0 / arg1
   281  		{name: "DIVW", argLength: 2, reg: gp21tmp, asm: "DIVW", resultInArg0: true, clobberFlags: true},   // arg0 / arg1
   282  		{name: "DIVDU", argLength: 2, reg: gp21tmp, asm: "DIVDU", resultInArg0: true, clobberFlags: true}, // arg0 / arg1
   283  		{name: "DIVWU", argLength: 2, reg: gp21tmp, asm: "DIVWU", resultInArg0: true, clobberFlags: true}, // arg0 / arg1
   284  
   285  		{name: "MODD", argLength: 2, reg: gp21tmp, asm: "MODD", resultInArg0: true, clobberFlags: true}, // arg0 % arg1
   286  		{name: "MODW", argLength: 2, reg: gp21tmp, asm: "MODW", resultInArg0: true, clobberFlags: true}, // arg0 % arg1
   287  
   288  		{name: "MODDU", argLength: 2, reg: gp21tmp, asm: "MODDU", resultInArg0: true, clobberFlags: true}, // arg0 % arg1
   289  		{name: "MODWU", argLength: 2, reg: gp21tmp, asm: "MODWU", resultInArg0: true, clobberFlags: true}, // arg0 % arg1
   290  
   291  		{name: "AND", argLength: 2, reg: gp21, asm: "AND", commutative: true, clobberFlags: true},                                                                    // arg0 & arg1
   292  		{name: "ANDW", argLength: 2, reg: gp21, asm: "ANDW", commutative: true, clobberFlags: true},                                                                  // arg0 & arg1
   293  		{name: "ANDconst", argLength: 1, reg: gp11, asm: "AND", aux: "Int64", resultInArg0: true, clobberFlags: true},                                                // arg0 & auxint
   294  		{name: "ANDWconst", argLength: 1, reg: gp11, asm: "ANDW", aux: "Int32", resultInArg0: true, clobberFlags: true},                                              // arg0 & auxint
   295  		{name: "ANDload", argLength: 3, reg: gpopload, asm: "AND", aux: "SymOff", resultInArg0: true, clobberFlags: true, faultOnNilArg1: true, symEffect: "Read"},   // arg0 & *arg1. arg2=mem
   296  		{name: "ANDWload", argLength: 3, reg: gpopload, asm: "ANDW", aux: "SymOff", resultInArg0: true, clobberFlags: true, faultOnNilArg1: true, symEffect: "Read"}, // arg0 & *arg1. arg2=mem
   297  
   298  		{name: "OR", argLength: 2, reg: gp21, asm: "OR", commutative: true, clobberFlags: true},                                                                    // arg0 | arg1
   299  		{name: "ORW", argLength: 2, reg: gp21, asm: "ORW", commutative: true, clobberFlags: true},                                                                  // arg0 | arg1
   300  		{name: "ORconst", argLength: 1, reg: gp11, asm: "OR", aux: "Int64", resultInArg0: true, clobberFlags: true},                                                // arg0 | auxint
   301  		{name: "ORWconst", argLength: 1, reg: gp11, asm: "ORW", aux: "Int32", resultInArg0: true, clobberFlags: true},                                              // arg0 | auxint
   302  		{name: "ORload", argLength: 3, reg: gpopload, asm: "OR", aux: "SymOff", resultInArg0: true, clobberFlags: true, faultOnNilArg1: true, symEffect: "Read"},   // arg0 | *arg1. arg2=mem
   303  		{name: "ORWload", argLength: 3, reg: gpopload, asm: "ORW", aux: "SymOff", resultInArg0: true, clobberFlags: true, faultOnNilArg1: true, symEffect: "Read"}, // arg0 | *arg1. arg2=mem
   304  
   305  		{name: "XOR", argLength: 2, reg: gp21, asm: "XOR", commutative: true, clobberFlags: true},                                                                    // arg0 ^ arg1
   306  		{name: "XORW", argLength: 2, reg: gp21, asm: "XORW", commutative: true, clobberFlags: true},                                                                  // arg0 ^ arg1
   307  		{name: "XORconst", argLength: 1, reg: gp11, asm: "XOR", aux: "Int64", resultInArg0: true, clobberFlags: true},                                                // arg0 ^ auxint
   308  		{name: "XORWconst", argLength: 1, reg: gp11, asm: "XORW", aux: "Int32", resultInArg0: true, clobberFlags: true},                                              // arg0 ^ auxint
   309  		{name: "XORload", argLength: 3, reg: gpopload, asm: "XOR", aux: "SymOff", resultInArg0: true, clobberFlags: true, faultOnNilArg1: true, symEffect: "Read"},   // arg0 ^ *arg1. arg2=mem
   310  		{name: "XORWload", argLength: 3, reg: gpopload, asm: "XORW", aux: "SymOff", resultInArg0: true, clobberFlags: true, faultOnNilArg1: true, symEffect: "Read"}, // arg0 ^ *arg1. arg2=mem
   311  
   312  		// Arithmetic ops with carry/borrow chain.
   313  		//
   314  		// A carry is represented by a condition code of 2 or 3 (GT or OV).
   315  		// A borrow is represented by a condition code of 0 or 1 (EQ or LT).
   316  		{name: "ADDC", argLength: 2, reg: gp21flags, asm: "ADDC", typ: "(UInt64,Flags)", commutative: true},                          // (arg0 + arg1, carry out)
   317  		{name: "ADDCconst", argLength: 1, reg: gp11flags, asm: "ADDC", typ: "(UInt64,Flags)", aux: "Int16"},                          // (arg0 + auxint, carry out)
   318  		{name: "ADDE", argLength: 3, reg: gp2flags1flags, asm: "ADDE", typ: "(UInt64,Flags)", commutative: true, resultInArg0: true}, // (arg0 + arg1 + arg2 (carry in), carry out)
   319  		{name: "SUBC", argLength: 2, reg: gp21flags, asm: "SUBC", typ: "(UInt64,Flags)"},                                             // (arg0 - arg1, borrow out)
   320  		{name: "SUBE", argLength: 3, reg: gp2flags1flags, asm: "SUBE", typ: "(UInt64,Flags)", resultInArg0: true},                    // (arg0 - arg1 - arg2 (borrow in), borrow out)
   321  
   322  		// Comparisons.
   323  		{name: "CMP", argLength: 2, reg: gp2flags, asm: "CMP", typ: "Flags"},   // arg0 compare to arg1
   324  		{name: "CMPW", argLength: 2, reg: gp2flags, asm: "CMPW", typ: "Flags"}, // arg0 compare to arg1
   325  
   326  		{name: "CMPU", argLength: 2, reg: gp2flags, asm: "CMPU", typ: "Flags"},   // arg0 compare to arg1
   327  		{name: "CMPWU", argLength: 2, reg: gp2flags, asm: "CMPWU", typ: "Flags"}, // arg0 compare to arg1
   328  
   329  		{name: "CMPconst", argLength: 1, reg: gp1flags, asm: "CMP", typ: "Flags", aux: "Int32"},     // arg0 compare to auxint
   330  		{name: "CMPWconst", argLength: 1, reg: gp1flags, asm: "CMPW", typ: "Flags", aux: "Int32"},   // arg0 compare to auxint
   331  		{name: "CMPUconst", argLength: 1, reg: gp1flags, asm: "CMPU", typ: "Flags", aux: "Int32"},   // arg0 compare to auxint
   332  		{name: "CMPWUconst", argLength: 1, reg: gp1flags, asm: "CMPWU", typ: "Flags", aux: "Int32"}, // arg0 compare to auxint
   333  
   334  		{name: "FCMPS", argLength: 2, reg: fp2flags, asm: "CEBR", typ: "Flags"},  // arg0 compare to arg1, f32
   335  		{name: "FCMP", argLength: 2, reg: fp2flags, asm: "FCMPU", typ: "Flags"},  // arg0 compare to arg1, f64
   336  		{name: "LTDBR", argLength: 1, reg: fp1flags, asm: "LTDBR", typ: "Flags"}, // arg0 compare to 0, f64
   337  		{name: "LTEBR", argLength: 1, reg: fp1flags, asm: "LTEBR", typ: "Flags"}, // arg0 compare to 0, f32
   338  
   339  		{name: "SLD", argLength: 2, reg: sh21, asm: "SLD"},                    // arg0 << arg1, shift amount is mod 64
   340  		{name: "SLW", argLength: 2, reg: sh21, asm: "SLW"},                    // arg0 << arg1, shift amount is mod 64
   341  		{name: "SLDconst", argLength: 1, reg: gp11, asm: "SLD", aux: "UInt8"}, // arg0 << auxint, shift amount 0-63
   342  		{name: "SLWconst", argLength: 1, reg: gp11, asm: "SLW", aux: "UInt8"}, // arg0 << auxint, shift amount 0-31
   343  
   344  		{name: "SRD", argLength: 2, reg: sh21, asm: "SRD"},                    // unsigned arg0 >> arg1, shift amount is mod 64
   345  		{name: "SRW", argLength: 2, reg: sh21, asm: "SRW"},                    // unsigned uint32(arg0) >> arg1, shift amount is mod 64
   346  		{name: "SRDconst", argLength: 1, reg: gp11, asm: "SRD", aux: "UInt8"}, // unsigned arg0 >> auxint, shift amount 0-63
   347  		{name: "SRWconst", argLength: 1, reg: gp11, asm: "SRW", aux: "UInt8"}, // unsigned uint32(arg0) >> auxint, shift amount 0-31
   348  
   349  		// Arithmetic shifts clobber flags.
   350  		{name: "SRAD", argLength: 2, reg: sh21, asm: "SRAD", clobberFlags: true},                    // signed arg0 >> arg1, shift amount is mod 64
   351  		{name: "SRAW", argLength: 2, reg: sh21, asm: "SRAW", clobberFlags: true},                    // signed int32(arg0) >> arg1, shift amount is mod 64
   352  		{name: "SRADconst", argLength: 1, reg: gp11, asm: "SRAD", aux: "UInt8", clobberFlags: true}, // signed arg0 >> auxint, shift amount 0-63
   353  		{name: "SRAWconst", argLength: 1, reg: gp11, asm: "SRAW", aux: "UInt8", clobberFlags: true}, // signed int32(arg0) >> auxint, shift amount 0-31
   354  
   355  		// Rotate instructions.
   356  		// Note: no RLLGconst - use RISBGZ instead.
   357  		{name: "RLLG", argLength: 2, reg: sh21, asm: "RLLG"},                  // arg0 rotate left arg1, rotate amount 0-63
   358  		{name: "RLL", argLength: 2, reg: sh21, asm: "RLL"},                    // arg0 rotate left arg1, rotate amount 0-31
   359  		{name: "RLLconst", argLength: 1, reg: gp11, asm: "RLL", aux: "UInt8"}, // arg0 rotate left auxint, rotate amount 0-31
   360  
   361  		// Rotate then (and|or|xor|insert) selected bits instructions.
   362  		//
   363  		// Aux is an s390x.RotateParams struct containing Start, End and rotation
   364  		// Amount fields.
   365  		//
   366  		// arg1 is rotated left by the rotation amount then the bits from the start
   367  		// bit to the end bit (inclusive) are combined with arg0 using the logical
   368  		// operation specified. Bit indices are specified from left to right - the
   369  		// MSB is 0 and the LSB is 63.
   370  		//
   371  		// Examples:
   372  		//               |          aux         |
   373  		// | instruction | start | end | amount |          arg0         |          arg1         |         result        |
   374  		// +-------------+-------+-----+--------+-----------------------+-----------------------+-----------------------+
   375  		// | RXSBG (XOR) |     0 |   1 |      0 | 0xffff_ffff_ffff_ffff | 0xffff_ffff_ffff_ffff | 0x3fff_ffff_ffff_ffff |
   376  		// | RXSBG (XOR) |    62 |  63 |      0 | 0xffff_ffff_ffff_ffff | 0xffff_ffff_ffff_ffff | 0xffff_ffff_ffff_fffc |
   377  		// | RXSBG (XOR) |     0 |  47 |     16 | 0xffff_ffff_ffff_ffff | 0x0000_0000_0000_ffff | 0xffff_ffff_0000_ffff |
   378  		// +-------------+-------+-----+--------+-----------------------+-----------------------+-----------------------+
   379  		//
   380  		{name: "RXSBG", argLength: 2, reg: gp21, asm: "RXSBG", resultInArg0: true, aux: "S390XRotateParams", clobberFlags: true}, // rotate then xor selected bits
   381  		{name: "RISBGZ", argLength: 1, reg: gp11, asm: "RISBGZ", aux: "S390XRotateParams", clobberFlags: true},                   // rotate then insert selected bits [into zero]
   382  
   383  		// unary ops
   384  		{name: "NEG", argLength: 1, reg: gp11, asm: "NEG", clobberFlags: true},   // -arg0
   385  		{name: "NEGW", argLength: 1, reg: gp11, asm: "NEGW", clobberFlags: true}, // -arg0
   386  
   387  		{name: "NOT", argLength: 1, reg: gp11, resultInArg0: true, clobberFlags: true},  // ^arg0
   388  		{name: "NOTW", argLength: 1, reg: gp11, resultInArg0: true, clobberFlags: true}, // ^arg0
   389  
   390  		{name: "FSQRT", argLength: 1, reg: fp11, asm: "FSQRT"},   // sqrt(arg0)
   391  		{name: "FSQRTS", argLength: 1, reg: fp11, asm: "FSQRTS"}, // sqrt(arg0), float32
   392  
   393  		// Conditional register-register moves.
   394  		// The aux for these values is an s390x.CCMask value representing the condition code mask.
   395  		{name: "LOCGR", argLength: 3, reg: gp2flags1, resultInArg0: true, asm: "LOCGR", aux: "S390XCCMask"}, // load arg1 into arg0 if the condition code in arg2 matches a masked bit in aux.
   396  
   397  		{name: "MOVBreg", argLength: 1, reg: gp11sp, asm: "MOVB", typ: "Int64"},    // sign extend arg0 from int8 to int64
   398  		{name: "MOVBZreg", argLength: 1, reg: gp11sp, asm: "MOVBZ", typ: "UInt64"}, // zero extend arg0 from int8 to int64
   399  		{name: "MOVHreg", argLength: 1, reg: gp11sp, asm: "MOVH", typ: "Int64"},    // sign extend arg0 from int16 to int64
   400  		{name: "MOVHZreg", argLength: 1, reg: gp11sp, asm: "MOVHZ", typ: "UInt64"}, // zero extend arg0 from int16 to int64
   401  		{name: "MOVWreg", argLength: 1, reg: gp11sp, asm: "MOVW", typ: "Int64"},    // sign extend arg0 from int32 to int64
   402  		{name: "MOVWZreg", argLength: 1, reg: gp11sp, asm: "MOVWZ", typ: "UInt64"}, // zero extend arg0 from int32 to int64
   403  
   404  		{name: "MOVDconst", reg: gp01, asm: "MOVD", typ: "UInt64", aux: "Int64", rematerializeable: true}, // auxint
   405  
   406  		{name: "LDGR", argLength: 1, reg: gpfp, asm: "LDGR"}, // move int64 to float64 (no conversion)
   407  		{name: "LGDR", argLength: 1, reg: fpgp, asm: "LGDR"}, // move float64 to int64 (no conversion)
   408  
   409  		{name: "CFDBRA", argLength: 1, reg: fpgp, asm: "CFDBRA", clobberFlags: true}, // convert float64 to int32
   410  		{name: "CGDBRA", argLength: 1, reg: fpgp, asm: "CGDBRA", clobberFlags: true}, // convert float64 to int64
   411  		{name: "CFEBRA", argLength: 1, reg: fpgp, asm: "CFEBRA", clobberFlags: true}, // convert float32 to int32
   412  		{name: "CGEBRA", argLength: 1, reg: fpgp, asm: "CGEBRA", clobberFlags: true}, // convert float32 to int64
   413  		{name: "CEFBRA", argLength: 1, reg: gpfp, asm: "CEFBRA", clobberFlags: true}, // convert int32 to float32
   414  		{name: "CDFBRA", argLength: 1, reg: gpfp, asm: "CDFBRA", clobberFlags: true}, // convert int32 to float64
   415  		{name: "CEGBRA", argLength: 1, reg: gpfp, asm: "CEGBRA", clobberFlags: true}, // convert int64 to float32
   416  		{name: "CDGBRA", argLength: 1, reg: gpfp, asm: "CDGBRA", clobberFlags: true}, // convert int64 to float64
   417  		{name: "CLFEBR", argLength: 1, reg: fpgp, asm: "CLFEBR", clobberFlags: true}, // convert float32 to uint32
   418  		{name: "CLFDBR", argLength: 1, reg: fpgp, asm: "CLFDBR", clobberFlags: true}, // convert float64 to uint32
   419  		{name: "CLGEBR", argLength: 1, reg: fpgp, asm: "CLGEBR", clobberFlags: true}, // convert float32 to uint64
   420  		{name: "CLGDBR", argLength: 1, reg: fpgp, asm: "CLGDBR", clobberFlags: true}, // convert float64 to uint64
   421  		{name: "CELFBR", argLength: 1, reg: gpfp, asm: "CELFBR", clobberFlags: true}, // convert uint32 to float32
   422  		{name: "CDLFBR", argLength: 1, reg: gpfp, asm: "CDLFBR", clobberFlags: true}, // convert uint32 to float64
   423  		{name: "CELGBR", argLength: 1, reg: gpfp, asm: "CELGBR", clobberFlags: true}, // convert uint64 to float32
   424  		{name: "CDLGBR", argLength: 1, reg: gpfp, asm: "CDLGBR", clobberFlags: true}, // convert uint64 to float64
   425  
   426  		{name: "LEDBR", argLength: 1, reg: fp11, asm: "LEDBR"}, // convert float64 to float32
   427  		{name: "LDEBR", argLength: 1, reg: fp11, asm: "LDEBR"}, // convert float32 to float64
   428  
   429  		{name: "MOVDaddr", argLength: 1, reg: addr, aux: "SymOff", rematerializeable: true, symEffect: "Addr"}, // arg0 + auxint + offset encoded in aux
   430  		{name: "MOVDaddridx", argLength: 2, reg: addridx, aux: "SymOff", symEffect: "Addr"},                    // arg0 + arg1 + auxint + aux
   431  
   432  		// auxint+aux == add auxint and the offset of the symbol in aux (if any) to the effective address
   433  		{name: "MOVBZload", argLength: 2, reg: gpload, asm: "MOVBZ", aux: "SymOff", typ: "UInt8", faultOnNilArg0: true, symEffect: "Read"},  // load byte from arg0+auxint+aux. arg1=mem.  Zero extend.
   434  		{name: "MOVBload", argLength: 2, reg: gpload, asm: "MOVB", aux: "SymOff", faultOnNilArg0: true, symEffect: "Read"},                  // ditto, sign extend to int64
   435  		{name: "MOVHZload", argLength: 2, reg: gpload, asm: "MOVHZ", aux: "SymOff", typ: "UInt16", faultOnNilArg0: true, symEffect: "Read"}, // load 2 bytes from arg0+auxint+aux. arg1=mem.  Zero extend.
   436  		{name: "MOVHload", argLength: 2, reg: gpload, asm: "MOVH", aux: "SymOff", faultOnNilArg0: true, symEffect: "Read"},                  // ditto, sign extend to int64
   437  		{name: "MOVWZload", argLength: 2, reg: gpload, asm: "MOVWZ", aux: "SymOff", typ: "UInt32", faultOnNilArg0: true, symEffect: "Read"}, // load 4 bytes from arg0+auxint+aux. arg1=mem.  Zero extend.
   438  		{name: "MOVWload", argLength: 2, reg: gpload, asm: "MOVW", aux: "SymOff", faultOnNilArg0: true, symEffect: "Read"},                  // ditto, sign extend to int64
   439  		{name: "MOVDload", argLength: 2, reg: gpload, asm: "MOVD", aux: "SymOff", typ: "UInt64", faultOnNilArg0: true, symEffect: "Read"},   // load 8 bytes from arg0+auxint+aux. arg1=mem
   440  
   441  		{name: "MOVWBR", argLength: 1, reg: gp11, asm: "MOVWBR"}, // arg0 swap bytes
   442  		{name: "MOVDBR", argLength: 1, reg: gp11, asm: "MOVDBR"}, // arg0 swap bytes
   443  
   444  		{name: "MOVHBRload", argLength: 2, reg: gpload, asm: "MOVHBR", aux: "SymOff", typ: "UInt16", faultOnNilArg0: true, symEffect: "Read"}, // load 2 bytes from arg0+auxint+aux. arg1=mem. Reverse bytes.
   445  		{name: "MOVWBRload", argLength: 2, reg: gpload, asm: "MOVWBR", aux: "SymOff", typ: "UInt32", faultOnNilArg0: true, symEffect: "Read"}, // load 4 bytes from arg0+auxint+aux. arg1=mem. Reverse bytes.
   446  		{name: "MOVDBRload", argLength: 2, reg: gpload, asm: "MOVDBR", aux: "SymOff", typ: "UInt64", faultOnNilArg0: true, symEffect: "Read"}, // load 8 bytes from arg0+auxint+aux. arg1=mem. Reverse bytes.
   447  
   448  		{name: "MOVBstore", argLength: 3, reg: gpstore, asm: "MOVB", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"},       // store byte in arg1 to arg0+auxint+aux. arg2=mem
   449  		{name: "MOVHstore", argLength: 3, reg: gpstore, asm: "MOVH", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"},       // store 2 bytes in arg1 to arg0+auxint+aux. arg2=mem
   450  		{name: "MOVWstore", argLength: 3, reg: gpstore, asm: "MOVW", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"},       // store 4 bytes in arg1 to arg0+auxint+aux. arg2=mem
   451  		{name: "MOVDstore", argLength: 3, reg: gpstore, asm: "MOVD", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"},       // store 8 bytes in arg1 to arg0+auxint+aux. arg2=mem
   452  		{name: "MOVHBRstore", argLength: 3, reg: gpstorebr, asm: "MOVHBR", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 2 bytes in arg1 to arg0+auxint+aux. arg2=mem. Reverse bytes.
   453  		{name: "MOVWBRstore", argLength: 3, reg: gpstorebr, asm: "MOVWBR", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 4 bytes in arg1 to arg0+auxint+aux. arg2=mem. Reverse bytes.
   454  		{name: "MOVDBRstore", argLength: 3, reg: gpstorebr, asm: "MOVDBR", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 8 bytes in arg1 to arg0+auxint+aux. arg2=mem. Reverse bytes.
   455  
   456  		{name: "MVC", argLength: 3, reg: gpmvc, asm: "MVC", aux: "SymValAndOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, faultOnNilArg1: true, symEffect: "None"}, // arg0=destptr, arg1=srcptr, arg2=mem, auxint=size,off
   457  
   458  		// indexed loads/stores
   459  		{name: "MOVBZloadidx", argLength: 3, reg: gploadidx, commutative: true, asm: "MOVBZ", aux: "SymOff", typ: "UInt8", symEffect: "Read"},   // load a byte from arg0+arg1+auxint+aux. arg2=mem. Zero extend.
   460  		{name: "MOVBloadidx", argLength: 3, reg: gploadidx, commutative: true, asm: "MOVB", aux: "SymOff", typ: "Int8", symEffect: "Read"},      // load a byte from arg0+arg1+auxint+aux. arg2=mem. Sign extend.
   461  		{name: "MOVHZloadidx", argLength: 3, reg: gploadidx, commutative: true, asm: "MOVHZ", aux: "SymOff", typ: "UInt16", symEffect: "Read"},  // load 2 bytes from arg0+arg1+auxint+aux. arg2=mem. Zero extend.
   462  		{name: "MOVHloadidx", argLength: 3, reg: gploadidx, commutative: true, asm: "MOVH", aux: "SymOff", typ: "Int16", symEffect: "Read"},     // load 2 bytes from arg0+arg1+auxint+aux. arg2=mem. Sign extend.
   463  		{name: "MOVWZloadidx", argLength: 3, reg: gploadidx, commutative: true, asm: "MOVWZ", aux: "SymOff", typ: "UInt32", symEffect: "Read"},  // load 4 bytes from arg0+arg1+auxint+aux. arg2=mem. Zero extend.
   464  		{name: "MOVWloadidx", argLength: 3, reg: gploadidx, commutative: true, asm: "MOVW", aux: "SymOff", typ: "Int32", symEffect: "Read"},     // load 4 bytes from arg0+arg1+auxint+aux. arg2=mem. Sign extend.
   465  		{name: "MOVDloadidx", argLength: 3, reg: gploadidx, commutative: true, asm: "MOVD", aux: "SymOff", typ: "UInt64", symEffect: "Read"},    // load 8 bytes from arg0+arg1+auxint+aux. arg2=mem
   466  		{name: "MOVHBRloadidx", argLength: 3, reg: gploadidx, commutative: true, asm: "MOVHBR", aux: "SymOff", typ: "Int16", symEffect: "Read"}, // load 2 bytes from arg0+arg1+auxint+aux. arg2=mem. Reverse bytes.
   467  		{name: "MOVWBRloadidx", argLength: 3, reg: gploadidx, commutative: true, asm: "MOVWBR", aux: "SymOff", typ: "Int32", symEffect: "Read"}, // load 4 bytes from arg0+arg1+auxint+aux. arg2=mem. Reverse bytes.
   468  		{name: "MOVDBRloadidx", argLength: 3, reg: gploadidx, commutative: true, asm: "MOVDBR", aux: "SymOff", typ: "Int64", symEffect: "Read"}, // load 8 bytes from arg0+arg1+auxint+aux. arg2=mem. Reverse bytes.
   469  		{name: "MOVBstoreidx", argLength: 4, reg: gpstoreidx, commutative: true, asm: "MOVB", aux: "SymOff", symEffect: "Write"},                // store byte in arg2 to arg0+arg1+auxint+aux. arg3=mem
   470  		{name: "MOVHstoreidx", argLength: 4, reg: gpstoreidx, commutative: true, asm: "MOVH", aux: "SymOff", symEffect: "Write"},                // store 2 bytes in arg2 to arg0+arg1+auxint+aux. arg3=mem
   471  		{name: "MOVWstoreidx", argLength: 4, reg: gpstoreidx, commutative: true, asm: "MOVW", aux: "SymOff", symEffect: "Write"},                // store 4 bytes in arg2 to arg0+arg1+auxint+aux. arg3=mem
   472  		{name: "MOVDstoreidx", argLength: 4, reg: gpstoreidx, commutative: true, asm: "MOVD", aux: "SymOff", symEffect: "Write"},                // store 8 bytes in arg2 to arg0+arg1+auxint+aux. arg3=mem
   473  		{name: "MOVHBRstoreidx", argLength: 4, reg: gpstoreidx, commutative: true, asm: "MOVHBR", aux: "SymOff", symEffect: "Write"},            // store 2 bytes in arg2 to arg0+arg1+auxint+aux. arg3=mem. Reverse bytes.
   474  		{name: "MOVWBRstoreidx", argLength: 4, reg: gpstoreidx, commutative: true, asm: "MOVWBR", aux: "SymOff", symEffect: "Write"},            // store 4 bytes in arg2 to arg0+arg1+auxint+aux. arg3=mem. Reverse bytes.
   475  		{name: "MOVDBRstoreidx", argLength: 4, reg: gpstoreidx, commutative: true, asm: "MOVDBR", aux: "SymOff", symEffect: "Write"},            // store 8 bytes in arg2 to arg0+arg1+auxint+aux. arg3=mem. Reverse bytes.
   476  
   477  		// For storeconst ops, the AuxInt field encodes both
   478  		// the value to store and an address offset of the store.
   479  		// Cast AuxInt to a ValAndOff to extract Val and Off fields.
   480  		{name: "MOVBstoreconst", argLength: 2, reg: gpstoreconst, asm: "MOVB", aux: "SymValAndOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store low byte of ValAndOff(AuxInt).Val() to arg0+ValAndOff(AuxInt).Off()+aux.  arg1=mem
   481  		{name: "MOVHstoreconst", argLength: 2, reg: gpstoreconst, asm: "MOVH", aux: "SymValAndOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store low 2 bytes of ...
   482  		{name: "MOVWstoreconst", argLength: 2, reg: gpstoreconst, asm: "MOVW", aux: "SymValAndOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store low 4 bytes of ...
   483  		{name: "MOVDstoreconst", argLength: 2, reg: gpstoreconst, asm: "MOVD", aux: "SymValAndOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 8 bytes of ...
   484  
   485  		{name: "CLEAR", argLength: 2, reg: regInfo{inputs: []regMask{ptr, 0}}, asm: "CLEAR", aux: "SymValAndOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, symEffect: "Write"},
   486  
   487  		{name: "CALLstatic", argLength: -1, reg: regInfo{clobbers: callerSave}, aux: "CallOff", clobberFlags: true, call: true},                                                // call static function aux.(*obj.LSym).  last arg=mem, auxint=argsize, returns mem
   488  		{name: "CALLtail", argLength: -1, reg: regInfo{clobbers: callerSave}, aux: "CallOff", clobberFlags: true, call: true, tailCall: true},                                  // tail call static function aux.(*obj.LSym).  last arg=mem, auxint=argsize, returns mem
   489  		{name: "CALLtailinter", argLength: -1, reg: regInfo{inputs: []regMask{ptr}, clobbers: callerSave}, aux: "CallOff", clobberFlags: true, call: true, tailCall: true},     // tail call fn by pointer. arg0=codeptr, last arg=mem, auxint=argsize, returns mem
   490  		{name: "CALLclosure", argLength: -1, reg: regInfo{inputs: []regMask{ptrsp, buildReg("R12"), 0}, clobbers: callerSave}, aux: "CallOff", clobberFlags: true, call: true}, // call function via closure.  arg0=codeptr, arg1=closure, last arg=mem, auxint=argsize, returns mem
   491  		{name: "CALLinter", argLength: -1, reg: regInfo{inputs: []regMask{ptr}, clobbers: callerSave}, aux: "CallOff", clobberFlags: true, call: true},                         // call fn by pointer.  arg0=codeptr, last arg=mem, auxint=argsize, returns mem
   492  
   493  		// (InvertFlags (CMP a b)) == (CMP b a)
   494  		// InvertFlags is a pseudo-op which can't appear in assembly output.
   495  		{name: "InvertFlags", argLength: 1}, // reverse direction of arg0
   496  
   497  		// Pseudo-ops
   498  		{name: "LoweredGetG", argLength: 1, reg: gp01}, // arg0=mem
   499  		// Scheduler ensures LoweredGetClosurePtr occurs only in entry block,
   500  		// and sorts it to the very beginning of the block to prevent other
   501  		// use of R12 (the closure pointer)
   502  		{name: "LoweredGetClosurePtr", reg: regInfo{outputs: []regMask{buildReg("R12")}}, zeroWidth: true},
   503  		// arg0=ptr,arg1=mem, returns void.  Faults if ptr is nil.
   504  		// LoweredGetCallerSP returns the SP of the caller of the current function. arg0=mem.
   505  		{name: "LoweredGetCallerSP", argLength: 1, reg: gp01, rematerializeable: true},
   506  		// LoweredGetCallerPC evaluates to the PC to which its "caller" will return.
   507  		// I.e., if f calls g "calls" sys.GetCallerPC,
   508  		// the result should be the PC within f that g will return to.
   509  		// See runtime/stubs.go for a more detailed discussion.
   510  		{name: "LoweredGetCallerPC", reg: gp01, rematerializeable: true},
   511  		{name: "LoweredNilCheck", argLength: 2, reg: regInfo{inputs: []regMask{ptrsp}}, clobberFlags: true, nilCheck: true, faultOnNilArg0: true},
   512  		// Round ops to block fused-multiply-add extraction.
   513  		{name: "LoweredRound32F", argLength: 1, reg: fp11, resultInArg0: true, zeroWidth: true},
   514  		{name: "LoweredRound64F", argLength: 1, reg: fp11, resultInArg0: true, zeroWidth: true},
   515  
   516  		// LoweredWB invokes runtime.gcWriteBarrier. arg0=mem, aux=# of buffer entries needed
   517  		// It saves all GP registers if necessary,
   518  		// but clobbers R14 (LR) because it's a call,
   519  		// and also clobbers R1 as the PLT stub does.
   520  		// Returns a pointer to a write barrier buffer in R9.
   521  		{name: "LoweredWB", argLength: 1, reg: regInfo{clobbers: (callerSave &^ gpg) | buildReg("R14") | r1, outputs: []regMask{r9}}, clobberFlags: true, aux: "Int64"},
   522  
   523  		// LoweredPanicBoundsRR takes x and y, two values that caused a bounds check to fail.
   524  		// the RC and CR versions are used when one of the arguments is a constant. CC is used
   525  		// when both are constant (normally both 0, as prove derives the fact that a [0] bounds
   526  		// failure means the length must have also been 0).
   527  		// AuxInt contains a report code (see PanicBounds in genericOps.go).
   528  		{name: "LoweredPanicBoundsRR", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{gp &^ lr, gp &^ lr}}, typ: "Mem", call: true}, // arg0=x, arg1=y, arg2=mem, returns memory.
   529  		{name: "LoweredPanicBoundsRC", argLength: 2, aux: "PanicBoundsC", reg: regInfo{inputs: []regMask{gp &^ lr}}, typ: "Mem", call: true},    // arg0=x, arg1=mem, returns memory.
   530  		{name: "LoweredPanicBoundsCR", argLength: 2, aux: "PanicBoundsC", reg: regInfo{inputs: []regMask{gp &^ lr}}, typ: "Mem", call: true},    // arg0=y, arg1=mem, returns memory.
   531  		{name: "LoweredPanicBoundsCC", argLength: 1, aux: "PanicBoundsCC", reg: regInfo{}, typ: "Mem", call: true},                              // arg0=mem, returns memory.
   532  
   533  		// Constant condition code values. The condition code can be 0, 1, 2 or 3.
   534  		{name: "FlagEQ"}, // CC=0 (equal)
   535  		{name: "FlagLT"}, // CC=1 (less than)
   536  		{name: "FlagGT"}, // CC=2 (greater than)
   537  		{name: "FlagOV"}, // CC=3 (overflow)
   538  
   539  		// Fast-BCR-serialization to ensure store-load ordering.
   540  		{name: "SYNC", argLength: 1, reg: sync, asm: "SYNC", typ: "Mem"},
   541  
   542  		// Atomic loads. These are just normal loads but return <value,memory> tuples
   543  		// so they can be properly ordered with other loads.
   544  		// load from arg0+auxint+aux.  arg1=mem.
   545  		{name: "MOVBZatomicload", argLength: 2, reg: gpload, asm: "MOVBZ", aux: "SymOff", faultOnNilArg0: true, symEffect: "Read"},
   546  		{name: "MOVWZatomicload", argLength: 2, reg: gpload, asm: "MOVWZ", aux: "SymOff", faultOnNilArg0: true, symEffect: "Read"},
   547  		{name: "MOVDatomicload", argLength: 2, reg: gpload, asm: "MOVD", aux: "SymOff", faultOnNilArg0: true, symEffect: "Read"},
   548  
   549  		// Atomic stores. These are just normal stores.
   550  		// store arg1 to arg0+auxint+aux. arg2=mem.
   551  		{name: "MOVBatomicstore", argLength: 3, reg: gpstore, asm: "MOVB", aux: "SymOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true, symEffect: "Write"},
   552  		{name: "MOVWatomicstore", argLength: 3, reg: gpstore, asm: "MOVW", aux: "SymOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true, symEffect: "Write"},
   553  		{name: "MOVDatomicstore", argLength: 3, reg: gpstore, asm: "MOVD", aux: "SymOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true, symEffect: "Write"},
   554  
   555  		// Atomic adds.
   556  		// *(arg0+auxint+aux) += arg1.  arg2=mem.
   557  		// Returns a tuple of <old contents of *(arg0+auxint+aux), memory>.
   558  		{name: "LAA", argLength: 3, reg: gpstorelaa, asm: "LAA", typ: "(UInt32,Mem)", aux: "SymOff", clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true, symEffect: "RdWr"},
   559  		{name: "LAAG", argLength: 3, reg: gpstorelaa, asm: "LAAG", typ: "(UInt64,Mem)", aux: "SymOff", clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true, symEffect: "RdWr"},
   560  		{name: "AddTupleFirst32", argLength: 2}, // arg1=tuple <x,y>.  Returns <x+arg0,y>.
   561  		{name: "AddTupleFirst64", argLength: 2}, // arg1=tuple <x,y>.  Returns <x+arg0,y>.
   562  
   563  		// Atomic bitwise operations.
   564  		// Note: 'floor' operations round the pointer down to the nearest word boundary
   565  		// which reflects how they are used in the runtime.
   566  		{name: "LAN", argLength: 3, reg: gpstore, asm: "LAN", typ: "Mem", clobberFlags: true, hasSideEffects: true},         // *arg0 &= arg1. arg2 = mem.
   567  		{name: "LANfloor", argLength: 3, reg: gpstorelab, asm: "LAN", typ: "Mem", clobberFlags: true, hasSideEffects: true}, // *(floor(arg0, 4)) &= arg1. arg2 = mem.
   568  		{name: "LAO", argLength: 3, reg: gpstore, asm: "LAO", typ: "Mem", clobberFlags: true, hasSideEffects: true},         // *arg0 |= arg1. arg2 = mem.
   569  		{name: "LAOfloor", argLength: 3, reg: gpstorelab, asm: "LAO", typ: "Mem", clobberFlags: true, hasSideEffects: true}, // *(floor(arg0, 4)) |= arg1. arg2 = mem.
   570  
   571  		// Compare and swap.
   572  		// arg0 = pointer, arg1 = old value, arg2 = new value, arg3 = memory.
   573  		// if *(arg0+auxint+aux) == arg1 {
   574  		//   *(arg0+auxint+aux) = arg2
   575  		//   return (true, memory)
   576  		// } else {
   577  		//   return (false, memory)
   578  		// }
   579  		// Note that these instructions also return the old value in arg1, but we ignore it.
   580  		// TODO: have these return flags instead of bool.  The current system generates:
   581  		//    CS ...
   582  		//    MOVD  $0, ret
   583  		//    BNE   2(PC)
   584  		//    MOVD  $1, ret
   585  		//    CMPW  ret, $0
   586  		//    BNE ...
   587  		// instead of just
   588  		//    CS ...
   589  		//    BEQ ...
   590  		// but we can't do that because memory-using ops can't generate flags yet
   591  		// (flagalloc wants to move flag-generating instructions around).
   592  		{name: "LoweredAtomicCas32", argLength: 4, reg: cas, asm: "CS", aux: "SymOff", clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true, symEffect: "RdWr"},
   593  		{name: "LoweredAtomicCas64", argLength: 4, reg: cas, asm: "CSG", aux: "SymOff", clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true, symEffect: "RdWr"},
   594  
   595  		// Lowered atomic swaps, emulated using compare-and-swap.
   596  		// store arg1 to arg0+auxint+aux, arg2=mem.
   597  		{name: "LoweredAtomicExchange32", argLength: 3, reg: exchange, asm: "CS", aux: "SymOff", clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true, symEffect: "RdWr"},
   598  		{name: "LoweredAtomicExchange64", argLength: 3, reg: exchange, asm: "CSG", aux: "SymOff", clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true, symEffect: "RdWr"},
   599  
   600  		// find leftmost one
   601  		{
   602  			name:         "FLOGR",
   603  			argLength:    1,
   604  			reg:          regInfo{inputs: gponly, outputs: []regMask{buildReg("R0")}, clobbers: buildReg("R1")},
   605  			asm:          "FLOGR",
   606  			typ:          "UInt64",
   607  			clobberFlags: true,
   608  		},
   609  
   610  		// population count
   611  		//
   612  		// Counts the number of ones in each byte of arg0
   613  		// and places the result into the corresponding byte
   614  		// of the result.
   615  		{
   616  			name:         "POPCNT",
   617  			argLength:    1,
   618  			reg:          gp11,
   619  			asm:          "POPCNT",
   620  			typ:          "UInt64",
   621  			clobberFlags: true,
   622  		},
   623  
   624  		// unsigned multiplication (64x64 → 128)
   625  		//
   626  		// Multiply the two 64-bit input operands together and place the 128-bit result into
   627  		// an even-odd register pair. The second register in the target pair also contains
   628  		// one of the input operands. Since we don't currently have a way to specify an
   629  		// even-odd register pair we hardcode this register pair as R2:R3.
   630  		{
   631  			name:      "MLGR",
   632  			argLength: 2,
   633  			reg:       regInfo{inputs: []regMask{gp, r3}, outputs: []regMask{r2, r3}},
   634  			asm:       "MLGR",
   635  		},
   636  
   637  		// pseudo operations to sum the output of the POPCNT instruction
   638  		{name: "SumBytes2", argLength: 1, typ: "UInt8"}, // sum the rightmost 2 bytes in arg0 ignoring overflow
   639  		{name: "SumBytes4", argLength: 1, typ: "UInt8"}, // sum the rightmost 4 bytes in arg0 ignoring overflow
   640  		{name: "SumBytes8", argLength: 1, typ: "UInt8"}, // sum all the bytes in arg0 ignoring overflow
   641  
   642  		// store multiple
   643  		{
   644  			name:           "STMG2",
   645  			argLength:      4,
   646  			reg:            regInfo{inputs: []regMask{ptrsp, buildReg("R1"), buildReg("R2"), 0}},
   647  			aux:            "SymOff",
   648  			typ:            "Mem",
   649  			asm:            "STMG",
   650  			faultOnNilArg0: true,
   651  			symEffect:      "Write",
   652  			clobberFlags:   true, // TODO(mundaym): currently uses AGFI to handle large offsets
   653  		},
   654  		{
   655  			name:           "STMG3",
   656  			argLength:      5,
   657  			reg:            regInfo{inputs: []regMask{ptrsp, buildReg("R1"), buildReg("R2"), buildReg("R3"), 0}},
   658  			aux:            "SymOff",
   659  			typ:            "Mem",
   660  			asm:            "STMG",
   661  			faultOnNilArg0: true,
   662  			symEffect:      "Write",
   663  			clobberFlags:   true, // TODO(mundaym): currently uses AGFI to handle large offsets
   664  		},
   665  		{
   666  			name:      "STMG4",
   667  			argLength: 6,
   668  			reg: regInfo{inputs: []regMask{
   669  				ptrsp,
   670  				buildReg("R1"),
   671  				buildReg("R2"),
   672  				buildReg("R3"),
   673  				buildReg("R4"),
   674  				0,
   675  			}},
   676  			aux:            "SymOff",
   677  			typ:            "Mem",
   678  			asm:            "STMG",
   679  			faultOnNilArg0: true,
   680  			symEffect:      "Write",
   681  			clobberFlags:   true, // TODO(mundaym): currently uses AGFI to handle large offsets
   682  		},
   683  		{
   684  			name:           "STM2",
   685  			argLength:      4,
   686  			reg:            regInfo{inputs: []regMask{ptrsp, buildReg("R1"), buildReg("R2"), 0}},
   687  			aux:            "SymOff",
   688  			typ:            "Mem",
   689  			asm:            "STMY",
   690  			faultOnNilArg0: true,
   691  			symEffect:      "Write",
   692  			clobberFlags:   true, // TODO(mundaym): currently uses AGFI to handle large offsets
   693  		},
   694  		{
   695  			name:           "STM3",
   696  			argLength:      5,
   697  			reg:            regInfo{inputs: []regMask{ptrsp, buildReg("R1"), buildReg("R2"), buildReg("R3"), 0}},
   698  			aux:            "SymOff",
   699  			typ:            "Mem",
   700  			asm:            "STMY",
   701  			faultOnNilArg0: true,
   702  			symEffect:      "Write",
   703  			clobberFlags:   true, // TODO(mundaym): currently uses AGFI to handle large offsets
   704  		},
   705  		{
   706  			name:      "STM4",
   707  			argLength: 6,
   708  			reg: regInfo{inputs: []regMask{
   709  				ptrsp,
   710  				buildReg("R1"),
   711  				buildReg("R2"),
   712  				buildReg("R3"),
   713  				buildReg("R4"),
   714  				0,
   715  			}},
   716  			aux:            "SymOff",
   717  			typ:            "Mem",
   718  			asm:            "STMY",
   719  			faultOnNilArg0: true,
   720  			symEffect:      "Write",
   721  			clobberFlags:   true, // TODO(mundaym): currently uses AGFI to handle large offsets
   722  		},
   723  
   724  		// large move
   725  		// auxint = remaining bytes after loop (rem)
   726  		// arg0 = address of dst memory (in R1, changed as a side effect)
   727  		// arg1 = address of src memory (in R2, changed as a side effect)
   728  		// arg2 = pointer to last address to move in loop + 256
   729  		// arg3 = mem
   730  		// returns mem
   731  		//
   732  		// mvc: MVC  $256, 0(R2), 0(R1)
   733  		//      MOVD $256(R1), R1
   734  		//      MOVD $256(R2), R2
   735  		//      CMP  R2, Rarg2
   736  		//      BNE  mvc
   737  		//	MVC  $rem, 0(R2), 0(R1) // if rem > 0
   738  		{
   739  			name:      "LoweredMove",
   740  			aux:       "Int64",
   741  			argLength: 4,
   742  			reg: regInfo{
   743  				inputs:   []regMask{buildReg("R1"), buildReg("R2"), gpsp},
   744  				clobbers: buildReg("R1 R2"),
   745  			},
   746  			clobberFlags:   true,
   747  			typ:            "Mem",
   748  			faultOnNilArg0: true,
   749  			faultOnNilArg1: true,
   750  		},
   751  
   752  		// large clear
   753  		// auxint = remaining bytes after loop (rem)
   754  		// arg0 = address of dst memory (in R1, changed as a side effect)
   755  		// arg1 = pointer to last address to zero in loop + 256
   756  		// arg2 = mem
   757  		// returns mem
   758  		//
   759  		// clear: CLEAR $256, 0(R1)
   760  		//        MOVD  $256(R1), R1
   761  		//        CMP   R1, Rarg2
   762  		//        BNE   clear
   763  		//	  CLEAR $rem, 0(R1) // if rem > 0
   764  		{
   765  			name:      "LoweredZero",
   766  			aux:       "Int64",
   767  			argLength: 3,
   768  			reg: regInfo{
   769  				inputs:   []regMask{buildReg("R1"), gpsp},
   770  				clobbers: buildReg("R1"),
   771  			},
   772  			clobberFlags:   true,
   773  			typ:            "Mem",
   774  			faultOnNilArg0: true,
   775  		},
   776  	}
   777  
   778  	// All blocks on s390x have their condition code mask (s390x.CCMask) as the Aux value.
   779  	// The condition code mask is a 4-bit mask where each bit corresponds to a condition
   780  	// code value. If the value of the condition code matches a bit set in the condition
   781  	// code mask then the first successor is executed. Otherwise the second successor is
   782  	// executed.
   783  	//
   784  	// | condition code value |  mask bit  |
   785  	// +----------------------+------------+
   786  	// | 0 (equal)            | 0b1000 (8) |
   787  	// | 1 (less than)        | 0b0100 (4) |
   788  	// | 2 (greater than)     | 0b0010 (2) |
   789  	// | 3 (unordered)        | 0b0001 (1) |
   790  	//
   791  	// Note: that compare-and-branch instructions must not have bit 3 (0b0001) set.
   792  	var S390Xblocks = []blockData{
   793  		// branch on condition
   794  		{name: "BRC", controls: 1, aux: "S390XCCMask"}, // condition code value (flags) is Controls[0]
   795  
   796  		// compare-and-branch (register-register)
   797  		//  - integrates comparison of Controls[0] with Controls[1]
   798  		//  - both control values must be in general purpose registers
   799  		{name: "CRJ", controls: 2, aux: "S390XCCMask"},   // signed 32-bit integer comparison
   800  		{name: "CGRJ", controls: 2, aux: "S390XCCMask"},  // signed 64-bit integer comparison
   801  		{name: "CLRJ", controls: 2, aux: "S390XCCMask"},  // unsigned 32-bit integer comparison
   802  		{name: "CLGRJ", controls: 2, aux: "S390XCCMask"}, // unsigned 64-bit integer comparison
   803  
   804  		// compare-and-branch (register-immediate)
   805  		//  - integrates comparison of Controls[0] with AuxInt
   806  		//  - control value must be in a general purpose register
   807  		//  - the AuxInt value is sign-extended for signed comparisons
   808  		//    and zero-extended for unsigned comparisons
   809  		{name: "CIJ", controls: 1, aux: "S390XCCMaskInt8"},    // signed 32-bit integer comparison
   810  		{name: "CGIJ", controls: 1, aux: "S390XCCMaskInt8"},   // signed 64-bit integer comparison
   811  		{name: "CLIJ", controls: 1, aux: "S390XCCMaskUint8"},  // unsigned 32-bit integer comparison
   812  		{name: "CLGIJ", controls: 1, aux: "S390XCCMaskUint8"}, // unsigned 64-bit integer comparison
   813  	}
   814  
   815  	archs = append(archs, arch{
   816  		name:               "S390X",
   817  		pkg:                "cmd/internal/obj/s390x",
   818  		genfile:            "../../s390x/ssa.go",
   819  		ops:                S390Xops,
   820  		blocks:             S390Xblocks,
   821  		regnames:           regNamesS390X,
   822  		ParamIntRegNames:   "R2 R3 R4 R5 R6 R7 R8 R9",
   823  		ParamFloatRegNames: "F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15",
   824  		gpregmask:          gp,
   825  		fpregmask:          fp,
   826  		framepointerreg:    -1, // not used
   827  		linkreg:            int8(num["R14"]),
   828  		imports: []string{
   829  			"cmd/internal/obj/s390x",
   830  		},
   831  	})
   832  }
   833  

View as plain text