Source file src/cmd/compile/internal/arm64/ssa.go

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package arm64
     6  
     7  import (
     8  	"math"
     9  
    10  	"cmd/compile/internal/base"
    11  	"cmd/compile/internal/ir"
    12  	"cmd/compile/internal/logopt"
    13  	"cmd/compile/internal/objw"
    14  	"cmd/compile/internal/ssa"
    15  	"cmd/compile/internal/ssagen"
    16  	"cmd/compile/internal/types"
    17  	"cmd/internal/obj"
    18  	"cmd/internal/obj/arm64"
    19  	"internal/abi"
    20  )
    21  
    22  // loadByType returns the load instruction of the given type.
    23  func loadByType(t *types.Type) obj.As {
    24  	if t.IsFloat() {
    25  		switch t.Size() {
    26  		case 4:
    27  			return arm64.AFMOVS
    28  		case 8:
    29  			return arm64.AFMOVD
    30  		}
    31  	} else {
    32  		switch t.Size() {
    33  		case 1:
    34  			if t.IsSigned() {
    35  				return arm64.AMOVB
    36  			} else {
    37  				return arm64.AMOVBU
    38  			}
    39  		case 2:
    40  			if t.IsSigned() {
    41  				return arm64.AMOVH
    42  			} else {
    43  				return arm64.AMOVHU
    44  			}
    45  		case 4:
    46  			if t.IsSigned() {
    47  				return arm64.AMOVW
    48  			} else {
    49  				return arm64.AMOVWU
    50  			}
    51  		case 8:
    52  			return arm64.AMOVD
    53  		}
    54  	}
    55  	panic("bad load type")
    56  }
    57  
    58  // storeByType returns the store instruction of the given type.
    59  func storeByType(t *types.Type) obj.As {
    60  	if t.IsFloat() {
    61  		switch t.Size() {
    62  		case 4:
    63  			return arm64.AFMOVS
    64  		case 8:
    65  			return arm64.AFMOVD
    66  		}
    67  	} else {
    68  		switch t.Size() {
    69  		case 1:
    70  			return arm64.AMOVB
    71  		case 2:
    72  			return arm64.AMOVH
    73  		case 4:
    74  			return arm64.AMOVW
    75  		case 8:
    76  			return arm64.AMOVD
    77  		}
    78  	}
    79  	panic("bad store type")
    80  }
    81  
    82  // loadByType2 returns an opcode that can load consecutive memory locations into 2 registers with type t.
    83  // returns obj.AXXX if no such opcode exists.
    84  func loadByType2(t *types.Type) obj.As {
    85  	if t.IsFloat() {
    86  		switch t.Size() {
    87  		case 4:
    88  			return arm64.AFLDPS
    89  		case 8:
    90  			return arm64.AFLDPD
    91  		}
    92  	} else {
    93  		switch t.Size() {
    94  		case 4:
    95  			return arm64.ALDPW
    96  		case 8:
    97  			return arm64.ALDP
    98  		}
    99  	}
   100  	return obj.AXXX
   101  }
   102  
   103  // storeByType2 returns an opcode that can store registers with type t into 2 consecutive memory locations.
   104  // returns obj.AXXX if no such opcode exists.
   105  func storeByType2(t *types.Type) obj.As {
   106  	if t.IsFloat() {
   107  		switch t.Size() {
   108  		case 4:
   109  			return arm64.AFSTPS
   110  		case 8:
   111  			return arm64.AFSTPD
   112  		}
   113  	} else {
   114  		switch t.Size() {
   115  		case 4:
   116  			return arm64.ASTPW
   117  		case 8:
   118  			return arm64.ASTP
   119  		}
   120  	}
   121  	return obj.AXXX
   122  }
   123  
   124  // makeshift encodes a register shifted by a constant, used as an Offset in Prog.
   125  func makeshift(v *ssa.Value, reg int16, typ int64, s int64) int64 {
   126  	if s < 0 || s >= 64 {
   127  		v.Fatalf("shift out of range: %d", s)
   128  	}
   129  	return int64(reg&31)<<16 | typ | (s&63)<<10
   130  }
   131  
   132  // genshift generates a Prog for r = r0 op (r1 shifted by n).
   133  func genshift(s *ssagen.State, v *ssa.Value, as obj.As, r0, r1, r int16, typ int64, n int64) *obj.Prog {
   134  	p := s.Prog(as)
   135  	p.From.Type = obj.TYPE_SHIFT
   136  	p.From.Offset = makeshift(v, r1, typ, n)
   137  	p.Reg = r0
   138  	if r != 0 {
   139  		p.To.Type = obj.TYPE_REG
   140  		p.To.Reg = r
   141  	}
   142  	return p
   143  }
   144  
   145  // generate the memory operand for the indexed load/store instructions.
   146  // base and idx are registers.
   147  func genIndexedOperand(op ssa.Op, base, idx int16) obj.Addr {
   148  	// Reg: base register, Index: (shifted) index register
   149  	mop := obj.Addr{Type: obj.TYPE_MEM, Reg: base}
   150  	switch op {
   151  	case ssa.OpARM64MOVDloadidx8, ssa.OpARM64MOVDstoreidx8,
   152  		ssa.OpARM64FMOVDloadidx8, ssa.OpARM64FMOVDstoreidx8:
   153  		mop.Index = arm64.REG_LSL | 3<<5 | idx&31
   154  	case ssa.OpARM64MOVWloadidx4, ssa.OpARM64MOVWUloadidx4, ssa.OpARM64MOVWstoreidx4,
   155  		ssa.OpARM64FMOVSloadidx4, ssa.OpARM64FMOVSstoreidx4:
   156  		mop.Index = arm64.REG_LSL | 2<<5 | idx&31
   157  	case ssa.OpARM64MOVHloadidx2, ssa.OpARM64MOVHUloadidx2, ssa.OpARM64MOVHstoreidx2:
   158  		mop.Index = arm64.REG_LSL | 1<<5 | idx&31
   159  	default: // not shifted
   160  		mop.Index = idx
   161  	}
   162  	return mop
   163  }
   164  
   165  func ssaGenValue(s *ssagen.State, v *ssa.Value) {
   166  	switch v.Op {
   167  	case ssa.OpCopy, ssa.OpARM64MOVDreg:
   168  		if v.Type.IsMemory() {
   169  			return
   170  		}
   171  		x := v.Args[0].Reg()
   172  		y := v.Reg()
   173  		if x == y {
   174  			return
   175  		}
   176  		as := arm64.AMOVD
   177  		if v.Type.IsFloat() {
   178  			switch v.Type.Size() {
   179  			case 4:
   180  				as = arm64.AFMOVS
   181  			case 8:
   182  				as = arm64.AFMOVD
   183  			default:
   184  				panic("bad float size")
   185  			}
   186  		}
   187  		p := s.Prog(as)
   188  		p.From.Type = obj.TYPE_REG
   189  		p.From.Reg = x
   190  		p.To.Type = obj.TYPE_REG
   191  		p.To.Reg = y
   192  	case ssa.OpARM64MOVDnop, ssa.OpARM64ZERO:
   193  		// nothing to do
   194  	case ssa.OpLoadReg:
   195  		if v.Type.IsFlags() {
   196  			v.Fatalf("load flags not implemented: %v", v.LongString())
   197  			return
   198  		}
   199  		p := s.Prog(loadByType(v.Type))
   200  		ssagen.AddrAuto(&p.From, v.Args[0])
   201  		p.To.Type = obj.TYPE_REG
   202  		p.To.Reg = v.Reg()
   203  	case ssa.OpStoreReg:
   204  		if v.Type.IsFlags() {
   205  			v.Fatalf("store flags not implemented: %v", v.LongString())
   206  			return
   207  		}
   208  		p := s.Prog(storeByType(v.Type))
   209  		p.From.Type = obj.TYPE_REG
   210  		p.From.Reg = v.Args[0].Reg()
   211  		ssagen.AddrAuto(&p.To, v)
   212  	case ssa.OpArgIntReg, ssa.OpArgFloatReg:
   213  		ssagen.CheckArgReg(v)
   214  		// The assembler needs to wrap the entry safepoint/stack growth code with spill/unspill
   215  		// The loop only runs once.
   216  		args := v.Block.Func.RegArgs
   217  		if len(args) == 0 {
   218  			break
   219  		}
   220  		v.Block.Func.RegArgs = nil // prevent from running again
   221  
   222  		for i := 0; i < len(args); i++ {
   223  			a := args[i]
   224  			// Offset by size of the saved LR slot.
   225  			addr := ssagen.SpillSlotAddr(a, arm64.REGSP, base.Ctxt.Arch.FixedFrameSize)
   226  			// Look for double-register operations if we can.
   227  			if i < len(args)-1 {
   228  				b := args[i+1]
   229  				if a.Type.Size() == b.Type.Size() &&
   230  					a.Type.IsFloat() == b.Type.IsFloat() &&
   231  					b.Offset == a.Offset+a.Type.Size() {
   232  					ld := loadByType2(a.Type)
   233  					st := storeByType2(a.Type)
   234  					if ld != obj.AXXX && st != obj.AXXX {
   235  						s.FuncInfo().AddSpill(obj.RegSpill{Reg: a.Reg, Reg2: b.Reg, Addr: addr, Unspill: ld, Spill: st})
   236  						i++ // b is done also, skip it.
   237  						continue
   238  					}
   239  				}
   240  			}
   241  			// Pass the spill/unspill information along to the assembler.
   242  			s.FuncInfo().AddSpill(obj.RegSpill{Reg: a.Reg, Addr: addr, Unspill: loadByType(a.Type), Spill: storeByType(a.Type)})
   243  		}
   244  
   245  	case ssa.OpARM64ADD,
   246  		ssa.OpARM64SUB,
   247  		ssa.OpARM64AND,
   248  		ssa.OpARM64OR,
   249  		ssa.OpARM64XOR,
   250  		ssa.OpARM64BIC,
   251  		ssa.OpARM64EON,
   252  		ssa.OpARM64ORN,
   253  		ssa.OpARM64MUL,
   254  		ssa.OpARM64MULW,
   255  		ssa.OpARM64MNEG,
   256  		ssa.OpARM64MNEGW,
   257  		ssa.OpARM64MULH,
   258  		ssa.OpARM64UMULH,
   259  		ssa.OpARM64MULL,
   260  		ssa.OpARM64UMULL,
   261  		ssa.OpARM64DIV,
   262  		ssa.OpARM64UDIV,
   263  		ssa.OpARM64DIVW,
   264  		ssa.OpARM64UDIVW,
   265  		ssa.OpARM64MOD,
   266  		ssa.OpARM64UMOD,
   267  		ssa.OpARM64MODW,
   268  		ssa.OpARM64UMODW,
   269  		ssa.OpARM64SLL,
   270  		ssa.OpARM64SRL,
   271  		ssa.OpARM64SRA,
   272  		ssa.OpARM64FADDS,
   273  		ssa.OpARM64FADDD,
   274  		ssa.OpARM64FSUBS,
   275  		ssa.OpARM64FSUBD,
   276  		ssa.OpARM64FMULS,
   277  		ssa.OpARM64FMULD,
   278  		ssa.OpARM64FNMULS,
   279  		ssa.OpARM64FNMULD,
   280  		ssa.OpARM64FDIVS,
   281  		ssa.OpARM64FDIVD,
   282  		ssa.OpARM64FMINS,
   283  		ssa.OpARM64FMIND,
   284  		ssa.OpARM64FMAXS,
   285  		ssa.OpARM64FMAXD,
   286  		ssa.OpARM64ROR,
   287  		ssa.OpARM64RORW:
   288  		r := v.Reg()
   289  		r1 := v.Args[0].Reg()
   290  		r2 := v.Args[1].Reg()
   291  		p := s.Prog(v.Op.Asm())
   292  		p.From.Type = obj.TYPE_REG
   293  		p.From.Reg = r2
   294  		p.Reg = r1
   295  		p.To.Type = obj.TYPE_REG
   296  		p.To.Reg = r
   297  	case ssa.OpARM64FMADDS,
   298  		ssa.OpARM64FMADDD,
   299  		ssa.OpARM64FNMADDS,
   300  		ssa.OpARM64FNMADDD,
   301  		ssa.OpARM64FMSUBS,
   302  		ssa.OpARM64FMSUBD,
   303  		ssa.OpARM64FNMSUBS,
   304  		ssa.OpARM64FNMSUBD,
   305  		ssa.OpARM64MADD,
   306  		ssa.OpARM64MADDW,
   307  		ssa.OpARM64MSUB,
   308  		ssa.OpARM64MSUBW:
   309  		rt := v.Reg()
   310  		ra := v.Args[0].Reg()
   311  		rm := v.Args[1].Reg()
   312  		rn := v.Args[2].Reg()
   313  		p := s.Prog(v.Op.Asm())
   314  		p.Reg = ra
   315  		p.From.Type = obj.TYPE_REG
   316  		p.From.Reg = rm
   317  		p.AddRestSourceReg(rn)
   318  		p.To.Type = obj.TYPE_REG
   319  		p.To.Reg = rt
   320  	case ssa.OpARM64ADDconst,
   321  		ssa.OpARM64SUBconst,
   322  		ssa.OpARM64ANDconst,
   323  		ssa.OpARM64ORconst,
   324  		ssa.OpARM64XORconst,
   325  		ssa.OpARM64SLLconst,
   326  		ssa.OpARM64SRLconst,
   327  		ssa.OpARM64SRAconst,
   328  		ssa.OpARM64RORconst,
   329  		ssa.OpARM64RORWconst:
   330  		p := s.Prog(v.Op.Asm())
   331  		p.From.Type = obj.TYPE_CONST
   332  		p.From.Offset = v.AuxInt
   333  		p.Reg = v.Args[0].Reg()
   334  		p.To.Type = obj.TYPE_REG
   335  		p.To.Reg = v.Reg()
   336  	case ssa.OpARM64ADDSconstflags:
   337  		p := s.Prog(v.Op.Asm())
   338  		p.From.Type = obj.TYPE_CONST
   339  		p.From.Offset = v.AuxInt
   340  		p.Reg = v.Args[0].Reg()
   341  		p.To.Type = obj.TYPE_REG
   342  		p.To.Reg = v.Reg0()
   343  	case ssa.OpARM64ADCzerocarry:
   344  		p := s.Prog(v.Op.Asm())
   345  		p.From.Type = obj.TYPE_REG
   346  		p.From.Reg = arm64.REGZERO
   347  		p.Reg = arm64.REGZERO
   348  		p.To.Type = obj.TYPE_REG
   349  		p.To.Reg = v.Reg()
   350  	case ssa.OpARM64ADCSflags,
   351  		ssa.OpARM64ADDSflags,
   352  		ssa.OpARM64SBCSflags,
   353  		ssa.OpARM64SUBSflags:
   354  		r := v.Reg0()
   355  		r1 := v.Args[0].Reg()
   356  		r2 := v.Args[1].Reg()
   357  		p := s.Prog(v.Op.Asm())
   358  		p.From.Type = obj.TYPE_REG
   359  		p.From.Reg = r2
   360  		p.Reg = r1
   361  		p.To.Type = obj.TYPE_REG
   362  		p.To.Reg = r
   363  	case ssa.OpARM64NEGSflags:
   364  		p := s.Prog(v.Op.Asm())
   365  		p.From.Type = obj.TYPE_REG
   366  		p.From.Reg = v.Args[0].Reg()
   367  		p.To.Type = obj.TYPE_REG
   368  		p.To.Reg = v.Reg0()
   369  	case ssa.OpARM64NGCzerocarry:
   370  		p := s.Prog(v.Op.Asm())
   371  		p.From.Type = obj.TYPE_REG
   372  		p.From.Reg = arm64.REGZERO
   373  		p.To.Type = obj.TYPE_REG
   374  		p.To.Reg = v.Reg()
   375  	case ssa.OpARM64EXTRconst,
   376  		ssa.OpARM64EXTRWconst:
   377  		p := s.Prog(v.Op.Asm())
   378  		p.From.Type = obj.TYPE_CONST
   379  		p.From.Offset = v.AuxInt
   380  		p.AddRestSourceReg(v.Args[0].Reg())
   381  		p.Reg = v.Args[1].Reg()
   382  		p.To.Type = obj.TYPE_REG
   383  		p.To.Reg = v.Reg()
   384  	case ssa.OpARM64MVNshiftLL, ssa.OpARM64NEGshiftLL:
   385  		genshift(s, v, v.Op.Asm(), 0, v.Args[0].Reg(), v.Reg(), arm64.SHIFT_LL, v.AuxInt)
   386  	case ssa.OpARM64MVNshiftRL, ssa.OpARM64NEGshiftRL:
   387  		genshift(s, v, v.Op.Asm(), 0, v.Args[0].Reg(), v.Reg(), arm64.SHIFT_LR, v.AuxInt)
   388  	case ssa.OpARM64MVNshiftRA, ssa.OpARM64NEGshiftRA:
   389  		genshift(s, v, v.Op.Asm(), 0, v.Args[0].Reg(), v.Reg(), arm64.SHIFT_AR, v.AuxInt)
   390  	case ssa.OpARM64MVNshiftRO:
   391  		genshift(s, v, v.Op.Asm(), 0, v.Args[0].Reg(), v.Reg(), arm64.SHIFT_ROR, v.AuxInt)
   392  	case ssa.OpARM64ADDshiftLL,
   393  		ssa.OpARM64SUBshiftLL,
   394  		ssa.OpARM64ANDshiftLL,
   395  		ssa.OpARM64ORshiftLL,
   396  		ssa.OpARM64XORshiftLL,
   397  		ssa.OpARM64EONshiftLL,
   398  		ssa.OpARM64ORNshiftLL,
   399  		ssa.OpARM64BICshiftLL:
   400  		genshift(s, v, v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg(), v.Reg(), arm64.SHIFT_LL, v.AuxInt)
   401  	case ssa.OpARM64ADDshiftRL,
   402  		ssa.OpARM64SUBshiftRL,
   403  		ssa.OpARM64ANDshiftRL,
   404  		ssa.OpARM64ORshiftRL,
   405  		ssa.OpARM64XORshiftRL,
   406  		ssa.OpARM64EONshiftRL,
   407  		ssa.OpARM64ORNshiftRL,
   408  		ssa.OpARM64BICshiftRL:
   409  		genshift(s, v, v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg(), v.Reg(), arm64.SHIFT_LR, v.AuxInt)
   410  	case ssa.OpARM64ADDshiftRA,
   411  		ssa.OpARM64SUBshiftRA,
   412  		ssa.OpARM64ANDshiftRA,
   413  		ssa.OpARM64ORshiftRA,
   414  		ssa.OpARM64XORshiftRA,
   415  		ssa.OpARM64EONshiftRA,
   416  		ssa.OpARM64ORNshiftRA,
   417  		ssa.OpARM64BICshiftRA:
   418  		genshift(s, v, v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg(), v.Reg(), arm64.SHIFT_AR, v.AuxInt)
   419  	case ssa.OpARM64ANDshiftRO,
   420  		ssa.OpARM64ORshiftRO,
   421  		ssa.OpARM64XORshiftRO,
   422  		ssa.OpARM64EONshiftRO,
   423  		ssa.OpARM64ORNshiftRO,
   424  		ssa.OpARM64BICshiftRO:
   425  		genshift(s, v, v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg(), v.Reg(), arm64.SHIFT_ROR, v.AuxInt)
   426  	case ssa.OpARM64MOVDconst:
   427  		p := s.Prog(v.Op.Asm())
   428  		p.From.Type = obj.TYPE_CONST
   429  		p.From.Offset = v.AuxInt
   430  		p.To.Type = obj.TYPE_REG
   431  		p.To.Reg = v.Reg()
   432  	case ssa.OpARM64FMOVSconst,
   433  		ssa.OpARM64FMOVDconst:
   434  		p := s.Prog(v.Op.Asm())
   435  		p.From.Type = obj.TYPE_FCONST
   436  		p.From.Val = math.Float64frombits(uint64(v.AuxInt))
   437  		p.To.Type = obj.TYPE_REG
   438  		p.To.Reg = v.Reg()
   439  	case ssa.OpARM64FCMPS0,
   440  		ssa.OpARM64FCMPD0:
   441  		p := s.Prog(v.Op.Asm())
   442  		p.From.Type = obj.TYPE_FCONST
   443  		p.From.Val = math.Float64frombits(0)
   444  		p.Reg = v.Args[0].Reg()
   445  	case ssa.OpARM64CMP,
   446  		ssa.OpARM64CMPW,
   447  		ssa.OpARM64CMN,
   448  		ssa.OpARM64CMNW,
   449  		ssa.OpARM64TST,
   450  		ssa.OpARM64TSTW,
   451  		ssa.OpARM64FCMPS,
   452  		ssa.OpARM64FCMPD:
   453  		p := s.Prog(v.Op.Asm())
   454  		p.From.Type = obj.TYPE_REG
   455  		p.From.Reg = v.Args[1].Reg()
   456  		p.Reg = v.Args[0].Reg()
   457  	case ssa.OpARM64CMPconst,
   458  		ssa.OpARM64CMPWconst,
   459  		ssa.OpARM64CMNconst,
   460  		ssa.OpARM64CMNWconst,
   461  		ssa.OpARM64TSTconst,
   462  		ssa.OpARM64TSTWconst:
   463  		p := s.Prog(v.Op.Asm())
   464  		p.From.Type = obj.TYPE_CONST
   465  		p.From.Offset = v.AuxInt
   466  		p.Reg = v.Args[0].Reg()
   467  	case ssa.OpARM64CMPshiftLL, ssa.OpARM64CMNshiftLL, ssa.OpARM64TSTshiftLL:
   468  		genshift(s, v, v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg(), 0, arm64.SHIFT_LL, v.AuxInt)
   469  	case ssa.OpARM64CMPshiftRL, ssa.OpARM64CMNshiftRL, ssa.OpARM64TSTshiftRL:
   470  		genshift(s, v, v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg(), 0, arm64.SHIFT_LR, v.AuxInt)
   471  	case ssa.OpARM64CMPshiftRA, ssa.OpARM64CMNshiftRA, ssa.OpARM64TSTshiftRA:
   472  		genshift(s, v, v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg(), 0, arm64.SHIFT_AR, v.AuxInt)
   473  	case ssa.OpARM64TSTshiftRO:
   474  		genshift(s, v, v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg(), 0, arm64.SHIFT_ROR, v.AuxInt)
   475  	case ssa.OpARM64MOVDaddr:
   476  		p := s.Prog(arm64.AMOVD)
   477  		p.From.Type = obj.TYPE_ADDR
   478  		p.From.Reg = v.Args[0].Reg()
   479  		p.To.Type = obj.TYPE_REG
   480  		p.To.Reg = v.Reg()
   481  
   482  		var wantreg string
   483  		// MOVD $sym+off(base), R
   484  		// the assembler expands it as the following:
   485  		// - base is SP: add constant offset to SP (R13)
   486  		//               when constant is large, tmp register (R11) may be used
   487  		// - base is SB: load external address from constant pool (use relocation)
   488  		switch v.Aux.(type) {
   489  		default:
   490  			v.Fatalf("aux is of unknown type %T", v.Aux)
   491  		case *obj.LSym:
   492  			wantreg = "SB"
   493  			ssagen.AddAux(&p.From, v)
   494  		case *ir.Name:
   495  			wantreg = "SP"
   496  			ssagen.AddAux(&p.From, v)
   497  		case nil:
   498  			// No sym, just MOVD $off(SP), R
   499  			wantreg = "SP"
   500  			p.From.Offset = v.AuxInt
   501  		}
   502  		if reg := v.Args[0].RegName(); reg != wantreg {
   503  			v.Fatalf("bad reg %s for symbol type %T, want %s", reg, v.Aux, wantreg)
   504  		}
   505  	case ssa.OpARM64MOVBload,
   506  		ssa.OpARM64MOVBUload,
   507  		ssa.OpARM64MOVHload,
   508  		ssa.OpARM64MOVHUload,
   509  		ssa.OpARM64MOVWload,
   510  		ssa.OpARM64MOVWUload,
   511  		ssa.OpARM64MOVDload,
   512  		ssa.OpARM64FMOVSload,
   513  		ssa.OpARM64FMOVDload,
   514  		ssa.OpARM64FMOVQload:
   515  		p := s.Prog(v.Op.Asm())
   516  		p.From.Type = obj.TYPE_MEM
   517  		p.From.Reg = v.Args[0].Reg()
   518  		ssagen.AddAux(&p.From, v)
   519  		p.To.Type = obj.TYPE_REG
   520  		p.To.Reg = v.Reg()
   521  	case ssa.OpARM64LDP, ssa.OpARM64LDPW, ssa.OpARM64LDPSW, ssa.OpARM64FLDPD, ssa.OpARM64FLDPS, ssa.OpARM64FLDPQ:
   522  		p := s.Prog(v.Op.Asm())
   523  		p.From.Type = obj.TYPE_MEM
   524  		p.From.Reg = v.Args[0].Reg()
   525  		ssagen.AddAux(&p.From, v)
   526  		p.To.Type = obj.TYPE_REGREG
   527  		p.To.Reg = v.Reg0()
   528  		p.To.Offset = int64(v.Reg1())
   529  	case ssa.OpARM64MOVBloadidx,
   530  		ssa.OpARM64MOVBUloadidx,
   531  		ssa.OpARM64MOVHloadidx,
   532  		ssa.OpARM64MOVHUloadidx,
   533  		ssa.OpARM64MOVWloadidx,
   534  		ssa.OpARM64MOVWUloadidx,
   535  		ssa.OpARM64MOVDloadidx,
   536  		ssa.OpARM64FMOVSloadidx,
   537  		ssa.OpARM64FMOVDloadidx,
   538  		ssa.OpARM64MOVHloadidx2,
   539  		ssa.OpARM64MOVHUloadidx2,
   540  		ssa.OpARM64MOVWloadidx4,
   541  		ssa.OpARM64MOVWUloadidx4,
   542  		ssa.OpARM64MOVDloadidx8,
   543  		ssa.OpARM64FMOVDloadidx8,
   544  		ssa.OpARM64FMOVSloadidx4:
   545  		p := s.Prog(v.Op.Asm())
   546  		p.From = genIndexedOperand(v.Op, v.Args[0].Reg(), v.Args[1].Reg())
   547  		p.To.Type = obj.TYPE_REG
   548  		p.To.Reg = v.Reg()
   549  	case ssa.OpARM64LDAR,
   550  		ssa.OpARM64LDARB,
   551  		ssa.OpARM64LDARW:
   552  		p := s.Prog(v.Op.Asm())
   553  		p.From.Type = obj.TYPE_MEM
   554  		p.From.Reg = v.Args[0].Reg()
   555  		ssagen.AddAux(&p.From, v)
   556  		p.To.Type = obj.TYPE_REG
   557  		p.To.Reg = v.Reg0()
   558  	case ssa.OpARM64MOVBstore,
   559  		ssa.OpARM64MOVHstore,
   560  		ssa.OpARM64MOVWstore,
   561  		ssa.OpARM64MOVDstore,
   562  		ssa.OpARM64FMOVSstore,
   563  		ssa.OpARM64FMOVDstore,
   564  		ssa.OpARM64FMOVQstore,
   565  		ssa.OpARM64STLRB,
   566  		ssa.OpARM64STLR,
   567  		ssa.OpARM64STLRW:
   568  		p := s.Prog(v.Op.Asm())
   569  		p.From.Type = obj.TYPE_REG
   570  		p.From.Reg = v.Args[1].Reg()
   571  		p.To.Type = obj.TYPE_MEM
   572  		p.To.Reg = v.Args[0].Reg()
   573  		ssagen.AddAux(&p.To, v)
   574  	case ssa.OpARM64MOVBstoreidx,
   575  		ssa.OpARM64MOVHstoreidx,
   576  		ssa.OpARM64MOVWstoreidx,
   577  		ssa.OpARM64MOVDstoreidx,
   578  		ssa.OpARM64FMOVSstoreidx,
   579  		ssa.OpARM64FMOVDstoreidx,
   580  		ssa.OpARM64MOVHstoreidx2,
   581  		ssa.OpARM64MOVWstoreidx4,
   582  		ssa.OpARM64FMOVSstoreidx4,
   583  		ssa.OpARM64MOVDstoreidx8,
   584  		ssa.OpARM64FMOVDstoreidx8:
   585  		p := s.Prog(v.Op.Asm())
   586  		p.To = genIndexedOperand(v.Op, v.Args[0].Reg(), v.Args[1].Reg())
   587  		p.From.Type = obj.TYPE_REG
   588  		p.From.Reg = v.Args[2].Reg()
   589  	case ssa.OpARM64STP, ssa.OpARM64STPW, ssa.OpARM64FSTPD, ssa.OpARM64FSTPS, ssa.OpARM64FSTPQ:
   590  		p := s.Prog(v.Op.Asm())
   591  		p.From.Type = obj.TYPE_REGREG
   592  		p.From.Reg = v.Args[1].Reg()
   593  		p.From.Offset = int64(v.Args[2].Reg())
   594  		p.To.Type = obj.TYPE_MEM
   595  		p.To.Reg = v.Args[0].Reg()
   596  		ssagen.AddAux(&p.To, v)
   597  	case ssa.OpARM64BFI,
   598  		ssa.OpARM64BFXIL:
   599  		p := s.Prog(v.Op.Asm())
   600  		p.From.Type = obj.TYPE_CONST
   601  		p.From.Offset = v.AuxInt >> 8
   602  		p.AddRestSourceConst(v.AuxInt & 0xff)
   603  		p.Reg = v.Args[1].Reg()
   604  		p.To.Type = obj.TYPE_REG
   605  		p.To.Reg = v.Reg()
   606  	case ssa.OpARM64SBFIZ,
   607  		ssa.OpARM64SBFX,
   608  		ssa.OpARM64UBFIZ,
   609  		ssa.OpARM64UBFX:
   610  		p := s.Prog(v.Op.Asm())
   611  		p.From.Type = obj.TYPE_CONST
   612  		p.From.Offset = v.AuxInt >> 8
   613  		p.AddRestSourceConst(v.AuxInt & 0xff)
   614  		p.Reg = v.Args[0].Reg()
   615  		p.To.Type = obj.TYPE_REG
   616  		p.To.Reg = v.Reg()
   617  	case ssa.OpARM64LoweredAtomicExchange64,
   618  		ssa.OpARM64LoweredAtomicExchange32,
   619  		ssa.OpARM64LoweredAtomicExchange8:
   620  		// LDAXR	(Rarg0), Rout
   621  		// STLXR	Rarg1, (Rarg0), Rtmp
   622  		// CBNZ		Rtmp, -2(PC)
   623  		var ld, st obj.As
   624  		switch v.Op {
   625  		case ssa.OpARM64LoweredAtomicExchange8:
   626  			ld = arm64.ALDAXRB
   627  			st = arm64.ASTLXRB
   628  		case ssa.OpARM64LoweredAtomicExchange32:
   629  			ld = arm64.ALDAXRW
   630  			st = arm64.ASTLXRW
   631  		case ssa.OpARM64LoweredAtomicExchange64:
   632  			ld = arm64.ALDAXR
   633  			st = arm64.ASTLXR
   634  		}
   635  		r0 := v.Args[0].Reg()
   636  		r1 := v.Args[1].Reg()
   637  		out := v.Reg0()
   638  		p := s.Prog(ld)
   639  		p.From.Type = obj.TYPE_MEM
   640  		p.From.Reg = r0
   641  		p.To.Type = obj.TYPE_REG
   642  		p.To.Reg = out
   643  		p1 := s.Prog(st)
   644  		p1.From.Type = obj.TYPE_REG
   645  		p1.From.Reg = r1
   646  		p1.To.Type = obj.TYPE_MEM
   647  		p1.To.Reg = r0
   648  		p1.RegTo2 = arm64.REGTMP
   649  		p2 := s.Prog(arm64.ACBNZ)
   650  		p2.From.Type = obj.TYPE_REG
   651  		p2.From.Reg = arm64.REGTMP
   652  		p2.To.Type = obj.TYPE_BRANCH
   653  		p2.To.SetTarget(p)
   654  	case ssa.OpARM64LoweredAtomicExchange64Variant,
   655  		ssa.OpARM64LoweredAtomicExchange32Variant,
   656  		ssa.OpARM64LoweredAtomicExchange8Variant:
   657  		var swap obj.As
   658  		switch v.Op {
   659  		case ssa.OpARM64LoweredAtomicExchange8Variant:
   660  			swap = arm64.ASWPALB
   661  		case ssa.OpARM64LoweredAtomicExchange32Variant:
   662  			swap = arm64.ASWPALW
   663  		case ssa.OpARM64LoweredAtomicExchange64Variant:
   664  			swap = arm64.ASWPALD
   665  		}
   666  		r0 := v.Args[0].Reg()
   667  		r1 := v.Args[1].Reg()
   668  		out := v.Reg0()
   669  
   670  		// SWPALD	Rarg1, (Rarg0), Rout
   671  		p := s.Prog(swap)
   672  		p.From.Type = obj.TYPE_REG
   673  		p.From.Reg = r1
   674  		p.To.Type = obj.TYPE_MEM
   675  		p.To.Reg = r0
   676  		p.RegTo2 = out
   677  
   678  	case ssa.OpARM64LoweredAtomicAdd64,
   679  		ssa.OpARM64LoweredAtomicAdd32:
   680  		// LDAXR	(Rarg0), Rout
   681  		// ADD		Rarg1, Rout
   682  		// STLXR	Rout, (Rarg0), Rtmp
   683  		// CBNZ		Rtmp, -3(PC)
   684  		ld := arm64.ALDAXR
   685  		st := arm64.ASTLXR
   686  		if v.Op == ssa.OpARM64LoweredAtomicAdd32 {
   687  			ld = arm64.ALDAXRW
   688  			st = arm64.ASTLXRW
   689  		}
   690  		r0 := v.Args[0].Reg()
   691  		r1 := v.Args[1].Reg()
   692  		out := v.Reg0()
   693  		p := s.Prog(ld)
   694  		p.From.Type = obj.TYPE_MEM
   695  		p.From.Reg = r0
   696  		p.To.Type = obj.TYPE_REG
   697  		p.To.Reg = out
   698  		p1 := s.Prog(arm64.AADD)
   699  		p1.From.Type = obj.TYPE_REG
   700  		p1.From.Reg = r1
   701  		p1.To.Type = obj.TYPE_REG
   702  		p1.To.Reg = out
   703  		p2 := s.Prog(st)
   704  		p2.From.Type = obj.TYPE_REG
   705  		p2.From.Reg = out
   706  		p2.To.Type = obj.TYPE_MEM
   707  		p2.To.Reg = r0
   708  		p2.RegTo2 = arm64.REGTMP
   709  		p3 := s.Prog(arm64.ACBNZ)
   710  		p3.From.Type = obj.TYPE_REG
   711  		p3.From.Reg = arm64.REGTMP
   712  		p3.To.Type = obj.TYPE_BRANCH
   713  		p3.To.SetTarget(p)
   714  	case ssa.OpARM64LoweredAtomicAdd64Variant,
   715  		ssa.OpARM64LoweredAtomicAdd32Variant:
   716  		// LDADDAL	Rarg1, (Rarg0), Rout
   717  		// ADD		Rarg1, Rout
   718  		op := arm64.ALDADDALD
   719  		if v.Op == ssa.OpARM64LoweredAtomicAdd32Variant {
   720  			op = arm64.ALDADDALW
   721  		}
   722  		r0 := v.Args[0].Reg()
   723  		r1 := v.Args[1].Reg()
   724  		out := v.Reg0()
   725  		p := s.Prog(op)
   726  		p.From.Type = obj.TYPE_REG
   727  		p.From.Reg = r1
   728  		p.To.Type = obj.TYPE_MEM
   729  		p.To.Reg = r0
   730  		p.RegTo2 = out
   731  		p1 := s.Prog(arm64.AADD)
   732  		p1.From.Type = obj.TYPE_REG
   733  		p1.From.Reg = r1
   734  		p1.To.Type = obj.TYPE_REG
   735  		p1.To.Reg = out
   736  	case ssa.OpARM64LoweredAtomicCas64,
   737  		ssa.OpARM64LoweredAtomicCas32:
   738  		// LDAXR	(Rarg0), Rtmp
   739  		// CMP		Rarg1, Rtmp
   740  		// BNE		3(PC)
   741  		// STLXR	Rarg2, (Rarg0), Rtmp
   742  		// CBNZ		Rtmp, -4(PC)
   743  		// CSET		EQ, Rout
   744  		ld := arm64.ALDAXR
   745  		st := arm64.ASTLXR
   746  		cmp := arm64.ACMP
   747  		if v.Op == ssa.OpARM64LoweredAtomicCas32 {
   748  			ld = arm64.ALDAXRW
   749  			st = arm64.ASTLXRW
   750  			cmp = arm64.ACMPW
   751  		}
   752  		r0 := v.Args[0].Reg()
   753  		r1 := v.Args[1].Reg()
   754  		r2 := v.Args[2].Reg()
   755  		out := v.Reg0()
   756  		p := s.Prog(ld)
   757  		p.From.Type = obj.TYPE_MEM
   758  		p.From.Reg = r0
   759  		p.To.Type = obj.TYPE_REG
   760  		p.To.Reg = arm64.REGTMP
   761  		p1 := s.Prog(cmp)
   762  		p1.From.Type = obj.TYPE_REG
   763  		p1.From.Reg = r1
   764  		p1.Reg = arm64.REGTMP
   765  		p2 := s.Prog(arm64.ABNE)
   766  		p2.To.Type = obj.TYPE_BRANCH
   767  		p3 := s.Prog(st)
   768  		p3.From.Type = obj.TYPE_REG
   769  		p3.From.Reg = r2
   770  		p3.To.Type = obj.TYPE_MEM
   771  		p3.To.Reg = r0
   772  		p3.RegTo2 = arm64.REGTMP
   773  		p4 := s.Prog(arm64.ACBNZ)
   774  		p4.From.Type = obj.TYPE_REG
   775  		p4.From.Reg = arm64.REGTMP
   776  		p4.To.Type = obj.TYPE_BRANCH
   777  		p4.To.SetTarget(p)
   778  		p5 := s.Prog(arm64.ACSET)
   779  		p5.From.Type = obj.TYPE_SPECIAL // assembler encodes conditional bits in Offset
   780  		p5.From.Offset = int64(arm64.SPOP_EQ)
   781  		p5.To.Type = obj.TYPE_REG
   782  		p5.To.Reg = out
   783  		p2.To.SetTarget(p5)
   784  	case ssa.OpARM64LoweredAtomicCas64Variant,
   785  		ssa.OpARM64LoweredAtomicCas32Variant:
   786  		// Rarg0: ptr
   787  		// Rarg1: old
   788  		// Rarg2: new
   789  		// MOV  	Rarg1, Rtmp
   790  		// CASAL	Rtmp, (Rarg0), Rarg2
   791  		// CMP  	Rarg1, Rtmp
   792  		// CSET 	EQ, Rout
   793  		cas := arm64.ACASALD
   794  		cmp := arm64.ACMP
   795  		mov := arm64.AMOVD
   796  		if v.Op == ssa.OpARM64LoweredAtomicCas32Variant {
   797  			cas = arm64.ACASALW
   798  			cmp = arm64.ACMPW
   799  			mov = arm64.AMOVW
   800  		}
   801  		r0 := v.Args[0].Reg()
   802  		r1 := v.Args[1].Reg()
   803  		r2 := v.Args[2].Reg()
   804  		out := v.Reg0()
   805  
   806  		// MOV  	Rarg1, Rtmp
   807  		p := s.Prog(mov)
   808  		p.From.Type = obj.TYPE_REG
   809  		p.From.Reg = r1
   810  		p.To.Type = obj.TYPE_REG
   811  		p.To.Reg = arm64.REGTMP
   812  
   813  		// CASAL	Rtmp, (Rarg0), Rarg2
   814  		p1 := s.Prog(cas)
   815  		p1.From.Type = obj.TYPE_REG
   816  		p1.From.Reg = arm64.REGTMP
   817  		p1.To.Type = obj.TYPE_MEM
   818  		p1.To.Reg = r0
   819  		p1.RegTo2 = r2
   820  
   821  		// CMP  	Rarg1, Rtmp
   822  		p2 := s.Prog(cmp)
   823  		p2.From.Type = obj.TYPE_REG
   824  		p2.From.Reg = r1
   825  		p2.Reg = arm64.REGTMP
   826  
   827  		// CSET 	EQ, Rout
   828  		p3 := s.Prog(arm64.ACSET)
   829  		p3.From.Type = obj.TYPE_SPECIAL // assembler encodes conditional bits in Offset
   830  		p3.From.Offset = int64(arm64.SPOP_EQ)
   831  		p3.To.Type = obj.TYPE_REG
   832  		p3.To.Reg = out
   833  
   834  	case ssa.OpARM64LoweredAtomicAnd64,
   835  		ssa.OpARM64LoweredAtomicOr64,
   836  		ssa.OpARM64LoweredAtomicAnd32,
   837  		ssa.OpARM64LoweredAtomicOr32,
   838  		ssa.OpARM64LoweredAtomicAnd8,
   839  		ssa.OpARM64LoweredAtomicOr8:
   840  		// LDAXR[BW] (Rarg0), Rout
   841  		// AND/OR	Rarg1, Rout, tmp1
   842  		// STLXR[BW] tmp1, (Rarg0), Rtmp
   843  		// CBNZ		Rtmp, -3(PC)
   844  		ld := arm64.ALDAXR
   845  		st := arm64.ASTLXR
   846  		if v.Op == ssa.OpARM64LoweredAtomicAnd32 || v.Op == ssa.OpARM64LoweredAtomicOr32 {
   847  			ld = arm64.ALDAXRW
   848  			st = arm64.ASTLXRW
   849  		}
   850  		if v.Op == ssa.OpARM64LoweredAtomicAnd8 || v.Op == ssa.OpARM64LoweredAtomicOr8 {
   851  			ld = arm64.ALDAXRB
   852  			st = arm64.ASTLXRB
   853  		}
   854  		r0 := v.Args[0].Reg()
   855  		r1 := v.Args[1].Reg()
   856  		out := v.Reg0()
   857  		tmp := v.RegTmp()
   858  		p := s.Prog(ld)
   859  		p.From.Type = obj.TYPE_MEM
   860  		p.From.Reg = r0
   861  		p.To.Type = obj.TYPE_REG
   862  		p.To.Reg = out
   863  		p1 := s.Prog(v.Op.Asm())
   864  		p1.From.Type = obj.TYPE_REG
   865  		p1.From.Reg = r1
   866  		p1.Reg = out
   867  		p1.To.Type = obj.TYPE_REG
   868  		p1.To.Reg = tmp
   869  		p2 := s.Prog(st)
   870  		p2.From.Type = obj.TYPE_REG
   871  		p2.From.Reg = tmp
   872  		p2.To.Type = obj.TYPE_MEM
   873  		p2.To.Reg = r0
   874  		p2.RegTo2 = arm64.REGTMP
   875  		p3 := s.Prog(arm64.ACBNZ)
   876  		p3.From.Type = obj.TYPE_REG
   877  		p3.From.Reg = arm64.REGTMP
   878  		p3.To.Type = obj.TYPE_BRANCH
   879  		p3.To.SetTarget(p)
   880  
   881  	case ssa.OpARM64LoweredAtomicAnd8Variant,
   882  		ssa.OpARM64LoweredAtomicAnd32Variant,
   883  		ssa.OpARM64LoweredAtomicAnd64Variant:
   884  		atomic_clear := arm64.ALDCLRALD
   885  		if v.Op == ssa.OpARM64LoweredAtomicAnd32Variant {
   886  			atomic_clear = arm64.ALDCLRALW
   887  		}
   888  		if v.Op == ssa.OpARM64LoweredAtomicAnd8Variant {
   889  			atomic_clear = arm64.ALDCLRALB
   890  		}
   891  		r0 := v.Args[0].Reg()
   892  		r1 := v.Args[1].Reg()
   893  		out := v.Reg0()
   894  
   895  		// MNV       Rarg1 Rtemp
   896  		p := s.Prog(arm64.AMVN)
   897  		p.From.Type = obj.TYPE_REG
   898  		p.From.Reg = r1
   899  		p.To.Type = obj.TYPE_REG
   900  		p.To.Reg = arm64.REGTMP
   901  
   902  		// LDCLRAL[BDW]  Rtemp, (Rarg0), Rout
   903  		p1 := s.Prog(atomic_clear)
   904  		p1.From.Type = obj.TYPE_REG
   905  		p1.From.Reg = arm64.REGTMP
   906  		p1.To.Type = obj.TYPE_MEM
   907  		p1.To.Reg = r0
   908  		p1.RegTo2 = out
   909  
   910  	case ssa.OpARM64LoweredAtomicOr8Variant,
   911  		ssa.OpARM64LoweredAtomicOr32Variant,
   912  		ssa.OpARM64LoweredAtomicOr64Variant:
   913  		atomic_or := arm64.ALDORALD
   914  		if v.Op == ssa.OpARM64LoweredAtomicOr32Variant {
   915  			atomic_or = arm64.ALDORALW
   916  		}
   917  		if v.Op == ssa.OpARM64LoweredAtomicOr8Variant {
   918  			atomic_or = arm64.ALDORALB
   919  		}
   920  		r0 := v.Args[0].Reg()
   921  		r1 := v.Args[1].Reg()
   922  		out := v.Reg0()
   923  
   924  		// LDORAL[BDW]  Rarg1, (Rarg0), Rout
   925  		p := s.Prog(atomic_or)
   926  		p.From.Type = obj.TYPE_REG
   927  		p.From.Reg = r1
   928  		p.To.Type = obj.TYPE_MEM
   929  		p.To.Reg = r0
   930  		p.RegTo2 = out
   931  
   932  	case ssa.OpARM64MOVBreg,
   933  		ssa.OpARM64MOVBUreg,
   934  		ssa.OpARM64MOVHreg,
   935  		ssa.OpARM64MOVHUreg,
   936  		ssa.OpARM64MOVWreg,
   937  		ssa.OpARM64MOVWUreg:
   938  		a := v.Args[0]
   939  		for a.Op == ssa.OpCopy || a.Op == ssa.OpARM64MOVDreg {
   940  			a = a.Args[0]
   941  		}
   942  		if a.Op == ssa.OpLoadReg {
   943  			t := a.Type
   944  			switch {
   945  			case v.Op == ssa.OpARM64MOVBreg && t.Size() == 1 && t.IsSigned(),
   946  				v.Op == ssa.OpARM64MOVBUreg && t.Size() == 1 && !t.IsSigned(),
   947  				v.Op == ssa.OpARM64MOVHreg && t.Size() == 2 && t.IsSigned(),
   948  				v.Op == ssa.OpARM64MOVHUreg && t.Size() == 2 && !t.IsSigned(),
   949  				v.Op == ssa.OpARM64MOVWreg && t.Size() == 4 && t.IsSigned(),
   950  				v.Op == ssa.OpARM64MOVWUreg && t.Size() == 4 && !t.IsSigned():
   951  				// arg is a proper-typed load, already zero/sign-extended, don't extend again
   952  				if v.Reg() == v.Args[0].Reg() {
   953  					return
   954  				}
   955  				p := s.Prog(arm64.AMOVD)
   956  				p.From.Type = obj.TYPE_REG
   957  				p.From.Reg = v.Args[0].Reg()
   958  				p.To.Type = obj.TYPE_REG
   959  				p.To.Reg = v.Reg()
   960  				return
   961  			default:
   962  			}
   963  		}
   964  		fallthrough
   965  	case ssa.OpARM64MVN,
   966  		ssa.OpARM64NEG,
   967  		ssa.OpARM64FABSD,
   968  		ssa.OpARM64FABSS,
   969  		ssa.OpARM64FMOVDfpgp,
   970  		ssa.OpARM64FMOVDgpfp,
   971  		ssa.OpARM64FMOVSfpgp,
   972  		ssa.OpARM64FMOVSgpfp,
   973  		ssa.OpARM64FNEGS,
   974  		ssa.OpARM64FNEGD,
   975  		ssa.OpARM64FSQRTS,
   976  		ssa.OpARM64FSQRTD,
   977  		ssa.OpARM64FCVTZSSW,
   978  		ssa.OpARM64FCVTZSDW,
   979  		ssa.OpARM64FCVTZUSW,
   980  		ssa.OpARM64FCVTZUDW,
   981  		ssa.OpARM64FCVTZSS,
   982  		ssa.OpARM64FCVTZSD,
   983  		ssa.OpARM64FCVTZUS,
   984  		ssa.OpARM64FCVTZUD,
   985  		ssa.OpARM64SCVTFWS,
   986  		ssa.OpARM64SCVTFWD,
   987  		ssa.OpARM64SCVTFS,
   988  		ssa.OpARM64SCVTFD,
   989  		ssa.OpARM64UCVTFWS,
   990  		ssa.OpARM64UCVTFWD,
   991  		ssa.OpARM64UCVTFS,
   992  		ssa.OpARM64UCVTFD,
   993  		ssa.OpARM64FCVTSD,
   994  		ssa.OpARM64FCVTDS,
   995  		ssa.OpARM64REV,
   996  		ssa.OpARM64REVW,
   997  		ssa.OpARM64REV16,
   998  		ssa.OpARM64REV16W,
   999  		ssa.OpARM64RBIT,
  1000  		ssa.OpARM64RBITW,
  1001  		ssa.OpARM64CLZ,
  1002  		ssa.OpARM64CLZW,
  1003  		ssa.OpARM64FRINTAD,
  1004  		ssa.OpARM64FRINTMD,
  1005  		ssa.OpARM64FRINTND,
  1006  		ssa.OpARM64FRINTPD,
  1007  		ssa.OpARM64FRINTZD,
  1008  		ssa.OpARM64FRINTAS,
  1009  		ssa.OpARM64FRINTMS,
  1010  		ssa.OpARM64FRINTNS,
  1011  		ssa.OpARM64FRINTPS,
  1012  		ssa.OpARM64FRINTZS:
  1013  		p := s.Prog(v.Op.Asm())
  1014  		p.From.Type = obj.TYPE_REG
  1015  		p.From.Reg = v.Args[0].Reg()
  1016  		p.To.Type = obj.TYPE_REG
  1017  		p.To.Reg = v.Reg()
  1018  	case ssa.OpARM64LoweredRound32F, ssa.OpARM64LoweredRound64F:
  1019  		// input is already rounded
  1020  	case ssa.OpARM64VCNT:
  1021  		p := s.Prog(v.Op.Asm())
  1022  		p.From.Type = obj.TYPE_REG
  1023  		p.From.Reg = (v.Args[0].Reg()-arm64.REG_F0)&31 + arm64.REG_ARNG + ((arm64.ARNG_8B & 15) << 5)
  1024  		p.To.Type = obj.TYPE_REG
  1025  		p.To.Reg = (v.Reg()-arm64.REG_F0)&31 + arm64.REG_ARNG + ((arm64.ARNG_8B & 15) << 5)
  1026  	case ssa.OpARM64VUADDLV:
  1027  		p := s.Prog(v.Op.Asm())
  1028  		p.From.Type = obj.TYPE_REG
  1029  		p.From.Reg = (v.Args[0].Reg()-arm64.REG_F0)&31 + arm64.REG_ARNG + ((arm64.ARNG_8B & 15) << 5)
  1030  		p.To.Type = obj.TYPE_REG
  1031  		p.To.Reg = v.Reg() - arm64.REG_F0 + arm64.REG_V0
  1032  	case ssa.OpARM64CSEL, ssa.OpARM64CSEL0:
  1033  		r1 := int16(arm64.REGZERO)
  1034  		if v.Op != ssa.OpARM64CSEL0 {
  1035  			r1 = v.Args[1].Reg()
  1036  		}
  1037  		p := s.Prog(v.Op.Asm())
  1038  		p.From.Type = obj.TYPE_SPECIAL // assembler encodes conditional bits in Offset
  1039  		condCode := condBits[ssa.Op(v.AuxInt)]
  1040  		p.From.Offset = int64(condCode)
  1041  		p.Reg = v.Args[0].Reg()
  1042  		p.AddRestSourceReg(r1)
  1043  		p.To.Type = obj.TYPE_REG
  1044  		p.To.Reg = v.Reg()
  1045  	case ssa.OpARM64CSINC, ssa.OpARM64CSINV, ssa.OpARM64CSNEG:
  1046  		p := s.Prog(v.Op.Asm())
  1047  		p.From.Type = obj.TYPE_SPECIAL // assembler encodes conditional bits in Offset
  1048  		condCode := condBits[ssa.Op(v.AuxInt)]
  1049  		p.From.Offset = int64(condCode)
  1050  		p.Reg = v.Args[0].Reg()
  1051  		p.AddRestSourceReg(v.Args[1].Reg())
  1052  		p.To.Type = obj.TYPE_REG
  1053  		p.To.Reg = v.Reg()
  1054  	case ssa.OpARM64CSETM:
  1055  		p := s.Prog(arm64.ACSETM)
  1056  		p.From.Type = obj.TYPE_SPECIAL // assembler encodes conditional bits in Offset
  1057  		condCode := condBits[ssa.Op(v.AuxInt)]
  1058  		p.From.Offset = int64(condCode)
  1059  		p.To.Type = obj.TYPE_REG
  1060  		p.To.Reg = v.Reg()
  1061  	case ssa.OpARM64CCMP,
  1062  		ssa.OpARM64CCMN,
  1063  		ssa.OpARM64CCMPconst,
  1064  		ssa.OpARM64CCMNconst,
  1065  		ssa.OpARM64CCMPW,
  1066  		ssa.OpARM64CCMNW,
  1067  		ssa.OpARM64CCMPWconst,
  1068  		ssa.OpARM64CCMNWconst:
  1069  		p := s.Prog(v.Op.Asm())
  1070  		p.Reg = v.Args[0].Reg()
  1071  		params := v.AuxArm64ConditionalParams()
  1072  		p.From.Type = obj.TYPE_SPECIAL // assembler encodes conditional bits in Offset
  1073  		p.From.Offset = int64(condBits[params.Cond()])
  1074  		constValue, ok := params.ConstValue()
  1075  		if ok {
  1076  			p.AddRestSourceConst(constValue)
  1077  		} else {
  1078  			p.AddRestSourceReg(v.Args[1].Reg())
  1079  		}
  1080  		p.To.Type = obj.TYPE_CONST
  1081  		p.To.Offset = params.Nzcv()
  1082  	case ssa.OpARM64LoweredZero:
  1083  		ptrReg := v.Args[0].Reg()
  1084  		n := v.AuxInt
  1085  		if n < 16 {
  1086  			v.Fatalf("Zero too small %d", n)
  1087  		}
  1088  
  1089  		// Generate zeroing instructions.
  1090  		var off int64
  1091  		for n >= 16 {
  1092  			//  STP     (ZR, ZR), off(ptrReg)
  1093  			zero16(s, ptrReg, off, false)
  1094  			off += 16
  1095  			n -= 16
  1096  		}
  1097  		// Write any fractional portion.
  1098  		// An overlapping 16-byte write can't be used here
  1099  		// because STP's offsets must be a multiple of 8.
  1100  		if n > 8 {
  1101  			//  MOVD    ZR, off(ptrReg)
  1102  			zero8(s, ptrReg, off)
  1103  			off += 8
  1104  			n -= 8
  1105  		}
  1106  		if n != 0 {
  1107  			//  MOVD    ZR, off+n-8(ptrReg)
  1108  			// TODO: for n<=4 we could use a smaller write.
  1109  			zero8(s, ptrReg, off+n-8)
  1110  		}
  1111  	case ssa.OpARM64LoweredZeroLoop:
  1112  		ptrReg := v.Args[0].Reg()
  1113  		countReg := v.RegTmp()
  1114  		n := v.AuxInt
  1115  		loopSize := int64(64)
  1116  		if n < 3*loopSize {
  1117  			// - a loop count of 0 won't work.
  1118  			// - a loop count of 1 is useless.
  1119  			// - a loop count of 2 is a code size ~tie
  1120  			//     3 instructions to implement the loop
  1121  			//     4 instructions in the loop body
  1122  			//   vs
  1123  			//     8 instructions in the straightline code
  1124  			//   Might as well use straightline code.
  1125  			v.Fatalf("ZeroLoop size too small %d", n)
  1126  		}
  1127  
  1128  		// Put iteration count in a register.
  1129  		//   MOVD    $n, countReg
  1130  		p := s.Prog(arm64.AMOVD)
  1131  		p.From.Type = obj.TYPE_CONST
  1132  		p.From.Offset = n / loopSize
  1133  		p.To.Type = obj.TYPE_REG
  1134  		p.To.Reg = countReg
  1135  		cntInit := p
  1136  
  1137  		// Zero loopSize bytes starting at ptrReg.
  1138  		// Increment ptrReg by loopSize as a side effect.
  1139  		for range loopSize / 16 {
  1140  			//  STP.P   (ZR, ZR), 16(ptrReg)
  1141  			zero16(s, ptrReg, 0, true)
  1142  			// TODO: should we use the postincrement form,
  1143  			// or use a separate += 64 instruction?
  1144  			// postincrement saves an instruction, but maybe
  1145  			// it requires more integer units to do the +=16s.
  1146  		}
  1147  		// Decrement loop count.
  1148  		//   SUB     $1, countReg
  1149  		p = s.Prog(arm64.ASUB)
  1150  		p.From.Type = obj.TYPE_CONST
  1151  		p.From.Offset = 1
  1152  		p.To.Type = obj.TYPE_REG
  1153  		p.To.Reg = countReg
  1154  		// Jump to loop header if we're not done yet.
  1155  		//   CBNZ    head
  1156  		p = s.Prog(arm64.ACBNZ)
  1157  		p.From.Type = obj.TYPE_REG
  1158  		p.From.Reg = countReg
  1159  		p.To.Type = obj.TYPE_BRANCH
  1160  		p.To.SetTarget(cntInit.Link)
  1161  
  1162  		// Multiples of the loop size are now done.
  1163  		n %= loopSize
  1164  
  1165  		// Write any fractional portion.
  1166  		var off int64
  1167  		for n >= 16 {
  1168  			//  STP     (ZR, ZR), off(ptrReg)
  1169  			zero16(s, ptrReg, off, false)
  1170  			off += 16
  1171  			n -= 16
  1172  		}
  1173  		if n > 8 {
  1174  			// Note: an overlapping 16-byte write can't be used
  1175  			// here because STP's offsets must be a multiple of 8.
  1176  			//  MOVD    ZR, off(ptrReg)
  1177  			zero8(s, ptrReg, off)
  1178  			off += 8
  1179  			n -= 8
  1180  		}
  1181  		if n != 0 {
  1182  			//  MOVD    ZR, off+n-8(ptrReg)
  1183  			// TODO: for n<=4 we could use a smaller write.
  1184  			zero8(s, ptrReg, off+n-8)
  1185  		}
  1186  		// TODO: maybe we should use the count register to instead
  1187  		// hold an end pointer and compare against that?
  1188  		//   ADD $n, ptrReg, endReg
  1189  		// then
  1190  		//   CMP ptrReg, endReg
  1191  		//   BNE loop
  1192  		// There's a past-the-end pointer here, any problem with that?
  1193  
  1194  	case ssa.OpARM64LoweredMove:
  1195  		dstReg := v.Args[0].Reg()
  1196  		srcReg := v.Args[1].Reg()
  1197  		if dstReg == srcReg {
  1198  			break
  1199  		}
  1200  		tmpReg1 := int16(arm64.REG_R25)
  1201  		tmpFReg1 := int16(arm64.REG_F16)
  1202  		tmpFReg2 := int16(arm64.REG_F17)
  1203  		n := v.AuxInt
  1204  		if n < 16 {
  1205  			v.Fatalf("Move too small %d", n)
  1206  		}
  1207  
  1208  		// Generate copying instructions.
  1209  		var off int64
  1210  		for n >= 32 {
  1211  			//  FLDPQ   off(srcReg), (tmpFReg1, tmpFReg2)
  1212  			//  FSTPQ   (tmpFReg1, tmpFReg2), off(dstReg)
  1213  			move32(s, srcReg, dstReg, tmpFReg1, tmpFReg2, off, false)
  1214  			off += 32
  1215  			n -= 32
  1216  		}
  1217  		for n >= 16 {
  1218  			//  FMOVQ   off(src), tmpFReg1
  1219  			//  FMOVQ   tmpFReg1, off(dst)
  1220  			move16(s, srcReg, dstReg, tmpFReg1, off, false)
  1221  			off += 16
  1222  			n -= 16
  1223  		}
  1224  		if n > 8 {
  1225  			//  MOVD    off(srcReg), tmpReg1
  1226  			//  MOVD    tmpReg1, off(dstReg)
  1227  			move8(s, srcReg, dstReg, tmpReg1, off)
  1228  			off += 8
  1229  			n -= 8
  1230  		}
  1231  		if n != 0 {
  1232  			//  MOVD    off+n-8(srcReg), tmpReg1
  1233  			//  MOVD    tmpReg1, off+n-8(dstReg)
  1234  			move8(s, srcReg, dstReg, tmpReg1, off+n-8)
  1235  		}
  1236  	case ssa.OpARM64LoweredMoveLoop:
  1237  		dstReg := v.Args[0].Reg()
  1238  		srcReg := v.Args[1].Reg()
  1239  		if dstReg == srcReg {
  1240  			break
  1241  		}
  1242  		countReg := int16(arm64.REG_R24)
  1243  		tmpReg1 := int16(arm64.REG_R25)
  1244  		tmpFReg1 := int16(arm64.REG_F16)
  1245  		tmpFReg2 := int16(arm64.REG_F17)
  1246  		n := v.AuxInt
  1247  		loopSize := int64(64)
  1248  		if n < 3*loopSize {
  1249  			// - a loop count of 0 won't work.
  1250  			// - a loop count of 1 is useless.
  1251  			// - a loop count of 2 is a code size ~tie
  1252  			//     3 instructions to implement the loop
  1253  			//     4 instructions in the loop body
  1254  			//   vs
  1255  			//     8 instructions in the straightline code
  1256  			//   Might as well use straightline code.
  1257  			v.Fatalf("ZeroLoop size too small %d", n)
  1258  		}
  1259  
  1260  		// Put iteration count in a register.
  1261  		//   MOVD    $n, countReg
  1262  		p := s.Prog(arm64.AMOVD)
  1263  		p.From.Type = obj.TYPE_CONST
  1264  		p.From.Offset = n / loopSize
  1265  		p.To.Type = obj.TYPE_REG
  1266  		p.To.Reg = countReg
  1267  		cntInit := p
  1268  
  1269  		// Move loopSize bytes starting at srcReg to dstReg.
  1270  		// Increment srcReg and destReg by loopSize as a side effect.
  1271  		for range loopSize / 32 {
  1272  			// FLDPQ.P 32(srcReg), (tmpFReg1, tmpFReg2)
  1273  			// FSTPQ.P (tmpFReg1, tmpFReg2), 32(dstReg)
  1274  			move32(s, srcReg, dstReg, tmpFReg1, tmpFReg2, 0, true)
  1275  		}
  1276  		// Decrement loop count.
  1277  		//   SUB     $1, countReg
  1278  		p = s.Prog(arm64.ASUB)
  1279  		p.From.Type = obj.TYPE_CONST
  1280  		p.From.Offset = 1
  1281  		p.To.Type = obj.TYPE_REG
  1282  		p.To.Reg = countReg
  1283  		// Jump to loop header if we're not done yet.
  1284  		//   CBNZ    head
  1285  		p = s.Prog(arm64.ACBNZ)
  1286  		p.From.Type = obj.TYPE_REG
  1287  		p.From.Reg = countReg
  1288  		p.To.Type = obj.TYPE_BRANCH
  1289  		p.To.SetTarget(cntInit.Link)
  1290  
  1291  		// Multiples of the loop size are now done.
  1292  		n %= loopSize
  1293  
  1294  		// Copy any fractional portion.
  1295  		var off int64
  1296  		for n >= 32 {
  1297  			//  FLDPQ   off(srcReg), (tmpFReg1, tmpFReg2)
  1298  			//  FSTPQ   (tmpFReg1, tmpFReg2), off(dstReg)
  1299  			move32(s, srcReg, dstReg, tmpFReg1, tmpFReg2, off, false)
  1300  			off += 32
  1301  			n -= 32
  1302  		}
  1303  		for n >= 16 {
  1304  			//  FMOVQ   off(src), tmpFReg1
  1305  			//  FMOVQ   tmpFReg1, off(dst)
  1306  			move16(s, srcReg, dstReg, tmpFReg1, off, false)
  1307  			off += 16
  1308  			n -= 16
  1309  		}
  1310  		if n > 8 {
  1311  			//  MOVD    off(srcReg), tmpReg1
  1312  			//  MOVD    tmpReg1, off(dstReg)
  1313  			move8(s, srcReg, dstReg, tmpReg1, off)
  1314  			off += 8
  1315  			n -= 8
  1316  		}
  1317  		if n != 0 {
  1318  			//  MOVD    off+n-8(srcReg), tmpReg1
  1319  			//  MOVD    tmpReg1, off+n-8(dstReg)
  1320  			move8(s, srcReg, dstReg, tmpReg1, off+n-8)
  1321  		}
  1322  
  1323  	case ssa.OpARM64CALLstatic, ssa.OpARM64CALLclosure, ssa.OpARM64CALLinter:
  1324  		s.Call(v)
  1325  	case ssa.OpARM64CALLtail, ssa.OpARM64CALLtailinter:
  1326  		s.TailCall(v)
  1327  	case ssa.OpARM64LoweredWB:
  1328  		p := s.Prog(obj.ACALL)
  1329  		p.To.Type = obj.TYPE_MEM
  1330  		p.To.Name = obj.NAME_EXTERN
  1331  		// AuxInt encodes how many buffer entries we need.
  1332  		p.To.Sym = ir.Syms.GCWriteBarrier[v.AuxInt-1]
  1333  	case ssa.OpARM64LoweredMemEq:
  1334  		p := s.Prog(obj.ACALL)
  1335  		p.To.Type = obj.TYPE_MEM
  1336  		p.To.Name = obj.NAME_EXTERN
  1337  		p.To.Sym = ir.Syms.Memequal
  1338  
  1339  	case ssa.OpARM64LoweredPanicBoundsRR, ssa.OpARM64LoweredPanicBoundsRC, ssa.OpARM64LoweredPanicBoundsCR, ssa.OpARM64LoweredPanicBoundsCC:
  1340  		// Compute the constant we put in the PCData entry for this call.
  1341  		code, signed := ssa.BoundsKind(v.AuxInt).Code()
  1342  		xIsReg := false
  1343  		yIsReg := false
  1344  		xVal := 0
  1345  		yVal := 0
  1346  		switch v.Op {
  1347  		case ssa.OpARM64LoweredPanicBoundsRR:
  1348  			xIsReg = true
  1349  			xVal = int(v.Args[0].Reg() - arm64.REG_R0)
  1350  			yIsReg = true
  1351  			yVal = int(v.Args[1].Reg() - arm64.REG_R0)
  1352  		case ssa.OpARM64LoweredPanicBoundsRC:
  1353  			xIsReg = true
  1354  			xVal = int(v.Args[0].Reg() - arm64.REG_R0)
  1355  			c := v.Aux.(ssa.PanicBoundsC).C
  1356  			if c >= 0 && c <= abi.BoundsMaxConst {
  1357  				yVal = int(c)
  1358  			} else {
  1359  				// Move constant to a register
  1360  				yIsReg = true
  1361  				if yVal == xVal {
  1362  					yVal = 1
  1363  				}
  1364  				p := s.Prog(arm64.AMOVD)
  1365  				p.From.Type = obj.TYPE_CONST
  1366  				p.From.Offset = c
  1367  				p.To.Type = obj.TYPE_REG
  1368  				p.To.Reg = arm64.REG_R0 + int16(yVal)
  1369  			}
  1370  		case ssa.OpARM64LoweredPanicBoundsCR:
  1371  			yIsReg = true
  1372  			yVal = int(v.Args[0].Reg() - arm64.REG_R0)
  1373  			c := v.Aux.(ssa.PanicBoundsC).C
  1374  			if c >= 0 && c <= abi.BoundsMaxConst {
  1375  				xVal = int(c)
  1376  			} else {
  1377  				// Move constant to a register
  1378  				if xVal == yVal {
  1379  					xVal = 1
  1380  				}
  1381  				p := s.Prog(arm64.AMOVD)
  1382  				p.From.Type = obj.TYPE_CONST
  1383  				p.From.Offset = c
  1384  				p.To.Type = obj.TYPE_REG
  1385  				p.To.Reg = arm64.REG_R0 + int16(xVal)
  1386  			}
  1387  		case ssa.OpARM64LoweredPanicBoundsCC:
  1388  			c := v.Aux.(ssa.PanicBoundsCC).Cx
  1389  			if c >= 0 && c <= abi.BoundsMaxConst {
  1390  				xVal = int(c)
  1391  			} else {
  1392  				// Move constant to a register
  1393  				xIsReg = true
  1394  				p := s.Prog(arm64.AMOVD)
  1395  				p.From.Type = obj.TYPE_CONST
  1396  				p.From.Offset = c
  1397  				p.To.Type = obj.TYPE_REG
  1398  				p.To.Reg = arm64.REG_R0 + int16(xVal)
  1399  			}
  1400  			c = v.Aux.(ssa.PanicBoundsCC).Cy
  1401  			if c >= 0 && c <= abi.BoundsMaxConst {
  1402  				yVal = int(c)
  1403  			} else {
  1404  				// Move constant to a register
  1405  				yIsReg = true
  1406  				yVal = 1
  1407  				p := s.Prog(arm64.AMOVD)
  1408  				p.From.Type = obj.TYPE_CONST
  1409  				p.From.Offset = c
  1410  				p.To.Type = obj.TYPE_REG
  1411  				p.To.Reg = arm64.REG_R0 + int16(yVal)
  1412  			}
  1413  		}
  1414  		c := abi.BoundsEncode(code, signed, xIsReg, yIsReg, xVal, yVal)
  1415  
  1416  		p := s.Prog(obj.APCDATA)
  1417  		p.From.SetConst(abi.PCDATA_PanicBounds)
  1418  		p.To.SetConst(int64(c))
  1419  		p = s.Prog(obj.ACALL)
  1420  		p.To.Type = obj.TYPE_MEM
  1421  		p.To.Name = obj.NAME_EXTERN
  1422  		p.To.Sym = ir.Syms.PanicBounds
  1423  
  1424  	case ssa.OpARM64LoweredNilCheck:
  1425  		// Issue a load which will fault if arg is nil.
  1426  		p := s.Prog(arm64.AMOVB)
  1427  		p.From.Type = obj.TYPE_MEM
  1428  		p.From.Reg = v.Args[0].Reg()
  1429  		ssagen.AddAux(&p.From, v)
  1430  		p.To.Type = obj.TYPE_REG
  1431  		p.To.Reg = arm64.REGTMP
  1432  		if logopt.Enabled() {
  1433  			logopt.LogOpt(v.Pos, "nilcheck", "genssa", v.Block.Func.Name)
  1434  		}
  1435  		if base.Debug.Nil != 0 && v.Pos.Line() > 1 { // v.Line==1 in generated wrappers
  1436  			base.WarnfAt(v.Pos, "generated nil check")
  1437  		}
  1438  	case ssa.OpARM64Equal,
  1439  		ssa.OpARM64NotEqual,
  1440  		ssa.OpARM64LessThan,
  1441  		ssa.OpARM64LessEqual,
  1442  		ssa.OpARM64GreaterThan,
  1443  		ssa.OpARM64GreaterEqual,
  1444  		ssa.OpARM64LessThanU,
  1445  		ssa.OpARM64LessEqualU,
  1446  		ssa.OpARM64GreaterThanU,
  1447  		ssa.OpARM64GreaterEqualU,
  1448  		ssa.OpARM64LessThanF,
  1449  		ssa.OpARM64LessEqualF,
  1450  		ssa.OpARM64GreaterThanF,
  1451  		ssa.OpARM64GreaterEqualF,
  1452  		ssa.OpARM64NotLessThanF,
  1453  		ssa.OpARM64NotLessEqualF,
  1454  		ssa.OpARM64NotGreaterThanF,
  1455  		ssa.OpARM64NotGreaterEqualF,
  1456  		ssa.OpARM64LessThanNoov,
  1457  		ssa.OpARM64GreaterEqualNoov:
  1458  		// generate boolean values using CSET
  1459  		p := s.Prog(arm64.ACSET)
  1460  		p.From.Type = obj.TYPE_SPECIAL // assembler encodes conditional bits in Offset
  1461  		condCode := condBits[v.Op]
  1462  		p.From.Offset = int64(condCode)
  1463  		p.To.Type = obj.TYPE_REG
  1464  		p.To.Reg = v.Reg()
  1465  	case ssa.OpARM64PRFM:
  1466  		p := s.Prog(v.Op.Asm())
  1467  		p.From.Type = obj.TYPE_MEM
  1468  		p.From.Reg = v.Args[0].Reg()
  1469  		p.To.Type = obj.TYPE_CONST
  1470  		p.To.Offset = v.AuxInt
  1471  	case ssa.OpARM64LoweredGetClosurePtr:
  1472  		// Closure pointer is R26 (arm64.REGCTXT).
  1473  		ssagen.CheckLoweredGetClosurePtr(v)
  1474  	case ssa.OpARM64LoweredGetCallerSP:
  1475  		// caller's SP is FixedFrameSize below the address of the first arg
  1476  		p := s.Prog(arm64.AMOVD)
  1477  		p.From.Type = obj.TYPE_ADDR
  1478  		p.From.Offset = -base.Ctxt.Arch.FixedFrameSize
  1479  		p.From.Name = obj.NAME_PARAM
  1480  		p.To.Type = obj.TYPE_REG
  1481  		p.To.Reg = v.Reg()
  1482  	case ssa.OpARM64LoweredGetCallerPC:
  1483  		p := s.Prog(obj.AGETCALLERPC)
  1484  		p.To.Type = obj.TYPE_REG
  1485  		p.To.Reg = v.Reg()
  1486  	case ssa.OpARM64DMB:
  1487  		p := s.Prog(v.Op.Asm())
  1488  		p.From.Type = obj.TYPE_CONST
  1489  		p.From.Offset = v.AuxInt
  1490  	case ssa.OpARM64FlagConstant:
  1491  		v.Fatalf("FlagConstant op should never make it to codegen %v", v.LongString())
  1492  	case ssa.OpARM64InvertFlags:
  1493  		v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString())
  1494  	case ssa.OpClobber:
  1495  		// MOVW	$0xdeaddead, REGTMP
  1496  		// MOVW	REGTMP, (slot)
  1497  		// MOVW	REGTMP, 4(slot)
  1498  		p := s.Prog(arm64.AMOVW)
  1499  		p.From.Type = obj.TYPE_CONST
  1500  		p.From.Offset = 0xdeaddead
  1501  		p.To.Type = obj.TYPE_REG
  1502  		p.To.Reg = arm64.REGTMP
  1503  		p = s.Prog(arm64.AMOVW)
  1504  		p.From.Type = obj.TYPE_REG
  1505  		p.From.Reg = arm64.REGTMP
  1506  		p.To.Type = obj.TYPE_MEM
  1507  		p.To.Reg = arm64.REGSP
  1508  		ssagen.AddAux(&p.To, v)
  1509  		p = s.Prog(arm64.AMOVW)
  1510  		p.From.Type = obj.TYPE_REG
  1511  		p.From.Reg = arm64.REGTMP
  1512  		p.To.Type = obj.TYPE_MEM
  1513  		p.To.Reg = arm64.REGSP
  1514  		ssagen.AddAux2(&p.To, v, v.AuxInt+4)
  1515  	case ssa.OpClobberReg:
  1516  		x := uint64(0xdeaddeaddeaddead)
  1517  		p := s.Prog(arm64.AMOVD)
  1518  		p.From.Type = obj.TYPE_CONST
  1519  		p.From.Offset = int64(x)
  1520  		p.To.Type = obj.TYPE_REG
  1521  		p.To.Reg = v.Reg()
  1522  	default:
  1523  		v.Fatalf("genValue not implemented: %s", v.LongString())
  1524  	}
  1525  }
  1526  
  1527  var condBits = map[ssa.Op]arm64.SpecialOperand{
  1528  	ssa.OpARM64Equal:         arm64.SPOP_EQ,
  1529  	ssa.OpARM64NotEqual:      arm64.SPOP_NE,
  1530  	ssa.OpARM64LessThan:      arm64.SPOP_LT,
  1531  	ssa.OpARM64LessThanU:     arm64.SPOP_LO,
  1532  	ssa.OpARM64LessEqual:     arm64.SPOP_LE,
  1533  	ssa.OpARM64LessEqualU:    arm64.SPOP_LS,
  1534  	ssa.OpARM64GreaterThan:   arm64.SPOP_GT,
  1535  	ssa.OpARM64GreaterThanU:  arm64.SPOP_HI,
  1536  	ssa.OpARM64GreaterEqual:  arm64.SPOP_GE,
  1537  	ssa.OpARM64GreaterEqualU: arm64.SPOP_HS,
  1538  	ssa.OpARM64LessThanF:     arm64.SPOP_MI, // Less than
  1539  	ssa.OpARM64LessEqualF:    arm64.SPOP_LS, // Less than or equal to
  1540  	ssa.OpARM64GreaterThanF:  arm64.SPOP_GT, // Greater than
  1541  	ssa.OpARM64GreaterEqualF: arm64.SPOP_GE, // Greater than or equal to
  1542  
  1543  	// The following condition codes have unordered to handle comparisons related to NaN.
  1544  	ssa.OpARM64NotLessThanF:     arm64.SPOP_PL, // Greater than, equal to, or unordered
  1545  	ssa.OpARM64NotLessEqualF:    arm64.SPOP_HI, // Greater than or unordered
  1546  	ssa.OpARM64NotGreaterThanF:  arm64.SPOP_LE, // Less than, equal to or unordered
  1547  	ssa.OpARM64NotGreaterEqualF: arm64.SPOP_LT, // Less than or unordered
  1548  
  1549  	ssa.OpARM64LessThanNoov:     arm64.SPOP_MI, // Less than but without honoring overflow
  1550  	ssa.OpARM64GreaterEqualNoov: arm64.SPOP_PL, // Greater than or equal to but without honoring overflow
  1551  }
  1552  
  1553  var blockJump = map[ssa.BlockKind]struct {
  1554  	asm, invasm obj.As
  1555  }{
  1556  	ssa.BlockARM64EQ:     {arm64.ABEQ, arm64.ABNE},
  1557  	ssa.BlockARM64NE:     {arm64.ABNE, arm64.ABEQ},
  1558  	ssa.BlockARM64LT:     {arm64.ABLT, arm64.ABGE},
  1559  	ssa.BlockARM64GE:     {arm64.ABGE, arm64.ABLT},
  1560  	ssa.BlockARM64LE:     {arm64.ABLE, arm64.ABGT},
  1561  	ssa.BlockARM64GT:     {arm64.ABGT, arm64.ABLE},
  1562  	ssa.BlockARM64ULT:    {arm64.ABLO, arm64.ABHS},
  1563  	ssa.BlockARM64UGE:    {arm64.ABHS, arm64.ABLO},
  1564  	ssa.BlockARM64UGT:    {arm64.ABHI, arm64.ABLS},
  1565  	ssa.BlockARM64ULE:    {arm64.ABLS, arm64.ABHI},
  1566  	ssa.BlockARM64Z:      {arm64.ACBZ, arm64.ACBNZ},
  1567  	ssa.BlockARM64NZ:     {arm64.ACBNZ, arm64.ACBZ},
  1568  	ssa.BlockARM64ZW:     {arm64.ACBZW, arm64.ACBNZW},
  1569  	ssa.BlockARM64NZW:    {arm64.ACBNZW, arm64.ACBZW},
  1570  	ssa.BlockARM64TBZ:    {arm64.ATBZ, arm64.ATBNZ},
  1571  	ssa.BlockARM64TBNZ:   {arm64.ATBNZ, arm64.ATBZ},
  1572  	ssa.BlockARM64FLT:    {arm64.ABMI, arm64.ABPL},
  1573  	ssa.BlockARM64FGE:    {arm64.ABGE, arm64.ABLT},
  1574  	ssa.BlockARM64FLE:    {arm64.ABLS, arm64.ABHI},
  1575  	ssa.BlockARM64FGT:    {arm64.ABGT, arm64.ABLE},
  1576  	ssa.BlockARM64LTnoov: {arm64.ABMI, arm64.ABPL},
  1577  	ssa.BlockARM64GEnoov: {arm64.ABPL, arm64.ABMI},
  1578  }
  1579  
  1580  // To model a 'LEnoov' ('<=' without overflow checking) branching.
  1581  var leJumps = [2][2]ssagen.IndexJump{
  1582  	{{Jump: arm64.ABEQ, Index: 0}, {Jump: arm64.ABPL, Index: 1}}, // next == b.Succs[0]
  1583  	{{Jump: arm64.ABMI, Index: 0}, {Jump: arm64.ABEQ, Index: 0}}, // next == b.Succs[1]
  1584  }
  1585  
  1586  // To model a 'GTnoov' ('>' without overflow checking) branching.
  1587  var gtJumps = [2][2]ssagen.IndexJump{
  1588  	{{Jump: arm64.ABMI, Index: 1}, {Jump: arm64.ABEQ, Index: 1}}, // next == b.Succs[0]
  1589  	{{Jump: arm64.ABEQ, Index: 1}, {Jump: arm64.ABPL, Index: 0}}, // next == b.Succs[1]
  1590  }
  1591  
  1592  func ssaGenBlock(s *ssagen.State, b, next *ssa.Block) {
  1593  	switch b.Kind {
  1594  	case ssa.BlockPlain, ssa.BlockDefer:
  1595  		if b.Succs[0].Block() != next {
  1596  			p := s.Prog(obj.AJMP)
  1597  			p.To.Type = obj.TYPE_BRANCH
  1598  			s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[0].Block()})
  1599  		}
  1600  
  1601  	case ssa.BlockExit, ssa.BlockRetJmp:
  1602  
  1603  	case ssa.BlockRet:
  1604  		s.Prog(obj.ARET)
  1605  
  1606  	case ssa.BlockARM64EQ, ssa.BlockARM64NE,
  1607  		ssa.BlockARM64LT, ssa.BlockARM64GE,
  1608  		ssa.BlockARM64LE, ssa.BlockARM64GT,
  1609  		ssa.BlockARM64ULT, ssa.BlockARM64UGT,
  1610  		ssa.BlockARM64ULE, ssa.BlockARM64UGE,
  1611  		ssa.BlockARM64Z, ssa.BlockARM64NZ,
  1612  		ssa.BlockARM64ZW, ssa.BlockARM64NZW,
  1613  		ssa.BlockARM64FLT, ssa.BlockARM64FGE,
  1614  		ssa.BlockARM64FLE, ssa.BlockARM64FGT,
  1615  		ssa.BlockARM64LTnoov, ssa.BlockARM64GEnoov:
  1616  		jmp := blockJump[b.Kind]
  1617  		var p *obj.Prog
  1618  		switch next {
  1619  		case b.Succs[0].Block():
  1620  			p = s.Br(jmp.invasm, b.Succs[1].Block())
  1621  		case b.Succs[1].Block():
  1622  			p = s.Br(jmp.asm, b.Succs[0].Block())
  1623  		default:
  1624  			if b.Likely != ssa.BranchUnlikely {
  1625  				p = s.Br(jmp.asm, b.Succs[0].Block())
  1626  				s.Br(obj.AJMP, b.Succs[1].Block())
  1627  			} else {
  1628  				p = s.Br(jmp.invasm, b.Succs[1].Block())
  1629  				s.Br(obj.AJMP, b.Succs[0].Block())
  1630  			}
  1631  		}
  1632  		if !b.Controls[0].Type.IsFlags() {
  1633  			p.From.Type = obj.TYPE_REG
  1634  			p.From.Reg = b.Controls[0].Reg()
  1635  		}
  1636  	case ssa.BlockARM64TBZ, ssa.BlockARM64TBNZ:
  1637  		jmp := blockJump[b.Kind]
  1638  		var p *obj.Prog
  1639  		switch next {
  1640  		case b.Succs[0].Block():
  1641  			p = s.Br(jmp.invasm, b.Succs[1].Block())
  1642  		case b.Succs[1].Block():
  1643  			p = s.Br(jmp.asm, b.Succs[0].Block())
  1644  		default:
  1645  			if b.Likely != ssa.BranchUnlikely {
  1646  				p = s.Br(jmp.asm, b.Succs[0].Block())
  1647  				s.Br(obj.AJMP, b.Succs[1].Block())
  1648  			} else {
  1649  				p = s.Br(jmp.invasm, b.Succs[1].Block())
  1650  				s.Br(obj.AJMP, b.Succs[0].Block())
  1651  			}
  1652  		}
  1653  		p.From.Offset = b.AuxInt
  1654  		p.From.Type = obj.TYPE_CONST
  1655  		p.Reg = b.Controls[0].Reg()
  1656  
  1657  	case ssa.BlockARM64LEnoov:
  1658  		s.CombJump(b, next, &leJumps)
  1659  	case ssa.BlockARM64GTnoov:
  1660  		s.CombJump(b, next, &gtJumps)
  1661  
  1662  	case ssa.BlockARM64JUMPTABLE:
  1663  		// MOVD	(TABLE)(IDX<<3), Rtmp
  1664  		// JMP	(Rtmp)
  1665  		p := s.Prog(arm64.AMOVD)
  1666  		p.From = genIndexedOperand(ssa.OpARM64MOVDloadidx8, b.Controls[1].Reg(), b.Controls[0].Reg())
  1667  		p.To.Type = obj.TYPE_REG
  1668  		p.To.Reg = arm64.REGTMP
  1669  		p = s.Prog(obj.AJMP)
  1670  		p.To.Type = obj.TYPE_MEM
  1671  		p.To.Reg = arm64.REGTMP
  1672  		// Save jump tables for later resolution of the target blocks.
  1673  		s.JumpTables = append(s.JumpTables, b)
  1674  
  1675  	default:
  1676  		b.Fatalf("branch not implemented: %s", b.LongString())
  1677  	}
  1678  }
  1679  
  1680  func loadRegResult(s *ssagen.State, f *ssa.Func, t *types.Type, reg int16, n *ir.Name, off int64) *obj.Prog {
  1681  	p := s.Prog(loadByType(t))
  1682  	p.From.Type = obj.TYPE_MEM
  1683  	p.From.Name = obj.NAME_AUTO
  1684  	p.From.Sym = n.Linksym()
  1685  	p.From.Offset = n.FrameOffset() + off
  1686  	p.To.Type = obj.TYPE_REG
  1687  	p.To.Reg = reg
  1688  	return p
  1689  }
  1690  
  1691  func spillArgReg(pp *objw.Progs, p *obj.Prog, f *ssa.Func, t *types.Type, reg int16, n *ir.Name, off int64) *obj.Prog {
  1692  	p = pp.Append(p, storeByType(t), obj.TYPE_REG, reg, 0, obj.TYPE_MEM, 0, n.FrameOffset()+off)
  1693  	p.To.Name = obj.NAME_PARAM
  1694  	p.To.Sym = n.Linksym()
  1695  	p.Pos = p.Pos.WithNotStmt()
  1696  	return p
  1697  }
  1698  
  1699  // zero16 zeroes 16 bytes at reg+off.
  1700  // If postInc is true, increment reg by 16.
  1701  func zero16(s *ssagen.State, reg int16, off int64, postInc bool) {
  1702  	//   STP     (ZR, ZR), off(reg)
  1703  	p := s.Prog(arm64.ASTP)
  1704  	p.From.Type = obj.TYPE_REGREG
  1705  	p.From.Reg = arm64.REGZERO
  1706  	p.From.Offset = int64(arm64.REGZERO)
  1707  	p.To.Type = obj.TYPE_MEM
  1708  	p.To.Reg = reg
  1709  	p.To.Offset = off
  1710  	if postInc {
  1711  		if off != 0 {
  1712  			panic("can't postinc with non-zero offset")
  1713  		}
  1714  		//   STP.P  (ZR, ZR), 16(reg)
  1715  		p.Scond = arm64.C_XPOST
  1716  		p.To.Offset = 16
  1717  	}
  1718  }
  1719  
  1720  // zero8 zeroes 8 bytes at reg+off.
  1721  func zero8(s *ssagen.State, reg int16, off int64) {
  1722  	//   MOVD     ZR, off(reg)
  1723  	p := s.Prog(arm64.AMOVD)
  1724  	p.From.Type = obj.TYPE_REG
  1725  	p.From.Reg = arm64.REGZERO
  1726  	p.To.Type = obj.TYPE_MEM
  1727  	p.To.Reg = reg
  1728  	p.To.Offset = off
  1729  }
  1730  
  1731  // move32 copies 32 bytes at src+off to dst+off.
  1732  // Uses registers tmp1 and tmp2.
  1733  // If postInc is true, increment src and dst by 32.
  1734  func move32(s *ssagen.State, src, dst, tmp1, tmp2 int16, off int64, postInc bool) {
  1735  	// FLDPQ   off(src), (tmp1, tmp2)
  1736  	ld := s.Prog(arm64.AFLDPQ)
  1737  	ld.From.Type = obj.TYPE_MEM
  1738  	ld.From.Reg = src
  1739  	ld.From.Offset = off
  1740  	ld.To.Type = obj.TYPE_REGREG
  1741  	ld.To.Reg = tmp1
  1742  	ld.To.Offset = int64(tmp2)
  1743  	// FSTPQ   (tmp1, tmp2), off(dst)
  1744  	st := s.Prog(arm64.AFSTPQ)
  1745  	st.From.Type = obj.TYPE_REGREG
  1746  	st.From.Reg = tmp1
  1747  	st.From.Offset = int64(tmp2)
  1748  	st.To.Type = obj.TYPE_MEM
  1749  	st.To.Reg = dst
  1750  	st.To.Offset = off
  1751  	if postInc {
  1752  		if off != 0 {
  1753  			panic("can't postinc with non-zero offset")
  1754  		}
  1755  		ld.Scond = arm64.C_XPOST
  1756  		st.Scond = arm64.C_XPOST
  1757  		ld.From.Offset = 32
  1758  		st.To.Offset = 32
  1759  	}
  1760  }
  1761  
  1762  // move16 copies 16 bytes at src+off to dst+off.
  1763  // Uses register tmp1
  1764  // If postInc is true, increment src and dst by 16.
  1765  func move16(s *ssagen.State, src, dst, tmp1 int16, off int64, postInc bool) {
  1766  	// FMOVQ     off(src), tmp1
  1767  	ld := s.Prog(arm64.AFMOVQ)
  1768  	ld.From.Type = obj.TYPE_MEM
  1769  	ld.From.Reg = src
  1770  	ld.From.Offset = off
  1771  	ld.To.Type = obj.TYPE_REG
  1772  	ld.To.Reg = tmp1
  1773  	// FMOVQ     tmp1, off(dst)
  1774  	st := s.Prog(arm64.AFMOVQ)
  1775  	st.From.Type = obj.TYPE_REG
  1776  	st.From.Reg = tmp1
  1777  	st.To.Type = obj.TYPE_MEM
  1778  	st.To.Reg = dst
  1779  	st.To.Offset = off
  1780  	if postInc {
  1781  		if off != 0 {
  1782  			panic("can't postinc with non-zero offset")
  1783  		}
  1784  		ld.Scond = arm64.C_XPOST
  1785  		st.Scond = arm64.C_XPOST
  1786  		ld.From.Offset = 16
  1787  		st.To.Offset = 16
  1788  	}
  1789  }
  1790  
  1791  // move8 copies 8 bytes at src+off to dst+off.
  1792  // Uses register tmp.
  1793  func move8(s *ssagen.State, src, dst, tmp int16, off int64) {
  1794  	// MOVD    off(src), tmp
  1795  	ld := s.Prog(arm64.AMOVD)
  1796  	ld.From.Type = obj.TYPE_MEM
  1797  	ld.From.Reg = src
  1798  	ld.From.Offset = off
  1799  	ld.To.Type = obj.TYPE_REG
  1800  	ld.To.Reg = tmp
  1801  	// MOVD    tmp, off(dst)
  1802  	st := s.Prog(arm64.AMOVD)
  1803  	st.From.Type = obj.TYPE_REG
  1804  	st.From.Reg = tmp
  1805  	st.To.Type = obj.TYPE_MEM
  1806  	st.To.Reg = dst
  1807  	st.To.Offset = off
  1808  }
  1809  

View as plain text