Text file src/runtime/asm_386.s

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  #include "go_asm.h"
     6  #include "go_tls.h"
     7  #include "funcdata.h"
     8  #include "textflag.h"
     9  
    10  // _rt0_386 is common startup code for most 386 systems when using
    11  // internal linking. This is the entry point for the program from the
    12  // kernel for an ordinary -buildmode=exe program. The stack holds the
    13  // number of arguments and the C-style argv.
    14  TEXT _rt0_386(SB),NOSPLIT,$8
    15  	MOVL	8(SP), AX	// argc
    16  	LEAL	12(SP), BX	// argv
    17  	MOVL	AX, 0(SP)
    18  	MOVL	BX, 4(SP)
    19  	JMP	runtime·rt0_go(SB)
    20  
    21  // _rt0_386_lib is common startup code for most 386 systems when
    22  // using -buildmode=c-archive or -buildmode=c-shared. The linker will
    23  // arrange to invoke this function as a global constructor (for
    24  // c-archive) or when the shared library is loaded (for c-shared).
    25  // We expect argc and argv to be passed on the stack following the
    26  // usual C ABI.
    27  TEXT _rt0_386_lib(SB),NOSPLIT,$0
    28  	PUSHL	BP
    29  	MOVL	SP, BP
    30  	PUSHL	BX
    31  	PUSHL	SI
    32  	PUSHL	DI
    33  
    34  	MOVL	8(BP), AX
    35  	MOVL	AX, _rt0_386_lib_argc<>(SB)
    36  	MOVL	12(BP), AX
    37  	MOVL	AX, _rt0_386_lib_argv<>(SB)
    38  
    39  	CALL	runtime·libInit(SB)
    40  
    41  	POPL	DI
    42  	POPL	SI
    43  	POPL	BX
    44  	POPL	BP
    45  	RET
    46  
    47  // rt0_lib_go initializes the Go runtime.
    48  // This is started in a separate thread by _rt0_386_lib.
    49  TEXT runtime·rt0_lib_go<ABIInternal>(SB),NOSPLIT,$8
    50  	MOVL	_rt0_386_lib_argc<>(SB), AX
    51  	MOVL	AX, 0(SP)
    52  	MOVL	_rt0_386_lib_argv<>(SB), AX
    53  	MOVL	AX, 4(SP)
    54  	JMP	runtime·rt0_go(SB)
    55  
    56  DATA _rt0_386_lib_argc<>(SB)/4, $0
    57  GLOBL _rt0_386_lib_argc<>(SB),NOPTR, $4
    58  DATA _rt0_386_lib_argv<>(SB)/4, $0
    59  GLOBL _rt0_386_lib_argv<>(SB),NOPTR, $4
    60  
    61  TEXT runtime·rt0_go(SB),NOSPLIT|NOFRAME|TOPFRAME,$0
    62  	// Copy arguments forward on an even stack.
    63  	// Users of this function jump to it, they don't call it.
    64  	MOVL	0(SP), AX
    65  	MOVL	4(SP), BX
    66  	SUBL	$128, SP		// plenty of scratch
    67  	ANDL	$~15, SP
    68  	MOVL	AX, 120(SP)		// save argc, argv away
    69  	MOVL	BX, 124(SP)
    70  
    71  	// set default stack bounds.
    72  	// _cgo_init may update stackguard.
    73  	MOVL	$runtime·g0(SB), BP
    74  	LEAL	(-64*1024+104)(SP), BX
    75  	MOVL	BX, g_stackguard0(BP)
    76  	MOVL	BX, g_stackguard1(BP)
    77  	MOVL	BX, (g_stack+stack_lo)(BP)
    78  	MOVL	SP, (g_stack+stack_hi)(BP)
    79  
    80  	// find out information about the processor we're on
    81  	// first see if CPUID instruction is supported.
    82  	PUSHFL
    83  	PUSHFL
    84  	XORL	$(1<<21), 0(SP) // flip ID bit
    85  	POPFL
    86  	PUSHFL
    87  	POPL	AX
    88  	XORL	0(SP), AX
    89  	POPFL	// restore EFLAGS
    90  	TESTL	$(1<<21), AX
    91  	JNE 	has_cpuid
    92  
    93  bad_proc: // show that the program requires MMX.
    94  	MOVL	$2, 0(SP)
    95  	MOVL	$bad_proc_msg<>(SB), 4(SP)
    96  	MOVL	$0x3d, 8(SP)
    97  	CALL	runtime·write(SB)
    98  	MOVL	$1, 0(SP)
    99  	CALL	runtime·exit(SB)
   100  	CALL	runtime·abort(SB)
   101  
   102  has_cpuid:
   103  	MOVL	$0, AX
   104  	CPUID
   105  	MOVL	AX, SI
   106  	CMPL	AX, $0
   107  	JE	nocpuinfo
   108  
   109  	CMPL	BX, $0x756E6547  // "Genu"
   110  	JNE	notintel
   111  	CMPL	DX, $0x49656E69  // "ineI"
   112  	JNE	notintel
   113  	CMPL	CX, $0x6C65746E  // "ntel"
   114  	JNE	notintel
   115  	MOVB	$1, runtime·isIntel(SB)
   116  notintel:
   117  
   118  	// Load EAX=1 cpuid flags
   119  	MOVL	$1, AX
   120  	CPUID
   121  	MOVL	CX, DI // Move to global variable clobbers CX when generating PIC
   122  	MOVL	AX, runtime·processorVersionInfo(SB)
   123  
   124  	// Check for MMX support
   125  	TESTL	$(1<<23), DX // MMX
   126  	JZ	bad_proc
   127  
   128  nocpuinfo:
   129  	// if there is an _cgo_init, call it to let it
   130  	// initialize and to set up GS.  if not,
   131  	// we set up GS ourselves.
   132  	MOVL	_cgo_init(SB), AX
   133  	TESTL	AX, AX
   134  	JZ	needtls
   135  #ifdef GOOS_android
   136  	// arg 4: TLS base, stored in slot 0 (Android's TLS_SLOT_SELF).
   137  	// Compensate for tls_g (+8).
   138  	MOVL	-8(TLS), BX
   139  	MOVL	BX, 12(SP)
   140  	MOVL	$runtime·tls_g(SB), 8(SP)	// arg 3: &tls_g
   141  #else
   142  	MOVL	$0, BX
   143  	MOVL	BX, 12(SP)	// arg 4: not used when using platform's TLS
   144  #ifdef GOOS_windows
   145  	MOVL	$runtime·tls_g(SB), 8(SP)	// arg 3: &tls_g
   146  #else
   147  	MOVL	BX, 8(SP)	// arg 3: not used when using platform's TLS
   148  #endif
   149  #endif
   150  	MOVL	$setg_gcc<>(SB), BX
   151  	MOVL	BX, 4(SP)	// arg 2: setg_gcc
   152  	MOVL	BP, 0(SP)	// arg 1: g0
   153  	CALL	AX
   154  
   155  	// update stackguard after _cgo_init
   156  	MOVL	$runtime·g0(SB), CX
   157  	MOVL	(g_stack+stack_lo)(CX), AX
   158  	ADDL	$const_stackGuard, AX
   159  	MOVL	AX, g_stackguard0(CX)
   160  	MOVL	AX, g_stackguard1(CX)
   161  
   162  #ifndef GOOS_windows
   163  	// skip runtime·ldt0setup(SB) and tls test after _cgo_init for non-windows
   164  	JMP ok
   165  #endif
   166  needtls:
   167  #ifdef GOOS_openbsd
   168  	// skip runtime·ldt0setup(SB) and tls test on OpenBSD in all cases
   169  	JMP	ok
   170  #endif
   171  #ifdef GOOS_plan9
   172  	// skip runtime·ldt0setup(SB) and tls test on Plan 9 in all cases
   173  	JMP	ok
   174  #endif
   175  
   176  	// set up %gs
   177  	CALL	ldt0setup<>(SB)
   178  
   179  	// store through it, to make sure it works
   180  	get_tls(BX)
   181  	MOVL	$0x123, g(BX)
   182  	MOVL	runtime·m0+m_tls(SB), AX
   183  	CMPL	AX, $0x123
   184  	JEQ	ok
   185  	MOVL	AX, 0	// abort
   186  ok:
   187  	// set up m and g "registers"
   188  	get_tls(BX)
   189  	LEAL	runtime·g0(SB), DX
   190  	MOVL	DX, g(BX)
   191  	LEAL	runtime·m0(SB), AX
   192  
   193  	// save m->g0 = g0
   194  	MOVL	DX, m_g0(AX)
   195  	// save g0->m = m0
   196  	MOVL	AX, g_m(DX)
   197  
   198  	CALL	runtime·emptyfunc(SB)	// fault if stack check is wrong
   199  
   200  	// convention is D is always cleared
   201  	CLD
   202  
   203  	CALL	runtime·check(SB)
   204  
   205  	// saved argc, argv
   206  	MOVL	120(SP), AX
   207  	MOVL	AX, 0(SP)
   208  	MOVL	124(SP), AX
   209  	MOVL	AX, 4(SP)
   210  	CALL	runtime·args(SB)
   211  	CALL	runtime·osinit(SB)
   212  	CALL	runtime·schedinit(SB)
   213  
   214  	// create a new goroutine to start program
   215  	PUSHL	$runtime·mainPC(SB)	// entry
   216  	CALL	runtime·newproc(SB)
   217  	POPL	AX
   218  
   219  	// start this M
   220  	CALL	runtime·mstart(SB)
   221  
   222  	CALL	runtime·abort(SB)
   223  	RET
   224  
   225  DATA	bad_proc_msg<>+0x00(SB)/61, $"This program can only be run on processors with MMX support.\n"
   226  GLOBL	bad_proc_msg<>(SB), RODATA, $61
   227  
   228  DATA	runtime·mainPC+0(SB)/4,$runtime·main(SB)
   229  GLOBL	runtime·mainPC(SB),RODATA,$4
   230  
   231  TEXT runtime·breakpoint(SB),NOSPLIT,$0-0
   232  	INT $3
   233  	RET
   234  
   235  TEXT runtime·asminit(SB),NOSPLIT,$0-0
   236  	// Linux and MinGW start the FPU in extended double precision.
   237  	// Other operating systems use double precision.
   238  	// Change to double precision to match them,
   239  	// and to match other hardware that only has double.
   240  	FLDCW	runtime·controlWord64(SB)
   241  	RET
   242  
   243  TEXT runtime·mstart(SB),NOSPLIT|TOPFRAME,$0
   244  	CALL	runtime·mstart0(SB)
   245  	RET // not reached
   246  
   247  /*
   248   *  go-routine
   249   */
   250  
   251  // void gogo(Gobuf*)
   252  // restore state from Gobuf; longjmp
   253  TEXT runtime·gogo(SB), NOSPLIT, $0-4
   254  	MOVL	buf+0(FP), BX		// gobuf
   255  	MOVL	gobuf_g(BX), DX
   256  	MOVL	0(DX), CX		// make sure g != nil
   257  	JMP	gogo<>(SB)
   258  
   259  TEXT gogo<>(SB), NOSPLIT, $0
   260  	get_tls(CX)
   261  	MOVL	DX, g(CX)
   262  	MOVL	gobuf_sp(BX), SP	// restore SP
   263  	MOVL	gobuf_ctxt(BX), DX
   264  	MOVL	$0, gobuf_sp(BX)	// clear to help garbage collector
   265  	MOVL	$0, gobuf_ctxt(BX)
   266  	MOVL	gobuf_pc(BX), BX
   267  	JMP	BX
   268  
   269  // func mcall(fn func(*g))
   270  // Switch to m->g0's stack, call fn(g).
   271  // Fn must never return. It should gogo(&g->sched)
   272  // to keep running g.
   273  TEXT runtime·mcall(SB), NOSPLIT, $0-4
   274  	MOVL	fn+0(FP), DI
   275  
   276  	get_tls(DX)
   277  	MOVL	g(DX), AX	// save state in g->sched
   278  	MOVL	0(SP), BX	// caller's PC
   279  	MOVL	BX, (g_sched+gobuf_pc)(AX)
   280  	LEAL	fn+0(FP), BX	// caller's SP
   281  	MOVL	BX, (g_sched+gobuf_sp)(AX)
   282  
   283  	// switch to m->g0 & its stack, call fn
   284  	MOVL	g(DX), BX
   285  	MOVL	g_m(BX), BX
   286  	MOVL	m_g0(BX), SI
   287  	CMPL	SI, AX	// if g == m->g0 call badmcall
   288  	JNE	3(PC)
   289  	MOVL	$runtime·badmcall(SB), AX
   290  	JMP	AX
   291  	MOVL	SI, g(DX)	// g = m->g0
   292  	MOVL	(g_sched+gobuf_sp)(SI), SP	// sp = m->g0->sched.sp
   293  	PUSHL	AX
   294  	MOVL	DI, DX
   295  	MOVL	0(DI), DI
   296  	CALL	DI
   297  	POPL	AX
   298  	MOVL	$runtime·badmcall2(SB), AX
   299  	JMP	AX
   300  	RET
   301  
   302  // systemstack_switch is a dummy routine that systemstack leaves at the bottom
   303  // of the G stack. We need to distinguish the routine that
   304  // lives at the bottom of the G stack from the one that lives
   305  // at the top of the system stack because the one at the top of
   306  // the system stack terminates the stack walk (see topofstack()).
   307  TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0
   308  	RET
   309  
   310  // func systemstack(fn func())
   311  TEXT runtime·systemstack(SB), NOSPLIT, $0-4
   312  	MOVL	fn+0(FP), DI	// DI = fn
   313  	get_tls(CX)
   314  	MOVL	g(CX), AX	// AX = g
   315  	MOVL	g_m(AX), BX	// BX = m
   316  
   317  	CMPL	AX, m_gsignal(BX)
   318  	JEQ	noswitch
   319  
   320  	MOVL	m_g0(BX), DX	// DX = g0
   321  	CMPL	AX, DX
   322  	JEQ	noswitch
   323  
   324  	CMPL	AX, m_curg(BX)
   325  	JNE	bad
   326  
   327  	// switch stacks
   328  	// save our state in g->sched. Pretend to
   329  	// be systemstack_switch if the G stack is scanned.
   330  	CALL	gosave_systemstack_switch<>(SB)
   331  
   332  	// switch to g0
   333  	get_tls(CX)
   334  	MOVL	DX, g(CX)
   335  	MOVL	(g_sched+gobuf_sp)(DX), BX
   336  	MOVL	BX, SP
   337  
   338  	// call target function
   339  	MOVL	DI, DX
   340  	MOVL	0(DI), DI
   341  	CALL	DI
   342  
   343  	// switch back to g
   344  	get_tls(CX)
   345  	MOVL	g(CX), AX
   346  	MOVL	g_m(AX), BX
   347  	MOVL	m_curg(BX), AX
   348  	MOVL	AX, g(CX)
   349  	MOVL	(g_sched+gobuf_sp)(AX), SP
   350  	MOVL	$0, (g_sched+gobuf_sp)(AX)
   351  	RET
   352  
   353  noswitch:
   354  	// already on system stack; tail call the function
   355  	// Using a tail call here cleans up tracebacks since we won't stop
   356  	// at an intermediate systemstack.
   357  	MOVL	DI, DX
   358  	MOVL	0(DI), DI
   359  	JMP	DI
   360  
   361  bad:
   362  	// Bad: g is not gsignal, not g0, not curg. What is it?
   363  	// Hide call from linker nosplit analysis.
   364  	MOVL	$runtime·badsystemstack(SB), AX
   365  	CALL	AX
   366  	INT	$3
   367  
   368  // func switchToCrashStack0(fn func())
   369  TEXT runtime·switchToCrashStack0(SB), NOSPLIT, $0-4
   370  	MOVL 	fn+0(FP), AX
   371  
   372  	get_tls(CX)
   373  	MOVL	g(CX), BX	// BX = g
   374  	MOVL	g_m(BX), DX	// DX = curm
   375  
   376  	// set g to gcrash
   377  	LEAL	runtime·gcrash(SB), BX // g = &gcrash
   378  	MOVL	DX, g_m(BX)            // g.m = curm
   379  	MOVL	BX, m_g0(DX)           // curm.g0 = g
   380  	get_tls(CX)
   381  	MOVL	BX, g(CX)
   382  
   383  	// switch to crashstack
   384  	MOVL	(g_stack+stack_hi)(BX), DX
   385  	SUBL	$(4*8), DX
   386  	MOVL	DX, SP
   387  
   388  	// call target function
   389  	MOVL	AX, DX
   390  	MOVL	0(AX), AX
   391  	CALL	AX
   392  
   393  	// should never return
   394  	CALL	runtime·abort(SB)
   395  	UNDEF
   396  
   397  /*
   398   * support for morestack
   399   */
   400  
   401  // Called during function prolog when more stack is needed.
   402  //
   403  // The traceback routines see morestack on a g0 as being
   404  // the top of a stack (for example, morestack calling newstack
   405  // calling the scheduler calling newm calling gc), so we must
   406  // record an argument size. For that purpose, it has no arguments.
   407  TEXT runtime·morestack(SB),NOSPLIT|NOFRAME,$0-0
   408  	// Cannot grow scheduler stack (m->g0).
   409  	get_tls(CX)
   410  	MOVL	g(CX), DI
   411  	MOVL	g_m(DI), BX
   412  
   413  	// Set g->sched to context in f.
   414  	MOVL	0(SP), AX	// f's PC
   415  	MOVL	AX, (g_sched+gobuf_pc)(DI)
   416  	LEAL	4(SP), AX	// f's SP
   417  	MOVL	AX, (g_sched+gobuf_sp)(DI)
   418  	MOVL	DX, (g_sched+gobuf_ctxt)(DI)
   419  
   420  	MOVL	m_g0(BX), SI
   421  	CMPL	g(CX), SI
   422  	JNE	3(PC)
   423  	CALL	runtime·badmorestackg0(SB)
   424  	CALL	runtime·abort(SB)
   425  
   426  	// Cannot grow signal stack.
   427  	MOVL	m_gsignal(BX), SI
   428  	CMPL	g(CX), SI
   429  	JNE	3(PC)
   430  	CALL	runtime·badmorestackgsignal(SB)
   431  	CALL	runtime·abort(SB)
   432  
   433  	// Called from f.
   434  	// Set m->morebuf to f's caller.
   435  	NOP	SP	// tell vet SP changed - stop checking offsets
   436  	MOVL	4(SP), DI	// f's caller's PC
   437  	MOVL	DI, (m_morebuf+gobuf_pc)(BX)
   438  	LEAL	8(SP), CX	// f's caller's SP
   439  	MOVL	CX, (m_morebuf+gobuf_sp)(BX)
   440  	get_tls(CX)
   441  	MOVL	g(CX), SI
   442  	MOVL	SI, (m_morebuf+gobuf_g)(BX)
   443  
   444  	// Call newstack on m->g0's stack.
   445  	MOVL	m_g0(BX), BP
   446  	MOVL	BP, g(CX)
   447  	MOVL	(g_sched+gobuf_sp)(BP), AX
   448  	MOVL	-4(AX), BX	// fault if CALL would, before smashing SP
   449  	MOVL	AX, SP
   450  	CALL	runtime·newstack(SB)
   451  	CALL	runtime·abort(SB)	// crash if newstack returns
   452  	RET
   453  
   454  TEXT runtime·morestack_noctxt(SB),NOSPLIT,$0-0
   455  	MOVL	$0, DX
   456  	JMP runtime·morestack(SB)
   457  
   458  // reflectcall: call a function with the given argument list
   459  // func call(stackArgsType *_type, f *FuncVal, stackArgs *byte, stackArgsSize, stackRetOffset, frameSize uint32, regArgs *abi.RegArgs).
   460  // we don't have variable-sized frames, so we use a small number
   461  // of constant-sized-frame functions to encode a few bits of size in the pc.
   462  // Caution: ugly multiline assembly macros in your future!
   463  
   464  #define DISPATCH(NAME,MAXSIZE)		\
   465  	CMPL	CX, $MAXSIZE;		\
   466  	JA	3(PC);			\
   467  	MOVL	$NAME(SB), AX;		\
   468  	JMP	AX
   469  // Note: can't just "JMP NAME(SB)" - bad inlining results.
   470  
   471  TEXT ·reflectcall(SB), NOSPLIT, $0-28
   472  	MOVL	frameSize+20(FP), CX
   473  	DISPATCH(runtime·call16, 16)
   474  	DISPATCH(runtime·call32, 32)
   475  	DISPATCH(runtime·call64, 64)
   476  	DISPATCH(runtime·call128, 128)
   477  	DISPATCH(runtime·call256, 256)
   478  	DISPATCH(runtime·call512, 512)
   479  	DISPATCH(runtime·call1024, 1024)
   480  	DISPATCH(runtime·call2048, 2048)
   481  	DISPATCH(runtime·call4096, 4096)
   482  	DISPATCH(runtime·call8192, 8192)
   483  	DISPATCH(runtime·call16384, 16384)
   484  	DISPATCH(runtime·call32768, 32768)
   485  	DISPATCH(runtime·call65536, 65536)
   486  	DISPATCH(runtime·call131072, 131072)
   487  	DISPATCH(runtime·call262144, 262144)
   488  	DISPATCH(runtime·call524288, 524288)
   489  	DISPATCH(runtime·call1048576, 1048576)
   490  	DISPATCH(runtime·call2097152, 2097152)
   491  	DISPATCH(runtime·call4194304, 4194304)
   492  	DISPATCH(runtime·call8388608, 8388608)
   493  	DISPATCH(runtime·call16777216, 16777216)
   494  	DISPATCH(runtime·call33554432, 33554432)
   495  	DISPATCH(runtime·call67108864, 67108864)
   496  	DISPATCH(runtime·call134217728, 134217728)
   497  	DISPATCH(runtime·call268435456, 268435456)
   498  	DISPATCH(runtime·call536870912, 536870912)
   499  	DISPATCH(runtime·call1073741824, 1073741824)
   500  	MOVL	$runtime·badreflectcall(SB), AX
   501  	JMP	AX
   502  
   503  #define CALLFN(NAME,MAXSIZE)			\
   504  TEXT NAME(SB), WRAPPER, $MAXSIZE-28;		\
   505  	NO_LOCAL_POINTERS;			\
   506  	/* copy arguments to stack */		\
   507  	MOVL	stackArgs+8(FP), SI;		\
   508  	MOVL	stackArgsSize+12(FP), CX;		\
   509  	MOVL	SP, DI;				\
   510  	REP;MOVSB;				\
   511  	/* call function */			\
   512  	MOVL	f+4(FP), DX;			\
   513  	MOVL	(DX), AX; 			\
   514  	PCDATA  $PCDATA_StackMapIndex, $0;	\
   515  	CALL	AX;				\
   516  	/* copy return values back */		\
   517  	MOVL	stackArgsType+0(FP), DX;		\
   518  	MOVL	stackArgs+8(FP), DI;		\
   519  	MOVL	stackArgsSize+12(FP), CX;		\
   520  	MOVL	stackRetOffset+16(FP), BX;		\
   521  	MOVL	SP, SI;				\
   522  	ADDL	BX, DI;				\
   523  	ADDL	BX, SI;				\
   524  	SUBL	BX, CX;				\
   525  	CALL	callRet<>(SB);			\
   526  	RET
   527  
   528  // callRet copies return values back at the end of call*. This is a
   529  // separate function so it can allocate stack space for the arguments
   530  // to reflectcallmove. It does not follow the Go ABI; it expects its
   531  // arguments in registers.
   532  TEXT callRet<>(SB), NOSPLIT, $20-0
   533  	MOVL	DX, 0(SP)
   534  	MOVL	DI, 4(SP)
   535  	MOVL	SI, 8(SP)
   536  	MOVL	CX, 12(SP)
   537  	MOVL	$0, 16(SP)
   538  	CALL	runtime·reflectcallmove(SB)
   539  	RET
   540  
   541  CALLFN(·call16, 16)
   542  CALLFN(·call32, 32)
   543  CALLFN(·call64, 64)
   544  CALLFN(·call128, 128)
   545  CALLFN(·call256, 256)
   546  CALLFN(·call512, 512)
   547  CALLFN(·call1024, 1024)
   548  CALLFN(·call2048, 2048)
   549  CALLFN(·call4096, 4096)
   550  CALLFN(·call8192, 8192)
   551  CALLFN(·call16384, 16384)
   552  CALLFN(·call32768, 32768)
   553  CALLFN(·call65536, 65536)
   554  CALLFN(·call131072, 131072)
   555  CALLFN(·call262144, 262144)
   556  CALLFN(·call524288, 524288)
   557  CALLFN(·call1048576, 1048576)
   558  CALLFN(·call2097152, 2097152)
   559  CALLFN(·call4194304, 4194304)
   560  CALLFN(·call8388608, 8388608)
   561  CALLFN(·call16777216, 16777216)
   562  CALLFN(·call33554432, 33554432)
   563  CALLFN(·call67108864, 67108864)
   564  CALLFN(·call134217728, 134217728)
   565  CALLFN(·call268435456, 268435456)
   566  CALLFN(·call536870912, 536870912)
   567  CALLFN(·call1073741824, 1073741824)
   568  
   569  TEXT runtime·procyieldAsm(SB),NOSPLIT,$0-0
   570  	MOVL	cycles+0(FP), AX
   571  	TESTL	AX, AX
   572  	JZ	done
   573  again:
   574  	PAUSE
   575  	SUBL	$1, AX
   576  	JNZ	again
   577  done:
   578  	RET
   579  
   580  TEXT ·publicationBarrier(SB),NOSPLIT,$0-0
   581  	// Stores are already ordered on x86, so this is just a
   582  	// compile barrier.
   583  	RET
   584  
   585  // Save state of caller into g->sched,
   586  // but using fake PC from systemstack_switch.
   587  // Must only be called from functions with no locals ($0)
   588  // or else unwinding from systemstack_switch is incorrect.
   589  TEXT gosave_systemstack_switch<>(SB),NOSPLIT,$0
   590  	PUSHL	AX
   591  	PUSHL	BX
   592  	get_tls(BX)
   593  	MOVL	g(BX), BX
   594  	LEAL	arg+0(FP), AX
   595  	MOVL	AX, (g_sched+gobuf_sp)(BX)
   596  	MOVL	$runtime·systemstack_switch(SB), AX
   597  	MOVL	AX, (g_sched+gobuf_pc)(BX)
   598  	// Assert ctxt is zero. See func save.
   599  	MOVL	(g_sched+gobuf_ctxt)(BX), AX
   600  	TESTL	AX, AX
   601  	JZ	2(PC)
   602  	CALL	runtime·abort(SB)
   603  	POPL	BX
   604  	POPL	AX
   605  	RET
   606  
   607  // func asmcgocall_no_g(fn, arg unsafe.Pointer)
   608  // Call fn(arg) aligned appropriately for the gcc ABI.
   609  // Called on a system stack, and there may be no g yet (during needm).
   610  TEXT ·asmcgocall_no_g(SB),NOSPLIT,$0-8
   611  	MOVL	fn+0(FP), AX
   612  	MOVL	arg+4(FP), BX
   613  	MOVL	SP, DX
   614  	SUBL	$32, SP
   615  	ANDL	$~15, SP	// alignment, perhaps unnecessary
   616  	MOVL	DX, 8(SP)	// save old SP
   617  	MOVL	BX, 0(SP)	// first argument in x86-32 ABI
   618  	CALL	AX
   619  	MOVL	8(SP), DX
   620  	MOVL	DX, SP
   621  	RET
   622  
   623  // func asmcgocall(fn, arg unsafe.Pointer) int32
   624  // Call fn(arg) on the scheduler stack,
   625  // aligned appropriately for the gcc ABI.
   626  // See cgocall.go for more details.
   627  TEXT ·asmcgocall(SB),NOSPLIT,$0-12
   628  	MOVL	fn+0(FP), AX
   629  	MOVL	arg+4(FP), BX
   630  
   631  	MOVL	SP, DX
   632  
   633  	// Figure out if we need to switch to m->g0 stack.
   634  	// We get called to create new OS threads too, and those
   635  	// come in on the m->g0 stack already. Or we might already
   636  	// be on the m->gsignal stack.
   637  #ifdef GOOS_windows
   638  	// On Windows, get_tls might return garbage if the thread
   639  	// has never called into Go, so check tls_g directly.
   640  	MOVL	runtime·tls_g(SB), CX
   641  	CMPL	CX, $0
   642  	JEQ	nosave
   643  #endif
   644  	get_tls(CX)
   645  	MOVL	g(CX), DI
   646  	CMPL	DI, $0
   647  	JEQ	nosave	// Don't even have a G yet.
   648  	MOVL	g_m(DI), BP
   649  	CMPL	DI, m_gsignal(BP)
   650  	JEQ	noswitch
   651  	MOVL	m_g0(BP), SI
   652  	CMPL	DI, SI
   653  	JEQ	noswitch
   654  	CALL	gosave_systemstack_switch<>(SB)
   655  	get_tls(CX)
   656  	MOVL	SI, g(CX)
   657  	MOVL	(g_sched+gobuf_sp)(SI), SP
   658  
   659  noswitch:
   660  	// Now on a scheduling stack (a pthread-created stack).
   661  	SUBL	$32, SP
   662  	ANDL	$~15, SP	// alignment, perhaps unnecessary
   663  	MOVL	DI, 8(SP)	// save g
   664  	MOVL	(g_stack+stack_hi)(DI), DI
   665  	SUBL	DX, DI
   666  	MOVL	DI, 4(SP)	// save depth in stack (can't just save SP, as stack might be copied during a callback)
   667  	MOVL	BX, 0(SP)	// first argument in x86-32 ABI
   668  	CALL	AX
   669  
   670  	// Restore registers, g, stack pointer.
   671  	get_tls(CX)
   672  	MOVL	8(SP), DI
   673  	MOVL	(g_stack+stack_hi)(DI), SI
   674  	SUBL	4(SP), SI
   675  	MOVL	DI, g(CX)
   676  	MOVL	SI, SP
   677  
   678  	MOVL	AX, ret+8(FP)
   679  	RET
   680  nosave:
   681  	// Now on a scheduling stack (a pthread-created stack).
   682  	SUBL	$32, SP
   683  	ANDL	$~15, SP	// alignment, perhaps unnecessary
   684  	MOVL	DX, 4(SP)	// save original stack pointer
   685  	MOVL	BX, 0(SP)	// first argument in x86-32 ABI
   686  	CALL	AX
   687  
   688  	MOVL	4(SP), CX	// restore original stack pointer
   689  	MOVL	CX, SP
   690  	MOVL	AX, ret+8(FP)
   691  	RET
   692  
   693  // cgocallback(fn, frame unsafe.Pointer, ctxt uintptr)
   694  // See cgocall.go for more details.
   695  TEXT ·cgocallback(SB),NOSPLIT,$12-12  // Frame size must match commented places below
   696  	NO_LOCAL_POINTERS
   697  
   698  	// Skip cgocallbackg, just dropm when fn is nil, and frame is the saved g.
   699  	// It is used to dropm while thread is exiting.
   700  	MOVL	fn+0(FP), AX
   701  	CMPL	AX, $0
   702  	JNE	loadg
   703  	// Restore the g from frame.
   704  	get_tls(CX)
   705  	MOVL	frame+4(FP), BX
   706  	MOVL	BX, g(CX)
   707  	JMP	dropm
   708  
   709  loadg:
   710  	// If g is nil, Go did not create the current thread,
   711  	// or if this thread never called into Go on pthread platforms.
   712  	// Call needm to obtain one for temporary use.
   713  	// In this case, we're running on the thread stack, so there's
   714  	// lots of space, but the linker doesn't know. Hide the call from
   715  	// the linker analysis by using an indirect call through AX.
   716  	get_tls(CX)
   717  #ifdef GOOS_windows
   718  	MOVL	$0, BP
   719  	CMPL	CX, $0
   720  	JEQ	needm
   721  #endif
   722  	MOVL	g(CX), BP
   723  	CMPL	BP, $0
   724  	JEQ	needm
   725  	MOVL	g_m(BP), BP
   726  	MOVL	BP, savedm-4(SP) // saved copy of oldm
   727  	JMP	havem
   728  needm:
   729  	MOVL	$runtime·needAndBindM(SB), AX
   730  	CALL	AX
   731  	MOVL	$0, savedm-4(SP)
   732  	get_tls(CX)
   733  	MOVL	g(CX), BP
   734  	MOVL	g_m(BP), BP
   735  
   736  	// Set m->sched.sp = SP, so that if a panic happens
   737  	// during the function we are about to execute, it will
   738  	// have a valid SP to run on the g0 stack.
   739  	// The next few lines (after the havem label)
   740  	// will save this SP onto the stack and then write
   741  	// the same SP back to m->sched.sp. That seems redundant,
   742  	// but if an unrecovered panic happens, unwindm will
   743  	// restore the g->sched.sp from the stack location
   744  	// and then systemstack will try to use it. If we don't set it here,
   745  	// that restored SP will be uninitialized (typically 0) and
   746  	// will not be usable.
   747  	MOVL	m_g0(BP), SI
   748  	MOVL	SP, (g_sched+gobuf_sp)(SI)
   749  
   750  havem:
   751  	// Now there's a valid m, and we're running on its m->g0.
   752  	// Save current m->g0->sched.sp on stack and then set it to SP.
   753  	// Save current sp in m->g0->sched.sp in preparation for
   754  	// switch back to m->curg stack.
   755  	// NOTE: unwindm knows that the saved g->sched.sp is at 0(SP).
   756  	MOVL	m_g0(BP), SI
   757  	MOVL	(g_sched+gobuf_sp)(SI), AX
   758  	MOVL	AX, 0(SP)
   759  	MOVL	SP, (g_sched+gobuf_sp)(SI)
   760  
   761  	// Switch to m->curg stack and call runtime.cgocallbackg.
   762  	// Because we are taking over the execution of m->curg
   763  	// but *not* resuming what had been running, we need to
   764  	// save that information (m->curg->sched) so we can restore it.
   765  	// We can restore m->curg->sched.sp easily, because calling
   766  	// runtime.cgocallbackg leaves SP unchanged upon return.
   767  	// To save m->curg->sched.pc, we push it onto the curg stack and
   768  	// open a frame the same size as cgocallback's g0 frame.
   769  	// Once we switch to the curg stack, the pushed PC will appear
   770  	// to be the return PC of cgocallback, so that the traceback
   771  	// will seamlessly trace back into the earlier calls.
   772  	MOVL	m_curg(BP), SI
   773  	MOVL	SI, g(CX)
   774  	MOVL	(g_sched+gobuf_sp)(SI), DI // prepare stack as DI
   775  	MOVL	(g_sched+gobuf_pc)(SI), BP
   776  	MOVL	BP, -4(DI)  // "push" return PC on the g stack
   777  	// Gather our arguments into registers.
   778  	MOVL	fn+0(FP), AX
   779  	MOVL	frame+4(FP), BX
   780  	MOVL	ctxt+8(FP), CX
   781  	LEAL	-(4+12)(DI), SP  // Must match declared frame size
   782  	MOVL	AX, 0(SP)
   783  	MOVL	BX, 4(SP)
   784  	MOVL	CX, 8(SP)
   785  	CALL	runtime·cgocallbackg(SB)
   786  
   787  	// Restore g->sched (== m->curg->sched) from saved values.
   788  	get_tls(CX)
   789  	MOVL	g(CX), SI
   790  	MOVL	12(SP), BP  // Must match declared frame size
   791  	MOVL	BP, (g_sched+gobuf_pc)(SI)
   792  	LEAL	(12+4)(SP), DI  // Must match declared frame size
   793  	MOVL	DI, (g_sched+gobuf_sp)(SI)
   794  
   795  	// Switch back to m->g0's stack and restore m->g0->sched.sp.
   796  	// (Unlike m->curg, the g0 goroutine never uses sched.pc,
   797  	// so we do not have to restore it.)
   798  	MOVL	g(CX), BP
   799  	MOVL	g_m(BP), BP
   800  	MOVL	m_g0(BP), SI
   801  	MOVL	SI, g(CX)
   802  	MOVL	(g_sched+gobuf_sp)(SI), SP
   803  	MOVL	0(SP), AX
   804  	MOVL	AX, (g_sched+gobuf_sp)(SI)
   805  
   806  	// If the m on entry was nil, we called needm above to borrow an m,
   807  	// 1. for the duration of the call on non-pthread platforms,
   808  	// 2. or the duration of the C thread alive on pthread platforms.
   809  	// If the m on entry wasn't nil,
   810  	// 1. the thread might be a Go thread,
   811  	// 2. or it wasn't the first call from a C thread on pthread platforms,
   812  	//    since then we skip dropm to reuse the m in the first call.
   813  	MOVL	savedm-4(SP), DX
   814  	CMPL	DX, $0
   815  	JNE	droppedm
   816  
   817  	// Skip dropm to reuse it in the next call, when a pthread key has been created.
   818  	MOVL	_cgo_pthread_key_created(SB), DX
   819  	// It means cgo is disabled when _cgo_pthread_key_created is a nil pointer, need dropm.
   820  	CMPL	DX, $0
   821  	JEQ	dropm
   822  	CMPL	(DX), $0
   823  	JNE	droppedm
   824  
   825  dropm:
   826  	MOVL	$runtime·dropm(SB), AX
   827  	CALL	AX
   828  droppedm:
   829  
   830  	// Done!
   831  	RET
   832  
   833  // void setg(G*); set g. for use by needm.
   834  TEXT runtime·setg(SB), NOSPLIT, $0-4
   835  	MOVL	gg+0(FP), BX
   836  #ifdef GOOS_windows
   837  	MOVL	runtime·tls_g(SB), CX
   838  	CMPL	BX, $0
   839  	JNE	settls
   840  	MOVL	$0, 0(CX)(FS)
   841  	RET
   842  settls:
   843  	MOVL	g_m(BX), AX
   844  	LEAL	m_tls(AX), AX
   845  	MOVL	AX, 0(CX)(FS)
   846  #endif
   847  	get_tls(CX)
   848  	MOVL	BX, g(CX)
   849  	RET
   850  
   851  // void setg_gcc(G*); set g. for use by gcc
   852  TEXT setg_gcc<>(SB), NOSPLIT, $0
   853  	get_tls(AX)
   854  	MOVL	gg+0(FP), DX
   855  	MOVL	DX, g(AX)
   856  	RET
   857  
   858  TEXT runtime·abort(SB),NOSPLIT,$0-0
   859  	INT	$3
   860  loop:
   861  	JMP	loop
   862  
   863  // check that SP is in range [g->stack.lo, g->stack.hi)
   864  TEXT runtime·stackcheck(SB), NOSPLIT, $0-0
   865  	get_tls(CX)
   866  	MOVL	g(CX), AX
   867  	CMPL	(g_stack+stack_hi)(AX), SP
   868  	JHI	2(PC)
   869  	CALL	runtime·abort(SB)
   870  	CMPL	SP, (g_stack+stack_lo)(AX)
   871  	JHI	2(PC)
   872  	CALL	runtime·abort(SB)
   873  	RET
   874  
   875  // func cputicks() int64
   876  TEXT runtime·cputicks(SB),NOSPLIT,$0-8
   877  	// LFENCE/MFENCE instruction support is dependent on SSE2.
   878  	// When no SSE2 support is present do not enforce any serialization
   879  	// since using CPUID to serialize the instruction stream is
   880  	// very costly.
   881  #ifdef GO386_softfloat
   882  	JMP	rdtsc  // no fence instructions available
   883  #endif
   884  	CMPB	internal∕cpu·X86+const_offsetX86HasRDTSCP(SB), $1
   885  	JNE	fences
   886  	// Instruction stream serializing RDTSCP is supported.
   887  	// RDTSCP is supported by Intel Nehalem (2008) and
   888  	// AMD K8 Rev. F (2006) and newer.
   889  	RDTSCP
   890  done:
   891  	MOVL	AX, ret_lo+0(FP)
   892  	MOVL	DX, ret_hi+4(FP)
   893  	RET
   894  fences:
   895  	// MFENCE is instruction stream serializing and flushes the
   896  	// store buffers on AMD. The serialization semantics of LFENCE on AMD
   897  	// are dependent on MSR C001_1029 and CPU generation.
   898  	// LFENCE on Intel does wait for all previous instructions to have executed.
   899  	// Intel recommends MFENCE;LFENCE in its manuals before RDTSC to have all
   900  	// previous instructions executed and all previous loads and stores to globally visible.
   901  	// Using MFENCE;LFENCE here aligns the serializing properties without
   902  	// runtime detection of CPU manufacturer.
   903  	MFENCE
   904  	LFENCE
   905  rdtsc:
   906  	RDTSC
   907  	JMP done
   908  
   909  TEXT ldt0setup<>(SB),NOSPLIT,$16-0
   910  #ifdef GOOS_windows
   911  	CALL	runtime·wintls(SB)
   912  #endif
   913  	// set up ldt 7 to point at m0.tls
   914  	// ldt 1 would be fine on Linux, but on OS X, 7 is as low as we can go.
   915  	// the entry number is just a hint.  setldt will set up GS with what it used.
   916  	MOVL	$7, 0(SP)
   917  	LEAL	runtime·m0+m_tls(SB), AX
   918  	MOVL	AX, 4(SP)
   919  	MOVL	$32, 8(SP)	// sizeof(tls array)
   920  	CALL	runtime·setldt(SB)
   921  	RET
   922  
   923  TEXT runtime·emptyfunc(SB),0,$0-0
   924  	RET
   925  
   926  // hash function using AES hardware instructions
   927  TEXT runtime·memhash(SB),NOSPLIT,$0-16
   928  	CMPB	runtime·useAeshash(SB), $0
   929  	JEQ	noaes
   930  	MOVL	p+0(FP), AX	// ptr to data
   931  	MOVL	s+8(FP), BX	// size
   932  	LEAL	ret+12(FP), DX
   933  	JMP	runtime·aeshashbody<>(SB)
   934  noaes:
   935  	JMP	runtime·memhashFallback(SB)
   936  
   937  TEXT runtime·strhash(SB),NOSPLIT,$0-12
   938  	CMPB	runtime·useAeshash(SB), $0
   939  	JEQ	noaes
   940  	MOVL	p+0(FP), AX	// ptr to string object
   941  	MOVL	4(AX), BX	// length of string
   942  	MOVL	(AX), AX	// string data
   943  	LEAL	ret+8(FP), DX
   944  	JMP	runtime·aeshashbody<>(SB)
   945  noaes:
   946  	JMP	runtime·strhashFallback(SB)
   947  
   948  // AX: data
   949  // BX: length
   950  // DX: address to put return value
   951  TEXT runtime·aeshashbody<>(SB),NOSPLIT,$0-0
   952  	MOVL	h+4(FP), X0	            // 32 bits of per-table hash seed
   953  	PINSRW	$4, BX, X0	            // 16 bits of length
   954  	PSHUFHW	$0, X0, X0	            // replace size with its low 2 bytes repeated 4 times
   955  	MOVO	X0, X1                      // save unscrambled seed
   956  	PXOR	runtime·aeskeysched(SB), X0 // xor in per-process seed
   957  	AESENC	X0, X0                      // scramble seed
   958  
   959  	CMPL	BX, $16
   960  	JB	aes0to15
   961  	JE	aes16
   962  	CMPL	BX, $32
   963  	JBE	aes17to32
   964  	CMPL	BX, $64
   965  	JBE	aes33to64
   966  	JMP	aes65plus
   967  
   968  aes0to15:
   969  	TESTL	BX, BX
   970  	JE	aes0
   971  
   972  	ADDL	$16, AX
   973  	TESTW	$0xff0, AX
   974  	JE	endofpage
   975  
   976  	// 16 bytes loaded at this address won't cross
   977  	// a page boundary, so we can load it directly.
   978  	MOVOU	-16(AX), X1
   979  	ADDL	BX, BX
   980  	PAND	masks<>(SB)(BX*8), X1
   981  
   982  final1:
   983  	PXOR	X0, X1	// xor data with seed
   984  	AESENC	X1, X1  // scramble combo 3 times
   985  	AESENC	X1, X1
   986  	AESENC	X1, X1
   987  	MOVL	X1, (DX)
   988  	RET
   989  
   990  endofpage:
   991  	// address ends in 1111xxxx. Might be up against
   992  	// a page boundary, so load ending at last byte.
   993  	// Then shift bytes down using pshufb.
   994  	MOVOU	-32(AX)(BX*1), X1
   995  	ADDL	BX, BX
   996  	PSHUFB	shifts<>(SB)(BX*8), X1
   997  	JMP	final1
   998  
   999  aes0:
  1000  	// Return scrambled input seed
  1001  	AESENC	X0, X0
  1002  	MOVL	X0, (DX)
  1003  	RET
  1004  
  1005  aes16:
  1006  	MOVOU	(AX), X1
  1007  	JMP	final1
  1008  
  1009  aes17to32:
  1010  	// make second starting seed
  1011  	PXOR	runtime·aeskeysched+16(SB), X1
  1012  	AESENC	X1, X1
  1013  
  1014  	// load data to be hashed
  1015  	MOVOU	(AX), X2
  1016  	MOVOU	-16(AX)(BX*1), X3
  1017  
  1018  	// xor with seed
  1019  	PXOR	X0, X2
  1020  	PXOR	X1, X3
  1021  
  1022  	// scramble 3 times
  1023  	AESENC	X2, X2
  1024  	AESENC	X3, X3
  1025  	AESENC	X2, X2
  1026  	AESENC	X3, X3
  1027  	AESENC	X2, X2
  1028  	AESENC	X3, X3
  1029  
  1030  	// combine results
  1031  	PXOR	X3, X2
  1032  	MOVL	X2, (DX)
  1033  	RET
  1034  
  1035  aes33to64:
  1036  	// make 3 more starting seeds
  1037  	MOVO	X1, X2
  1038  	MOVO	X1, X3
  1039  	PXOR	runtime·aeskeysched+16(SB), X1
  1040  	PXOR	runtime·aeskeysched+32(SB), X2
  1041  	PXOR	runtime·aeskeysched+48(SB), X3
  1042  	AESENC	X1, X1
  1043  	AESENC	X2, X2
  1044  	AESENC	X3, X3
  1045  
  1046  	MOVOU	(AX), X4
  1047  	MOVOU	16(AX), X5
  1048  	MOVOU	-32(AX)(BX*1), X6
  1049  	MOVOU	-16(AX)(BX*1), X7
  1050  
  1051  	PXOR	X0, X4
  1052  	PXOR	X1, X5
  1053  	PXOR	X2, X6
  1054  	PXOR	X3, X7
  1055  
  1056  	AESENC	X4, X4
  1057  	AESENC	X5, X5
  1058  	AESENC	X6, X6
  1059  	AESENC	X7, X7
  1060  
  1061  	AESENC	X4, X4
  1062  	AESENC	X5, X5
  1063  	AESENC	X6, X6
  1064  	AESENC	X7, X7
  1065  
  1066  	AESENC	X4, X4
  1067  	AESENC	X5, X5
  1068  	AESENC	X6, X6
  1069  	AESENC	X7, X7
  1070  
  1071  	PXOR	X6, X4
  1072  	PXOR	X7, X5
  1073  	PXOR	X5, X4
  1074  	MOVL	X4, (DX)
  1075  	RET
  1076  
  1077  aes65plus:
  1078  	// make 3 more starting seeds
  1079  	MOVO	X1, X2
  1080  	MOVO	X1, X3
  1081  	PXOR	runtime·aeskeysched+16(SB), X1
  1082  	PXOR	runtime·aeskeysched+32(SB), X2
  1083  	PXOR	runtime·aeskeysched+48(SB), X3
  1084  	AESENC	X1, X1
  1085  	AESENC	X2, X2
  1086  	AESENC	X3, X3
  1087  
  1088  	// start with last (possibly overlapping) block
  1089  	MOVOU	-64(AX)(BX*1), X4
  1090  	MOVOU	-48(AX)(BX*1), X5
  1091  	MOVOU	-32(AX)(BX*1), X6
  1092  	MOVOU	-16(AX)(BX*1), X7
  1093  
  1094  	// scramble state once
  1095  	AESENC	X0, X4
  1096  	AESENC	X1, X5
  1097  	AESENC	X2, X6
  1098  	AESENC	X3, X7
  1099  
  1100  	// compute number of remaining 64-byte blocks
  1101  	DECL	BX
  1102  	SHRL	$6, BX
  1103  
  1104  aesloop:
  1105  	// scramble state, xor in a block
  1106  	MOVOU	(AX), X0
  1107  	MOVOU	16(AX), X1
  1108  	MOVOU	32(AX), X2
  1109  	MOVOU	48(AX), X3
  1110  	AESENC	X0, X4
  1111  	AESENC	X1, X5
  1112  	AESENC	X2, X6
  1113  	AESENC	X3, X7
  1114  
  1115  	// scramble state
  1116  	AESENC	X4, X4
  1117  	AESENC	X5, X5
  1118  	AESENC	X6, X6
  1119  	AESENC	X7, X7
  1120  
  1121  	ADDL	$64, AX
  1122  	DECL	BX
  1123  	JNE	aesloop
  1124  
  1125  	// 3 more scrambles to finish
  1126  	AESENC	X4, X4
  1127  	AESENC	X5, X5
  1128  	AESENC	X6, X6
  1129  	AESENC	X7, X7
  1130  
  1131  	AESENC	X4, X4
  1132  	AESENC	X5, X5
  1133  	AESENC	X6, X6
  1134  	AESENC	X7, X7
  1135  
  1136  	AESENC	X4, X4
  1137  	AESENC	X5, X5
  1138  	AESENC	X6, X6
  1139  	AESENC	X7, X7
  1140  
  1141  	PXOR	X6, X4
  1142  	PXOR	X7, X5
  1143  	PXOR	X5, X4
  1144  	MOVL	X4, (DX)
  1145  	RET
  1146  
  1147  TEXT runtime·memhash32(SB),NOSPLIT,$0-12
  1148  	CMPB	runtime·useAeshash(SB), $0
  1149  	JEQ	noaes
  1150  	MOVL	p+0(FP), AX	// ptr to data
  1151  	MOVL	h+4(FP), X0	// seed
  1152  	PINSRD	$1, (AX), X0	// data
  1153  	AESENC	runtime·aeskeysched+0(SB), X0
  1154  	AESENC	runtime·aeskeysched+16(SB), X0
  1155  	AESENC	runtime·aeskeysched+32(SB), X0
  1156  	MOVL	X0, ret+8(FP)
  1157  	RET
  1158  noaes:
  1159  	JMP	runtime·memhash32Fallback(SB)
  1160  
  1161  TEXT runtime·memhash64(SB),NOSPLIT,$0-12
  1162  	CMPB	runtime·useAeshash(SB), $0
  1163  	JEQ	noaes
  1164  	MOVL	p+0(FP), AX	// ptr to data
  1165  	MOVQ	(AX), X0	// data
  1166  	PINSRD	$2, h+4(FP), X0	// seed
  1167  	AESENC	runtime·aeskeysched+0(SB), X0
  1168  	AESENC	runtime·aeskeysched+16(SB), X0
  1169  	AESENC	runtime·aeskeysched+32(SB), X0
  1170  	MOVL	X0, ret+8(FP)
  1171  	RET
  1172  noaes:
  1173  	JMP	runtime·memhash64Fallback(SB)
  1174  
  1175  // simple mask to get rid of data in the high part of the register.
  1176  DATA masks<>+0x00(SB)/4, $0x00000000
  1177  DATA masks<>+0x04(SB)/4, $0x00000000
  1178  DATA masks<>+0x08(SB)/4, $0x00000000
  1179  DATA masks<>+0x0c(SB)/4, $0x00000000
  1180  
  1181  DATA masks<>+0x10(SB)/4, $0x000000ff
  1182  DATA masks<>+0x14(SB)/4, $0x00000000
  1183  DATA masks<>+0x18(SB)/4, $0x00000000
  1184  DATA masks<>+0x1c(SB)/4, $0x00000000
  1185  
  1186  DATA masks<>+0x20(SB)/4, $0x0000ffff
  1187  DATA masks<>+0x24(SB)/4, $0x00000000
  1188  DATA masks<>+0x28(SB)/4, $0x00000000
  1189  DATA masks<>+0x2c(SB)/4, $0x00000000
  1190  
  1191  DATA masks<>+0x30(SB)/4, $0x00ffffff
  1192  DATA masks<>+0x34(SB)/4, $0x00000000
  1193  DATA masks<>+0x38(SB)/4, $0x00000000
  1194  DATA masks<>+0x3c(SB)/4, $0x00000000
  1195  
  1196  DATA masks<>+0x40(SB)/4, $0xffffffff
  1197  DATA masks<>+0x44(SB)/4, $0x00000000
  1198  DATA masks<>+0x48(SB)/4, $0x00000000
  1199  DATA masks<>+0x4c(SB)/4, $0x00000000
  1200  
  1201  DATA masks<>+0x50(SB)/4, $0xffffffff
  1202  DATA masks<>+0x54(SB)/4, $0x000000ff
  1203  DATA masks<>+0x58(SB)/4, $0x00000000
  1204  DATA masks<>+0x5c(SB)/4, $0x00000000
  1205  
  1206  DATA masks<>+0x60(SB)/4, $0xffffffff
  1207  DATA masks<>+0x64(SB)/4, $0x0000ffff
  1208  DATA masks<>+0x68(SB)/4, $0x00000000
  1209  DATA masks<>+0x6c(SB)/4, $0x00000000
  1210  
  1211  DATA masks<>+0x70(SB)/4, $0xffffffff
  1212  DATA masks<>+0x74(SB)/4, $0x00ffffff
  1213  DATA masks<>+0x78(SB)/4, $0x00000000
  1214  DATA masks<>+0x7c(SB)/4, $0x00000000
  1215  
  1216  DATA masks<>+0x80(SB)/4, $0xffffffff
  1217  DATA masks<>+0x84(SB)/4, $0xffffffff
  1218  DATA masks<>+0x88(SB)/4, $0x00000000
  1219  DATA masks<>+0x8c(SB)/4, $0x00000000
  1220  
  1221  DATA masks<>+0x90(SB)/4, $0xffffffff
  1222  DATA masks<>+0x94(SB)/4, $0xffffffff
  1223  DATA masks<>+0x98(SB)/4, $0x000000ff
  1224  DATA masks<>+0x9c(SB)/4, $0x00000000
  1225  
  1226  DATA masks<>+0xa0(SB)/4, $0xffffffff
  1227  DATA masks<>+0xa4(SB)/4, $0xffffffff
  1228  DATA masks<>+0xa8(SB)/4, $0x0000ffff
  1229  DATA masks<>+0xac(SB)/4, $0x00000000
  1230  
  1231  DATA masks<>+0xb0(SB)/4, $0xffffffff
  1232  DATA masks<>+0xb4(SB)/4, $0xffffffff
  1233  DATA masks<>+0xb8(SB)/4, $0x00ffffff
  1234  DATA masks<>+0xbc(SB)/4, $0x00000000
  1235  
  1236  DATA masks<>+0xc0(SB)/4, $0xffffffff
  1237  DATA masks<>+0xc4(SB)/4, $0xffffffff
  1238  DATA masks<>+0xc8(SB)/4, $0xffffffff
  1239  DATA masks<>+0xcc(SB)/4, $0x00000000
  1240  
  1241  DATA masks<>+0xd0(SB)/4, $0xffffffff
  1242  DATA masks<>+0xd4(SB)/4, $0xffffffff
  1243  DATA masks<>+0xd8(SB)/4, $0xffffffff
  1244  DATA masks<>+0xdc(SB)/4, $0x000000ff
  1245  
  1246  DATA masks<>+0xe0(SB)/4, $0xffffffff
  1247  DATA masks<>+0xe4(SB)/4, $0xffffffff
  1248  DATA masks<>+0xe8(SB)/4, $0xffffffff
  1249  DATA masks<>+0xec(SB)/4, $0x0000ffff
  1250  
  1251  DATA masks<>+0xf0(SB)/4, $0xffffffff
  1252  DATA masks<>+0xf4(SB)/4, $0xffffffff
  1253  DATA masks<>+0xf8(SB)/4, $0xffffffff
  1254  DATA masks<>+0xfc(SB)/4, $0x00ffffff
  1255  
  1256  GLOBL masks<>(SB),RODATA,$256
  1257  
  1258  // these are arguments to pshufb. They move data down from
  1259  // the high bytes of the register to the low bytes of the register.
  1260  // index is how many bytes to move.
  1261  DATA shifts<>+0x00(SB)/4, $0x00000000
  1262  DATA shifts<>+0x04(SB)/4, $0x00000000
  1263  DATA shifts<>+0x08(SB)/4, $0x00000000
  1264  DATA shifts<>+0x0c(SB)/4, $0x00000000
  1265  
  1266  DATA shifts<>+0x10(SB)/4, $0xffffff0f
  1267  DATA shifts<>+0x14(SB)/4, $0xffffffff
  1268  DATA shifts<>+0x18(SB)/4, $0xffffffff
  1269  DATA shifts<>+0x1c(SB)/4, $0xffffffff
  1270  
  1271  DATA shifts<>+0x20(SB)/4, $0xffff0f0e
  1272  DATA shifts<>+0x24(SB)/4, $0xffffffff
  1273  DATA shifts<>+0x28(SB)/4, $0xffffffff
  1274  DATA shifts<>+0x2c(SB)/4, $0xffffffff
  1275  
  1276  DATA shifts<>+0x30(SB)/4, $0xff0f0e0d
  1277  DATA shifts<>+0x34(SB)/4, $0xffffffff
  1278  DATA shifts<>+0x38(SB)/4, $0xffffffff
  1279  DATA shifts<>+0x3c(SB)/4, $0xffffffff
  1280  
  1281  DATA shifts<>+0x40(SB)/4, $0x0f0e0d0c
  1282  DATA shifts<>+0x44(SB)/4, $0xffffffff
  1283  DATA shifts<>+0x48(SB)/4, $0xffffffff
  1284  DATA shifts<>+0x4c(SB)/4, $0xffffffff
  1285  
  1286  DATA shifts<>+0x50(SB)/4, $0x0e0d0c0b
  1287  DATA shifts<>+0x54(SB)/4, $0xffffff0f
  1288  DATA shifts<>+0x58(SB)/4, $0xffffffff
  1289  DATA shifts<>+0x5c(SB)/4, $0xffffffff
  1290  
  1291  DATA shifts<>+0x60(SB)/4, $0x0d0c0b0a
  1292  DATA shifts<>+0x64(SB)/4, $0xffff0f0e
  1293  DATA shifts<>+0x68(SB)/4, $0xffffffff
  1294  DATA shifts<>+0x6c(SB)/4, $0xffffffff
  1295  
  1296  DATA shifts<>+0x70(SB)/4, $0x0c0b0a09
  1297  DATA shifts<>+0x74(SB)/4, $0xff0f0e0d
  1298  DATA shifts<>+0x78(SB)/4, $0xffffffff
  1299  DATA shifts<>+0x7c(SB)/4, $0xffffffff
  1300  
  1301  DATA shifts<>+0x80(SB)/4, $0x0b0a0908
  1302  DATA shifts<>+0x84(SB)/4, $0x0f0e0d0c
  1303  DATA shifts<>+0x88(SB)/4, $0xffffffff
  1304  DATA shifts<>+0x8c(SB)/4, $0xffffffff
  1305  
  1306  DATA shifts<>+0x90(SB)/4, $0x0a090807
  1307  DATA shifts<>+0x94(SB)/4, $0x0e0d0c0b
  1308  DATA shifts<>+0x98(SB)/4, $0xffffff0f
  1309  DATA shifts<>+0x9c(SB)/4, $0xffffffff
  1310  
  1311  DATA shifts<>+0xa0(SB)/4, $0x09080706
  1312  DATA shifts<>+0xa4(SB)/4, $0x0d0c0b0a
  1313  DATA shifts<>+0xa8(SB)/4, $0xffff0f0e
  1314  DATA shifts<>+0xac(SB)/4, $0xffffffff
  1315  
  1316  DATA shifts<>+0xb0(SB)/4, $0x08070605
  1317  DATA shifts<>+0xb4(SB)/4, $0x0c0b0a09
  1318  DATA shifts<>+0xb8(SB)/4, $0xff0f0e0d
  1319  DATA shifts<>+0xbc(SB)/4, $0xffffffff
  1320  
  1321  DATA shifts<>+0xc0(SB)/4, $0x07060504
  1322  DATA shifts<>+0xc4(SB)/4, $0x0b0a0908
  1323  DATA shifts<>+0xc8(SB)/4, $0x0f0e0d0c
  1324  DATA shifts<>+0xcc(SB)/4, $0xffffffff
  1325  
  1326  DATA shifts<>+0xd0(SB)/4, $0x06050403
  1327  DATA shifts<>+0xd4(SB)/4, $0x0a090807
  1328  DATA shifts<>+0xd8(SB)/4, $0x0e0d0c0b
  1329  DATA shifts<>+0xdc(SB)/4, $0xffffff0f
  1330  
  1331  DATA shifts<>+0xe0(SB)/4, $0x05040302
  1332  DATA shifts<>+0xe4(SB)/4, $0x09080706
  1333  DATA shifts<>+0xe8(SB)/4, $0x0d0c0b0a
  1334  DATA shifts<>+0xec(SB)/4, $0xffff0f0e
  1335  
  1336  DATA shifts<>+0xf0(SB)/4, $0x04030201
  1337  DATA shifts<>+0xf4(SB)/4, $0x08070605
  1338  DATA shifts<>+0xf8(SB)/4, $0x0c0b0a09
  1339  DATA shifts<>+0xfc(SB)/4, $0xff0f0e0d
  1340  
  1341  GLOBL shifts<>(SB),RODATA,$256
  1342  
  1343  TEXT ·checkASM(SB),NOSPLIT,$0-1
  1344  	// check that masks<>(SB) and shifts<>(SB) are aligned to 16-byte
  1345  	MOVL	$masks<>(SB), AX
  1346  	MOVL	$shifts<>(SB), BX
  1347  	ORL	BX, AX
  1348  	TESTL	$15, AX
  1349  	SETEQ	ret+0(FP)
  1350  	RET
  1351  
  1352  // Called from cgo wrappers, this function returns g->m->curg.stack.hi.
  1353  // Must obey the gcc calling convention.
  1354  TEXT _cgo_topofstack(SB),NOSPLIT,$0
  1355  	get_tls(CX)
  1356  	MOVL	g(CX), AX
  1357  	MOVL	g_m(AX), AX
  1358  	MOVL	m_curg(AX), AX
  1359  	MOVL	(g_stack+stack_hi)(AX), AX
  1360  	RET
  1361  
  1362  // The top-most function running on a goroutine
  1363  // returns to goexit+PCQuantum.
  1364  TEXT runtime·goexit(SB),NOSPLIT|TOPFRAME,$0-0
  1365  	BYTE	$0x90	// NOP
  1366  	CALL	runtime·goexit1(SB)	// does not return
  1367  	// traceback from goexit1 must hit code range of goexit
  1368  	BYTE	$0x90	// NOP
  1369  
  1370  // Add a module's moduledata to the linked list of moduledata objects. This
  1371  // is called from .init_array by a function generated in the linker and so
  1372  // follows the platform ABI wrt register preservation -- it only touches AX,
  1373  // CX (implicitly) and DX, but it does not follow the ABI wrt arguments:
  1374  // instead the pointer to the moduledata is passed in AX.
  1375  TEXT runtime·addmoduledata(SB),NOSPLIT,$0-0
  1376  	MOVL	runtime·lastmoduledatap(SB), DX
  1377  	MOVL	AX, moduledata_next(DX)
  1378  	MOVL	AX, runtime·lastmoduledatap(SB)
  1379  	RET
  1380  
  1381  TEXT runtime·uint32tofloat64(SB),NOSPLIT,$8-12
  1382  	MOVL	a+0(FP), AX
  1383  	MOVL	AX, 0(SP)
  1384  	MOVL	$0, 4(SP)
  1385  	FMOVV	0(SP), F0
  1386  	FMOVDP	F0, ret+4(FP)
  1387  	RET
  1388  
  1389  TEXT runtime·float64touint32(SB),NOSPLIT,$12-12
  1390  	FMOVD	a+0(FP), F0
  1391  	FSTCW	0(SP)
  1392  	FLDCW	runtime·controlWord64trunc(SB)
  1393  	FMOVVP	F0, 4(SP)
  1394  	FLDCW	0(SP)
  1395  	MOVL	4(SP), AX
  1396  	MOVL	AX, ret+8(FP)
  1397  	RET
  1398  
  1399  // gcWriteBarrier informs the GC about heap pointer writes.
  1400  //
  1401  // gcWriteBarrier returns space in a write barrier buffer which
  1402  // should be filled in by the caller.
  1403  // gcWriteBarrier does NOT follow the Go ABI. It accepts the
  1404  // number of bytes of buffer needed in DI, and returns a pointer
  1405  // to the buffer space in DI.
  1406  // It clobbers FLAGS. It does not clobber any general-purpose registers,
  1407  // but may clobber others (e.g., SSE registers).
  1408  // Typical use would be, when doing *(CX+88) = AX
  1409  //     CMPL    $0, runtime.writeBarrier(SB)
  1410  //     JEQ     dowrite
  1411  //     CALL    runtime.gcBatchBarrier2(SB)
  1412  //     MOVL    AX, (DI)
  1413  //     MOVL    88(CX), DX
  1414  //     MOVL    DX, 4(DI)
  1415  // dowrite:
  1416  //     MOVL    AX, 88(CX)
  1417  TEXT gcWriteBarrier<>(SB),NOSPLIT,$28
  1418  	// Save the registers clobbered by the fast path. This is slightly
  1419  	// faster than having the caller spill these.
  1420  	MOVL	CX, 20(SP)
  1421  	MOVL	BX, 24(SP)
  1422  retry:
  1423  	// TODO: Consider passing g.m.p in as an argument so they can be shared
  1424  	// across a sequence of write barriers.
  1425  	get_tls(BX)
  1426  	MOVL	g(BX), BX
  1427  	MOVL	g_m(BX), BX
  1428  	MOVL	m_p(BX), BX
  1429  	// Get current buffer write position.
  1430  	MOVL	(p_wbBuf+wbBuf_next)(BX), CX	// original next position
  1431  	ADDL	DI, CX				// new next position
  1432  	// Is the buffer full?
  1433  	CMPL	CX, (p_wbBuf+wbBuf_end)(BX)
  1434  	JA	flush
  1435  	// Commit to the larger buffer.
  1436  	MOVL	CX, (p_wbBuf+wbBuf_next)(BX)
  1437  	// Make return value (the original next position)
  1438  	SUBL	DI, CX
  1439  	MOVL	CX, DI
  1440  	// Restore registers.
  1441  	MOVL	20(SP), CX
  1442  	MOVL	24(SP), BX
  1443  	RET
  1444  
  1445  flush:
  1446  	// Save all general purpose registers since these could be
  1447  	// clobbered by wbBufFlush and were not saved by the caller.
  1448  	MOVL	DI, 0(SP)
  1449  	MOVL	AX, 4(SP)
  1450  	// BX already saved
  1451  	// CX already saved
  1452  	MOVL	DX, 8(SP)
  1453  	MOVL	BP, 12(SP)
  1454  	MOVL	SI, 16(SP)
  1455  	// DI already saved
  1456  
  1457  	CALL	runtime·wbBufFlush(SB)
  1458  
  1459  	MOVL	0(SP), DI
  1460  	MOVL	4(SP), AX
  1461  	MOVL	8(SP), DX
  1462  	MOVL	12(SP), BP
  1463  	MOVL	16(SP), SI
  1464  	JMP	retry
  1465  
  1466  TEXT runtime·gcWriteBarrier1<ABIInternal>(SB),NOSPLIT,$0
  1467  	MOVL	$4, DI
  1468  	JMP	gcWriteBarrier<>(SB)
  1469  TEXT runtime·gcWriteBarrier2<ABIInternal>(SB),NOSPLIT,$0
  1470  	MOVL	$8, DI
  1471  	JMP	gcWriteBarrier<>(SB)
  1472  TEXT runtime·gcWriteBarrier3<ABIInternal>(SB),NOSPLIT,$0
  1473  	MOVL	$12, DI
  1474  	JMP	gcWriteBarrier<>(SB)
  1475  TEXT runtime·gcWriteBarrier4<ABIInternal>(SB),NOSPLIT,$0
  1476  	MOVL	$16, DI
  1477  	JMP	gcWriteBarrier<>(SB)
  1478  TEXT runtime·gcWriteBarrier5<ABIInternal>(SB),NOSPLIT,$0
  1479  	MOVL	$20, DI
  1480  	JMP	gcWriteBarrier<>(SB)
  1481  TEXT runtime·gcWriteBarrier6<ABIInternal>(SB),NOSPLIT,$0
  1482  	MOVL	$24, DI
  1483  	JMP	gcWriteBarrier<>(SB)
  1484  TEXT runtime·gcWriteBarrier7<ABIInternal>(SB),NOSPLIT,$0
  1485  	MOVL	$28, DI
  1486  	JMP	gcWriteBarrier<>(SB)
  1487  TEXT runtime·gcWriteBarrier8<ABIInternal>(SB),NOSPLIT,$0
  1488  	MOVL	$32, DI
  1489  	JMP	gcWriteBarrier<>(SB)
  1490  
  1491  TEXT runtime·panicBounds<ABIInternal>(SB),NOSPLIT,$40-0
  1492  	NO_LOCAL_POINTERS
  1493  	// Save all int registers that could have an index in them.
  1494  	// They may be pointers, but if they are they are dead.
  1495  	MOVL	AX, 8(SP)
  1496  	MOVL	CX, 12(SP)
  1497  	MOVL	DX, 16(SP)
  1498  	MOVL	BX, 20(SP)
  1499  	// skip SP @ 24(SP)
  1500  	MOVL	BP, 28(SP)
  1501  	MOVL	SI, 32(SP)
  1502  	MOVL	DI, 36(SP)
  1503  
  1504  	MOVL	SP, AX		// hide SP read from vet
  1505  	MOVL	40(AX), AX	// PC immediately after call to panicBounds
  1506  	MOVL	AX, 0(SP)
  1507  	LEAL	8(SP), AX
  1508  	MOVL	AX, 4(SP)
  1509  	CALL	runtime·panicBounds32<ABIInternal>(SB)
  1510  	RET
  1511  
  1512  TEXT runtime·panicExtend<ABIInternal>(SB),NOSPLIT,$40-0
  1513  	NO_LOCAL_POINTERS
  1514  	// Save all int registers that could have an index in them.
  1515  	// They may be pointers, but if they are they are dead.
  1516  	MOVL	AX, 8(SP)
  1517  	MOVL	CX, 12(SP)
  1518  	MOVL	DX, 16(SP)
  1519  	MOVL	BX, 20(SP)
  1520  	// skip SP @ 24(SP)
  1521  	MOVL	BP, 28(SP)
  1522  	MOVL	SI, 32(SP)
  1523  	MOVL	DI, 36(SP)
  1524  
  1525  	MOVL	SP, AX		// hide SP read from vet
  1526  	MOVL	40(AX), AX	// PC immediately after call to panicExtend
  1527  	MOVL	AX, 0(SP)
  1528  	LEAL	8(SP), AX
  1529  	MOVL	AX, 4(SP)
  1530  	CALL	runtime·panicBounds32X<ABIInternal>(SB)
  1531  	RET
  1532  
  1533  #ifdef GOOS_android
  1534  // Use the free TLS_SLOT_APP slot #2 on Android Q.
  1535  // Earlier androids are set up in gcc_android.c.
  1536  DATA runtime·tls_g+0(SB)/4, $8
  1537  GLOBL runtime·tls_g+0(SB), NOPTR, $4
  1538  #endif
  1539  #ifdef GOOS_windows
  1540  GLOBL runtime·tls_g+0(SB), NOPTR, $4
  1541  #endif
  1542  

View as plain text