Text file
src/runtime/asm_386.s
1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 #include "go_asm.h"
6 #include "go_tls.h"
7 #include "funcdata.h"
8 #include "textflag.h"
9
10 // _rt0_386 is common startup code for most 386 systems when using
11 // internal linking. This is the entry point for the program from the
12 // kernel for an ordinary -buildmode=exe program. The stack holds the
13 // number of arguments and the C-style argv.
14 TEXT _rt0_386(SB),NOSPLIT,$8
15 MOVL 8(SP), AX // argc
16 LEAL 12(SP), BX // argv
17 MOVL AX, 0(SP)
18 MOVL BX, 4(SP)
19 JMP runtime·rt0_go(SB)
20
21 // _rt0_386_lib is common startup code for most 386 systems when
22 // using -buildmode=c-archive or -buildmode=c-shared. The linker will
23 // arrange to invoke this function as a global constructor (for
24 // c-archive) or when the shared library is loaded (for c-shared).
25 // We expect argc and argv to be passed on the stack following the
26 // usual C ABI.
27 TEXT _rt0_386_lib(SB),NOSPLIT,$0
28 PUSHL BP
29 MOVL SP, BP
30 PUSHL BX
31 PUSHL SI
32 PUSHL DI
33
34 MOVL 8(BP), AX
35 MOVL AX, _rt0_386_lib_argc<>(SB)
36 MOVL 12(BP), AX
37 MOVL AX, _rt0_386_lib_argv<>(SB)
38
39 CALL runtime·libInit(SB)
40
41 POPL DI
42 POPL SI
43 POPL BX
44 POPL BP
45 RET
46
47 // rt0_lib_go initializes the Go runtime.
48 // This is started in a separate thread by _rt0_386_lib.
49 TEXT runtime·rt0_lib_go<ABIInternal>(SB),NOSPLIT,$8
50 MOVL _rt0_386_lib_argc<>(SB), AX
51 MOVL AX, 0(SP)
52 MOVL _rt0_386_lib_argv<>(SB), AX
53 MOVL AX, 4(SP)
54 JMP runtime·rt0_go(SB)
55
56 DATA _rt0_386_lib_argc<>(SB)/4, $0
57 GLOBL _rt0_386_lib_argc<>(SB),NOPTR, $4
58 DATA _rt0_386_lib_argv<>(SB)/4, $0
59 GLOBL _rt0_386_lib_argv<>(SB),NOPTR, $4
60
61 TEXT runtime·rt0_go(SB),NOSPLIT|NOFRAME|TOPFRAME,$0
62 // Copy arguments forward on an even stack.
63 // Users of this function jump to it, they don't call it.
64 MOVL 0(SP), AX
65 MOVL 4(SP), BX
66 SUBL $128, SP // plenty of scratch
67 ANDL $~15, SP
68 MOVL AX, 120(SP) // save argc, argv away
69 MOVL BX, 124(SP)
70
71 // set default stack bounds.
72 // _cgo_init may update stackguard.
73 MOVL $runtime·g0(SB), BP
74 LEAL (-64*1024+104)(SP), BX
75 MOVL BX, g_stackguard0(BP)
76 MOVL BX, g_stackguard1(BP)
77 MOVL BX, (g_stack+stack_lo)(BP)
78 MOVL SP, (g_stack+stack_hi)(BP)
79
80 // find out information about the processor we're on
81 // first see if CPUID instruction is supported.
82 PUSHFL
83 PUSHFL
84 XORL $(1<<21), 0(SP) // flip ID bit
85 POPFL
86 PUSHFL
87 POPL AX
88 XORL 0(SP), AX
89 POPFL // restore EFLAGS
90 TESTL $(1<<21), AX
91 JNE has_cpuid
92
93 bad_proc: // show that the program requires MMX.
94 MOVL $2, 0(SP)
95 MOVL $bad_proc_msg<>(SB), 4(SP)
96 MOVL $0x3d, 8(SP)
97 CALL runtime·write(SB)
98 MOVL $1, 0(SP)
99 CALL runtime·exit(SB)
100 CALL runtime·abort(SB)
101
102 has_cpuid:
103 MOVL $0, AX
104 CPUID
105 MOVL AX, SI
106 CMPL AX, $0
107 JE nocpuinfo
108
109 CMPL BX, $0x756E6547 // "Genu"
110 JNE notintel
111 CMPL DX, $0x49656E69 // "ineI"
112 JNE notintel
113 CMPL CX, $0x6C65746E // "ntel"
114 JNE notintel
115 MOVB $1, runtime·isIntel(SB)
116 notintel:
117
118 // Load EAX=1 cpuid flags
119 MOVL $1, AX
120 CPUID
121 MOVL CX, DI // Move to global variable clobbers CX when generating PIC
122 MOVL AX, runtime·processorVersionInfo(SB)
123
124 // Check for MMX support
125 TESTL $(1<<23), DX // MMX
126 JZ bad_proc
127
128 nocpuinfo:
129 // if there is an _cgo_init, call it to let it
130 // initialize and to set up GS. if not,
131 // we set up GS ourselves.
132 MOVL _cgo_init(SB), AX
133 TESTL AX, AX
134 JZ needtls
135 #ifdef GOOS_android
136 // arg 4: TLS base, stored in slot 0 (Android's TLS_SLOT_SELF).
137 // Compensate for tls_g (+8).
138 MOVL -8(TLS), BX
139 MOVL BX, 12(SP)
140 MOVL $runtime·tls_g(SB), 8(SP) // arg 3: &tls_g
141 #else
142 MOVL $0, BX
143 MOVL BX, 12(SP) // arg 4: not used when using platform's TLS
144 #ifdef GOOS_windows
145 MOVL $runtime·tls_g(SB), 8(SP) // arg 3: &tls_g
146 #else
147 MOVL BX, 8(SP) // arg 3: not used when using platform's TLS
148 #endif
149 #endif
150 MOVL $setg_gcc<>(SB), BX
151 MOVL BX, 4(SP) // arg 2: setg_gcc
152 MOVL BP, 0(SP) // arg 1: g0
153 CALL AX
154
155 // update stackguard after _cgo_init
156 MOVL $runtime·g0(SB), CX
157 MOVL (g_stack+stack_lo)(CX), AX
158 ADDL $const_stackGuard, AX
159 MOVL AX, g_stackguard0(CX)
160 MOVL AX, g_stackguard1(CX)
161
162 #ifndef GOOS_windows
163 // skip runtime·ldt0setup(SB) and tls test after _cgo_init for non-windows
164 JMP ok
165 #endif
166 needtls:
167 #ifdef GOOS_openbsd
168 // skip runtime·ldt0setup(SB) and tls test on OpenBSD in all cases
169 JMP ok
170 #endif
171 #ifdef GOOS_plan9
172 // skip runtime·ldt0setup(SB) and tls test on Plan 9 in all cases
173 JMP ok
174 #endif
175
176 // set up %gs
177 CALL ldt0setup<>(SB)
178
179 // store through it, to make sure it works
180 get_tls(BX)
181 MOVL $0x123, g(BX)
182 MOVL runtime·m0+m_tls(SB), AX
183 CMPL AX, $0x123
184 JEQ ok
185 MOVL AX, 0 // abort
186 ok:
187 // set up m and g "registers"
188 get_tls(BX)
189 LEAL runtime·g0(SB), DX
190 MOVL DX, g(BX)
191 LEAL runtime·m0(SB), AX
192
193 // save m->g0 = g0
194 MOVL DX, m_g0(AX)
195 // save g0->m = m0
196 MOVL AX, g_m(DX)
197
198 CALL runtime·emptyfunc(SB) // fault if stack check is wrong
199
200 // convention is D is always cleared
201 CLD
202
203 CALL runtime·check(SB)
204
205 // saved argc, argv
206 MOVL 120(SP), AX
207 MOVL AX, 0(SP)
208 MOVL 124(SP), AX
209 MOVL AX, 4(SP)
210 CALL runtime·args(SB)
211 CALL runtime·osinit(SB)
212 CALL runtime·schedinit(SB)
213
214 // create a new goroutine to start program
215 PUSHL $runtime·mainPC(SB) // entry
216 CALL runtime·newproc(SB)
217 POPL AX
218
219 // start this M
220 CALL runtime·mstart(SB)
221
222 CALL runtime·abort(SB)
223 RET
224
225 DATA bad_proc_msg<>+0x00(SB)/61, $"This program can only be run on processors with MMX support.\n"
226 GLOBL bad_proc_msg<>(SB), RODATA, $61
227
228 DATA runtime·mainPC+0(SB)/4,$runtime·main(SB)
229 GLOBL runtime·mainPC(SB),RODATA,$4
230
231 TEXT runtime·breakpoint(SB),NOSPLIT,$0-0
232 INT $3
233 RET
234
235 TEXT runtime·asminit(SB),NOSPLIT,$0-0
236 // Linux and MinGW start the FPU in extended double precision.
237 // Other operating systems use double precision.
238 // Change to double precision to match them,
239 // and to match other hardware that only has double.
240 FLDCW runtime·controlWord64(SB)
241 RET
242
243 TEXT runtime·mstart(SB),NOSPLIT|TOPFRAME,$0
244 CALL runtime·mstart0(SB)
245 RET // not reached
246
247 /*
248 * go-routine
249 */
250
251 // void gogo(Gobuf*)
252 // restore state from Gobuf; longjmp
253 TEXT runtime·gogo(SB), NOSPLIT, $0-4
254 MOVL buf+0(FP), BX // gobuf
255 MOVL gobuf_g(BX), DX
256 MOVL 0(DX), CX // make sure g != nil
257 JMP gogo<>(SB)
258
259 TEXT gogo<>(SB), NOSPLIT, $0
260 get_tls(CX)
261 MOVL DX, g(CX)
262 MOVL gobuf_sp(BX), SP // restore SP
263 MOVL gobuf_ctxt(BX), DX
264 MOVL $0, gobuf_sp(BX) // clear to help garbage collector
265 MOVL $0, gobuf_ctxt(BX)
266 MOVL gobuf_pc(BX), BX
267 JMP BX
268
269 // func mcall(fn func(*g))
270 // Switch to m->g0's stack, call fn(g).
271 // Fn must never return. It should gogo(&g->sched)
272 // to keep running g.
273 TEXT runtime·mcall(SB), NOSPLIT, $0-4
274 MOVL fn+0(FP), DI
275
276 get_tls(DX)
277 MOVL g(DX), AX // save state in g->sched
278 MOVL 0(SP), BX // caller's PC
279 MOVL BX, (g_sched+gobuf_pc)(AX)
280 LEAL fn+0(FP), BX // caller's SP
281 MOVL BX, (g_sched+gobuf_sp)(AX)
282
283 // switch to m->g0 & its stack, call fn
284 MOVL g(DX), BX
285 MOVL g_m(BX), BX
286 MOVL m_g0(BX), SI
287 CMPL SI, AX // if g == m->g0 call badmcall
288 JNE 3(PC)
289 MOVL $runtime·badmcall(SB), AX
290 JMP AX
291 MOVL SI, g(DX) // g = m->g0
292 MOVL (g_sched+gobuf_sp)(SI), SP // sp = m->g0->sched.sp
293 PUSHL AX
294 MOVL DI, DX
295 MOVL 0(DI), DI
296 CALL DI
297 POPL AX
298 MOVL $runtime·badmcall2(SB), AX
299 JMP AX
300 RET
301
302 // systemstack_switch is a dummy routine that systemstack leaves at the bottom
303 // of the G stack. We need to distinguish the routine that
304 // lives at the bottom of the G stack from the one that lives
305 // at the top of the system stack because the one at the top of
306 // the system stack terminates the stack walk (see topofstack()).
307 TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0
308 RET
309
310 // func systemstack(fn func())
311 TEXT runtime·systemstack(SB), NOSPLIT, $0-4
312 MOVL fn+0(FP), DI // DI = fn
313 get_tls(CX)
314 MOVL g(CX), AX // AX = g
315 MOVL g_m(AX), BX // BX = m
316
317 CMPL AX, m_gsignal(BX)
318 JEQ noswitch
319
320 MOVL m_g0(BX), DX // DX = g0
321 CMPL AX, DX
322 JEQ noswitch
323
324 CMPL AX, m_curg(BX)
325 JNE bad
326
327 // switch stacks
328 // save our state in g->sched. Pretend to
329 // be systemstack_switch if the G stack is scanned.
330 CALL gosave_systemstack_switch<>(SB)
331
332 // switch to g0
333 get_tls(CX)
334 MOVL DX, g(CX)
335 MOVL (g_sched+gobuf_sp)(DX), BX
336 MOVL BX, SP
337
338 // call target function
339 MOVL DI, DX
340 MOVL 0(DI), DI
341 CALL DI
342
343 // switch back to g
344 get_tls(CX)
345 MOVL g(CX), AX
346 MOVL g_m(AX), BX
347 MOVL m_curg(BX), AX
348 MOVL AX, g(CX)
349 MOVL (g_sched+gobuf_sp)(AX), SP
350 MOVL $0, (g_sched+gobuf_sp)(AX)
351 RET
352
353 noswitch:
354 // already on system stack; tail call the function
355 // Using a tail call here cleans up tracebacks since we won't stop
356 // at an intermediate systemstack.
357 MOVL DI, DX
358 MOVL 0(DI), DI
359 JMP DI
360
361 bad:
362 // Bad: g is not gsignal, not g0, not curg. What is it?
363 // Hide call from linker nosplit analysis.
364 MOVL $runtime·badsystemstack(SB), AX
365 CALL AX
366 INT $3
367
368 // func switchToCrashStack0(fn func())
369 TEXT runtime·switchToCrashStack0(SB), NOSPLIT, $0-4
370 MOVL fn+0(FP), AX
371
372 get_tls(CX)
373 MOVL g(CX), BX // BX = g
374 MOVL g_m(BX), DX // DX = curm
375
376 // set g to gcrash
377 LEAL runtime·gcrash(SB), BX // g = &gcrash
378 MOVL DX, g_m(BX) // g.m = curm
379 MOVL BX, m_g0(DX) // curm.g0 = g
380 get_tls(CX)
381 MOVL BX, g(CX)
382
383 // switch to crashstack
384 MOVL (g_stack+stack_hi)(BX), DX
385 SUBL $(4*8), DX
386 MOVL DX, SP
387
388 // call target function
389 MOVL AX, DX
390 MOVL 0(AX), AX
391 CALL AX
392
393 // should never return
394 CALL runtime·abort(SB)
395 UNDEF
396
397 /*
398 * support for morestack
399 */
400
401 // Called during function prolog when more stack is needed.
402 //
403 // The traceback routines see morestack on a g0 as being
404 // the top of a stack (for example, morestack calling newstack
405 // calling the scheduler calling newm calling gc), so we must
406 // record an argument size. For that purpose, it has no arguments.
407 TEXT runtime·morestack(SB),NOSPLIT|NOFRAME,$0-0
408 // Cannot grow scheduler stack (m->g0).
409 get_tls(CX)
410 MOVL g(CX), DI
411 MOVL g_m(DI), BX
412
413 // Set g->sched to context in f.
414 MOVL 0(SP), AX // f's PC
415 MOVL AX, (g_sched+gobuf_pc)(DI)
416 LEAL 4(SP), AX // f's SP
417 MOVL AX, (g_sched+gobuf_sp)(DI)
418 MOVL DX, (g_sched+gobuf_ctxt)(DI)
419
420 MOVL m_g0(BX), SI
421 CMPL g(CX), SI
422 JNE 3(PC)
423 CALL runtime·badmorestackg0(SB)
424 CALL runtime·abort(SB)
425
426 // Cannot grow signal stack.
427 MOVL m_gsignal(BX), SI
428 CMPL g(CX), SI
429 JNE 3(PC)
430 CALL runtime·badmorestackgsignal(SB)
431 CALL runtime·abort(SB)
432
433 // Called from f.
434 // Set m->morebuf to f's caller.
435 NOP SP // tell vet SP changed - stop checking offsets
436 MOVL 4(SP), DI // f's caller's PC
437 MOVL DI, (m_morebuf+gobuf_pc)(BX)
438 LEAL 8(SP), CX // f's caller's SP
439 MOVL CX, (m_morebuf+gobuf_sp)(BX)
440 get_tls(CX)
441 MOVL g(CX), SI
442 MOVL SI, (m_morebuf+gobuf_g)(BX)
443
444 // Call newstack on m->g0's stack.
445 MOVL m_g0(BX), BP
446 MOVL BP, g(CX)
447 MOVL (g_sched+gobuf_sp)(BP), AX
448 MOVL -4(AX), BX // fault if CALL would, before smashing SP
449 MOVL AX, SP
450 CALL runtime·newstack(SB)
451 CALL runtime·abort(SB) // crash if newstack returns
452 RET
453
454 TEXT runtime·morestack_noctxt(SB),NOSPLIT,$0-0
455 MOVL $0, DX
456 JMP runtime·morestack(SB)
457
458 // reflectcall: call a function with the given argument list
459 // func call(stackArgsType *_type, f *FuncVal, stackArgs *byte, stackArgsSize, stackRetOffset, frameSize uint32, regArgs *abi.RegArgs).
460 // we don't have variable-sized frames, so we use a small number
461 // of constant-sized-frame functions to encode a few bits of size in the pc.
462 // Caution: ugly multiline assembly macros in your future!
463
464 #define DISPATCH(NAME,MAXSIZE) \
465 CMPL CX, $MAXSIZE; \
466 JA 3(PC); \
467 MOVL $NAME(SB), AX; \
468 JMP AX
469 // Note: can't just "JMP NAME(SB)" - bad inlining results.
470
471 TEXT ·reflectcall(SB), NOSPLIT, $0-28
472 MOVL frameSize+20(FP), CX
473 DISPATCH(runtime·call16, 16)
474 DISPATCH(runtime·call32, 32)
475 DISPATCH(runtime·call64, 64)
476 DISPATCH(runtime·call128, 128)
477 DISPATCH(runtime·call256, 256)
478 DISPATCH(runtime·call512, 512)
479 DISPATCH(runtime·call1024, 1024)
480 DISPATCH(runtime·call2048, 2048)
481 DISPATCH(runtime·call4096, 4096)
482 DISPATCH(runtime·call8192, 8192)
483 DISPATCH(runtime·call16384, 16384)
484 DISPATCH(runtime·call32768, 32768)
485 DISPATCH(runtime·call65536, 65536)
486 DISPATCH(runtime·call131072, 131072)
487 DISPATCH(runtime·call262144, 262144)
488 DISPATCH(runtime·call524288, 524288)
489 DISPATCH(runtime·call1048576, 1048576)
490 DISPATCH(runtime·call2097152, 2097152)
491 DISPATCH(runtime·call4194304, 4194304)
492 DISPATCH(runtime·call8388608, 8388608)
493 DISPATCH(runtime·call16777216, 16777216)
494 DISPATCH(runtime·call33554432, 33554432)
495 DISPATCH(runtime·call67108864, 67108864)
496 DISPATCH(runtime·call134217728, 134217728)
497 DISPATCH(runtime·call268435456, 268435456)
498 DISPATCH(runtime·call536870912, 536870912)
499 DISPATCH(runtime·call1073741824, 1073741824)
500 MOVL $runtime·badreflectcall(SB), AX
501 JMP AX
502
503 #define CALLFN(NAME,MAXSIZE) \
504 TEXT NAME(SB), WRAPPER, $MAXSIZE-28; \
505 NO_LOCAL_POINTERS; \
506 /* copy arguments to stack */ \
507 MOVL stackArgs+8(FP), SI; \
508 MOVL stackArgsSize+12(FP), CX; \
509 MOVL SP, DI; \
510 REP;MOVSB; \
511 /* call function */ \
512 MOVL f+4(FP), DX; \
513 MOVL (DX), AX; \
514 PCDATA $PCDATA_StackMapIndex, $0; \
515 CALL AX; \
516 /* copy return values back */ \
517 MOVL stackArgsType+0(FP), DX; \
518 MOVL stackArgs+8(FP), DI; \
519 MOVL stackArgsSize+12(FP), CX; \
520 MOVL stackRetOffset+16(FP), BX; \
521 MOVL SP, SI; \
522 ADDL BX, DI; \
523 ADDL BX, SI; \
524 SUBL BX, CX; \
525 CALL callRet<>(SB); \
526 RET
527
528 // callRet copies return values back at the end of call*. This is a
529 // separate function so it can allocate stack space for the arguments
530 // to reflectcallmove. It does not follow the Go ABI; it expects its
531 // arguments in registers.
532 TEXT callRet<>(SB), NOSPLIT, $20-0
533 MOVL DX, 0(SP)
534 MOVL DI, 4(SP)
535 MOVL SI, 8(SP)
536 MOVL CX, 12(SP)
537 MOVL $0, 16(SP)
538 CALL runtime·reflectcallmove(SB)
539 RET
540
541 CALLFN(·call16, 16)
542 CALLFN(·call32, 32)
543 CALLFN(·call64, 64)
544 CALLFN(·call128, 128)
545 CALLFN(·call256, 256)
546 CALLFN(·call512, 512)
547 CALLFN(·call1024, 1024)
548 CALLFN(·call2048, 2048)
549 CALLFN(·call4096, 4096)
550 CALLFN(·call8192, 8192)
551 CALLFN(·call16384, 16384)
552 CALLFN(·call32768, 32768)
553 CALLFN(·call65536, 65536)
554 CALLFN(·call131072, 131072)
555 CALLFN(·call262144, 262144)
556 CALLFN(·call524288, 524288)
557 CALLFN(·call1048576, 1048576)
558 CALLFN(·call2097152, 2097152)
559 CALLFN(·call4194304, 4194304)
560 CALLFN(·call8388608, 8388608)
561 CALLFN(·call16777216, 16777216)
562 CALLFN(·call33554432, 33554432)
563 CALLFN(·call67108864, 67108864)
564 CALLFN(·call134217728, 134217728)
565 CALLFN(·call268435456, 268435456)
566 CALLFN(·call536870912, 536870912)
567 CALLFN(·call1073741824, 1073741824)
568
569 TEXT runtime·procyieldAsm(SB),NOSPLIT,$0-0
570 MOVL cycles+0(FP), AX
571 TESTL AX, AX
572 JZ done
573 again:
574 PAUSE
575 SUBL $1, AX
576 JNZ again
577 done:
578 RET
579
580 TEXT ·publicationBarrier(SB),NOSPLIT,$0-0
581 // Stores are already ordered on x86, so this is just a
582 // compile barrier.
583 RET
584
585 // Save state of caller into g->sched,
586 // but using fake PC from systemstack_switch.
587 // Must only be called from functions with no locals ($0)
588 // or else unwinding from systemstack_switch is incorrect.
589 TEXT gosave_systemstack_switch<>(SB),NOSPLIT,$0
590 PUSHL AX
591 PUSHL BX
592 get_tls(BX)
593 MOVL g(BX), BX
594 LEAL arg+0(FP), AX
595 MOVL AX, (g_sched+gobuf_sp)(BX)
596 MOVL $runtime·systemstack_switch(SB), AX
597 MOVL AX, (g_sched+gobuf_pc)(BX)
598 // Assert ctxt is zero. See func save.
599 MOVL (g_sched+gobuf_ctxt)(BX), AX
600 TESTL AX, AX
601 JZ 2(PC)
602 CALL runtime·abort(SB)
603 POPL BX
604 POPL AX
605 RET
606
607 // func asmcgocall_no_g(fn, arg unsafe.Pointer)
608 // Call fn(arg) aligned appropriately for the gcc ABI.
609 // Called on a system stack, and there may be no g yet (during needm).
610 TEXT ·asmcgocall_no_g(SB),NOSPLIT,$0-8
611 MOVL fn+0(FP), AX
612 MOVL arg+4(FP), BX
613 MOVL SP, DX
614 SUBL $32, SP
615 ANDL $~15, SP // alignment, perhaps unnecessary
616 MOVL DX, 8(SP) // save old SP
617 MOVL BX, 0(SP) // first argument in x86-32 ABI
618 CALL AX
619 MOVL 8(SP), DX
620 MOVL DX, SP
621 RET
622
623 // func asmcgocall(fn, arg unsafe.Pointer) int32
624 // Call fn(arg) on the scheduler stack,
625 // aligned appropriately for the gcc ABI.
626 // See cgocall.go for more details.
627 TEXT ·asmcgocall(SB),NOSPLIT,$0-12
628 MOVL fn+0(FP), AX
629 MOVL arg+4(FP), BX
630
631 MOVL SP, DX
632
633 // Figure out if we need to switch to m->g0 stack.
634 // We get called to create new OS threads too, and those
635 // come in on the m->g0 stack already. Or we might already
636 // be on the m->gsignal stack.
637 #ifdef GOOS_windows
638 // On Windows, get_tls might return garbage if the thread
639 // has never called into Go, so check tls_g directly.
640 MOVL runtime·tls_g(SB), CX
641 CMPL CX, $0
642 JEQ nosave
643 #endif
644 get_tls(CX)
645 MOVL g(CX), DI
646 CMPL DI, $0
647 JEQ nosave // Don't even have a G yet.
648 MOVL g_m(DI), BP
649 CMPL DI, m_gsignal(BP)
650 JEQ noswitch
651 MOVL m_g0(BP), SI
652 CMPL DI, SI
653 JEQ noswitch
654 CALL gosave_systemstack_switch<>(SB)
655 get_tls(CX)
656 MOVL SI, g(CX)
657 MOVL (g_sched+gobuf_sp)(SI), SP
658
659 noswitch:
660 // Now on a scheduling stack (a pthread-created stack).
661 SUBL $32, SP
662 ANDL $~15, SP // alignment, perhaps unnecessary
663 MOVL DI, 8(SP) // save g
664 MOVL (g_stack+stack_hi)(DI), DI
665 SUBL DX, DI
666 MOVL DI, 4(SP) // save depth in stack (can't just save SP, as stack might be copied during a callback)
667 MOVL BX, 0(SP) // first argument in x86-32 ABI
668 CALL AX
669
670 // Restore registers, g, stack pointer.
671 get_tls(CX)
672 MOVL 8(SP), DI
673 MOVL (g_stack+stack_hi)(DI), SI
674 SUBL 4(SP), SI
675 MOVL DI, g(CX)
676 MOVL SI, SP
677
678 MOVL AX, ret+8(FP)
679 RET
680 nosave:
681 // Now on a scheduling stack (a pthread-created stack).
682 SUBL $32, SP
683 ANDL $~15, SP // alignment, perhaps unnecessary
684 MOVL DX, 4(SP) // save original stack pointer
685 MOVL BX, 0(SP) // first argument in x86-32 ABI
686 CALL AX
687
688 MOVL 4(SP), CX // restore original stack pointer
689 MOVL CX, SP
690 MOVL AX, ret+8(FP)
691 RET
692
693 // cgocallback(fn, frame unsafe.Pointer, ctxt uintptr)
694 // See cgocall.go for more details.
695 TEXT ·cgocallback(SB),NOSPLIT,$12-12 // Frame size must match commented places below
696 NO_LOCAL_POINTERS
697
698 // Skip cgocallbackg, just dropm when fn is nil, and frame is the saved g.
699 // It is used to dropm while thread is exiting.
700 MOVL fn+0(FP), AX
701 CMPL AX, $0
702 JNE loadg
703 // Restore the g from frame.
704 get_tls(CX)
705 MOVL frame+4(FP), BX
706 MOVL BX, g(CX)
707 JMP dropm
708
709 loadg:
710 // If g is nil, Go did not create the current thread,
711 // or if this thread never called into Go on pthread platforms.
712 // Call needm to obtain one for temporary use.
713 // In this case, we're running on the thread stack, so there's
714 // lots of space, but the linker doesn't know. Hide the call from
715 // the linker analysis by using an indirect call through AX.
716 get_tls(CX)
717 #ifdef GOOS_windows
718 MOVL $0, BP
719 CMPL CX, $0
720 JEQ needm
721 #endif
722 MOVL g(CX), BP
723 CMPL BP, $0
724 JEQ needm
725 MOVL g_m(BP), BP
726 MOVL BP, savedm-4(SP) // saved copy of oldm
727 JMP havem
728 needm:
729 MOVL $runtime·needAndBindM(SB), AX
730 CALL AX
731 MOVL $0, savedm-4(SP)
732 get_tls(CX)
733 MOVL g(CX), BP
734 MOVL g_m(BP), BP
735
736 // Set m->sched.sp = SP, so that if a panic happens
737 // during the function we are about to execute, it will
738 // have a valid SP to run on the g0 stack.
739 // The next few lines (after the havem label)
740 // will save this SP onto the stack and then write
741 // the same SP back to m->sched.sp. That seems redundant,
742 // but if an unrecovered panic happens, unwindm will
743 // restore the g->sched.sp from the stack location
744 // and then systemstack will try to use it. If we don't set it here,
745 // that restored SP will be uninitialized (typically 0) and
746 // will not be usable.
747 MOVL m_g0(BP), SI
748 MOVL SP, (g_sched+gobuf_sp)(SI)
749
750 havem:
751 // Now there's a valid m, and we're running on its m->g0.
752 // Save current m->g0->sched.sp on stack and then set it to SP.
753 // Save current sp in m->g0->sched.sp in preparation for
754 // switch back to m->curg stack.
755 // NOTE: unwindm knows that the saved g->sched.sp is at 0(SP).
756 MOVL m_g0(BP), SI
757 MOVL (g_sched+gobuf_sp)(SI), AX
758 MOVL AX, 0(SP)
759 MOVL SP, (g_sched+gobuf_sp)(SI)
760
761 // Switch to m->curg stack and call runtime.cgocallbackg.
762 // Because we are taking over the execution of m->curg
763 // but *not* resuming what had been running, we need to
764 // save that information (m->curg->sched) so we can restore it.
765 // We can restore m->curg->sched.sp easily, because calling
766 // runtime.cgocallbackg leaves SP unchanged upon return.
767 // To save m->curg->sched.pc, we push it onto the curg stack and
768 // open a frame the same size as cgocallback's g0 frame.
769 // Once we switch to the curg stack, the pushed PC will appear
770 // to be the return PC of cgocallback, so that the traceback
771 // will seamlessly trace back into the earlier calls.
772 MOVL m_curg(BP), SI
773 MOVL SI, g(CX)
774 MOVL (g_sched+gobuf_sp)(SI), DI // prepare stack as DI
775 MOVL (g_sched+gobuf_pc)(SI), BP
776 MOVL BP, -4(DI) // "push" return PC on the g stack
777 // Gather our arguments into registers.
778 MOVL fn+0(FP), AX
779 MOVL frame+4(FP), BX
780 MOVL ctxt+8(FP), CX
781 LEAL -(4+12)(DI), SP // Must match declared frame size
782 MOVL AX, 0(SP)
783 MOVL BX, 4(SP)
784 MOVL CX, 8(SP)
785 CALL runtime·cgocallbackg(SB)
786
787 // Restore g->sched (== m->curg->sched) from saved values.
788 get_tls(CX)
789 MOVL g(CX), SI
790 MOVL 12(SP), BP // Must match declared frame size
791 MOVL BP, (g_sched+gobuf_pc)(SI)
792 LEAL (12+4)(SP), DI // Must match declared frame size
793 MOVL DI, (g_sched+gobuf_sp)(SI)
794
795 // Switch back to m->g0's stack and restore m->g0->sched.sp.
796 // (Unlike m->curg, the g0 goroutine never uses sched.pc,
797 // so we do not have to restore it.)
798 MOVL g(CX), BP
799 MOVL g_m(BP), BP
800 MOVL m_g0(BP), SI
801 MOVL SI, g(CX)
802 MOVL (g_sched+gobuf_sp)(SI), SP
803 MOVL 0(SP), AX
804 MOVL AX, (g_sched+gobuf_sp)(SI)
805
806 // If the m on entry was nil, we called needm above to borrow an m,
807 // 1. for the duration of the call on non-pthread platforms,
808 // 2. or the duration of the C thread alive on pthread platforms.
809 // If the m on entry wasn't nil,
810 // 1. the thread might be a Go thread,
811 // 2. or it wasn't the first call from a C thread on pthread platforms,
812 // since then we skip dropm to reuse the m in the first call.
813 MOVL savedm-4(SP), DX
814 CMPL DX, $0
815 JNE droppedm
816
817 // Skip dropm to reuse it in the next call, when a pthread key has been created.
818 MOVL _cgo_pthread_key_created(SB), DX
819 // It means cgo is disabled when _cgo_pthread_key_created is a nil pointer, need dropm.
820 CMPL DX, $0
821 JEQ dropm
822 CMPL (DX), $0
823 JNE droppedm
824
825 dropm:
826 MOVL $runtime·dropm(SB), AX
827 CALL AX
828 droppedm:
829
830 // Done!
831 RET
832
833 // void setg(G*); set g. for use by needm.
834 TEXT runtime·setg(SB), NOSPLIT, $0-4
835 MOVL gg+0(FP), BX
836 #ifdef GOOS_windows
837 MOVL runtime·tls_g(SB), CX
838 CMPL BX, $0
839 JNE settls
840 MOVL $0, 0(CX)(FS)
841 RET
842 settls:
843 MOVL g_m(BX), AX
844 LEAL m_tls(AX), AX
845 MOVL AX, 0(CX)(FS)
846 #endif
847 get_tls(CX)
848 MOVL BX, g(CX)
849 RET
850
851 // void setg_gcc(G*); set g. for use by gcc
852 TEXT setg_gcc<>(SB), NOSPLIT, $0
853 get_tls(AX)
854 MOVL gg+0(FP), DX
855 MOVL DX, g(AX)
856 RET
857
858 TEXT runtime·abort(SB),NOSPLIT,$0-0
859 INT $3
860 loop:
861 JMP loop
862
863 // check that SP is in range [g->stack.lo, g->stack.hi)
864 TEXT runtime·stackcheck(SB), NOSPLIT, $0-0
865 get_tls(CX)
866 MOVL g(CX), AX
867 CMPL (g_stack+stack_hi)(AX), SP
868 JHI 2(PC)
869 CALL runtime·abort(SB)
870 CMPL SP, (g_stack+stack_lo)(AX)
871 JHI 2(PC)
872 CALL runtime·abort(SB)
873 RET
874
875 // func cputicks() int64
876 TEXT runtime·cputicks(SB),NOSPLIT,$0-8
877 // LFENCE/MFENCE instruction support is dependent on SSE2.
878 // When no SSE2 support is present do not enforce any serialization
879 // since using CPUID to serialize the instruction stream is
880 // very costly.
881 #ifdef GO386_softfloat
882 JMP rdtsc // no fence instructions available
883 #endif
884 CMPB internal∕cpu·X86+const_offsetX86HasRDTSCP(SB), $1
885 JNE fences
886 // Instruction stream serializing RDTSCP is supported.
887 // RDTSCP is supported by Intel Nehalem (2008) and
888 // AMD K8 Rev. F (2006) and newer.
889 RDTSCP
890 done:
891 MOVL AX, ret_lo+0(FP)
892 MOVL DX, ret_hi+4(FP)
893 RET
894 fences:
895 // MFENCE is instruction stream serializing and flushes the
896 // store buffers on AMD. The serialization semantics of LFENCE on AMD
897 // are dependent on MSR C001_1029 and CPU generation.
898 // LFENCE on Intel does wait for all previous instructions to have executed.
899 // Intel recommends MFENCE;LFENCE in its manuals before RDTSC to have all
900 // previous instructions executed and all previous loads and stores to globally visible.
901 // Using MFENCE;LFENCE here aligns the serializing properties without
902 // runtime detection of CPU manufacturer.
903 MFENCE
904 LFENCE
905 rdtsc:
906 RDTSC
907 JMP done
908
909 TEXT ldt0setup<>(SB),NOSPLIT,$16-0
910 #ifdef GOOS_windows
911 CALL runtime·wintls(SB)
912 #endif
913 // set up ldt 7 to point at m0.tls
914 // ldt 1 would be fine on Linux, but on OS X, 7 is as low as we can go.
915 // the entry number is just a hint. setldt will set up GS with what it used.
916 MOVL $7, 0(SP)
917 LEAL runtime·m0+m_tls(SB), AX
918 MOVL AX, 4(SP)
919 MOVL $32, 8(SP) // sizeof(tls array)
920 CALL runtime·setldt(SB)
921 RET
922
923 TEXT runtime·emptyfunc(SB),0,$0-0
924 RET
925
926 // hash function using AES hardware instructions
927 TEXT runtime·memhash(SB),NOSPLIT,$0-16
928 CMPB runtime·useAeshash(SB), $0
929 JEQ noaes
930 MOVL p+0(FP), AX // ptr to data
931 MOVL s+8(FP), BX // size
932 LEAL ret+12(FP), DX
933 JMP runtime·aeshashbody<>(SB)
934 noaes:
935 JMP runtime·memhashFallback(SB)
936
937 TEXT runtime·strhash(SB),NOSPLIT,$0-12
938 CMPB runtime·useAeshash(SB), $0
939 JEQ noaes
940 MOVL p+0(FP), AX // ptr to string object
941 MOVL 4(AX), BX // length of string
942 MOVL (AX), AX // string data
943 LEAL ret+8(FP), DX
944 JMP runtime·aeshashbody<>(SB)
945 noaes:
946 JMP runtime·strhashFallback(SB)
947
948 // AX: data
949 // BX: length
950 // DX: address to put return value
951 TEXT runtime·aeshashbody<>(SB),NOSPLIT,$0-0
952 MOVL h+4(FP), X0 // 32 bits of per-table hash seed
953 PINSRW $4, BX, X0 // 16 bits of length
954 PSHUFHW $0, X0, X0 // replace size with its low 2 bytes repeated 4 times
955 MOVO X0, X1 // save unscrambled seed
956 PXOR runtime·aeskeysched(SB), X0 // xor in per-process seed
957 AESENC X0, X0 // scramble seed
958
959 CMPL BX, $16
960 JB aes0to15
961 JE aes16
962 CMPL BX, $32
963 JBE aes17to32
964 CMPL BX, $64
965 JBE aes33to64
966 JMP aes65plus
967
968 aes0to15:
969 TESTL BX, BX
970 JE aes0
971
972 ADDL $16, AX
973 TESTW $0xff0, AX
974 JE endofpage
975
976 // 16 bytes loaded at this address won't cross
977 // a page boundary, so we can load it directly.
978 MOVOU -16(AX), X1
979 ADDL BX, BX
980 PAND masks<>(SB)(BX*8), X1
981
982 final1:
983 PXOR X0, X1 // xor data with seed
984 AESENC X1, X1 // scramble combo 3 times
985 AESENC X1, X1
986 AESENC X1, X1
987 MOVL X1, (DX)
988 RET
989
990 endofpage:
991 // address ends in 1111xxxx. Might be up against
992 // a page boundary, so load ending at last byte.
993 // Then shift bytes down using pshufb.
994 MOVOU -32(AX)(BX*1), X1
995 ADDL BX, BX
996 PSHUFB shifts<>(SB)(BX*8), X1
997 JMP final1
998
999 aes0:
1000 // Return scrambled input seed
1001 AESENC X0, X0
1002 MOVL X0, (DX)
1003 RET
1004
1005 aes16:
1006 MOVOU (AX), X1
1007 JMP final1
1008
1009 aes17to32:
1010 // make second starting seed
1011 PXOR runtime·aeskeysched+16(SB), X1
1012 AESENC X1, X1
1013
1014 // load data to be hashed
1015 MOVOU (AX), X2
1016 MOVOU -16(AX)(BX*1), X3
1017
1018 // xor with seed
1019 PXOR X0, X2
1020 PXOR X1, X3
1021
1022 // scramble 3 times
1023 AESENC X2, X2
1024 AESENC X3, X3
1025 AESENC X2, X2
1026 AESENC X3, X3
1027 AESENC X2, X2
1028 AESENC X3, X3
1029
1030 // combine results
1031 PXOR X3, X2
1032 MOVL X2, (DX)
1033 RET
1034
1035 aes33to64:
1036 // make 3 more starting seeds
1037 MOVO X1, X2
1038 MOVO X1, X3
1039 PXOR runtime·aeskeysched+16(SB), X1
1040 PXOR runtime·aeskeysched+32(SB), X2
1041 PXOR runtime·aeskeysched+48(SB), X3
1042 AESENC X1, X1
1043 AESENC X2, X2
1044 AESENC X3, X3
1045
1046 MOVOU (AX), X4
1047 MOVOU 16(AX), X5
1048 MOVOU -32(AX)(BX*1), X6
1049 MOVOU -16(AX)(BX*1), X7
1050
1051 PXOR X0, X4
1052 PXOR X1, X5
1053 PXOR X2, X6
1054 PXOR X3, X7
1055
1056 AESENC X4, X4
1057 AESENC X5, X5
1058 AESENC X6, X6
1059 AESENC X7, X7
1060
1061 AESENC X4, X4
1062 AESENC X5, X5
1063 AESENC X6, X6
1064 AESENC X7, X7
1065
1066 AESENC X4, X4
1067 AESENC X5, X5
1068 AESENC X6, X6
1069 AESENC X7, X7
1070
1071 PXOR X6, X4
1072 PXOR X7, X5
1073 PXOR X5, X4
1074 MOVL X4, (DX)
1075 RET
1076
1077 aes65plus:
1078 // make 3 more starting seeds
1079 MOVO X1, X2
1080 MOVO X1, X3
1081 PXOR runtime·aeskeysched+16(SB), X1
1082 PXOR runtime·aeskeysched+32(SB), X2
1083 PXOR runtime·aeskeysched+48(SB), X3
1084 AESENC X1, X1
1085 AESENC X2, X2
1086 AESENC X3, X3
1087
1088 // start with last (possibly overlapping) block
1089 MOVOU -64(AX)(BX*1), X4
1090 MOVOU -48(AX)(BX*1), X5
1091 MOVOU -32(AX)(BX*1), X6
1092 MOVOU -16(AX)(BX*1), X7
1093
1094 // scramble state once
1095 AESENC X0, X4
1096 AESENC X1, X5
1097 AESENC X2, X6
1098 AESENC X3, X7
1099
1100 // compute number of remaining 64-byte blocks
1101 DECL BX
1102 SHRL $6, BX
1103
1104 aesloop:
1105 // scramble state, xor in a block
1106 MOVOU (AX), X0
1107 MOVOU 16(AX), X1
1108 MOVOU 32(AX), X2
1109 MOVOU 48(AX), X3
1110 AESENC X0, X4
1111 AESENC X1, X5
1112 AESENC X2, X6
1113 AESENC X3, X7
1114
1115 // scramble state
1116 AESENC X4, X4
1117 AESENC X5, X5
1118 AESENC X6, X6
1119 AESENC X7, X7
1120
1121 ADDL $64, AX
1122 DECL BX
1123 JNE aesloop
1124
1125 // 3 more scrambles to finish
1126 AESENC X4, X4
1127 AESENC X5, X5
1128 AESENC X6, X6
1129 AESENC X7, X7
1130
1131 AESENC X4, X4
1132 AESENC X5, X5
1133 AESENC X6, X6
1134 AESENC X7, X7
1135
1136 AESENC X4, X4
1137 AESENC X5, X5
1138 AESENC X6, X6
1139 AESENC X7, X7
1140
1141 PXOR X6, X4
1142 PXOR X7, X5
1143 PXOR X5, X4
1144 MOVL X4, (DX)
1145 RET
1146
1147 TEXT runtime·memhash32(SB),NOSPLIT,$0-12
1148 CMPB runtime·useAeshash(SB), $0
1149 JEQ noaes
1150 MOVL p+0(FP), AX // ptr to data
1151 MOVL h+4(FP), X0 // seed
1152 PINSRD $1, (AX), X0 // data
1153 AESENC runtime·aeskeysched+0(SB), X0
1154 AESENC runtime·aeskeysched+16(SB), X0
1155 AESENC runtime·aeskeysched+32(SB), X0
1156 MOVL X0, ret+8(FP)
1157 RET
1158 noaes:
1159 JMP runtime·memhash32Fallback(SB)
1160
1161 TEXT runtime·memhash64(SB),NOSPLIT,$0-12
1162 CMPB runtime·useAeshash(SB), $0
1163 JEQ noaes
1164 MOVL p+0(FP), AX // ptr to data
1165 MOVQ (AX), X0 // data
1166 PINSRD $2, h+4(FP), X0 // seed
1167 AESENC runtime·aeskeysched+0(SB), X0
1168 AESENC runtime·aeskeysched+16(SB), X0
1169 AESENC runtime·aeskeysched+32(SB), X0
1170 MOVL X0, ret+8(FP)
1171 RET
1172 noaes:
1173 JMP runtime·memhash64Fallback(SB)
1174
1175 // simple mask to get rid of data in the high part of the register.
1176 DATA masks<>+0x00(SB)/4, $0x00000000
1177 DATA masks<>+0x04(SB)/4, $0x00000000
1178 DATA masks<>+0x08(SB)/4, $0x00000000
1179 DATA masks<>+0x0c(SB)/4, $0x00000000
1180
1181 DATA masks<>+0x10(SB)/4, $0x000000ff
1182 DATA masks<>+0x14(SB)/4, $0x00000000
1183 DATA masks<>+0x18(SB)/4, $0x00000000
1184 DATA masks<>+0x1c(SB)/4, $0x00000000
1185
1186 DATA masks<>+0x20(SB)/4, $0x0000ffff
1187 DATA masks<>+0x24(SB)/4, $0x00000000
1188 DATA masks<>+0x28(SB)/4, $0x00000000
1189 DATA masks<>+0x2c(SB)/4, $0x00000000
1190
1191 DATA masks<>+0x30(SB)/4, $0x00ffffff
1192 DATA masks<>+0x34(SB)/4, $0x00000000
1193 DATA masks<>+0x38(SB)/4, $0x00000000
1194 DATA masks<>+0x3c(SB)/4, $0x00000000
1195
1196 DATA masks<>+0x40(SB)/4, $0xffffffff
1197 DATA masks<>+0x44(SB)/4, $0x00000000
1198 DATA masks<>+0x48(SB)/4, $0x00000000
1199 DATA masks<>+0x4c(SB)/4, $0x00000000
1200
1201 DATA masks<>+0x50(SB)/4, $0xffffffff
1202 DATA masks<>+0x54(SB)/4, $0x000000ff
1203 DATA masks<>+0x58(SB)/4, $0x00000000
1204 DATA masks<>+0x5c(SB)/4, $0x00000000
1205
1206 DATA masks<>+0x60(SB)/4, $0xffffffff
1207 DATA masks<>+0x64(SB)/4, $0x0000ffff
1208 DATA masks<>+0x68(SB)/4, $0x00000000
1209 DATA masks<>+0x6c(SB)/4, $0x00000000
1210
1211 DATA masks<>+0x70(SB)/4, $0xffffffff
1212 DATA masks<>+0x74(SB)/4, $0x00ffffff
1213 DATA masks<>+0x78(SB)/4, $0x00000000
1214 DATA masks<>+0x7c(SB)/4, $0x00000000
1215
1216 DATA masks<>+0x80(SB)/4, $0xffffffff
1217 DATA masks<>+0x84(SB)/4, $0xffffffff
1218 DATA masks<>+0x88(SB)/4, $0x00000000
1219 DATA masks<>+0x8c(SB)/4, $0x00000000
1220
1221 DATA masks<>+0x90(SB)/4, $0xffffffff
1222 DATA masks<>+0x94(SB)/4, $0xffffffff
1223 DATA masks<>+0x98(SB)/4, $0x000000ff
1224 DATA masks<>+0x9c(SB)/4, $0x00000000
1225
1226 DATA masks<>+0xa0(SB)/4, $0xffffffff
1227 DATA masks<>+0xa4(SB)/4, $0xffffffff
1228 DATA masks<>+0xa8(SB)/4, $0x0000ffff
1229 DATA masks<>+0xac(SB)/4, $0x00000000
1230
1231 DATA masks<>+0xb0(SB)/4, $0xffffffff
1232 DATA masks<>+0xb4(SB)/4, $0xffffffff
1233 DATA masks<>+0xb8(SB)/4, $0x00ffffff
1234 DATA masks<>+0xbc(SB)/4, $0x00000000
1235
1236 DATA masks<>+0xc0(SB)/4, $0xffffffff
1237 DATA masks<>+0xc4(SB)/4, $0xffffffff
1238 DATA masks<>+0xc8(SB)/4, $0xffffffff
1239 DATA masks<>+0xcc(SB)/4, $0x00000000
1240
1241 DATA masks<>+0xd0(SB)/4, $0xffffffff
1242 DATA masks<>+0xd4(SB)/4, $0xffffffff
1243 DATA masks<>+0xd8(SB)/4, $0xffffffff
1244 DATA masks<>+0xdc(SB)/4, $0x000000ff
1245
1246 DATA masks<>+0xe0(SB)/4, $0xffffffff
1247 DATA masks<>+0xe4(SB)/4, $0xffffffff
1248 DATA masks<>+0xe8(SB)/4, $0xffffffff
1249 DATA masks<>+0xec(SB)/4, $0x0000ffff
1250
1251 DATA masks<>+0xf0(SB)/4, $0xffffffff
1252 DATA masks<>+0xf4(SB)/4, $0xffffffff
1253 DATA masks<>+0xf8(SB)/4, $0xffffffff
1254 DATA masks<>+0xfc(SB)/4, $0x00ffffff
1255
1256 GLOBL masks<>(SB),RODATA,$256
1257
1258 // these are arguments to pshufb. They move data down from
1259 // the high bytes of the register to the low bytes of the register.
1260 // index is how many bytes to move.
1261 DATA shifts<>+0x00(SB)/4, $0x00000000
1262 DATA shifts<>+0x04(SB)/4, $0x00000000
1263 DATA shifts<>+0x08(SB)/4, $0x00000000
1264 DATA shifts<>+0x0c(SB)/4, $0x00000000
1265
1266 DATA shifts<>+0x10(SB)/4, $0xffffff0f
1267 DATA shifts<>+0x14(SB)/4, $0xffffffff
1268 DATA shifts<>+0x18(SB)/4, $0xffffffff
1269 DATA shifts<>+0x1c(SB)/4, $0xffffffff
1270
1271 DATA shifts<>+0x20(SB)/4, $0xffff0f0e
1272 DATA shifts<>+0x24(SB)/4, $0xffffffff
1273 DATA shifts<>+0x28(SB)/4, $0xffffffff
1274 DATA shifts<>+0x2c(SB)/4, $0xffffffff
1275
1276 DATA shifts<>+0x30(SB)/4, $0xff0f0e0d
1277 DATA shifts<>+0x34(SB)/4, $0xffffffff
1278 DATA shifts<>+0x38(SB)/4, $0xffffffff
1279 DATA shifts<>+0x3c(SB)/4, $0xffffffff
1280
1281 DATA shifts<>+0x40(SB)/4, $0x0f0e0d0c
1282 DATA shifts<>+0x44(SB)/4, $0xffffffff
1283 DATA shifts<>+0x48(SB)/4, $0xffffffff
1284 DATA shifts<>+0x4c(SB)/4, $0xffffffff
1285
1286 DATA shifts<>+0x50(SB)/4, $0x0e0d0c0b
1287 DATA shifts<>+0x54(SB)/4, $0xffffff0f
1288 DATA shifts<>+0x58(SB)/4, $0xffffffff
1289 DATA shifts<>+0x5c(SB)/4, $0xffffffff
1290
1291 DATA shifts<>+0x60(SB)/4, $0x0d0c0b0a
1292 DATA shifts<>+0x64(SB)/4, $0xffff0f0e
1293 DATA shifts<>+0x68(SB)/4, $0xffffffff
1294 DATA shifts<>+0x6c(SB)/4, $0xffffffff
1295
1296 DATA shifts<>+0x70(SB)/4, $0x0c0b0a09
1297 DATA shifts<>+0x74(SB)/4, $0xff0f0e0d
1298 DATA shifts<>+0x78(SB)/4, $0xffffffff
1299 DATA shifts<>+0x7c(SB)/4, $0xffffffff
1300
1301 DATA shifts<>+0x80(SB)/4, $0x0b0a0908
1302 DATA shifts<>+0x84(SB)/4, $0x0f0e0d0c
1303 DATA shifts<>+0x88(SB)/4, $0xffffffff
1304 DATA shifts<>+0x8c(SB)/4, $0xffffffff
1305
1306 DATA shifts<>+0x90(SB)/4, $0x0a090807
1307 DATA shifts<>+0x94(SB)/4, $0x0e0d0c0b
1308 DATA shifts<>+0x98(SB)/4, $0xffffff0f
1309 DATA shifts<>+0x9c(SB)/4, $0xffffffff
1310
1311 DATA shifts<>+0xa0(SB)/4, $0x09080706
1312 DATA shifts<>+0xa4(SB)/4, $0x0d0c0b0a
1313 DATA shifts<>+0xa8(SB)/4, $0xffff0f0e
1314 DATA shifts<>+0xac(SB)/4, $0xffffffff
1315
1316 DATA shifts<>+0xb0(SB)/4, $0x08070605
1317 DATA shifts<>+0xb4(SB)/4, $0x0c0b0a09
1318 DATA shifts<>+0xb8(SB)/4, $0xff0f0e0d
1319 DATA shifts<>+0xbc(SB)/4, $0xffffffff
1320
1321 DATA shifts<>+0xc0(SB)/4, $0x07060504
1322 DATA shifts<>+0xc4(SB)/4, $0x0b0a0908
1323 DATA shifts<>+0xc8(SB)/4, $0x0f0e0d0c
1324 DATA shifts<>+0xcc(SB)/4, $0xffffffff
1325
1326 DATA shifts<>+0xd0(SB)/4, $0x06050403
1327 DATA shifts<>+0xd4(SB)/4, $0x0a090807
1328 DATA shifts<>+0xd8(SB)/4, $0x0e0d0c0b
1329 DATA shifts<>+0xdc(SB)/4, $0xffffff0f
1330
1331 DATA shifts<>+0xe0(SB)/4, $0x05040302
1332 DATA shifts<>+0xe4(SB)/4, $0x09080706
1333 DATA shifts<>+0xe8(SB)/4, $0x0d0c0b0a
1334 DATA shifts<>+0xec(SB)/4, $0xffff0f0e
1335
1336 DATA shifts<>+0xf0(SB)/4, $0x04030201
1337 DATA shifts<>+0xf4(SB)/4, $0x08070605
1338 DATA shifts<>+0xf8(SB)/4, $0x0c0b0a09
1339 DATA shifts<>+0xfc(SB)/4, $0xff0f0e0d
1340
1341 GLOBL shifts<>(SB),RODATA,$256
1342
1343 TEXT ·checkASM(SB),NOSPLIT,$0-1
1344 // check that masks<>(SB) and shifts<>(SB) are aligned to 16-byte
1345 MOVL $masks<>(SB), AX
1346 MOVL $shifts<>(SB), BX
1347 ORL BX, AX
1348 TESTL $15, AX
1349 SETEQ ret+0(FP)
1350 RET
1351
1352 // Called from cgo wrappers, this function returns g->m->curg.stack.hi.
1353 // Must obey the gcc calling convention.
1354 TEXT _cgo_topofstack(SB),NOSPLIT,$0
1355 get_tls(CX)
1356 MOVL g(CX), AX
1357 MOVL g_m(AX), AX
1358 MOVL m_curg(AX), AX
1359 MOVL (g_stack+stack_hi)(AX), AX
1360 RET
1361
1362 // The top-most function running on a goroutine
1363 // returns to goexit+PCQuantum.
1364 TEXT runtime·goexit(SB),NOSPLIT|TOPFRAME,$0-0
1365 BYTE $0x90 // NOP
1366 CALL runtime·goexit1(SB) // does not return
1367 // traceback from goexit1 must hit code range of goexit
1368 BYTE $0x90 // NOP
1369
1370 // Add a module's moduledata to the linked list of moduledata objects. This
1371 // is called from .init_array by a function generated in the linker and so
1372 // follows the platform ABI wrt register preservation -- it only touches AX,
1373 // CX (implicitly) and DX, but it does not follow the ABI wrt arguments:
1374 // instead the pointer to the moduledata is passed in AX.
1375 TEXT runtime·addmoduledata(SB),NOSPLIT,$0-0
1376 MOVL runtime·lastmoduledatap(SB), DX
1377 MOVL AX, moduledata_next(DX)
1378 MOVL AX, runtime·lastmoduledatap(SB)
1379 RET
1380
1381 TEXT runtime·uint32tofloat64(SB),NOSPLIT,$8-12
1382 MOVL a+0(FP), AX
1383 MOVL AX, 0(SP)
1384 MOVL $0, 4(SP)
1385 FMOVV 0(SP), F0
1386 FMOVDP F0, ret+4(FP)
1387 RET
1388
1389 TEXT runtime·float64touint32(SB),NOSPLIT,$12-12
1390 FMOVD a+0(FP), F0
1391 FSTCW 0(SP)
1392 FLDCW runtime·controlWord64trunc(SB)
1393 FMOVVP F0, 4(SP)
1394 FLDCW 0(SP)
1395 MOVL 4(SP), AX
1396 MOVL AX, ret+8(FP)
1397 RET
1398
1399 // gcWriteBarrier informs the GC about heap pointer writes.
1400 //
1401 // gcWriteBarrier returns space in a write barrier buffer which
1402 // should be filled in by the caller.
1403 // gcWriteBarrier does NOT follow the Go ABI. It accepts the
1404 // number of bytes of buffer needed in DI, and returns a pointer
1405 // to the buffer space in DI.
1406 // It clobbers FLAGS. It does not clobber any general-purpose registers,
1407 // but may clobber others (e.g., SSE registers).
1408 // Typical use would be, when doing *(CX+88) = AX
1409 // CMPL $0, runtime.writeBarrier(SB)
1410 // JEQ dowrite
1411 // CALL runtime.gcBatchBarrier2(SB)
1412 // MOVL AX, (DI)
1413 // MOVL 88(CX), DX
1414 // MOVL DX, 4(DI)
1415 // dowrite:
1416 // MOVL AX, 88(CX)
1417 TEXT gcWriteBarrier<>(SB),NOSPLIT,$28
1418 // Save the registers clobbered by the fast path. This is slightly
1419 // faster than having the caller spill these.
1420 MOVL CX, 20(SP)
1421 MOVL BX, 24(SP)
1422 retry:
1423 // TODO: Consider passing g.m.p in as an argument so they can be shared
1424 // across a sequence of write barriers.
1425 get_tls(BX)
1426 MOVL g(BX), BX
1427 MOVL g_m(BX), BX
1428 MOVL m_p(BX), BX
1429 // Get current buffer write position.
1430 MOVL (p_wbBuf+wbBuf_next)(BX), CX // original next position
1431 ADDL DI, CX // new next position
1432 // Is the buffer full?
1433 CMPL CX, (p_wbBuf+wbBuf_end)(BX)
1434 JA flush
1435 // Commit to the larger buffer.
1436 MOVL CX, (p_wbBuf+wbBuf_next)(BX)
1437 // Make return value (the original next position)
1438 SUBL DI, CX
1439 MOVL CX, DI
1440 // Restore registers.
1441 MOVL 20(SP), CX
1442 MOVL 24(SP), BX
1443 RET
1444
1445 flush:
1446 // Save all general purpose registers since these could be
1447 // clobbered by wbBufFlush and were not saved by the caller.
1448 MOVL DI, 0(SP)
1449 MOVL AX, 4(SP)
1450 // BX already saved
1451 // CX already saved
1452 MOVL DX, 8(SP)
1453 MOVL BP, 12(SP)
1454 MOVL SI, 16(SP)
1455 // DI already saved
1456
1457 CALL runtime·wbBufFlush(SB)
1458
1459 MOVL 0(SP), DI
1460 MOVL 4(SP), AX
1461 MOVL 8(SP), DX
1462 MOVL 12(SP), BP
1463 MOVL 16(SP), SI
1464 JMP retry
1465
1466 TEXT runtime·gcWriteBarrier1<ABIInternal>(SB),NOSPLIT,$0
1467 MOVL $4, DI
1468 JMP gcWriteBarrier<>(SB)
1469 TEXT runtime·gcWriteBarrier2<ABIInternal>(SB),NOSPLIT,$0
1470 MOVL $8, DI
1471 JMP gcWriteBarrier<>(SB)
1472 TEXT runtime·gcWriteBarrier3<ABIInternal>(SB),NOSPLIT,$0
1473 MOVL $12, DI
1474 JMP gcWriteBarrier<>(SB)
1475 TEXT runtime·gcWriteBarrier4<ABIInternal>(SB),NOSPLIT,$0
1476 MOVL $16, DI
1477 JMP gcWriteBarrier<>(SB)
1478 TEXT runtime·gcWriteBarrier5<ABIInternal>(SB),NOSPLIT,$0
1479 MOVL $20, DI
1480 JMP gcWriteBarrier<>(SB)
1481 TEXT runtime·gcWriteBarrier6<ABIInternal>(SB),NOSPLIT,$0
1482 MOVL $24, DI
1483 JMP gcWriteBarrier<>(SB)
1484 TEXT runtime·gcWriteBarrier7<ABIInternal>(SB),NOSPLIT,$0
1485 MOVL $28, DI
1486 JMP gcWriteBarrier<>(SB)
1487 TEXT runtime·gcWriteBarrier8<ABIInternal>(SB),NOSPLIT,$0
1488 MOVL $32, DI
1489 JMP gcWriteBarrier<>(SB)
1490
1491 TEXT runtime·panicBounds<ABIInternal>(SB),NOSPLIT,$40-0
1492 NO_LOCAL_POINTERS
1493 // Save all int registers that could have an index in them.
1494 // They may be pointers, but if they are they are dead.
1495 MOVL AX, 8(SP)
1496 MOVL CX, 12(SP)
1497 MOVL DX, 16(SP)
1498 MOVL BX, 20(SP)
1499 // skip SP @ 24(SP)
1500 MOVL BP, 28(SP)
1501 MOVL SI, 32(SP)
1502 MOVL DI, 36(SP)
1503
1504 MOVL SP, AX // hide SP read from vet
1505 MOVL 40(AX), AX // PC immediately after call to panicBounds
1506 MOVL AX, 0(SP)
1507 LEAL 8(SP), AX
1508 MOVL AX, 4(SP)
1509 CALL runtime·panicBounds32<ABIInternal>(SB)
1510 RET
1511
1512 TEXT runtime·panicExtend<ABIInternal>(SB),NOSPLIT,$40-0
1513 NO_LOCAL_POINTERS
1514 // Save all int registers that could have an index in them.
1515 // They may be pointers, but if they are they are dead.
1516 MOVL AX, 8(SP)
1517 MOVL CX, 12(SP)
1518 MOVL DX, 16(SP)
1519 MOVL BX, 20(SP)
1520 // skip SP @ 24(SP)
1521 MOVL BP, 28(SP)
1522 MOVL SI, 32(SP)
1523 MOVL DI, 36(SP)
1524
1525 MOVL SP, AX // hide SP read from vet
1526 MOVL 40(AX), AX // PC immediately after call to panicExtend
1527 MOVL AX, 0(SP)
1528 LEAL 8(SP), AX
1529 MOVL AX, 4(SP)
1530 CALL runtime·panicBounds32X<ABIInternal>(SB)
1531 RET
1532
1533 #ifdef GOOS_android
1534 // Use the free TLS_SLOT_APP slot #2 on Android Q.
1535 // Earlier androids are set up in gcc_android.c.
1536 DATA runtime·tls_g+0(SB)/4, $8
1537 GLOBL runtime·tls_g+0(SB), NOPTR, $4
1538 #endif
1539 #ifdef GOOS_windows
1540 GLOBL runtime·tls_g+0(SB), NOPTR, $4
1541 #endif
1542
View as plain text