Text file src/runtime/race_riscv64.s

     1  // Copyright 2025 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  //go:build race
     6  
     7  #include "go_asm.h"
     8  #include "funcdata.h"
     9  #include "textflag.h"
    10  #include "cgo/abi_riscv64.h"
    11  
    12  // The following thunks allow calling the gcc-compiled race runtime directly
    13  // from Go code without going all the way through cgo.
    14  // First, it's much faster (up to 50% speedup for real Go programs).
    15  // Second, it eliminates race-related special cases from cgocall and scheduler.
    16  // Third, in long-term it will allow to remove cyclic runtime/race dependency on cmd/go.
    17  
    18  // A brief recap of the riscv C calling convention.
    19  // Arguments are passed in X10...X17
    20  // Callee-saved registers are: X8, X9, X18..X27
    21  // Temporary registers are: X5..X7, X28..X31
    22  
    23  // When calling racecalladdr, X11 is the call target address.
    24  
    25  // The race ctx, ThreadState *thr below, is passed in X10 and loaded in racecalladdr.
    26  
    27  // func runtime·raceread(addr uintptr)
    28  // Called from instrumented code.
    29  TEXT	runtime·raceread<ABIInternal>(SB), NOSPLIT, $0-8
    30  	// void __tsan_read(ThreadState *thr, void *addr, void *pc);
    31  	MOV	$__tsan_read(SB), X23
    32  	MOV	X10, X11
    33  	MOV	X1, X12
    34  	JMP	racecalladdr<>(SB)
    35  
    36  // func runtime·RaceRead(addr uintptr)
    37  TEXT	runtime·RaceRead(SB), NOSPLIT, $0-8
    38  	// This needs to be a tail call, because raceread reads caller pc.
    39  	JMP	runtime·raceread(SB)
    40  
    41  // func runtime·racereadpc(void *addr, void *callpc, void *pc)
    42  TEXT	runtime·racereadpc(SB), NOSPLIT, $0-24
    43  	// void __tsan_read_pc(ThreadState *thr, void *addr, void *callpc, void *pc);
    44  	MOV	$__tsan_read_pc(SB), X23
    45  	MOV	addr+0(FP), X11
    46  	MOV	callpc+8(FP), X12
    47  	MOV	pc+16(FP), X13
    48  	JMP	racecalladdr<>(SB)
    49  
    50  // func runtime·racewrite(addr uintptr)
    51  // Called from instrumented code.
    52  TEXT	runtime·racewrite<ABIInternal>(SB), NOSPLIT, $0-8
    53  	// void __tsan_write(ThreadState *thr, void *addr, void *pc);
    54  	MOV	$__tsan_write(SB), X23
    55  	MOV	X10, X11
    56  	MOV	X1, X12
    57  	JMP	racecalladdr<>(SB)
    58  
    59  // func runtime·RaceWrite(addr uintptr)
    60  TEXT	runtime·RaceWrite(SB), NOSPLIT, $0-8
    61  	// This needs to be a tail call, because racewrite reads caller pc.
    62  	JMP	runtime·racewrite(SB)
    63  
    64  // func runtime·racewritepc(void *addr, void *callpc, void *pc)
    65  TEXT	runtime·racewritepc(SB), NOSPLIT, $0-24
    66  	// void __tsan_write_pc(ThreadState *thr, void *addr, void *callpc, void *pc);
    67  	MOV	$__tsan_write_pc(SB), X23
    68  	MOV	addr+0(FP), X11
    69  	MOV	callpc+8(FP), X12
    70  	MOV	pc+16(FP), X13
    71  	JMP	racecalladdr<>(SB)
    72  
    73  // func runtime·racereadrange(addr, size uintptr)
    74  // Called from instrumented code.
    75  TEXT	runtime·racereadrange<ABIInternal>(SB), NOSPLIT, $0-16
    76  	// void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc);
    77  	MOV	$__tsan_read_range(SB), X23
    78  	MOV	X11, X12
    79  	MOV	X10, X11
    80  	MOV	X1, X13
    81  	JMP	racecalladdr<>(SB)
    82  
    83  // func runtime·RaceReadRange(addr, size uintptr)
    84  TEXT	runtime·RaceReadRange(SB), NOSPLIT, $0-16
    85  	// This needs to be a tail call, because racereadrange reads caller pc.
    86  	JMP	runtime·racereadrange(SB)
    87  
    88  // func runtime·racereadrangepc1(void *addr, uintptr sz, void *pc)
    89  TEXT	runtime·racereadrangepc1(SB), NOSPLIT, $0-24
    90  	// void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc);
    91  	MOV	$__tsan_read_range(SB), X23
    92  	MOV	addr+0(FP), X11
    93  	MOV	size+8(FP), X12
    94  	MOV	pc+16(FP), X13
    95  
    96  	// pc is an interceptor address, but TSan expects it to point to the
    97  	// middle of an interceptor (see LLVM's SCOPED_INTERCEPTOR_RAW).
    98  	ADD	$4, X13
    99  	JMP	racecalladdr<>(SB)
   100  
   101  // func runtime·racewriterange(addr, size uintptr)
   102  // Called from instrumented code.
   103  TEXT	runtime·racewriterange<ABIInternal>(SB), NOSPLIT, $0-16
   104  	// void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc);
   105  	MOV	$__tsan_write_range(SB), X23
   106  	MOV	X11, X12
   107  	MOV	X10, X11
   108  	MOV	X1, X13
   109  	JMP	racecalladdr<>(SB)
   110  
   111  // func runtime·RaceWriteRange(addr, size uintptr)
   112  TEXT	runtime·RaceWriteRange(SB), NOSPLIT, $0-16
   113  	// This needs to be a tail call, because racewriterange reads caller pc.
   114  	JMP	runtime·racewriterange(SB)
   115  
   116  // func runtime·racewriterangepc1(void *addr, uintptr sz, void *pc)
   117  TEXT	runtime·racewriterangepc1(SB), NOSPLIT, $0-24
   118  	// void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc);
   119  	MOV	$__tsan_write_range(SB), X23
   120  	MOV	addr+0(FP), X11
   121  	MOV	size+8(FP), X12
   122  	MOV	pc+16(FP), X13
   123  	// pc is an interceptor address, but TSan expects it to point to the
   124  	// middle of an interceptor (see LLVM's SCOPED_INTERCEPTOR_RAW).
   125  	ADD	$4, X13
   126  	JMP	racecalladdr<>(SB)
   127  
   128  // If addr (X11) is out of range, do nothing. Otherwise, setup goroutine context and
   129  // invoke racecall. Other arguments are already set.
   130  TEXT	racecalladdr<>(SB), NOSPLIT, $0-0
   131  	MOV	runtime·racearenastart(SB), X7
   132  	BLT	X11, X7, data			// Before racearena start?
   133  	MOV	runtime·racearenaend(SB), X7
   134  	BLT	X11, X7, call			// Before racearena end?
   135  data:
   136  	MOV	runtime·racedatastart(SB), X7
   137  	BLT	X11, X7, ret			// Before racedata start?
   138  	MOV	runtime·racedataend(SB), X7
   139  	BGE	X11, X7, ret			// At or after racedata end?
   140  call:
   141  	MOV	g_racectx(g), X10
   142  	JMP	racecall<>(SB)
   143  ret:
   144  	RET
   145  
   146  // func runtime·racefuncenter(pc uintptr)
   147  // Called from instrumented code.
   148  TEXT	runtime·racefuncenter<ABIInternal>(SB), NOSPLIT, $0-8
   149  	MOV	$__tsan_func_enter(SB), X23
   150  	MOV	X10, X11
   151  	MOV	g_racectx(g), X10
   152  	JMP	racecall<>(SB)
   153  
   154  // Common code for racefuncenter
   155  // X1 = caller's return address
   156  TEXT	racefuncenter<>(SB), NOSPLIT, $0-0
   157  	// void __tsan_func_enter(ThreadState *thr, void *pc);
   158  	MOV	$__tsan_func_enter(SB), X23
   159  	MOV	g_racectx(g), X10
   160  	MOV	X1, X11
   161  	JMP	racecall<>(SB)
   162  
   163  // func runtime·racefuncexit()
   164  // Called from instrumented code.
   165  TEXT	runtime·racefuncexit<ABIInternal>(SB), NOSPLIT, $0-0
   166  	// void __tsan_func_exit(ThreadState *thr);
   167  	MOV	$__tsan_func_exit(SB), X23
   168  	MOV	g_racectx(g), X10
   169  	JMP	racecall<>(SB)
   170  
   171  // Atomic operations for sync/atomic package.
   172  
   173  // Load
   174  
   175  TEXT	sync∕atomic·LoadInt32(SB), NOSPLIT, $0-12
   176  	GO_ARGS
   177  	MOV	$__tsan_go_atomic32_load(SB), X23
   178  	CALL	racecallatomic<>(SB)
   179  	RET
   180  
   181  TEXT	sync∕atomic·LoadInt64(SB), NOSPLIT, $0-16
   182  	GO_ARGS
   183  	MOV	$__tsan_go_atomic64_load(SB), X23
   184  	CALL	racecallatomic<>(SB)
   185  	RET
   186  
   187  TEXT	sync∕atomic·LoadUint32(SB), NOSPLIT, $0-12
   188  	GO_ARGS
   189  	JMP	sync∕atomic·LoadInt32(SB)
   190  
   191  TEXT	sync∕atomic·LoadUint64(SB), NOSPLIT, $0-16
   192  	GO_ARGS
   193  	JMP	sync∕atomic·LoadInt64(SB)
   194  
   195  TEXT	sync∕atomic·LoadUintptr(SB), NOSPLIT, $0-16
   196  	GO_ARGS
   197  	JMP	sync∕atomic·LoadInt64(SB)
   198  
   199  TEXT	sync∕atomic·LoadPointer(SB), NOSPLIT, $0-16
   200  	GO_ARGS
   201  	JMP	sync∕atomic·LoadInt64(SB)
   202  
   203  // Store
   204  
   205  TEXT	sync∕atomic·StoreInt32(SB), NOSPLIT, $0-12
   206  	GO_ARGS
   207  	MOV	$__tsan_go_atomic32_store(SB), X23
   208  	CALL	racecallatomic<>(SB)
   209  	RET
   210  
   211  TEXT	sync∕atomic·StoreInt64(SB), NOSPLIT, $0-16
   212  	GO_ARGS
   213  	MOV	$__tsan_go_atomic64_store(SB), X23
   214  	CALL	racecallatomic<>(SB)
   215  	RET
   216  
   217  TEXT	sync∕atomic·StoreUint32(SB), NOSPLIT, $0-12
   218  	GO_ARGS
   219  	JMP	sync∕atomic·StoreInt32(SB)
   220  
   221  TEXT	sync∕atomic·StoreUint64(SB), NOSPLIT, $0-16
   222  	GO_ARGS
   223  	JMP	sync∕atomic·StoreInt64(SB)
   224  
   225  TEXT	sync∕atomic·StoreUintptr(SB), NOSPLIT, $0-16
   226  	GO_ARGS
   227  	JMP	sync∕atomic·StoreInt64(SB)
   228  
   229  // Swap
   230  
   231  TEXT	sync∕atomic·SwapInt32(SB), NOSPLIT, $0-20
   232  	GO_ARGS
   233  	MOV	$__tsan_go_atomic32_exchange(SB), X23
   234  	CALL	racecallatomic<>(SB)
   235  	RET
   236  
   237  TEXT	sync∕atomic·SwapInt64(SB), NOSPLIT, $0-24
   238  	GO_ARGS
   239  	MOV	$__tsan_go_atomic64_exchange(SB), X23
   240  	CALL	racecallatomic<>(SB)
   241  	RET
   242  
   243  TEXT	sync∕atomic·SwapUint32(SB), NOSPLIT, $0-20
   244  	GO_ARGS
   245  	JMP	sync∕atomic·SwapInt32(SB)
   246  
   247  TEXT	sync∕atomic·SwapUint64(SB), NOSPLIT, $0-24
   248  	GO_ARGS
   249  	JMP	sync∕atomic·SwapInt64(SB)
   250  
   251  TEXT	sync∕atomic·SwapUintptr(SB), NOSPLIT, $0-24
   252  	GO_ARGS
   253  	JMP	sync∕atomic·SwapInt64(SB)
   254  
   255  // Add
   256  
   257  TEXT	sync∕atomic·AddInt32(SB), NOSPLIT, $0-20
   258  	GO_ARGS
   259  	MOV	$__tsan_go_atomic32_fetch_add(SB), X23
   260  	CALL	racecallatomic<>(SB)
   261  	// TSan performed fetch_add, but Go needs add_fetch.
   262  	MOVW	add+8(FP), X5
   263  	MOVW	ret+16(FP), X6
   264  	ADD	X5, X6, X5
   265  	MOVW	X5, ret+16(FP)
   266  	RET
   267  
   268  TEXT	sync∕atomic·AddInt64(SB), NOSPLIT, $0-24
   269  	GO_ARGS
   270  	MOV	$__tsan_go_atomic64_fetch_add(SB), X23
   271  	CALL	racecallatomic<>(SB)
   272  	// TSan performed fetch_add, but Go needs add_fetch.
   273  	MOV	add+8(FP), X5
   274  	MOV	ret+16(FP), X6
   275  	ADD	X5, X6, X5
   276  	MOV	X5, ret+16(FP)
   277  	RET
   278  
   279  TEXT	sync∕atomic·AddUint32(SB), NOSPLIT, $0-20
   280  	GO_ARGS
   281  	JMP	sync∕atomic·AddInt32(SB)
   282  
   283  TEXT	sync∕atomic·AddUint64(SB), NOSPLIT, $0-24
   284  	GO_ARGS
   285  	JMP	sync∕atomic·AddInt64(SB)
   286  
   287  TEXT	sync∕atomic·AddUintptr(SB), NOSPLIT, $0-24
   288  	GO_ARGS
   289  	JMP	sync∕atomic·AddInt64(SB)
   290  
   291  // And
   292  TEXT	sync∕atomic·AndInt32(SB), NOSPLIT, $0-20
   293  	GO_ARGS
   294  	MOV	$__tsan_go_atomic32_fetch_and(SB), X23
   295  	CALL	racecallatomic<>(SB)
   296  	RET
   297  
   298  TEXT	sync∕atomic·AndInt64(SB), NOSPLIT, $0-24
   299  	GO_ARGS
   300  	MOV	$__tsan_go_atomic64_fetch_and(SB), X23
   301  	CALL	racecallatomic<>(SB)
   302  	RET
   303  
   304  TEXT	sync∕atomic·AndUint32(SB), NOSPLIT, $0-20
   305  	GO_ARGS
   306  	JMP	sync∕atomic·AndInt32(SB)
   307  
   308  TEXT	sync∕atomic·AndUint64(SB), NOSPLIT, $0-24
   309  	GO_ARGS
   310  	JMP	sync∕atomic·AndInt64(SB)
   311  
   312  TEXT	sync∕atomic·AndUintptr(SB), NOSPLIT, $0-24
   313  	GO_ARGS
   314  	JMP	sync∕atomic·AndInt64(SB)
   315  
   316  // Or
   317  TEXT	sync∕atomic·OrInt32(SB), NOSPLIT, $0-20
   318  	GO_ARGS
   319  	MOV	$__tsan_go_atomic32_fetch_or(SB), X23
   320  	CALL	racecallatomic<>(SB)
   321  	RET
   322  
   323  TEXT	sync∕atomic·OrInt64(SB), NOSPLIT, $0-24
   324  	GO_ARGS
   325  	MOV	$__tsan_go_atomic64_fetch_or(SB), X23
   326  	CALL	racecallatomic<>(SB)
   327  	RET
   328  
   329  TEXT	sync∕atomic·OrUint32(SB), NOSPLIT, $0-20
   330  	GO_ARGS
   331  	JMP	sync∕atomic·OrInt32(SB)
   332  
   333  TEXT	sync∕atomic·OrUint64(SB), NOSPLIT, $0-24
   334  	GO_ARGS
   335  	JMP	sync∕atomic·OrInt64(SB)
   336  
   337  TEXT	sync∕atomic·OrUintptr(SB), NOSPLIT, $0-24
   338  	GO_ARGS
   339  	JMP	sync∕atomic·OrInt64(SB)
   340  
   341  // CompareAndSwap
   342  
   343  TEXT	sync∕atomic·CompareAndSwapInt32(SB), NOSPLIT, $0-17
   344  	GO_ARGS
   345  	MOV	$__tsan_go_atomic32_compare_exchange(SB), X23
   346  	CALL	racecallatomic<>(SB)
   347  	RET
   348  
   349  TEXT	sync∕atomic·CompareAndSwapInt64(SB), NOSPLIT, $0-25
   350  	GO_ARGS
   351  	MOV	$__tsan_go_atomic64_compare_exchange(SB), X23
   352  	CALL	racecallatomic<>(SB)
   353  	RET
   354  
   355  TEXT	sync∕atomic·CompareAndSwapUint32(SB), NOSPLIT, $0-17
   356  	GO_ARGS
   357  	JMP	sync∕atomic·CompareAndSwapInt32(SB)
   358  
   359  TEXT	sync∕atomic·CompareAndSwapUint64(SB), NOSPLIT, $0-25
   360  	GO_ARGS
   361  	JMP	sync∕atomic·CompareAndSwapInt64(SB)
   362  
   363  TEXT	sync∕atomic·CompareAndSwapUintptr(SB), NOSPLIT, $0-25
   364  	GO_ARGS
   365  	JMP	sync∕atomic·CompareAndSwapInt64(SB)
   366  
   367  // Generic atomic operation implementation.
   368  // X23 = addr of target function
   369  TEXT	racecallatomic<>(SB), NOSPLIT, $0
   370  	// Set up these registers
   371  	// X10 = *ThreadState
   372  	// X11 = caller pc
   373  	// X12 = pc
   374  	// X13 = addr of incoming arg list
   375  
   376  	// Trigger SIGSEGV early.
   377  	MOV	24(X2), X6	// 1st arg is addr. after two times CALL, get it at 24(X2)
   378  	MOVB	(X6), X0	// segv here if addr is bad
   379  	// Check that addr is within [arenastart, arenaend) or within [racedatastart, racedataend).
   380  	MOV	runtime·racearenastart(SB), X7
   381  	BLT	X6, X7, racecallatomic_data
   382  	MOV	runtime·racearenaend(SB), X7
   383  	BLT	X6, X7, racecallatomic_ok
   384  racecallatomic_data:
   385  	MOV	runtime·racedatastart(SB), X7
   386  	BLT	X6, X7, racecallatomic_ignore
   387  	MOV	runtime·racedataend(SB), X7
   388  	BGE	X6, X7, racecallatomic_ignore
   389  racecallatomic_ok:
   390  	// Addr is within the good range, call the atomic function.
   391  	MOV	g_racectx(g), X10	// goroutine context
   392  	MOV	8(X2), X11		// caller pc
   393  	MOV	X1, X12			// pc
   394  	ADD	$24, X2, X13
   395  	CALL	racecall<>(SB)
   396  	RET
   397  racecallatomic_ignore:
   398  	// Addr is outside the good range.
   399  	// Call __tsan_go_ignore_sync_begin to ignore synchronization during the atomic op.
   400  	// An attempt to synchronize on the address would cause crash.
   401  	MOV	X1, X20			// save PC
   402  	MOV	X23, X21			// save target function
   403  	MOV	$__tsan_go_ignore_sync_begin(SB), X23
   404  	MOV	g_racectx(g), X10	// goroutine context
   405  	CALL	racecall<>(SB)
   406  	MOV	X21, X23			// restore the target function
   407  	// Call the atomic function.
   408  	MOV	g_racectx(g), X10	// goroutine context
   409  	MOV	8(X2), X11		// caller pc
   410  	MOV	X20, X12		// pc
   411  	ADD	$24, X2, X13		// arguments
   412  	CALL	racecall<>(SB)
   413  	// Call __tsan_go_ignore_sync_end.
   414  	MOV	$__tsan_go_ignore_sync_end(SB), X23
   415  	MOV	g_racectx(g), X10	// goroutine context
   416  	CALL	racecall<>(SB)
   417  	RET
   418  
   419  // func runtime·racecall(void(*f)(...), ...)
   420  // Calls C function f from race runtime and passes up to 4 arguments to it.
   421  // The arguments are never heap-object-preserving pointers, so we pretend there
   422  // are no arguments.
   423  TEXT	runtime·racecall(SB), NOSPLIT, $0-0
   424  	MOV	fn+0(FP), X23
   425  	MOV	arg0+8(FP), X10
   426  	MOV	arg1+16(FP), X11
   427  	MOV	arg2+24(FP), X12
   428  	MOV	arg3+32(FP), X13
   429  	JMP	racecall<>(SB)
   430  
   431  // Switches SP to g0 stack and calls X23. Arguments are already set.
   432  TEXT	racecall<>(SB), NOSPLIT|NOFRAME, $0-0
   433  	MOV	X1, X18				// Save RA in callee save register
   434  	MOV	X2, X19				// Save SP in callee save register
   435  	CALL	runtime·save_g(SB)	// Save g for callbacks
   436  
   437  	MOV	g_m(g), X6
   438  
   439  	// Switch to g0 stack if we aren't already on g0 or gsignal.
   440  	MOV	m_gsignal(X6), X7
   441  	BEQ	X7, g, call
   442  	MOV	m_g0(X6), X7
   443  	BEQ	X7, g, call
   444  
   445  	MOV	(g_sched+gobuf_sp)(X7), X2	// Switch to g0 stack
   446  call:
   447  	JALR	RA, (X23)			// Call C function
   448  	MOV	X19, X2				// Restore SP
   449  	JMP	(X18)				// Return to Go.
   450  
   451  // C->Go callback thunk that allows to call runtime·racesymbolize from C code.
   452  // Direct Go->C race call has only switched SP, finish g->g0 switch by setting correct g.
   453  // The overall effect of Go->C->Go call chain is similar to that of mcall.
   454  // R0 contains command code. R1 contains command-specific context.
   455  // See racecallback for command codes.
   456  TEXT	runtime·racecallbackthunk(SB), NOSPLIT|NOFRAME, $0
   457  	// Handle command raceGetProcCmd (0) here.
   458  	// First, code below assumes that we are on curg, while raceGetProcCmd
   459  	// can be executed on g0. Second, it is called frequently, so will
   460  	// benefit from this fast path.
   461  	BNEZ	X10, rest
   462  	MOV	X1, X23
   463  	MOV	g, X6
   464  	CALL	runtime·load_g(SB)
   465  	MOV	g_m(g), X7
   466  	MOV	m_p(X7), X7
   467  	MOV	p_raceprocctx(X7), X7
   468  	MOV	X7, (X11)
   469  	MOV	X6, g
   470  	JMP	(X23)
   471  rest:
   472  	// Save callee-save registers (X8, X9, X18..X27, F8, F9, F18..F27),
   473  	// since Go code will not respect this.
   474  	// 8(X2) and 16(X2) are for args passed to racecallback
   475  	SUB	$(27*8), X2
   476  	MOV	X1, (0*8)(X2)
   477  	SAVE_GPR((3*8))
   478  	SAVE_FPR((15*8))
   479  
   480  	// Set g = g0.
   481  	CALL	runtime·load_g(SB)
   482  	MOV	g_m(g), X5
   483  	MOV	m_g0(X5), X6
   484  	BEQ	X6, g, noswitch	// branch if already on g0
   485  	MOV	X6, g
   486  
   487  	MOV	X10, 8(X2)	// func arg
   488  	MOV	X11, 16(X2)	// func arg
   489  	CALL	runtime·racecallback(SB)
   490  
   491  	// All registers are smashed after Go code, reload.
   492  	MOV	g_m(g), X5
   493  	MOV	m_curg(X5), g	// g = m->curg
   494  ret:
   495  	// Restore callee-save registers.
   496  	MOV	(0*8)(X2), X1
   497  	RESTORE_GPR((3*8))
   498  	RESTORE_FPR((15*8))
   499  
   500  	ADD	$(27*8), X2
   501  	JMP	(X1)
   502  
   503  noswitch:
   504  	// already on g0
   505  	MOV	X10, 8(X2)	// func arg
   506  	MOV	X11, 16(X2)	// func arg
   507  	CALL	runtime·racecallback(SB)
   508  	JMP	ret
   509  

View as plain text