Text file
src/runtime/race_riscv64.s
1 // Copyright 2025 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 //go:build race
6
7 #include "go_asm.h"
8 #include "funcdata.h"
9 #include "textflag.h"
10 #include "cgo/abi_riscv64.h"
11
12 // The following thunks allow calling the gcc-compiled race runtime directly
13 // from Go code without going all the way through cgo.
14 // First, it's much faster (up to 50% speedup for real Go programs).
15 // Second, it eliminates race-related special cases from cgocall and scheduler.
16 // Third, in long-term it will allow to remove cyclic runtime/race dependency on cmd/go.
17
18 // A brief recap of the riscv C calling convention.
19 // Arguments are passed in X10...X17
20 // Callee-saved registers are: X8, X9, X18..X27
21 // Temporary registers are: X5..X7, X28..X31
22
23 // When calling racecalladdr, X11 is the call target address.
24
25 // The race ctx, ThreadState *thr below, is passed in X10 and loaded in racecalladdr.
26
27 // func runtime·raceread(addr uintptr)
28 // Called from instrumented code.
29 TEXT runtime·raceread<ABIInternal>(SB), NOSPLIT, $0-8
30 // void __tsan_read(ThreadState *thr, void *addr, void *pc);
31 MOV $__tsan_read(SB), X23
32 MOV X10, X11
33 MOV X1, X12
34 JMP racecalladdr<>(SB)
35
36 // func runtime·RaceRead(addr uintptr)
37 TEXT runtime·RaceRead(SB), NOSPLIT, $0-8
38 // This needs to be a tail call, because raceread reads caller pc.
39 JMP runtime·raceread(SB)
40
41 // func runtime·racereadpc(void *addr, void *callpc, void *pc)
42 TEXT runtime·racereadpc(SB), NOSPLIT, $0-24
43 // void __tsan_read_pc(ThreadState *thr, void *addr, void *callpc, void *pc);
44 MOV $__tsan_read_pc(SB), X23
45 MOV addr+0(FP), X11
46 MOV callpc+8(FP), X12
47 MOV pc+16(FP), X13
48 JMP racecalladdr<>(SB)
49
50 // func runtime·racewrite(addr uintptr)
51 // Called from instrumented code.
52 TEXT runtime·racewrite<ABIInternal>(SB), NOSPLIT, $0-8
53 // void __tsan_write(ThreadState *thr, void *addr, void *pc);
54 MOV $__tsan_write(SB), X23
55 MOV X10, X11
56 MOV X1, X12
57 JMP racecalladdr<>(SB)
58
59 // func runtime·RaceWrite(addr uintptr)
60 TEXT runtime·RaceWrite(SB), NOSPLIT, $0-8
61 // This needs to be a tail call, because racewrite reads caller pc.
62 JMP runtime·racewrite(SB)
63
64 // func runtime·racewritepc(void *addr, void *callpc, void *pc)
65 TEXT runtime·racewritepc(SB), NOSPLIT, $0-24
66 // void __tsan_write_pc(ThreadState *thr, void *addr, void *callpc, void *pc);
67 MOV $__tsan_write_pc(SB), X23
68 MOV addr+0(FP), X11
69 MOV callpc+8(FP), X12
70 MOV pc+16(FP), X13
71 JMP racecalladdr<>(SB)
72
73 // func runtime·racereadrange(addr, size uintptr)
74 // Called from instrumented code.
75 TEXT runtime·racereadrange<ABIInternal>(SB), NOSPLIT, $0-16
76 // void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc);
77 MOV $__tsan_read_range(SB), X23
78 MOV X11, X12
79 MOV X10, X11
80 MOV X1, X13
81 JMP racecalladdr<>(SB)
82
83 // func runtime·RaceReadRange(addr, size uintptr)
84 TEXT runtime·RaceReadRange(SB), NOSPLIT, $0-16
85 // This needs to be a tail call, because racereadrange reads caller pc.
86 JMP runtime·racereadrange(SB)
87
88 // func runtime·racereadrangepc1(void *addr, uintptr sz, void *pc)
89 TEXT runtime·racereadrangepc1(SB), NOSPLIT, $0-24
90 // void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc);
91 MOV $__tsan_read_range(SB), X23
92 MOV addr+0(FP), X11
93 MOV size+8(FP), X12
94 MOV pc+16(FP), X13
95
96 // pc is an interceptor address, but TSan expects it to point to the
97 // middle of an interceptor (see LLVM's SCOPED_INTERCEPTOR_RAW).
98 ADD $4, X13
99 JMP racecalladdr<>(SB)
100
101 // func runtime·racewriterange(addr, size uintptr)
102 // Called from instrumented code.
103 TEXT runtime·racewriterange<ABIInternal>(SB), NOSPLIT, $0-16
104 // void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc);
105 MOV $__tsan_write_range(SB), X23
106 MOV X11, X12
107 MOV X10, X11
108 MOV X1, X13
109 JMP racecalladdr<>(SB)
110
111 // func runtime·RaceWriteRange(addr, size uintptr)
112 TEXT runtime·RaceWriteRange(SB), NOSPLIT, $0-16
113 // This needs to be a tail call, because racewriterange reads caller pc.
114 JMP runtime·racewriterange(SB)
115
116 // func runtime·racewriterangepc1(void *addr, uintptr sz, void *pc)
117 TEXT runtime·racewriterangepc1(SB), NOSPLIT, $0-24
118 // void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc);
119 MOV $__tsan_write_range(SB), X23
120 MOV addr+0(FP), X11
121 MOV size+8(FP), X12
122 MOV pc+16(FP), X13
123 // pc is an interceptor address, but TSan expects it to point to the
124 // middle of an interceptor (see LLVM's SCOPED_INTERCEPTOR_RAW).
125 ADD $4, X13
126 JMP racecalladdr<>(SB)
127
128 // If addr (X11) is out of range, do nothing. Otherwise, setup goroutine context and
129 // invoke racecall. Other arguments are already set.
130 TEXT racecalladdr<>(SB), NOSPLIT, $0-0
131 MOV runtime·racearenastart(SB), X7
132 BLT X11, X7, data // Before racearena start?
133 MOV runtime·racearenaend(SB), X7
134 BLT X11, X7, call // Before racearena end?
135 data:
136 MOV runtime·racedatastart(SB), X7
137 BLT X11, X7, ret // Before racedata start?
138 MOV runtime·racedataend(SB), X7
139 BGE X11, X7, ret // At or after racedata end?
140 call:
141 MOV g_racectx(g), X10
142 JMP racecall<>(SB)
143 ret:
144 RET
145
146 // func runtime·racefuncenter(pc uintptr)
147 // Called from instrumented code.
148 TEXT runtime·racefuncenter<ABIInternal>(SB), NOSPLIT, $0-8
149 MOV $__tsan_func_enter(SB), X23
150 MOV X10, X11
151 MOV g_racectx(g), X10
152 JMP racecall<>(SB)
153
154 // Common code for racefuncenter
155 // X1 = caller's return address
156 TEXT racefuncenter<>(SB), NOSPLIT, $0-0
157 // void __tsan_func_enter(ThreadState *thr, void *pc);
158 MOV $__tsan_func_enter(SB), X23
159 MOV g_racectx(g), X10
160 MOV X1, X11
161 JMP racecall<>(SB)
162
163 // func runtime·racefuncexit()
164 // Called from instrumented code.
165 TEXT runtime·racefuncexit<ABIInternal>(SB), NOSPLIT, $0-0
166 // void __tsan_func_exit(ThreadState *thr);
167 MOV $__tsan_func_exit(SB), X23
168 MOV g_racectx(g), X10
169 JMP racecall<>(SB)
170
171 // Atomic operations for sync/atomic package.
172
173 // Load
174
175 TEXT sync∕atomic·LoadInt32(SB), NOSPLIT, $0-12
176 GO_ARGS
177 MOV $__tsan_go_atomic32_load(SB), X23
178 CALL racecallatomic<>(SB)
179 RET
180
181 TEXT sync∕atomic·LoadInt64(SB), NOSPLIT, $0-16
182 GO_ARGS
183 MOV $__tsan_go_atomic64_load(SB), X23
184 CALL racecallatomic<>(SB)
185 RET
186
187 TEXT sync∕atomic·LoadUint32(SB), NOSPLIT, $0-12
188 GO_ARGS
189 JMP sync∕atomic·LoadInt32(SB)
190
191 TEXT sync∕atomic·LoadUint64(SB), NOSPLIT, $0-16
192 GO_ARGS
193 JMP sync∕atomic·LoadInt64(SB)
194
195 TEXT sync∕atomic·LoadUintptr(SB), NOSPLIT, $0-16
196 GO_ARGS
197 JMP sync∕atomic·LoadInt64(SB)
198
199 TEXT sync∕atomic·LoadPointer(SB), NOSPLIT, $0-16
200 GO_ARGS
201 JMP sync∕atomic·LoadInt64(SB)
202
203 // Store
204
205 TEXT sync∕atomic·StoreInt32(SB), NOSPLIT, $0-12
206 GO_ARGS
207 MOV $__tsan_go_atomic32_store(SB), X23
208 CALL racecallatomic<>(SB)
209 RET
210
211 TEXT sync∕atomic·StoreInt64(SB), NOSPLIT, $0-16
212 GO_ARGS
213 MOV $__tsan_go_atomic64_store(SB), X23
214 CALL racecallatomic<>(SB)
215 RET
216
217 TEXT sync∕atomic·StoreUint32(SB), NOSPLIT, $0-12
218 GO_ARGS
219 JMP sync∕atomic·StoreInt32(SB)
220
221 TEXT sync∕atomic·StoreUint64(SB), NOSPLIT, $0-16
222 GO_ARGS
223 JMP sync∕atomic·StoreInt64(SB)
224
225 TEXT sync∕atomic·StoreUintptr(SB), NOSPLIT, $0-16
226 GO_ARGS
227 JMP sync∕atomic·StoreInt64(SB)
228
229 // Swap
230
231 TEXT sync∕atomic·SwapInt32(SB), NOSPLIT, $0-20
232 GO_ARGS
233 MOV $__tsan_go_atomic32_exchange(SB), X23
234 CALL racecallatomic<>(SB)
235 RET
236
237 TEXT sync∕atomic·SwapInt64(SB), NOSPLIT, $0-24
238 GO_ARGS
239 MOV $__tsan_go_atomic64_exchange(SB), X23
240 CALL racecallatomic<>(SB)
241 RET
242
243 TEXT sync∕atomic·SwapUint32(SB), NOSPLIT, $0-20
244 GO_ARGS
245 JMP sync∕atomic·SwapInt32(SB)
246
247 TEXT sync∕atomic·SwapUint64(SB), NOSPLIT, $0-24
248 GO_ARGS
249 JMP sync∕atomic·SwapInt64(SB)
250
251 TEXT sync∕atomic·SwapUintptr(SB), NOSPLIT, $0-24
252 GO_ARGS
253 JMP sync∕atomic·SwapInt64(SB)
254
255 // Add
256
257 TEXT sync∕atomic·AddInt32(SB), NOSPLIT, $0-20
258 GO_ARGS
259 MOV $__tsan_go_atomic32_fetch_add(SB), X23
260 CALL racecallatomic<>(SB)
261 // TSan performed fetch_add, but Go needs add_fetch.
262 MOVW add+8(FP), X5
263 MOVW ret+16(FP), X6
264 ADD X5, X6, X5
265 MOVW X5, ret+16(FP)
266 RET
267
268 TEXT sync∕atomic·AddInt64(SB), NOSPLIT, $0-24
269 GO_ARGS
270 MOV $__tsan_go_atomic64_fetch_add(SB), X23
271 CALL racecallatomic<>(SB)
272 // TSan performed fetch_add, but Go needs add_fetch.
273 MOV add+8(FP), X5
274 MOV ret+16(FP), X6
275 ADD X5, X6, X5
276 MOV X5, ret+16(FP)
277 RET
278
279 TEXT sync∕atomic·AddUint32(SB), NOSPLIT, $0-20
280 GO_ARGS
281 JMP sync∕atomic·AddInt32(SB)
282
283 TEXT sync∕atomic·AddUint64(SB), NOSPLIT, $0-24
284 GO_ARGS
285 JMP sync∕atomic·AddInt64(SB)
286
287 TEXT sync∕atomic·AddUintptr(SB), NOSPLIT, $0-24
288 GO_ARGS
289 JMP sync∕atomic·AddInt64(SB)
290
291 // And
292 TEXT sync∕atomic·AndInt32(SB), NOSPLIT, $0-20
293 GO_ARGS
294 MOV $__tsan_go_atomic32_fetch_and(SB), X23
295 CALL racecallatomic<>(SB)
296 RET
297
298 TEXT sync∕atomic·AndInt64(SB), NOSPLIT, $0-24
299 GO_ARGS
300 MOV $__tsan_go_atomic64_fetch_and(SB), X23
301 CALL racecallatomic<>(SB)
302 RET
303
304 TEXT sync∕atomic·AndUint32(SB), NOSPLIT, $0-20
305 GO_ARGS
306 JMP sync∕atomic·AndInt32(SB)
307
308 TEXT sync∕atomic·AndUint64(SB), NOSPLIT, $0-24
309 GO_ARGS
310 JMP sync∕atomic·AndInt64(SB)
311
312 TEXT sync∕atomic·AndUintptr(SB), NOSPLIT, $0-24
313 GO_ARGS
314 JMP sync∕atomic·AndInt64(SB)
315
316 // Or
317 TEXT sync∕atomic·OrInt32(SB), NOSPLIT, $0-20
318 GO_ARGS
319 MOV $__tsan_go_atomic32_fetch_or(SB), X23
320 CALL racecallatomic<>(SB)
321 RET
322
323 TEXT sync∕atomic·OrInt64(SB), NOSPLIT, $0-24
324 GO_ARGS
325 MOV $__tsan_go_atomic64_fetch_or(SB), X23
326 CALL racecallatomic<>(SB)
327 RET
328
329 TEXT sync∕atomic·OrUint32(SB), NOSPLIT, $0-20
330 GO_ARGS
331 JMP sync∕atomic·OrInt32(SB)
332
333 TEXT sync∕atomic·OrUint64(SB), NOSPLIT, $0-24
334 GO_ARGS
335 JMP sync∕atomic·OrInt64(SB)
336
337 TEXT sync∕atomic·OrUintptr(SB), NOSPLIT, $0-24
338 GO_ARGS
339 JMP sync∕atomic·OrInt64(SB)
340
341 // CompareAndSwap
342
343 TEXT sync∕atomic·CompareAndSwapInt32(SB), NOSPLIT, $0-17
344 GO_ARGS
345 MOV $__tsan_go_atomic32_compare_exchange(SB), X23
346 CALL racecallatomic<>(SB)
347 RET
348
349 TEXT sync∕atomic·CompareAndSwapInt64(SB), NOSPLIT, $0-25
350 GO_ARGS
351 MOV $__tsan_go_atomic64_compare_exchange(SB), X23
352 CALL racecallatomic<>(SB)
353 RET
354
355 TEXT sync∕atomic·CompareAndSwapUint32(SB), NOSPLIT, $0-17
356 GO_ARGS
357 JMP sync∕atomic·CompareAndSwapInt32(SB)
358
359 TEXT sync∕atomic·CompareAndSwapUint64(SB), NOSPLIT, $0-25
360 GO_ARGS
361 JMP sync∕atomic·CompareAndSwapInt64(SB)
362
363 TEXT sync∕atomic·CompareAndSwapUintptr(SB), NOSPLIT, $0-25
364 GO_ARGS
365 JMP sync∕atomic·CompareAndSwapInt64(SB)
366
367 // Generic atomic operation implementation.
368 // X23 = addr of target function
369 TEXT racecallatomic<>(SB), NOSPLIT, $0
370 // Set up these registers
371 // X10 = *ThreadState
372 // X11 = caller pc
373 // X12 = pc
374 // X13 = addr of incoming arg list
375
376 // Trigger SIGSEGV early.
377 MOV 24(X2), X6 // 1st arg is addr. after two times CALL, get it at 24(X2)
378 MOVB (X6), X0 // segv here if addr is bad
379 // Check that addr is within [arenastart, arenaend) or within [racedatastart, racedataend).
380 MOV runtime·racearenastart(SB), X7
381 BLT X6, X7, racecallatomic_data
382 MOV runtime·racearenaend(SB), X7
383 BLT X6, X7, racecallatomic_ok
384 racecallatomic_data:
385 MOV runtime·racedatastart(SB), X7
386 BLT X6, X7, racecallatomic_ignore
387 MOV runtime·racedataend(SB), X7
388 BGE X6, X7, racecallatomic_ignore
389 racecallatomic_ok:
390 // Addr is within the good range, call the atomic function.
391 MOV g_racectx(g), X10 // goroutine context
392 MOV 8(X2), X11 // caller pc
393 MOV X1, X12 // pc
394 ADD $24, X2, X13
395 CALL racecall<>(SB)
396 RET
397 racecallatomic_ignore:
398 // Addr is outside the good range.
399 // Call __tsan_go_ignore_sync_begin to ignore synchronization during the atomic op.
400 // An attempt to synchronize on the address would cause crash.
401 MOV X1, X20 // save PC
402 MOV X23, X21 // save target function
403 MOV $__tsan_go_ignore_sync_begin(SB), X23
404 MOV g_racectx(g), X10 // goroutine context
405 CALL racecall<>(SB)
406 MOV X21, X23 // restore the target function
407 // Call the atomic function.
408 MOV g_racectx(g), X10 // goroutine context
409 MOV 8(X2), X11 // caller pc
410 MOV X20, X12 // pc
411 ADD $24, X2, X13 // arguments
412 CALL racecall<>(SB)
413 // Call __tsan_go_ignore_sync_end.
414 MOV $__tsan_go_ignore_sync_end(SB), X23
415 MOV g_racectx(g), X10 // goroutine context
416 CALL racecall<>(SB)
417 RET
418
419 // func runtime·racecall(void(*f)(...), ...)
420 // Calls C function f from race runtime and passes up to 4 arguments to it.
421 // The arguments are never heap-object-preserving pointers, so we pretend there
422 // are no arguments.
423 TEXT runtime·racecall(SB), NOSPLIT, $0-0
424 MOV fn+0(FP), X23
425 MOV arg0+8(FP), X10
426 MOV arg1+16(FP), X11
427 MOV arg2+24(FP), X12
428 MOV arg3+32(FP), X13
429 JMP racecall<>(SB)
430
431 // Switches SP to g0 stack and calls X23. Arguments are already set.
432 TEXT racecall<>(SB), NOSPLIT|NOFRAME, $0-0
433 MOV X1, X18 // Save RA in callee save register
434 MOV X2, X19 // Save SP in callee save register
435 CALL runtime·save_g(SB) // Save g for callbacks
436
437 MOV g_m(g), X6
438
439 // Switch to g0 stack if we aren't already on g0 or gsignal.
440 MOV m_gsignal(X6), X7
441 BEQ X7, g, call
442 MOV m_g0(X6), X7
443 BEQ X7, g, call
444
445 MOV (g_sched+gobuf_sp)(X7), X2 // Switch to g0 stack
446 call:
447 JALR RA, (X23) // Call C function
448 MOV X19, X2 // Restore SP
449 JMP (X18) // Return to Go.
450
451 // C->Go callback thunk that allows to call runtime·racesymbolize from C code.
452 // Direct Go->C race call has only switched SP, finish g->g0 switch by setting correct g.
453 // The overall effect of Go->C->Go call chain is similar to that of mcall.
454 // R0 contains command code. R1 contains command-specific context.
455 // See racecallback for command codes.
456 TEXT runtime·racecallbackthunk(SB), NOSPLIT|NOFRAME, $0
457 // Handle command raceGetProcCmd (0) here.
458 // First, code below assumes that we are on curg, while raceGetProcCmd
459 // can be executed on g0. Second, it is called frequently, so will
460 // benefit from this fast path.
461 BNEZ X10, rest
462 MOV X1, X23
463 MOV g, X6
464 CALL runtime·load_g(SB)
465 MOV g_m(g), X7
466 MOV m_p(X7), X7
467 MOV p_raceprocctx(X7), X7
468 MOV X7, (X11)
469 MOV X6, g
470 JMP (X23)
471 rest:
472 // Save callee-save registers (X8, X9, X18..X27, F8, F9, F18..F27),
473 // since Go code will not respect this.
474 // 8(X2) and 16(X2) are for args passed to racecallback
475 SUB $(27*8), X2
476 MOV X1, (0*8)(X2)
477 SAVE_GPR((3*8))
478 SAVE_FPR((15*8))
479
480 // Set g = g0.
481 CALL runtime·load_g(SB)
482 MOV g_m(g), X5
483 MOV m_g0(X5), X6
484 BEQ X6, g, noswitch // branch if already on g0
485 MOV X6, g
486
487 MOV X10, 8(X2) // func arg
488 MOV X11, 16(X2) // func arg
489 CALL runtime·racecallback(SB)
490
491 // All registers are smashed after Go code, reload.
492 MOV g_m(g), X5
493 MOV m_curg(X5), g // g = m->curg
494 ret:
495 // Restore callee-save registers.
496 MOV (0*8)(X2), X1
497 RESTORE_GPR((3*8))
498 RESTORE_FPR((15*8))
499
500 ADD $(27*8), X2
501 JMP (X1)
502
503 noswitch:
504 // already on g0
505 MOV X10, 8(X2) // func arg
506 MOV X11, 16(X2) // func arg
507 CALL runtime·racecallback(SB)
508 JMP ret
509
View as plain text