Source file
src/runtime/os_linux.go
1
2
3
4
5 package runtime
6
7 import (
8 "internal/abi"
9 "internal/goarch"
10 "internal/runtime/atomic"
11 "internal/runtime/syscall/linux"
12 "internal/strconv"
13 "unsafe"
14 )
15
16
17
18
19 const sigPerThreadSyscall = _SIGRTMIN + 1
20
21 type mOS struct {
22
23
24
25
26
27
28
29 profileTimer int32
30 profileTimerValid atomic.Bool
31
32
33
34 needPerThreadSyscall atomic.Uint8
35
36
37
38 vgetrandomState uintptr
39
40 waitsema uint32
41 }
42
43
44
45
46
47
48
49
50
51
52 const (
53 _FUTEX_PRIVATE_FLAG = 128
54 _FUTEX_WAIT_PRIVATE = 0 | _FUTEX_PRIVATE_FLAG
55 _FUTEX_WAKE_PRIVATE = 1 | _FUTEX_PRIVATE_FLAG
56 )
57
58
59
60
61
62
63
64
65
66 func futexsleep(addr *uint32, val uint32, ns int64) {
67
68
69
70
71
72 if ns < 0 {
73 futex(unsafe.Pointer(addr), _FUTEX_WAIT_PRIVATE, val, nil, nil, 0)
74 return
75 }
76
77 var ts timespec
78 ts.setNsec(ns)
79 futex(unsafe.Pointer(addr), _FUTEX_WAIT_PRIVATE, val, &ts, nil, 0)
80 }
81
82
83
84
85 func futexwakeup(addr *uint32, cnt uint32) {
86 ret := futex(unsafe.Pointer(addr), _FUTEX_WAKE_PRIVATE, cnt, nil, nil, 0)
87 if ret >= 0 {
88 return
89 }
90
91
92
93
94 systemstack(func() {
95 print("futexwakeup addr=", addr, " returned ", ret, "\n")
96 })
97
98 *(*int32)(unsafe.Pointer(uintptr(0x1006))) = 0x1006
99 }
100
101 func getCPUCount() int32 {
102
103
104
105
106
107
108
109 const maxCPUs = 64 * 1024
110 var buf [maxCPUs / 8]byte
111 r := sched_getaffinity(0, unsafe.Sizeof(buf), &buf[0])
112 if r < 0 {
113 return 1
114 }
115 n := int32(0)
116 for _, v := range buf[:r] {
117 for v != 0 {
118 n += int32(v & 1)
119 v >>= 1
120 }
121 }
122 if n == 0 {
123 n = 1
124 }
125 return n
126 }
127
128
129 const (
130 _CLONE_VM = 0x100
131 _CLONE_FS = 0x200
132 _CLONE_FILES = 0x400
133 _CLONE_SIGHAND = 0x800
134 _CLONE_PTRACE = 0x2000
135 _CLONE_VFORK = 0x4000
136 _CLONE_PARENT = 0x8000
137 _CLONE_THREAD = 0x10000
138 _CLONE_NEWNS = 0x20000
139 _CLONE_SYSVSEM = 0x40000
140 _CLONE_SETTLS = 0x80000
141 _CLONE_PARENT_SETTID = 0x100000
142 _CLONE_CHILD_CLEARTID = 0x200000
143 _CLONE_UNTRACED = 0x800000
144 _CLONE_CHILD_SETTID = 0x1000000
145 _CLONE_STOPPED = 0x2000000
146 _CLONE_NEWUTS = 0x4000000
147 _CLONE_NEWIPC = 0x8000000
148
149
150
151
152
153
154
155
156 cloneFlags = _CLONE_VM |
157 _CLONE_FS |
158 _CLONE_FILES |
159 _CLONE_SIGHAND |
160 _CLONE_SYSVSEM |
161 _CLONE_THREAD
162 )
163
164
165 func clone(flags int32, stk, mp, gp, fn unsafe.Pointer) int32
166
167
168
169
170 func newosproc(mp *m) {
171 stk := unsafe.Pointer(mp.g0.stack.hi)
172
175 if false {
176 print("newosproc stk=", stk, " m=", mp, " g=", mp.g0, " clone=", abi.FuncPCABI0(clone), " id=", mp.id, " ostk=", &mp, "\n")
177 }
178
179
180
181 var oset sigset
182 sigprocmask(_SIG_SETMASK, &sigset_all, &oset)
183 ret := retryOnEAGAIN(func() int32 {
184 r := clone(cloneFlags, stk, unsafe.Pointer(mp), unsafe.Pointer(mp.g0), unsafe.Pointer(abi.FuncPCABI0(mstart)))
185
186
187 if r >= 0 {
188 return 0
189 }
190 return -r
191 })
192 sigprocmask(_SIG_SETMASK, &oset, nil)
193
194 if ret != 0 {
195 print("runtime: failed to create new OS thread (have ", mcount(), " already; errno=", ret, ")\n")
196 if ret == _EAGAIN {
197 println("runtime: may need to increase max user processes (ulimit -u)")
198 }
199 throw("newosproc")
200 }
201 }
202
203
204
205
206 func newosproc0(stacksize uintptr, fn unsafe.Pointer) {
207 stack := sysAlloc(stacksize, &memstats.stacks_sys, "OS thread stack")
208 if stack == nil {
209 writeErrStr(failallocatestack)
210 exit(1)
211 }
212 ret := clone(cloneFlags, unsafe.Pointer(uintptr(stack)+stacksize), nil, nil, fn)
213 if ret < 0 {
214 writeErrStr(failthreadcreate)
215 exit(1)
216 }
217 }
218
219 const (
220 _AT_NULL = 0
221 _AT_PAGESZ = 6
222 _AT_PLATFORM = 15
223 _AT_HWCAP = 16
224 _AT_SECURE = 23
225 _AT_RANDOM = 25
226 _AT_HWCAP2 = 26
227 )
228
229 var procAuxv = []byte("/proc/self/auxv\x00")
230
231 var addrspace_vec [1]byte
232
233 func mincore(addr unsafe.Pointer, n uintptr, dst *byte) int32
234
235 var auxvreadbuf [128]uintptr
236
237 func sysargs(argc int32, argv **byte) {
238 n := argc + 1
239
240
241 for argv_index(argv, n) != nil {
242 n++
243 }
244
245
246 n++
247
248
249 auxvp := (*[1 << 28]uintptr)(add(unsafe.Pointer(argv), uintptr(n)*goarch.PtrSize))
250
251 if pairs := sysauxv(auxvp[:]); pairs != 0 {
252 auxv = auxvp[: pairs*2 : pairs*2]
253 return
254 }
255
256
257
258 fd := open(&procAuxv[0], 0 , 0)
259 if fd < 0 {
260
261
262
263 const size = 256 << 10
264 p, err := mmap(nil, size, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_PRIVATE, -1, 0)
265 if err != 0 {
266 return
267 }
268 var n uintptr
269 for n = 4 << 10; n < size; n <<= 1 {
270 err := mincore(unsafe.Pointer(uintptr(p)+n), 1, &addrspace_vec[0])
271 if err == 0 {
272 physPageSize = n
273 break
274 }
275 }
276 if physPageSize == 0 {
277 physPageSize = size
278 }
279 munmap(p, size)
280 return
281 }
282
283 n = read(fd, noescape(unsafe.Pointer(&auxvreadbuf[0])), int32(unsafe.Sizeof(auxvreadbuf)))
284 closefd(fd)
285 if n < 0 {
286 return
287 }
288
289
290 auxvreadbuf[len(auxvreadbuf)-2] = _AT_NULL
291 pairs := sysauxv(auxvreadbuf[:])
292 auxv = auxvreadbuf[: pairs*2 : pairs*2]
293 }
294
295
296 var secureMode bool
297
298 func sysauxv(auxv []uintptr) (pairs int) {
299
300
301 var i int
302 for ; auxv[i] != _AT_NULL; i += 2 {
303 tag, val := auxv[i], auxv[i+1]
304 switch tag {
305 case _AT_RANDOM:
306
307
308
309
310
311
312 startupRand = (*[16]byte)(unsafe.Pointer(val))[:]
313
314 case _AT_PAGESZ:
315 physPageSize = val
316
317 case _AT_SECURE:
318 secureMode = val == 1
319 }
320
321 archauxv(tag, val)
322 vdsoauxv(tag, val)
323 }
324 return i / 2
325 }
326
327 var sysTHPSizePath = []byte("/sys/kernel/mm/transparent_hugepage/hpage_pmd_size\x00")
328
329 func getHugePageSize() uintptr {
330 var numbuf [20]byte
331 fd := open(&sysTHPSizePath[0], 0 , 0)
332 if fd < 0 {
333 return 0
334 }
335 ptr := noescape(unsafe.Pointer(&numbuf[0]))
336 n := read(fd, ptr, int32(len(numbuf)))
337 closefd(fd)
338 if n <= 0 {
339 return 0
340 }
341 n--
342 v, err := strconv.Atoi(slicebytetostringtmp((*byte)(ptr), int(n)))
343 if err != nil || v < 0 {
344 v = 0
345 }
346 if v&(v-1) != 0 {
347
348 return 0
349 }
350 return uintptr(v)
351 }
352
353 func osinit() {
354 numCPUStartup = getCPUCount()
355 physHugePageSize = getHugePageSize()
356 vgetrandomInit()
357 configure64bitsTimeOn32BitsArchitectures()
358 }
359
360 var urandom_dev = []byte("/dev/urandom\x00")
361
362 func readRandom(r []byte) int {
363
364
365 fd := open(&urandom_dev[0], 0 , 0)
366 n := read(fd, unsafe.Pointer(&r[0]), int32(len(r)))
367 closefd(fd)
368 return int(n)
369 }
370
371 func goenvs() {
372 goenvs_unix()
373 }
374
375
376
377
378
379
380
381 func libpreinit() {
382 initsig(true)
383 }
384
385
386
387 func mpreinit(mp *m) {
388 mp.gsignal = malg(32 * 1024)
389 mp.gsignal.m = mp
390 }
391
392 func gettid() uint32
393
394
395
396 func minit() {
397 minitSignals()
398
399
400
401
402 getg().m.procid = uint64(gettid())
403 }
404
405
406
407
408 func unminit() {
409 unminitSignals()
410 getg().m.procid = 0
411 }
412
413
414
415
416
417
418
419 func mdestroy(mp *m) {
420 }
421
422
423
424
425
426 func sigreturn__sigaction()
427 func sigtramp()
428 func cgoSigtramp()
429
430
431 func sigaltstack(new, old *stackt)
432
433
434 func setitimer(mode int32, new, old *itimerval)
435
436
437 func timer_create(clockid int32, sevp *sigevent, timerid *int32) int32
438
439
440 func timer_delete(timerid int32) int32
441
442
443 func rtsigprocmask(how int32, new, old *sigset, size int32)
444
445
446
447 func sigprocmask(how int32, new, old *sigset) {
448 rtsigprocmask(how, new, old, int32(unsafe.Sizeof(*new)))
449 }
450
451 func raise(sig uint32)
452 func raiseproc(sig uint32)
453
454
455 func sched_getaffinity(pid, len uintptr, buf *byte) int32
456 func osyield()
457
458
459 func osyield_no_g() {
460 osyield()
461 }
462
463 func pipe2(flags int32) (r, w int32, errno int32)
464
465
466 func fcntl(fd, cmd, arg int32) (ret int32, errno int32) {
467 r, _, err := linux.Syscall6(linux.SYS_FCNTL, uintptr(fd), uintptr(cmd), uintptr(arg), 0, 0, 0)
468 return int32(r), int32(err)
469 }
470
471 const (
472 _si_max_size = 128
473 _sigev_max_size = 64
474 )
475
476
477
478 func setsig(i uint32, fn uintptr) {
479 var sa sigactiont
480 sa.sa_flags = _SA_SIGINFO | _SA_ONSTACK | _SA_RESTORER | _SA_RESTART
481 sigfillset(&sa.sa_mask)
482
483
484
485
486 if GOARCH == "386" || GOARCH == "amd64" {
487 sa.sa_restorer = abi.FuncPCABI0(sigreturn__sigaction)
488 }
489 if fn == abi.FuncPCABIInternal(sighandler) {
490 if iscgo {
491 fn = abi.FuncPCABI0(cgoSigtramp)
492 } else {
493 fn = abi.FuncPCABI0(sigtramp)
494 }
495 }
496 sa.sa_handler = fn
497 sigaction(i, &sa, nil)
498 }
499
500
501
502 func setsigstack(i uint32) {
503 var sa sigactiont
504 sigaction(i, nil, &sa)
505 if sa.sa_flags&_SA_ONSTACK != 0 {
506 return
507 }
508 sa.sa_flags |= _SA_ONSTACK
509 sigaction(i, &sa, nil)
510 }
511
512
513
514 func getsig(i uint32) uintptr {
515 var sa sigactiont
516 sigaction(i, nil, &sa)
517 return sa.sa_handler
518 }
519
520
521
522
523 func setSignalstackSP(s *stackt, sp uintptr) {
524 *(*uintptr)(unsafe.Pointer(&s.ss_sp)) = sp
525 }
526
527
528 func (c *sigctxt) fixsigcode(sig uint32) {
529 }
530
531
532
533
534 func sysSigaction(sig uint32, new, old *sigactiont) {
535 if rt_sigaction(uintptr(sig), new, old, unsafe.Sizeof(sigactiont{}.sa_mask)) != 0 {
536
537
538
539
540
541
542
543
544
545
546
547 if sig != 32 && sig != 33 && sig != 64 {
548
549 systemstack(func() {
550 throw("sigaction failed")
551 })
552 }
553 }
554 }
555
556
557
558
559 func rt_sigaction(sig uintptr, new, old *sigactiont, size uintptr) int32
560
561
562
563
564
565
566
567
568
569 func fixSigactionForCgo(new *sigactiont) {
570 if GOARCH == "386" && new != nil {
571 new.sa_flags &^= _SA_RESTORER
572 new.sa_restorer = 0
573 }
574 }
575
576 func getpid() int
577 func tgkill(tgid, tid, sig int)
578
579
580 func signalM(mp *m, sig int) {
581 tgkill(getpid(), int(mp.procid), sig)
582 }
583
584
585
586
587
588
589
590
591 func validSIGPROF(mp *m, c *sigctxt) bool {
592 code := int32(c.sigcode())
593 setitimer := code == _SI_KERNEL
594 timer_create := code == _SI_TIMER
595
596 if !(setitimer || timer_create) {
597
598
599
600 return true
601 }
602
603 if mp == nil {
604
605
606
607
608
609
610
611
612
613
614
615
616 return setitimer
617 }
618
619
620
621 if mp.profileTimerValid.Load() {
622
623
624
625
626
627 return timer_create
628 }
629
630
631 return setitimer
632 }
633
634 func setProcessCPUProfiler(hz int32) {
635 setProcessCPUProfilerTimer(hz)
636 }
637
638 func setThreadCPUProfiler(hz int32) {
639 mp := getg().m
640 mp.profilehz = hz
641
642
643 if mp.profileTimerValid.Load() {
644 timerid := mp.profileTimer
645 mp.profileTimerValid.Store(false)
646 mp.profileTimer = 0
647
648 ret := timer_delete(timerid)
649 if ret != 0 {
650 print("runtime: failed to disable profiling timer; timer_delete(", timerid, ") errno=", -ret, "\n")
651 throw("timer_delete")
652 }
653 }
654
655 if hz == 0 {
656
657 return
658 }
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679 spec := new(itimerspec)
680 spec.it_value.setNsec(1 + int64(cheaprandn(uint32(1e9/hz))))
681 spec.it_interval.setNsec(1e9 / int64(hz))
682
683 var timerid int32
684 var sevp sigevent
685 sevp.notify = _SIGEV_THREAD_ID
686 sevp.signo = _SIGPROF
687 sevp.sigev_notify_thread_id = int32(mp.procid)
688 ret := timer_create(_CLOCK_THREAD_CPUTIME_ID, &sevp, &timerid)
689 if ret != 0 {
690
691
692 return
693 }
694
695 ret = timer_settime(timerid, 0, spec, nil)
696 if ret != 0 {
697 print("runtime: failed to configure profiling timer; timer_settime(", timerid,
698 ", 0, {interval: {",
699 spec.it_interval.tv_sec, "s + ", spec.it_interval.tv_nsec, "ns} value: {",
700 spec.it_value.tv_sec, "s + ", spec.it_value.tv_nsec, "ns}}, nil) errno=", -ret, "\n")
701 throw("timer_settime")
702 }
703
704 mp.profileTimer = timerid
705 mp.profileTimerValid.Store(true)
706 }
707
708
709
710 type perThreadSyscallArgs struct {
711 trap uintptr
712 a1 uintptr
713 a2 uintptr
714 a3 uintptr
715 a4 uintptr
716 a5 uintptr
717 a6 uintptr
718 r1 uintptr
719 r2 uintptr
720 }
721
722
723
724
725
726
727 var perThreadSyscall perThreadSyscallArgs
728
729
730
731
732
733
734
735
736
737 func syscall_runtime_doAllThreadsSyscall(trap, a1, a2, a3, a4, a5, a6 uintptr) (r1, r2, err uintptr) {
738 if iscgo {
739
740 panic("doAllThreadsSyscall not supported with cgo enabled")
741 }
742
743
744
745
746
747
748
749
750 stw := stopTheWorld(stwAllThreadsSyscall)
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772 allocmLock.lock()
773
774
775
776
777
778
779 acquirem()
780
781
782
783
784
785
786 r1, r2, errno := linux.Syscall6(trap, a1, a2, a3, a4, a5, a6)
787 if GOARCH == "ppc64" || GOARCH == "ppc64le" {
788
789 r2 = 0
790 }
791 if errno != 0 {
792 releasem(getg().m)
793 allocmLock.unlock()
794 startTheWorld(stw)
795 return r1, r2, errno
796 }
797
798 perThreadSyscall = perThreadSyscallArgs{
799 trap: trap,
800 a1: a1,
801 a2: a2,
802 a3: a3,
803 a4: a4,
804 a5: a5,
805 a6: a6,
806 r1: r1,
807 r2: r2,
808 }
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845 for mp := allm; mp != nil; mp = mp.alllink {
846 for atomic.Load64(&mp.procid) == 0 {
847
848 osyield()
849 }
850 }
851
852
853
854 gp := getg()
855 tid := gp.m.procid
856 for mp := allm; mp != nil; mp = mp.alllink {
857 if atomic.Load64(&mp.procid) == tid {
858
859 continue
860 }
861 mp.needPerThreadSyscall.Store(1)
862 signalM(mp, sigPerThreadSyscall)
863 }
864
865
866 for mp := allm; mp != nil; mp = mp.alllink {
867 if mp.procid == tid {
868 continue
869 }
870 for mp.needPerThreadSyscall.Load() != 0 {
871 osyield()
872 }
873 }
874
875 perThreadSyscall = perThreadSyscallArgs{}
876
877 releasem(getg().m)
878 allocmLock.unlock()
879 startTheWorld(stw)
880
881 return r1, r2, errno
882 }
883
884
885
886
887
888
889
890 func runPerThreadSyscall() {
891 gp := getg()
892 if gp.m.needPerThreadSyscall.Load() == 0 {
893 return
894 }
895
896 args := perThreadSyscall
897 r1, r2, errno := linux.Syscall6(args.trap, args.a1, args.a2, args.a3, args.a4, args.a5, args.a6)
898 if GOARCH == "ppc64" || GOARCH == "ppc64le" {
899
900 r2 = 0
901 }
902 if errno != 0 || r1 != args.r1 || r2 != args.r2 {
903 print("trap:", args.trap, ", a123456=[", args.a1, ",", args.a2, ",", args.a3, ",", args.a4, ",", args.a5, ",", args.a6, "]\n")
904 print("results: got {r1=", r1, ",r2=", r2, ",errno=", errno, "}, want {r1=", args.r1, ",r2=", args.r2, ",errno=0}\n")
905 fatal("AllThreadsSyscall6 results differ between threads; runtime corrupted")
906 }
907
908 gp.m.needPerThreadSyscall.Store(0)
909 }
910
911 const (
912 _SI_USER = 0
913 _SI_TKILL = -6
914 _SYS_SECCOMP = 1
915 )
916
917
918
919
920
921 func (c *sigctxt) sigFromUser() bool {
922 code := int32(c.sigcode())
923 return code == _SI_USER || code == _SI_TKILL
924 }
925
926
927
928
929 func (c *sigctxt) sigFromSeccomp() bool {
930 code := int32(c.sigcode())
931 return code == _SYS_SECCOMP
932 }
933
934
935 func mprotect(addr unsafe.Pointer, n uintptr, prot int32) (ret int32, errno int32) {
936 r, _, err := linux.Syscall6(linux.SYS_MPROTECT, uintptr(addr), n, uintptr(prot), 0, 0, 0)
937 return int32(r), int32(err)
938 }
939
940 type kernelVersion struct {
941 major int
942 minor int
943 }
944
945
946
947 func getKernelVersion() kernelVersion {
948 var buf linux.Utsname
949 if e := linux.Uname(&buf); e != 0 {
950 throw("uname failed")
951 }
952
953 rel := gostringnocopy(&buf.Release[0])
954 major, minor, _, ok := parseRelease(rel)
955 if !ok {
956 throw("failed to parse kernel version from uname")
957 }
958 return kernelVersion{major: major, minor: minor}
959 }
960
961
962
963
964 func parseRelease(rel string) (major, minor, patch int, ok bool) {
965
966 for i := 0; i < len(rel); i++ {
967 if rel[i] == '-' || rel[i] == '+' {
968 rel = rel[:i]
969 break
970 }
971 }
972
973 next := func() (int, bool) {
974 for i := 0; i < len(rel); i++ {
975 if rel[i] == '.' {
976 ver, err := strconv.Atoi(rel[:i])
977 rel = rel[i+1:]
978 return ver, err == nil
979 }
980 }
981 ver, err := strconv.Atoi(rel)
982 rel = ""
983 return ver, err == nil
984 }
985 if major, ok = next(); !ok || rel == "" {
986 return
987 }
988 if minor, ok = next(); !ok || rel == "" {
989 return
990 }
991 patch, ok = next()
992 return
993 }
994
995
996
997 func (kv kernelVersion) GE(x, y int) bool {
998 return kv.major > x || (kv.major == x && kv.minor >= y)
999 }
1000
View as plain text