Source file src/runtime/string.go

     1  // Copyright 2014 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package runtime
     6  
     7  import (
     8  	"internal/abi"
     9  	"internal/bytealg"
    10  	"internal/goarch"
    11  	"internal/goos"
    12  	"internal/runtime/math"
    13  	"internal/runtime/sys"
    14  	"internal/strconv"
    15  	"unsafe"
    16  )
    17  
    18  // The constant is known to the compiler.
    19  // There is no fundamental theory behind this number.
    20  const tmpStringBufSize = 32
    21  
    22  type tmpBuf [tmpStringBufSize]byte
    23  
    24  // concatstrings implements a Go string concatenation x+y+z+...
    25  // The operands are passed in the slice a.
    26  // If buf != nil, the compiler has determined that the result does not
    27  // escape the calling function, so the string data can be stored in buf
    28  // if small enough.
    29  func concatstrings(buf *tmpBuf, a []string) string {
    30  	idx := 0
    31  	l := 0
    32  	count := 0
    33  	for i, x := range a {
    34  		n := len(x)
    35  		if n == 0 {
    36  			continue
    37  		}
    38  		if l+n < l {
    39  			throw("string concatenation too long")
    40  		}
    41  		l += n
    42  		count++
    43  		idx = i
    44  	}
    45  	if count == 0 {
    46  		return ""
    47  	}
    48  
    49  	// If there is just one string and either it is not on the stack
    50  	// or our result does not escape the calling frame (buf != nil),
    51  	// then we can return that string directly.
    52  	if count == 1 && (buf != nil || !stringDataOnStack(a[idx])) {
    53  		return a[idx]
    54  	}
    55  	s, b := rawstringtmp(buf, l)
    56  	for _, x := range a {
    57  		n := copy(b, x)
    58  		b = b[n:]
    59  	}
    60  	return s
    61  }
    62  
    63  // concatstring2 helps make the callsite smaller (compared to concatstrings),
    64  // and we think this is currently more valuable than omitting one call in the
    65  // chain, the same goes for concatstring{3,4,5}.
    66  func concatstring2(buf *tmpBuf, a0, a1 string) string {
    67  	return concatstrings(buf, []string{a0, a1})
    68  }
    69  
    70  func concatstring3(buf *tmpBuf, a0, a1, a2 string) string {
    71  	return concatstrings(buf, []string{a0, a1, a2})
    72  }
    73  
    74  func concatstring4(buf *tmpBuf, a0, a1, a2, a3 string) string {
    75  	return concatstrings(buf, []string{a0, a1, a2, a3})
    76  }
    77  
    78  func concatstring5(buf *tmpBuf, a0, a1, a2, a3, a4 string) string {
    79  	return concatstrings(buf, []string{a0, a1, a2, a3, a4})
    80  }
    81  
    82  // concatbytes implements a Go string concatenation x+y+z+... returning a slice
    83  // of bytes.
    84  // The operands are passed in the slice a.
    85  func concatbytes(buf *tmpBuf, a []string) []byte {
    86  	l := 0
    87  	for _, x := range a {
    88  		n := len(x)
    89  		if l+n < l {
    90  			throw("string concatenation too long")
    91  		}
    92  		l += n
    93  	}
    94  	if l == 0 {
    95  		// This is to match the return type of the non-optimized concatenation.
    96  		return []byte{}
    97  	}
    98  
    99  	var b []byte
   100  	if buf != nil && l <= len(buf) {
   101  		*buf = tmpBuf{}
   102  		b = buf[:l]
   103  	} else {
   104  		b = rawbyteslice(l)
   105  	}
   106  	offset := 0
   107  	for _, x := range a {
   108  		copy(b[offset:], x)
   109  		offset += len(x)
   110  	}
   111  
   112  	return b
   113  }
   114  
   115  // concatbyte2 helps make the callsite smaller (compared to concatbytes),
   116  // and we think this is currently more valuable than omitting one call in
   117  // the chain, the same goes for concatbyte{3,4,5}.
   118  func concatbyte2(buf *tmpBuf, a0, a1 string) []byte {
   119  	return concatbytes(buf, []string{a0, a1})
   120  }
   121  
   122  func concatbyte3(buf *tmpBuf, a0, a1, a2 string) []byte {
   123  	return concatbytes(buf, []string{a0, a1, a2})
   124  }
   125  
   126  func concatbyte4(buf *tmpBuf, a0, a1, a2, a3 string) []byte {
   127  	return concatbytes(buf, []string{a0, a1, a2, a3})
   128  }
   129  
   130  func concatbyte5(buf *tmpBuf, a0, a1, a2, a3, a4 string) []byte {
   131  	return concatbytes(buf, []string{a0, a1, a2, a3, a4})
   132  }
   133  
   134  // slicebytetostring converts a byte slice to a string.
   135  // It is inserted by the compiler into generated code.
   136  // ptr is a pointer to the first element of the slice;
   137  // n is the length of the slice.
   138  // Buf is a fixed-size buffer for the result,
   139  // it is not nil if the result does not escape.
   140  func slicebytetostring(buf *tmpBuf, ptr *byte, n int) string {
   141  	if n == 0 {
   142  		// Turns out to be a relatively common case.
   143  		// Consider that you want to parse out data between parens in "foo()bar",
   144  		// you find the indices and convert the subslice to string.
   145  		return ""
   146  	}
   147  	if raceenabled {
   148  		racereadrangepc(unsafe.Pointer(ptr),
   149  			uintptr(n),
   150  			sys.GetCallerPC(),
   151  			abi.FuncPCABIInternal(slicebytetostring))
   152  	}
   153  	if msanenabled {
   154  		msanread(unsafe.Pointer(ptr), uintptr(n))
   155  	}
   156  	if asanenabled {
   157  		asanread(unsafe.Pointer(ptr), uintptr(n))
   158  	}
   159  	if n == 1 {
   160  		p := unsafe.Pointer(&staticuint64s[*ptr])
   161  		if goarch.BigEndian {
   162  			p = add(p, 7)
   163  		}
   164  		return unsafe.String((*byte)(p), 1)
   165  	}
   166  
   167  	var p unsafe.Pointer
   168  	if buf != nil && n <= len(buf) {
   169  		p = unsafe.Pointer(buf)
   170  	} else {
   171  		p = mallocgc(uintptr(n), nil, false)
   172  	}
   173  	memmove(p, unsafe.Pointer(ptr), uintptr(n))
   174  	return unsafe.String((*byte)(p), n)
   175  }
   176  
   177  // stringDataOnStack reports whether the string's data is
   178  // stored on the current goroutine's stack.
   179  func stringDataOnStack(s string) bool {
   180  	ptr := uintptr(unsafe.Pointer(unsafe.StringData(s)))
   181  	stk := getg().stack
   182  	return stk.lo <= ptr && ptr < stk.hi
   183  }
   184  
   185  func rawstringtmp(buf *tmpBuf, l int) (s string, b []byte) {
   186  	if buf != nil && l <= len(buf) {
   187  		b = buf[:l]
   188  		s = slicebytetostringtmp(&b[0], len(b))
   189  	} else {
   190  		s, b = rawstring(l)
   191  	}
   192  	return
   193  }
   194  
   195  // slicebytetostringtmp returns a "string" referring to the actual []byte bytes.
   196  //
   197  // Callers need to ensure that the returned string will not be used after
   198  // the calling goroutine modifies the original slice or synchronizes with
   199  // another goroutine.
   200  //
   201  // The function is only called when instrumenting
   202  // and otherwise intrinsified by the compiler.
   203  //
   204  // Some internal compiler optimizations use this function.
   205  //   - Used for m[T1{... Tn{..., string(k), ...} ...}] and m[string(k)]
   206  //     where k is []byte, T1 to Tn is a nesting of struct and array literals.
   207  //   - Used for "<"+string(b)+">" concatenation where b is []byte.
   208  //   - Used for string(b)=="foo" comparison where b is []byte.
   209  func slicebytetostringtmp(ptr *byte, n int) string {
   210  	if raceenabled && n > 0 {
   211  		racereadrangepc(unsafe.Pointer(ptr),
   212  			uintptr(n),
   213  			sys.GetCallerPC(),
   214  			abi.FuncPCABIInternal(slicebytetostringtmp))
   215  	}
   216  	if msanenabled && n > 0 {
   217  		msanread(unsafe.Pointer(ptr), uintptr(n))
   218  	}
   219  	if asanenabled && n > 0 {
   220  		asanread(unsafe.Pointer(ptr), uintptr(n))
   221  	}
   222  	return unsafe.String(ptr, n)
   223  }
   224  
   225  func stringtoslicebyte(buf *tmpBuf, s string) []byte {
   226  	var b []byte
   227  	if buf != nil && len(s) <= len(buf) {
   228  		*buf = tmpBuf{}
   229  		b = buf[:len(s)]
   230  	} else {
   231  		b = rawbyteslice(len(s))
   232  	}
   233  	copy(b, s)
   234  	return b
   235  }
   236  
   237  func stringtoslicerune(buf *[tmpStringBufSize]rune, s string) []rune {
   238  	// two passes.
   239  	// unlike slicerunetostring, no race because strings are immutable.
   240  	n := 0
   241  	for range s {
   242  		n++
   243  	}
   244  
   245  	var a []rune
   246  	if buf != nil && n <= len(buf) {
   247  		*buf = [tmpStringBufSize]rune{}
   248  		a = buf[:n]
   249  	} else {
   250  		a = rawruneslice(n)
   251  	}
   252  
   253  	n = 0
   254  	for _, r := range s {
   255  		a[n] = r
   256  		n++
   257  	}
   258  	return a
   259  }
   260  
   261  func slicerunetostring(buf *tmpBuf, a []rune) string {
   262  	if raceenabled && len(a) > 0 {
   263  		racereadrangepc(unsafe.Pointer(&a[0]),
   264  			uintptr(len(a))*unsafe.Sizeof(a[0]),
   265  			sys.GetCallerPC(),
   266  			abi.FuncPCABIInternal(slicerunetostring))
   267  	}
   268  	if msanenabled && len(a) > 0 {
   269  		msanread(unsafe.Pointer(&a[0]), uintptr(len(a))*unsafe.Sizeof(a[0]))
   270  	}
   271  	if asanenabled && len(a) > 0 {
   272  		asanread(unsafe.Pointer(&a[0]), uintptr(len(a))*unsafe.Sizeof(a[0]))
   273  	}
   274  	var dum [4]byte
   275  	size1 := 0
   276  	for _, r := range a {
   277  		size1 += encoderune(dum[:], r)
   278  	}
   279  	s, b := rawstringtmp(buf, size1+3)
   280  	size2 := 0
   281  	for _, r := range a {
   282  		// check for race
   283  		if size2 >= size1 {
   284  			break
   285  		}
   286  		size2 += encoderune(b[size2:], r)
   287  	}
   288  	return s[:size2]
   289  }
   290  
   291  type stringStruct struct {
   292  	str unsafe.Pointer
   293  	len int
   294  }
   295  
   296  // Variant with *byte pointer type for DWARF debugging.
   297  type stringStructDWARF struct {
   298  	str *byte
   299  	len int
   300  }
   301  
   302  func stringStructOf(sp *string) *stringStruct {
   303  	return (*stringStruct)(unsafe.Pointer(sp))
   304  }
   305  
   306  func intstring(buf *[4]byte, v int64) (s string) {
   307  	var b []byte
   308  	if buf != nil {
   309  		b = buf[:]
   310  		s = slicebytetostringtmp(&b[0], len(b))
   311  	} else {
   312  		s, b = rawstring(4)
   313  	}
   314  	if int64(rune(v)) != v {
   315  		v = runeError
   316  	}
   317  	n := encoderune(b, rune(v))
   318  	return s[:n]
   319  }
   320  
   321  // rawstring allocates storage for a new string. The returned
   322  // string and byte slice both refer to the same storage.
   323  // The storage is not zeroed. Callers should use
   324  // b to set the string contents and then drop b.
   325  func rawstring(size int) (s string, b []byte) {
   326  	p := mallocgc(uintptr(size), nil, false)
   327  	return unsafe.String((*byte)(p), size), unsafe.Slice((*byte)(p), size)
   328  }
   329  
   330  // rawbyteslice allocates a new byte slice. The byte slice is not zeroed.
   331  func rawbyteslice(size int) (b []byte) {
   332  	cap := roundupsize(uintptr(size), true)
   333  	p := mallocgc(cap, nil, false)
   334  	if cap != uintptr(size) {
   335  		memclrNoHeapPointers(add(p, uintptr(size)), cap-uintptr(size))
   336  	}
   337  
   338  	*(*slice)(unsafe.Pointer(&b)) = slice{p, size, int(cap)}
   339  	return
   340  }
   341  
   342  // rawruneslice allocates a new rune slice. The rune slice is not zeroed.
   343  func rawruneslice(size int) (b []rune) {
   344  	if uintptr(size) > maxAlloc/4 {
   345  		throw("out of memory")
   346  	}
   347  	mem := roundupsize(uintptr(size)*4, true)
   348  	p := mallocgc(mem, nil, false)
   349  	if mem != uintptr(size)*4 {
   350  		memclrNoHeapPointers(add(p, uintptr(size)*4), mem-uintptr(size)*4)
   351  	}
   352  
   353  	*(*slice)(unsafe.Pointer(&b)) = slice{p, size, int(mem / 4)}
   354  	return
   355  }
   356  
   357  // used by cmd/cgo
   358  func gobytes(p *byte, n int) (b []byte) {
   359  	if n == 0 {
   360  		return make([]byte, 0)
   361  	}
   362  
   363  	if n < 0 || uintptr(n) > maxAlloc {
   364  		panic(errorString("gobytes: length out of range"))
   365  	}
   366  
   367  	bp := mallocgc(uintptr(n), nil, false)
   368  	memmove(bp, unsafe.Pointer(p), uintptr(n))
   369  
   370  	*(*slice)(unsafe.Pointer(&b)) = slice{bp, n, n}
   371  	return
   372  }
   373  
   374  // This is exported via linkname to assembly in syscall (for Plan9) and cgo.
   375  //
   376  //go:linkname gostring
   377  func gostring(p *byte) string {
   378  	l := findnull(p)
   379  	if l == 0 {
   380  		return ""
   381  	}
   382  	s, b := rawstring(l)
   383  	memmove(unsafe.Pointer(&b[0]), unsafe.Pointer(p), uintptr(l))
   384  	return s
   385  }
   386  
   387  // internal_syscall_gostring is a version of gostring for internal/syscall/unix.
   388  //
   389  //go:linkname internal_syscall_gostring internal/syscall/unix.gostring
   390  func internal_syscall_gostring(p *byte) string {
   391  	return gostring(p)
   392  }
   393  
   394  func gostringn(p *byte, l int) string {
   395  	if l == 0 {
   396  		return ""
   397  	}
   398  	s, b := rawstring(l)
   399  	memmove(unsafe.Pointer(&b[0]), unsafe.Pointer(p), uintptr(l))
   400  	return s
   401  }
   402  
   403  // parseByteCount parses a string that represents a count of bytes.
   404  //
   405  // s must match the following regular expression:
   406  //
   407  //	^[0-9]+(([KMGT]i)?B)?$
   408  //
   409  // In other words, an integer byte count with an optional unit
   410  // suffix. Acceptable suffixes include one of
   411  // - KiB, MiB, GiB, TiB which represent binary IEC/ISO 80000 units, or
   412  // - B, which just represents bytes.
   413  //
   414  // Returns an int64 because that's what its callers want and receive,
   415  // but the result is always non-negative.
   416  func parseByteCount(s string) (int64, bool) {
   417  	// The empty string is not valid.
   418  	if s == "" {
   419  		return 0, false
   420  	}
   421  	// Handle the easy non-suffix case.
   422  	last := s[len(s)-1]
   423  	if last >= '0' && last <= '9' {
   424  		n, err := strconv.ParseInt(s, 10, 64)
   425  		if err != nil || n < 0 {
   426  			return 0, false
   427  		}
   428  		return n, true
   429  	}
   430  	// Failing a trailing digit, this must always end in 'B'.
   431  	// Also at this point there must be at least one digit before
   432  	// that B.
   433  	if last != 'B' || len(s) < 2 {
   434  		return 0, false
   435  	}
   436  	// The one before that must always be a digit or 'i'.
   437  	if c := s[len(s)-2]; c >= '0' && c <= '9' {
   438  		// Trivial 'B' suffix.
   439  		n, err := strconv.ParseInt(s[:len(s)-1], 10, 64)
   440  		if err != nil || n < 0 {
   441  			return 0, false
   442  		}
   443  		return n, true
   444  	} else if c != 'i' {
   445  		return 0, false
   446  	}
   447  	// Finally, we need at least 4 characters now, for the unit
   448  	// prefix and at least one digit.
   449  	if len(s) < 4 {
   450  		return 0, false
   451  	}
   452  	power := 0
   453  	switch s[len(s)-3] {
   454  	case 'K':
   455  		power = 1
   456  	case 'M':
   457  		power = 2
   458  	case 'G':
   459  		power = 3
   460  	case 'T':
   461  		power = 4
   462  	default:
   463  		// Invalid suffix.
   464  		return 0, false
   465  	}
   466  	m := uint64(1)
   467  	for i := 0; i < power; i++ {
   468  		m *= 1024
   469  	}
   470  	n, err := strconv.ParseInt(s[:len(s)-3], 10, 64)
   471  	if err != nil || n < 0 {
   472  		return 0, false
   473  	}
   474  	un := uint64(n)
   475  	if un > math.MaxUint64/m {
   476  		// Overflow.
   477  		return 0, false
   478  	}
   479  	un *= m
   480  	if un > uint64(math.MaxInt64) {
   481  		// Overflow.
   482  		return 0, false
   483  	}
   484  	return int64(un), true
   485  }
   486  
   487  //go:nosplit
   488  func findnull(s *byte) int {
   489  	if s == nil {
   490  		return 0
   491  	}
   492  
   493  	// Avoid IndexByteString on Plan 9 because it uses SSE instructions
   494  	// on x86 machines, and those are classified as floating point instructions,
   495  	// which are illegal in a note handler.
   496  	if GOOS == "plan9" {
   497  		p := (*[maxAlloc/2 - 1]byte)(unsafe.Pointer(s))
   498  		l := 0
   499  		for p[l] != 0 {
   500  			l++
   501  		}
   502  		return l
   503  	}
   504  
   505  	// pageSize is the unit we scan at a time looking for NULL.
   506  	// It must be the minimum page size for any architecture Go
   507  	// runs on. It's okay (just a minor performance loss) if the
   508  	// actual system page size is larger than this value.
   509  	// For Android, we set the page size to the MTE size, as MTE
   510  	// might be enforced. See issue 59090.
   511  	const pageSize = 4096*(1-goos.IsAndroid) + 16*goos.IsAndroid
   512  
   513  	offset := 0
   514  	ptr := unsafe.Pointer(s)
   515  	// IndexByteString uses wide reads, so we need to be careful
   516  	// with page boundaries. Call IndexByteString on
   517  	// [ptr, endOfPage) interval.
   518  	safeLen := int(pageSize - uintptr(ptr)%pageSize)
   519  
   520  	for {
   521  		t := *(*string)(unsafe.Pointer(&stringStruct{ptr, safeLen}))
   522  		// Check one page at a time.
   523  		if i := bytealg.IndexByteString(t, 0); i != -1 {
   524  			return offset + i
   525  		}
   526  		// Move to next page
   527  		ptr = unsafe.Pointer(uintptr(ptr) + uintptr(safeLen))
   528  		offset += safeLen
   529  		safeLen = pageSize
   530  	}
   531  }
   532  
   533  func findnullw(s *uint16) int {
   534  	if s == nil {
   535  		return 0
   536  	}
   537  	p := (*[maxAlloc/2/2 - 1]uint16)(unsafe.Pointer(s))
   538  	l := 0
   539  	for p[l] != 0 {
   540  		l++
   541  	}
   542  	return l
   543  }
   544  
   545  //go:nosplit
   546  func gostringnocopy(str *byte) string {
   547  	ss := stringStruct{str: unsafe.Pointer(str), len: findnull(str)}
   548  	s := *(*string)(unsafe.Pointer(&ss))
   549  	return s
   550  }
   551  
   552  func gostringw(strw *uint16) string {
   553  	var buf [8]byte
   554  	str := (*[maxAlloc/2/2 - 1]uint16)(unsafe.Pointer(strw))
   555  	n1 := 0
   556  	for i := 0; str[i] != 0; i++ {
   557  		n1 += encoderune(buf[:], rune(str[i]))
   558  	}
   559  	s, b := rawstring(n1 + 4)
   560  	n2 := 0
   561  	for i := 0; str[i] != 0; i++ {
   562  		// check for race
   563  		if n2 >= n1 {
   564  			break
   565  		}
   566  		n2 += encoderune(b[n2:], rune(str[i]))
   567  	}
   568  	b[n2] = 0 // for luck
   569  	return s[:n2]
   570  }
   571  

View as plain text