Source file src/hash/maphash/maphash.go
1 // Copyright 2019 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Package maphash provides hash functions on byte sequences and comparable values. 6 // It also defines [Hasher], the interface between a hash function and a hash table. 7 // 8 // These hash functions are intended to be used to implement hash 9 // tables, Bloom filters, and other data structures that need to map 10 // arbitrary strings or byte sequences to a uniform distribution on 11 // unsigned 64-bit integers. 12 // 13 // Each different instance of a hash table or data structure should use its own [Seed]. 14 // 15 // The hash functions are not cryptographically secure. 16 // (See crypto/sha256 and crypto/sha512 for cryptographic use.) 17 package maphash 18 19 import ( 20 "hash" 21 "internal/abi" 22 "internal/runtime/maps" 23 "unsafe" 24 ) 25 26 // A Seed is a random value that selects the specific hash function 27 // computed by a [Hash]. If two Hashes use the same Seeds, they 28 // will compute the same hash values for any given input. 29 // If two Hashes use different Seeds, they are very likely to compute 30 // distinct hash values for any given input. 31 // 32 // A Seed must be initialized by calling [MakeSeed]. 33 // The zero seed is uninitialized and not valid for use with [Hash]'s SetSeed method. 34 // 35 // Each Seed value is local to a single process and cannot be serialized 36 // or otherwise recreated in a different process. 37 type Seed struct { 38 s uint64 39 } 40 41 // Bytes returns the hash of b with the given seed. 42 // 43 // Bytes is equivalent to, but more convenient and efficient than: 44 // 45 // var h Hash 46 // h.SetSeed(seed) 47 // h.Write(b) 48 // return h.Sum64() 49 func Bytes(seed Seed, b []byte) uint64 { 50 state := seed.s 51 if state == 0 { 52 panic("maphash: use of uninitialized Seed") 53 } 54 55 if len(b) > bufSize { 56 b = b[:len(b):len(b)] // merge len and cap calculations when reslicing 57 for len(b) > bufSize { 58 state = rthash(b[:bufSize], state) 59 b = b[bufSize:] 60 } 61 } 62 return rthash(b, state) 63 } 64 65 // String returns the hash of s with the given seed. 66 // 67 // String is equivalent to, but more convenient and efficient than: 68 // 69 // var h Hash 70 // h.SetSeed(seed) 71 // h.WriteString(s) 72 // return h.Sum64() 73 func String(seed Seed, s string) uint64 { 74 state := seed.s 75 if state == 0 { 76 panic("maphash: use of uninitialized Seed") 77 } 78 for len(s) > bufSize { 79 state = rthashString(s[:bufSize], state) 80 s = s[bufSize:] 81 } 82 return rthashString(s, state) 83 } 84 85 // A Hash computes a seeded hash of a byte sequence. 86 // 87 // The zero Hash is a valid Hash ready to use. 88 // A zero Hash chooses a random seed for itself during 89 // the first call to a Reset, Write, Seed, Clone, or Sum64 method. 90 // For control over the seed, use SetSeed. 91 // 92 // The computed hash values depend only on the initial seed and 93 // the sequence of bytes provided to the Hash object, not on the way 94 // in which the bytes are provided. For example, the three sequences 95 // 96 // h.Write([]byte{'f','o','o'}) 97 // h.WriteByte('f'); h.WriteByte('o'); h.WriteByte('o') 98 // h.WriteString("foo") 99 // 100 // all have the same effect. 101 // 102 // Hashes are intended to be collision-resistant, even for situations 103 // where an adversary controls the byte sequences being hashed. 104 // 105 // A Hash is not safe for concurrent use by multiple goroutines, but a Seed is. 106 // If multiple goroutines must compute the same seeded hash, 107 // each can declare its own Hash and call SetSeed with a common Seed. 108 type Hash struct { 109 _ [0]func() // not comparable 110 seed Seed // initial seed used for this hash 111 state Seed // current hash of all flushed bytes 112 buf [bufSize]byte // unflushed byte buffer 113 n int // number of unflushed bytes 114 } 115 116 // bufSize is the size of the Hash write buffer. 117 // The buffer ensures that writes depend only on the sequence of bytes, 118 // not the sequence of WriteByte/Write/WriteString calls, 119 // by always calling rthash with a full buffer (except for the tail). 120 const bufSize = 128 121 122 // initSeed seeds the hash if necessary. 123 // initSeed is called lazily before any operation that actually uses h.seed/h.state. 124 // Note that this does not include Write/WriteByte/WriteString in the case 125 // where they only add to h.buf. (If they write too much, they call h.flush, 126 // which does call h.initSeed.) 127 func (h *Hash) initSeed() { 128 if h.seed.s == 0 { 129 seed := MakeSeed() 130 h.seed = seed 131 h.state = seed 132 } 133 } 134 135 // WriteByte adds b to the sequence of bytes hashed by h. 136 // It never fails; the error result is for implementing [io.ByteWriter]. 137 func (h *Hash) WriteByte(b byte) error { 138 if h.n == len(h.buf) { 139 h.flush() 140 } 141 h.buf[h.n] = b 142 h.n++ 143 return nil 144 } 145 146 // Write adds b to the sequence of bytes hashed by h. 147 // It always writes all of b and never fails; the count and error result are for implementing [io.Writer]. 148 func (h *Hash) Write(b []byte) (int, error) { 149 size := len(b) 150 // Deal with bytes left over in h.buf. 151 // h.n <= bufSize is always true. 152 // Checking it is ~free and it lets the compiler eliminate a bounds check. 153 if h.n > 0 && h.n <= bufSize { 154 k := copy(h.buf[h.n:], b) 155 h.n += k 156 if h.n < bufSize { 157 // Copied the entirety of b to h.buf. 158 return size, nil 159 } 160 b = b[k:] 161 h.flush() 162 // No need to set h.n = 0 here; it happens just before exit. 163 } 164 // Process as many full buffers as possible, without copying, and calling initSeed only once. 165 if len(b) > bufSize { 166 h.initSeed() 167 for len(b) > bufSize { 168 h.state.s = rthash(b[:bufSize], h.state.s) 169 b = b[bufSize:] 170 } 171 } 172 // Copy the tail. 173 copy(h.buf[:], b) 174 h.n = len(b) 175 return size, nil 176 } 177 178 // WriteString adds the bytes of s to the sequence of bytes hashed by h. 179 // It always writes all of s and never fails; the count and error result are for implementing [io.StringWriter]. 180 func (h *Hash) WriteString(s string) (int, error) { 181 // WriteString mirrors Write. See Write for comments. 182 size := len(s) 183 if h.n > 0 && h.n <= bufSize { 184 k := copy(h.buf[h.n:], s) 185 h.n += k 186 if h.n < bufSize { 187 return size, nil 188 } 189 s = s[k:] 190 h.flush() 191 } 192 if len(s) > bufSize { 193 h.initSeed() 194 for len(s) > bufSize { 195 h.state.s = rthashString(s[:bufSize], h.state.s) 196 s = s[bufSize:] 197 } 198 } 199 copy(h.buf[:], s) 200 h.n = len(s) 201 return size, nil 202 } 203 204 // Seed returns h's seed value. 205 func (h *Hash) Seed() Seed { 206 h.initSeed() 207 return h.seed 208 } 209 210 // SetSeed sets h to use seed, which must have been returned by [MakeSeed] 211 // or by another [Hash.Seed] method. 212 // Two [Hash] objects with the same seed behave identically. 213 // Two [Hash] objects with different seeds will very likely behave differently. 214 // Any bytes added to h before this call will be discarded. 215 func (h *Hash) SetSeed(seed Seed) { 216 if seed.s == 0 { 217 panic("maphash: use of uninitialized Seed") 218 } 219 h.seed = seed 220 h.state = seed 221 h.n = 0 222 } 223 224 // Reset discards all bytes added to h. 225 // (The seed remains the same.) 226 func (h *Hash) Reset() { 227 h.initSeed() 228 h.state = h.seed 229 h.n = 0 230 } 231 232 // precondition: buffer is full. 233 func (h *Hash) flush() { 234 if h.n != len(h.buf) { 235 panic("maphash: flush of partially full buffer") 236 } 237 h.initSeed() 238 h.state.s = rthash(h.buf[:h.n], h.state.s) 239 h.n = 0 240 } 241 242 // Sum64 returns h's current 64-bit value, which depends on 243 // h's seed and the sequence of bytes added to h since the 244 // last call to [Hash.Reset] or [Hash.SetSeed]. 245 // 246 // All bits of the Sum64 result are close to uniformly and 247 // independently distributed, so it can be safely reduced 248 // by using bit masking, shifting, or modular arithmetic. 249 func (h *Hash) Sum64() uint64 { 250 h.initSeed() 251 return rthash(h.buf[:h.n], h.state.s) 252 } 253 254 // MakeSeed returns a new random seed. 255 func MakeSeed() Seed { 256 var s uint64 257 for { 258 s = randUint64() 259 // We use seed 0 to indicate an uninitialized seed/hash, 260 // so keep trying until we get a non-zero seed. 261 if s != 0 { 262 break 263 } 264 } 265 return Seed{s: s} 266 } 267 268 // Sum appends the hash's current 64-bit value to b. 269 // It exists for implementing [hash.Hash]. 270 // For direct calls, it is more efficient to use [Hash.Sum64]. 271 func (h *Hash) Sum(b []byte) []byte { 272 x := h.Sum64() 273 return append(b, 274 byte(x>>0), 275 byte(x>>8), 276 byte(x>>16), 277 byte(x>>24), 278 byte(x>>32), 279 byte(x>>40), 280 byte(x>>48), 281 byte(x>>56)) 282 } 283 284 // Size returns h's hash value size, 8 bytes. 285 func (h *Hash) Size() int { return 8 } 286 287 // BlockSize returns h's block size. 288 func (h *Hash) BlockSize() int { return len(h.buf) } 289 290 // Clone implements [hash.Cloner]. 291 func (h *Hash) Clone() (hash.Cloner, error) { 292 h.initSeed() 293 r := *h 294 return &r, nil 295 } 296 297 // Comparable returns the hash of comparable value v with the given seed 298 // such that Comparable(s, v1) == Comparable(s, v2) if v1 == v2. 299 // If v != v, then the resulting hash is randomly distributed. 300 func Comparable[T comparable](seed Seed, v T) uint64 { 301 abi.EscapeNonString(v) 302 return comparableHash(v, seed) 303 } 304 305 // WriteComparable adds x to the data hashed by h. 306 func WriteComparable[T comparable](h *Hash, x T) { 307 abi.EscapeNonString(x) 308 // writeComparable directly operates on h.state 309 // without using h.buf. Mix in the buffer length so it won't 310 // commute with a buffered write, which either changes h.n or changes 311 // h.state. 312 if h.n != 0 { 313 writeComparable(h, h.n) 314 } 315 writeComparable(h, x) 316 } 317 318 //go:linkname runtime_rand runtime.rand 319 func runtime_rand() uint64 320 321 //go:linkname runtime_memhash runtime.memhash 322 //go:noescape 323 func runtime_memhash(p unsafe.Pointer, seed, s uintptr) uintptr 324 325 func rthash(buf []byte, seed uint64) uint64 { 326 if len(buf) == 0 { 327 return seed 328 } 329 len := len(buf) 330 // The runtime hasher only works on uintptr. For 64-bit 331 // architectures, we use the hasher directly. Otherwise, 332 // we use two parallel hashers on the lower and upper 32 bits. 333 if maps.Use64BitHash { 334 return uint64(runtime_memhash(unsafe.Pointer(&buf[0]), uintptr(seed), uintptr(len))) 335 } 336 lo := runtime_memhash(unsafe.Pointer(&buf[0]), uintptr(uint32(seed)), uintptr(len)) 337 hi := runtime_memhash(unsafe.Pointer(&buf[0]), uintptr(seed>>32), uintptr(len)) 338 return uint64(hi)<<32 | uint64(lo) 339 } 340 341 func rthashString(s string, state uint64) uint64 { 342 buf := unsafe.Slice(unsafe.StringData(s), len(s)) 343 return rthash(buf, state) 344 } 345 346 func randUint64() uint64 { 347 return runtime_rand() 348 } 349 350 func comparableHash[T comparable](v T, seed Seed) uint64 { 351 s := seed.s 352 var m map[T]struct{} 353 mTyp := abi.TypeOf(m) 354 hasher := (*abi.MapType)(unsafe.Pointer(mTyp)).Hasher 355 if maps.Use64BitHash { 356 return uint64(hasher(abi.NoEscape(unsafe.Pointer(&v)), uintptr(s))) 357 } 358 lo := hasher(abi.NoEscape(unsafe.Pointer(&v)), uintptr(uint32(s))) 359 hi := hasher(abi.NoEscape(unsafe.Pointer(&v)), uintptr(s>>32)) 360 return uint64(hi)<<32 | uint64(lo) 361 } 362 363 func writeComparable[T comparable](h *Hash, v T) { 364 h.state.s = comparableHash(v, h.state) 365 } 366