// Copyright 2025 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package maphash // A Hasher defines the interface between a hash-based container and its elements. // It provides a hash function and an equivalence relation over values // of type T, enabling those values to be inserted in hash tables // and similar data structures. // // Of course, comparable types can already be used as keys of Go's // built-in map type, but a Hasher enables non-comparable types to be // used as keys of a suitable hash table too. // Hashers may be useful even for comparable types, to define an // equivalence relation that differs from the usual one (==), such as a // field-based comparison for a pointer-to-struct type, or a // case-insensitive comparison for strings, as in this example: // // // CaseInsensitive is a Hasher[string] whose // // equivalence relation ignores letter case. // type CaseInsensitive struct{} // // func (CaseInsensitive) Hash(h *Hash, s string) { // h.WriteString(strings.ToLower(s)) // } // // func (CaseInsensitive) Equal(x, y string) bool { // // (We avoid strings.EqualFold as it is not // // consistent with ToLower for all values.) // return strings.ToLower(x) == strings.ToLower(y) // } // // A Hasher also permits values to be used with other hash-based data // structures such as a Bloom filter. // The [ComparableHasher] type makes it convenient to enable comparable // types to be used in such data structures under their usual (==) // equivalence relation. // // # Hash invariants // // If two values are equal as defined by Equal(x, y), then they must // have the same hash as defined by the effects of Hash(h, x) on h. // // Hashers must be logically stateless: the behavior of the Hash and // Equal methods depends only on the arguments. // // # Writing a good function // // When defining a hash function and equivalence relation for a data // type, it may help to first define a canonical encoding for values // of that type as a sequence of elements, each being a number, // string, boolean, or pointer. // An encoding is canonical if two values that are logically equal // have the same encoding, even if they are represented differently. // For example, a canonical case-insensitive encoding of a string is // [strings.ToLower]. // // Once you have defined the encoding, the Hasher's Hash method should // encode a value into the [Hash] using a sequence of calls to // [Hash.Write] for byte slices, [Hash.WriteString] for strings, // [Hash.WriteByte] for bytes, and [WriteComparable] for elements of // other types. The Hasher's Equal method should compute the // encodings of two values, then compare their corresponding // elements, returning false at the first mismatch. // // A Hash method may discard information so long as it remains // consistent with the Equal method as defined above. // For example, valid implementations of CaseInsensitive.Hash might inspect // only the first letter of the string, or even use a constant value. // However, the lossier the hash function, the more frequent // the hash collisions and the slower the hash table. // // Some data types, such as sets, are inherently unordered: the set // {a, b, c} is equal to the set {c, b, a}. // In some cases it is possible to define a canonical encoding for a // set by sorting the elements into some order. // In other cases this may inefficient, since it may require allocating // memory, or infeasible, as when there is no convenient order. // Another way to hash an unordered set is to compute the hash // for each element separately, then combine all the element hashes // using a commutative (order-independent) operator such as + or ^. // // The Hash method below, for a hypothetical Set type, illustrates // this approach: // // type Set[T comparable] struct{ ... } // // type setHasher[T comparable] struct{} // // func (setHasher[T]) Hash(hash *maphash.Hash, set *Set[T]) { // var accum uint64 // for elem := range set.Elements() { // // Initialize a hasher for the element, // // using same seed as the outer hash. // var sub maphash.Hash // sub.SetSeed(hash.Seed()) // // // Hash the element. // maphash.WriteComparable(&sub, elem) // // // Mix the element's hash into the set's hash. // accum ^= sub.Sum64() // } // maphash.WriteComparable(hash, accum) // } // // In many languages, a data type's hash operation simply returns an // integer value. // However, that makes it possible for an adversary to systematically // construct a large number of values that all have the same hash, // degrading the asymptotic performance of hash tables in a // denial-of-service attack known as "hash flooding". // By contrast, computing hashes as a sequence of values emitted into // a [Hash] with an unpredictable [Seed] that varies from one hash // table to another mitigates this attack. // // In effect, the Seed chooses one of 2⁶⁴ different hash functions. // The code example above calls SetSeed on the element's sub-Hasher // so that it uses the same hash function as for the Set itself, and // not a random one. type Hasher[T any] interface { Hash(*Hash, T) Equal(x, y T) bool } // ComparableHasher is an implementation of [Hasher] whose // Equal(x, y) method is consistent with x == y. // // ComparableHasher is defined only for comparable types. // The type system will not prevent you from instantiating a type // such as ComparableHasher[any]; nonetheless you must not pass // non-comparable argument values to its Hash or Equal methods. type ComparableHasher[T comparable] struct { _ [0]func(T) // disallow comparison, and conversion between ComparableHasher[X] and ComparableHasher[Y] } func (ComparableHasher[T]) Hash(h *Hash, v T) { WriteComparable(h, v) } func (ComparableHasher[T]) Equal(x, y T) bool { return x == y }