hasher.go

     1  // Copyright 2025 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package maphash
     6  
     7  // A Hasher defines the interface between a hash-based container and its elements.
     8  // It provides a hash function and an equivalence relation over values
     9  // of type T, enabling those values to be inserted in hash tables
    10  // and similar data structures.
    11  //
    12  // Of course, comparable types can already be used as keys of Go's
    13  // built-in map type, but a Hasher enables non-comparable types to be
    14  // used as keys of a suitable hash table too.
    15  // Hashers may be useful even for comparable types, to define an
    16  // equivalence relation that differs from the usual one (==), such as a
    17  // field-based comparison for a pointer-to-struct type, or a
    18  // case-insensitive comparison for strings, as in this example:
    19  //
    20  //	// CaseInsensitive is a Hasher[string] whose
    21  //	// equivalence relation ignores letter case.
    22  //	type CaseInsensitive struct{}
    23  //
    24  //	func (CaseInsensitive) Hash(h *Hash, s string) {
    25  //		h.WriteString(strings.ToLower(s))
    26  //	}
    27  //
    28  //	func (CaseInsensitive) Equal(x, y string) bool {
    29  //		// (We avoid strings.EqualFold as it is not
    30  //		// consistent with ToLower for all values.)
    31  //		return strings.ToLower(x) == strings.ToLower(y)
    32  //	}
    33  //
    34  // A Hasher also permits values to be used with other hash-based data
    35  // structures such as a Bloom filter.
    36  // The [ComparableHasher] type makes it convenient to enable comparable
    37  // types to be used in such data structures under their usual (==)
    38  // equivalence relation.
    39  //
    40  // # Hash invariants
    41  //
    42  // If two values are equal as defined by Equal(x, y), then they must
    43  // have the same hash as defined by the effects of Hash(h, x) on h.
    44  //
    45  // Hashers must be logically stateless: the behavior of the Hash and
    46  // Equal methods depends only on the arguments.
    47  //
    48  // # Writing a good function
    49  //
    50  // When defining a hash function and equivalence relation for a data
    51  // type, it may help to first define a canonical encoding for values
    52  // of that type as a sequence of elements, each being a number,
    53  // string, boolean, or pointer.
    54  // An encoding is canonical if two values that are logically equal
    55  // have the same encoding, even if they are represented differently.
    56  // For example, a canonical case-insensitive encoding of a string is
    57  // [strings.ToLower].
    58  //
    59  // Once you have defined the encoding, the Hasher's Hash method should
    60  // encode a value into the [Hash] using a sequence of calls to
    61  // [Hash.Write] for byte slices, [Hash.WriteString] for strings,
    62  // [Hash.WriteByte] for bytes, and [WriteComparable] for elements of
    63  // other types. The Hasher's Equal method should compute the
    64  // encodings of two values, then compare their corresponding
    65  // elements, returning false at the first mismatch.
    66  //
    67  // A Hash method may discard information so long as it remains
    68  // consistent with the Equal method as defined above.
    69  // For example, valid implementations of CaseInsensitive.Hash might inspect
    70  // only the first letter of the string, or even use a constant value.
    71  // However, the lossier the hash function, the more frequent
    72  // the hash collisions and the slower the hash table.
    73  //
    74  // Some data types, such as sets, are inherently unordered: the set
    75  // {a, b, c} is equal to the set {c, b, a}.
    76  // In some cases it is possible to define a canonical encoding for a
    77  // set by sorting the elements into some order.
    78  // In other cases this may inefficient, since it may require allocating
    79  // memory, or infeasible, as when there is no convenient order.
    80  // Another way to hash an unordered set is to compute the hash
    81  // for each element separately, then combine all the element hashes
    82  // using a commutative (order-independent) operator such as + or ^.
    83  //
    84  // The Hash method below, for a hypothetical Set type, illustrates
    85  // this approach:
    86  //
    87  //	type Set[T comparable] struct{ ... }
    88  //
    89  //	type setHasher[T comparable] struct{}
    90  //
    91  //	func (setHasher[T]) Hash(hash *maphash.Hash, set *Set[T]) {
    92  //		var accum uint64
    93  //		for elem := range set.Elements() {
    94  //			// Initialize a hasher for the element,
    95  //			// using same seed as the outer hash.
    96  //			var sub maphash.Hash
    97  //			sub.SetSeed(hash.Seed())
    98  //
    99  //			// Hash the element.
   100  //			maphash.WriteComparable(&sub, elem)
   101  //
   102  //			// Mix the element's hash into the set's hash.
   103  //			accum ^= sub.Sum64()
   104  //		}
   105  //		maphash.WriteComparable(hash, accum)
   106  //	}
   107  //
   108  // In many languages, a data type's hash operation simply returns an
   109  // integer value.
   110  // However, that makes it possible for an adversary to systematically
   111  // construct a large number of values that all have the same hash,
   112  // degrading the asymptotic performance of hash tables in a
   113  // denial-of-service attack known as "hash flooding".
   114  // By contrast, computing hashes as a sequence of values emitted into
   115  // a [Hash] with an unpredictable [Seed] that varies from one hash
   116  // table to another mitigates this attack.
   117  //
   118  // In effect, the Seed chooses one of 2⁶⁴ different hash functions.
   119  // The code example above calls SetSeed on the element's sub-Hasher
   120  // so that it uses the same hash function as for the Set itself, and
   121  // not a random one.
   122  type Hasher[T any] interface {
   123  	Hash(*Hash, T)
   124  	Equal(x, y T) bool
   125  }
   126  
   127  // ComparableHasher is an implementation of [Hasher] whose
   128  // Equal(x, y) method is consistent with x == y.
   129  //
   130  // ComparableHasher is defined only for comparable types.
   131  // The type system will not prevent you from instantiating a type
   132  // such as ComparableHasher[any]; nonetheless you must not pass
   133  // non-comparable argument values to its Hash or Equal methods.
   134  type ComparableHasher[T comparable] struct {
   135  	_ [0]func(T) // disallow comparison, and conversion between ComparableHasher[X] and ComparableHasher[Y]
   136  }
   137  
   138  func (ComparableHasher[T]) Hash(h *Hash, v T) { WriteComparable(h, v) }
   139  func (ComparableHasher[T]) Equal(x, y T) bool { return x == y }
   140
View as plain text