hash.h (7478B)
1 // Copyright (c) 2018 Kenton Varda and contributors 2 // Licensed under the MIT License: 3 // 4 // Permission is hereby granted, free of charge, to any person obtaining a copy 5 // of this software and associated documentation files (the "Software"), to deal 6 // in the Software without restriction, including without limitation the rights 7 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 // copies of the Software, and to permit persons to whom the Software is 9 // furnished to do so, subject to the following conditions: 10 // 11 // The above copyright notice and this permission notice shall be included in 12 // all copies or substantial portions of the Software. 13 // 14 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 // THE SOFTWARE. 21 22 #pragma once 23 24 #include "string.h" 25 26 KJ_BEGIN_HEADER 27 28 namespace kj { 29 namespace _ { // private 30 31 struct HashCoder { 32 // This is a dummy type with only one instance: HASHCODER (below). To make an arbitrary type 33 // hashable, define `operator*(HashCoder, T)` to return any other type that is already hashable. 34 // Be sure to declare the operator in the same namespace as `T` **or** in the global scope. 35 // You can use the KJ_HASHCODE() macro as syntax sugar for this. 36 // 37 // A more usual way to accomplish what we're doing here would be to require that you define 38 // a function like `hashCode(T)` and then rely on argument-dependent lookup. However, this has 39 // the problem that it pollutes other people's namespaces and even the global namespace. For 40 // example, some other project may already have functions called `hashCode` which do something 41 // different. Declaring `operator*` with `HashCoder` as the left operand cannot conflict with 42 // anything. 43 44 uint operator*(ArrayPtr<const byte> s) const; 45 inline uint operator*(ArrayPtr<byte> s) const { return operator*(s.asConst()); } 46 47 inline uint operator*(ArrayPtr<const char> s) const { return operator*(s.asBytes()); } 48 inline uint operator*(ArrayPtr<char> s) const { return operator*(s.asBytes()); } 49 inline uint operator*(const Array<const char>& s) const { return operator*(s.asBytes()); } 50 inline uint operator*(const Array<char>& s) const { return operator*(s.asBytes()); } 51 inline uint operator*(const String& s) const { return operator*(s.asBytes()); } 52 inline uint operator*(const StringPtr& s) const { return operator*(s.asBytes()); } 53 54 inline uint operator*(decltype(nullptr)) const { return 0; } 55 inline uint operator*(bool b) const { return b; } 56 inline uint operator*(char i) const { return i; } 57 inline uint operator*(signed char i) const { return i; } 58 inline uint operator*(unsigned char i) const { return i; } 59 inline uint operator*(signed short i) const { return i; } 60 inline uint operator*(unsigned short i) const { return i; } 61 inline uint operator*(signed int i) const { return i; } 62 inline uint operator*(unsigned int i) const { return i; } 63 64 inline uint operator*(signed long i) const { 65 if (sizeof(i) == sizeof(uint)) { 66 return operator*(static_cast<uint>(i)); 67 } else { 68 return operator*(static_cast<unsigned long long>(i)); 69 } 70 } 71 inline uint operator*(unsigned long i) const { 72 if (sizeof(i) == sizeof(uint)) { 73 return operator*(static_cast<uint>(i)); 74 } else { 75 return operator*(static_cast<unsigned long long>(i)); 76 } 77 } 78 inline uint operator*(signed long long i) const { 79 return operator*(static_cast<unsigned long long>(i)); 80 } 81 inline uint operator*(unsigned long long i) const { 82 // Mix 64 bits to 32 bits in such a way that if our input values differ primarily in the upper 83 // 32 bits, we still get good diffusion. (I.e. we cannot just truncate!) 84 // 85 // 49123 is an arbitrarily-chosen prime that is vaguely close to 2^16. 86 // 87 // TODO(perf): I just made this up. Is it OK? 88 return static_cast<uint>(i) + static_cast<uint>(i >> 32) * 49123; 89 } 90 91 template <typename T> 92 uint operator*(T* ptr) const { 93 if (sizeof(ptr) == sizeof(uint)) { 94 // TODO(cleanup): In C++17, make the if() above be `if constexpr ()`, then change this to 95 // reinterpret_cast<uint>(ptr). 96 return reinterpret_cast<unsigned long long>(ptr); 97 } else { 98 return operator*(reinterpret_cast<unsigned long long>(ptr)); 99 } 100 } 101 102 template <typename T, typename = decltype(instance<const HashCoder&>() * instance<const T&>())> 103 uint operator*(ArrayPtr<T> arr) const; 104 template <typename T, typename = decltype(instance<const HashCoder&>() * instance<const T&>())> 105 uint operator*(const Array<T>& arr) const; 106 template <typename T, typename = EnableIf<__is_enum(T)>> 107 inline uint operator*(T e) const; 108 109 template <typename T, typename Result = decltype(instance<T>().hashCode())> 110 inline Result operator*(T&& value) const { return kj::fwd<T>(value).hashCode(); } 111 }; 112 static KJ_CONSTEXPR(const) HashCoder HASHCODER = HashCoder(); 113 114 } // namespace _ (private) 115 116 #define KJ_HASHCODE(...) operator*(::kj::_::HashCoder, __VA_ARGS__) 117 // Defines a hash function for a custom type. Example: 118 // 119 // class Foo {...}; 120 // inline uint KJ_HASHCODE(const Foo& foo) { return kj::hashCode(foo.x, foo.y); } 121 // 122 // This allows Foo to be passed to hashCode(). 123 // 124 // The function should be declared either in the same namespace as the target type or in the global 125 // namespace. It can return any type which itself is hashable -- that value will be hashed in turn 126 // until a `uint` comes out. 127 128 inline uint hashCode(uint value) { return value; } 129 template <typename T> 130 inline uint hashCode(T&& value) { return hashCode(_::HASHCODER * kj::fwd<T>(value)); } 131 template <typename... T> 132 inline uint hashCode(T&&... values) { 133 uint hashes[] = { hashCode(kj::fwd<T>(values))... }; 134 return hashCode(kj::ArrayPtr<uint>(hashes).asBytes()); 135 } 136 // kj::hashCode() is a universal hashing function, like kj::str() is a universal stringification 137 // function. Throw stuff in, get a hash code. 138 // 139 // Hash codes may differ between different processes, even running exactly the same code. 140 // 141 // NOT SUITABLE FOR CRYPTOGRAPHY. This is for hash tables, not crypto. 142 143 // ======================================================================================= 144 // inline implementation details 145 146 namespace _ { // private 147 148 template <typename T, typename> 149 inline uint HashCoder::operator*(ArrayPtr<T> arr) const { 150 // Hash each array element to create a string of hashes, then murmur2 over those. 151 // 152 // TODO(perf): Choose a more-modern hash. (See hash.c++.) 153 154 constexpr uint m = 0x5bd1e995; 155 constexpr uint r = 24; 156 uint h = arr.size() * sizeof(uint); 157 158 for (auto& e: arr) { 159 uint k = kj::hashCode(e); 160 k *= m; 161 k ^= k >> r; 162 k *= m; 163 h *= m; 164 h ^= k; 165 } 166 167 h ^= h >> 13; 168 h *= m; 169 h ^= h >> 15; 170 return h; 171 } 172 template <typename T, typename> 173 inline uint HashCoder::operator*(const Array<T>& arr) const { 174 return operator*(arr.asPtr()); 175 } 176 177 template <typename T, typename> 178 inline uint HashCoder::operator*(T e) const { 179 return operator*(static_cast<__underlying_type(T)>(e)); 180 } 181 182 } // namespace _ (private) 183 } // namespace kj 184 185 KJ_END_HEADER