capnproto

FORK: Cap'n Proto serialization/RPC system - core tools and C++ library
git clone https://git.neptards.moe/neptards/capnproto.git
Log | Files | Refs | README | LICENSE

hash.h (7478B)


      1 // Copyright (c) 2018 Kenton Varda and contributors
      2 // Licensed under the MIT License:
      3 //
      4 // Permission is hereby granted, free of charge, to any person obtaining a copy
      5 // of this software and associated documentation files (the "Software"), to deal
      6 // in the Software without restriction, including without limitation the rights
      7 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
      8 // copies of the Software, and to permit persons to whom the Software is
      9 // furnished to do so, subject to the following conditions:
     10 //
     11 // The above copyright notice and this permission notice shall be included in
     12 // all copies or substantial portions of the Software.
     13 //
     14 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     15 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     16 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
     17 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     18 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
     19 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
     20 // THE SOFTWARE.
     21 
     22 #pragma once
     23 
     24 #include "string.h"
     25 
     26 KJ_BEGIN_HEADER
     27 
     28 namespace kj {
     29 namespace _ {  // private
     30 
     31 struct HashCoder {
     32   // This is a dummy type with only one instance: HASHCODER (below).  To make an arbitrary type
     33   // hashable, define `operator*(HashCoder, T)` to return any other type that is already hashable.
     34   // Be sure to declare the operator in the same namespace as `T` **or** in the global scope.
     35   // You can use the KJ_HASHCODE() macro as syntax sugar for this.
     36   //
     37   // A more usual way to accomplish what we're doing here would be to require that you define
     38   // a function like `hashCode(T)` and then rely on argument-dependent lookup.  However, this has
     39   // the problem that it pollutes other people's namespaces and even the global namespace.  For
     40   // example, some other project may already have functions called `hashCode` which do something
     41   // different.  Declaring `operator*` with `HashCoder` as the left operand cannot conflict with
     42   // anything.
     43 
     44   uint operator*(ArrayPtr<const byte> s) const;
     45   inline uint operator*(ArrayPtr<byte> s) const { return operator*(s.asConst()); }
     46 
     47   inline uint operator*(ArrayPtr<const char> s) const { return operator*(s.asBytes()); }
     48   inline uint operator*(ArrayPtr<char> s) const { return operator*(s.asBytes()); }
     49   inline uint operator*(const Array<const char>& s) const { return operator*(s.asBytes()); }
     50   inline uint operator*(const Array<char>& s) const { return operator*(s.asBytes()); }
     51   inline uint operator*(const String& s) const { return operator*(s.asBytes()); }
     52   inline uint operator*(const StringPtr& s) const { return operator*(s.asBytes()); }
     53 
     54   inline uint operator*(decltype(nullptr)) const { return 0; }
     55   inline uint operator*(bool b) const { return b; }
     56   inline uint operator*(char i) const { return i; }
     57   inline uint operator*(signed char i) const { return i; }
     58   inline uint operator*(unsigned char i) const { return i; }
     59   inline uint operator*(signed short i) const { return i; }
     60   inline uint operator*(unsigned short i) const { return i; }
     61   inline uint operator*(signed int i) const { return i; }
     62   inline uint operator*(unsigned int i) const { return i; }
     63 
     64   inline uint operator*(signed long i) const {
     65     if (sizeof(i) == sizeof(uint)) {
     66       return operator*(static_cast<uint>(i));
     67     } else {
     68       return operator*(static_cast<unsigned long long>(i));
     69     }
     70   }
     71   inline uint operator*(unsigned long i) const {
     72     if (sizeof(i) == sizeof(uint)) {
     73       return operator*(static_cast<uint>(i));
     74     } else {
     75       return operator*(static_cast<unsigned long long>(i));
     76     }
     77   }
     78   inline uint operator*(signed long long i) const {
     79     return operator*(static_cast<unsigned long long>(i));
     80   }
     81   inline uint operator*(unsigned long long i) const {
     82     // Mix 64 bits to 32 bits in such a way that if our input values differ primarily in the upper
     83     // 32 bits, we still get good diffusion. (I.e. we cannot just truncate!)
     84     //
     85     // 49123 is an arbitrarily-chosen prime that is vaguely close to 2^16.
     86     //
     87     // TODO(perf): I just made this up. Is it OK?
     88     return static_cast<uint>(i) + static_cast<uint>(i >> 32) * 49123;
     89   }
     90 
     91   template <typename T>
     92   uint operator*(T* ptr) const {
     93     if (sizeof(ptr) == sizeof(uint)) {
     94       // TODO(cleanup): In C++17, make the if() above be `if constexpr ()`, then change this to
     95       //   reinterpret_cast<uint>(ptr).
     96       return reinterpret_cast<unsigned long long>(ptr);
     97     } else {
     98       return operator*(reinterpret_cast<unsigned long long>(ptr));
     99     }
    100   }
    101 
    102   template <typename T, typename = decltype(instance<const HashCoder&>() * instance<const T&>())>
    103   uint operator*(ArrayPtr<T> arr) const;
    104   template <typename T, typename = decltype(instance<const HashCoder&>() * instance<const T&>())>
    105   uint operator*(const Array<T>& arr) const;
    106   template <typename T, typename = EnableIf<__is_enum(T)>>
    107   inline uint operator*(T e) const;
    108 
    109   template <typename T, typename Result = decltype(instance<T>().hashCode())>
    110   inline Result operator*(T&& value) const { return kj::fwd<T>(value).hashCode(); }
    111 };
    112 static KJ_CONSTEXPR(const) HashCoder HASHCODER = HashCoder();
    113 
    114 }  // namespace _ (private)
    115 
    116 #define KJ_HASHCODE(...) operator*(::kj::_::HashCoder, __VA_ARGS__)
    117 // Defines a hash function for a custom type.  Example:
    118 //
    119 //    class Foo {...};
    120 //    inline uint KJ_HASHCODE(const Foo& foo) { return kj::hashCode(foo.x, foo.y); }
    121 //
    122 // This allows Foo to be passed to hashCode().
    123 //
    124 // The function should be declared either in the same namespace as the target type or in the global
    125 // namespace. It can return any type which itself is hashable -- that value will be hashed in turn
    126 // until a `uint` comes out.
    127 
    128 inline uint hashCode(uint value) { return value; }
    129 template <typename T>
    130 inline uint hashCode(T&& value) { return hashCode(_::HASHCODER * kj::fwd<T>(value)); }
    131 template <typename... T>
    132 inline uint hashCode(T&&... values) {
    133   uint hashes[] = { hashCode(kj::fwd<T>(values))... };
    134   return hashCode(kj::ArrayPtr<uint>(hashes).asBytes());
    135 }
    136 // kj::hashCode() is a universal hashing function, like kj::str() is a universal stringification
    137 // function. Throw stuff in, get a hash code.
    138 //
    139 // Hash codes may differ between different processes, even running exactly the same code.
    140 //
    141 // NOT SUITABLE FOR CRYPTOGRAPHY. This is for hash tables, not crypto.
    142 
    143 // =======================================================================================
    144 // inline implementation details
    145 
    146 namespace _ {  // private
    147 
    148 template <typename T, typename>
    149 inline uint HashCoder::operator*(ArrayPtr<T> arr) const {
    150   // Hash each array element to create a string of hashes, then murmur2 over those.
    151   //
    152   // TODO(perf): Choose a more-modern hash. (See hash.c++.)
    153 
    154   constexpr uint m = 0x5bd1e995;
    155   constexpr uint r = 24;
    156   uint h = arr.size() * sizeof(uint);
    157 
    158   for (auto& e: arr) {
    159     uint k = kj::hashCode(e);
    160     k *= m;
    161     k ^= k >> r;
    162     k *= m;
    163     h *= m;
    164     h ^= k;
    165   }
    166 
    167   h ^= h >> 13;
    168   h *= m;
    169   h ^= h >> 15;
    170   return h;
    171 }
    172 template <typename T, typename>
    173 inline uint HashCoder::operator*(const Array<T>& arr) const {
    174   return operator*(arr.asPtr());
    175 }
    176 
    177 template <typename T, typename>
    178 inline uint HashCoder::operator*(T e) const {
    179   return operator*(static_cast<__underlying_type(T)>(e));
    180 }
    181 
    182 }  // namespace _ (private)
    183 } // namespace kj
    184 
    185 KJ_END_HEADER