|
| 1 | +#pragma once |
| 2 | + |
| 3 | +#if defined(NODE_WANT_INTERNALS) && NODE_WANT_INTERNALS |
| 4 | + |
| 5 | +// Fast, high-quality hash function for byte sequences. |
| 6 | +// |
| 7 | +// Provides HashBytes() for use in hash tables. Uses native-width integer |
| 8 | +// loads and a 128-bit multiply-and-fold mixer for excellent avalanche |
| 9 | +// properties on short byte sequences (network identifiers, addresses, |
| 10 | +// tokens). |
| 11 | +// |
| 12 | +// Based on rapidhash V3 by Nicolas De Carli, which evolved from wyhash |
| 13 | +// by Wang Yi. Both use the same core mixing primitive (MUM: multiply, |
| 14 | +// then XOR the high and low halves of the 128-bit result). |
| 15 | +// |
| 16 | +// rapidhash: https://github.com/Nicoshev/rapidhash |
| 17 | +// Copyright (C) 2025 Nicolas De Carli — MIT License |
| 18 | +// wyhash: https://github.com/wangyi-fudan/wyhash |
| 19 | +// Wang Yi — public domain (The Unlicense) |
| 20 | +// |
| 21 | +// The implementation here uses rapidhash's read strategy (native-width |
| 22 | +// overlapping reads, optimized for short inputs) and secret constants. |
| 23 | +// The core mixing function (rapid_mum/rapid_mix) is identical to |
| 24 | +// wyhash's wymum/wymix. |
| 25 | + |
| 26 | +#include <cstddef> |
| 27 | +#include <cstdint> |
| 28 | +#include <cstring> |
| 29 | + |
| 30 | +#if defined(_MSC_VER) |
| 31 | +#include <intrin.h> |
| 32 | +#if defined(_M_X64) && !defined(_M_ARM64EC) |
| 33 | +#pragma intrinsic(_umul128) |
| 34 | +#endif |
| 35 | +#endif |
| 36 | + |
| 37 | +namespace node { |
| 38 | + |
| 39 | +namespace hash_detail { |
| 40 | + |
| 41 | +// 128-bit multiply, then XOR the high and low halves. |
| 42 | +// This is the core mixing function ("rapid_mum" / "wymum"). |
| 43 | +// On 64-bit platforms with __int128, this compiles to a single |
| 44 | +// mul instruction + shift + xor. |
| 45 | +inline uint64_t RapidMix(uint64_t a, uint64_t b) { |
| 46 | +#ifdef __SIZEOF_INT128__ |
| 47 | + __uint128_t r = static_cast<__uint128_t>(a) * b; |
| 48 | + a = static_cast<uint64_t>(r); |
| 49 | + b = static_cast<uint64_t>(r >> 64); |
| 50 | +#elif defined(_MSC_VER) && (defined(_WIN64) || defined(_M_HYBRID_CHPE_ARM64)) |
| 51 | +#if defined(_M_X64) |
| 52 | + a = _umul128(a, b, &b); |
| 53 | +#else |
| 54 | + uint64_t hi = __umulh(a, b); |
| 55 | + a = a * b; |
| 56 | + b = hi; |
| 57 | +#endif |
| 58 | +#else |
| 59 | + // Portable 64x64 -> 128-bit multiply fallback for 32-bit platforms. |
| 60 | + uint64_t ha = a >> 32, hb = b >> 32; |
| 61 | + uint64_t la = static_cast<uint32_t>(a), lb = static_cast<uint32_t>(b); |
| 62 | + uint64_t rh = ha * hb, rm0 = ha * lb, rm1 = hb * la, rl = la * lb; |
| 63 | + uint64_t t = rl + (rm0 << 32); |
| 64 | + uint64_t lo = t + (rm1 << 32); |
| 65 | + uint64_t hi = rh + (rm0 >> 32) + (rm1 >> 32) + (t < rl) + (lo < t); |
| 66 | + a = lo; |
| 67 | + b = hi; |
| 68 | +#endif |
| 69 | + return a ^ b; |
| 70 | +} |
| 71 | + |
| 72 | +// Inline 128-bit multiply WITHOUT the final XOR (used in the |
| 73 | +// penultimate mixing step where a and b are updated separately). |
| 74 | +inline void RapidMum(uint64_t* a, uint64_t* b) { |
| 75 | +#ifdef __SIZEOF_INT128__ |
| 76 | + __uint128_t r = static_cast<__uint128_t>(*a) * (*b); |
| 77 | + *a = static_cast<uint64_t>(r); |
| 78 | + *b = static_cast<uint64_t>(r >> 64); |
| 79 | +#elif defined(_MSC_VER) && (defined(_WIN64) || defined(_M_HYBRID_CHPE_ARM64)) |
| 80 | +#if defined(_M_X64) |
| 81 | + *a = _umul128(*a, *b, b); |
| 82 | +#else |
| 83 | + uint64_t hi = __umulh(*a, *b); |
| 84 | + *a = (*a) * (*b); |
| 85 | + *b = hi; |
| 86 | +#endif |
| 87 | +#else |
| 88 | + uint64_t ha = *a >> 32, hb = *b >> 32; |
| 89 | + uint64_t la = static_cast<uint32_t>(*a), lb = static_cast<uint32_t>(*b); |
| 90 | + uint64_t rh = ha * hb, rm0 = ha * lb, rm1 = hb * la, rl = la * lb; |
| 91 | + uint64_t t = rl + (rm0 << 32); |
| 92 | + *a = t + (rm1 << 32); |
| 93 | + *b = rh + (rm0 >> 32) + (rm1 >> 32) + (t < rl) + (*a < t); |
| 94 | +#endif |
| 95 | +} |
| 96 | + |
| 97 | +// Read functions. The compiler optimizes small fixed-size memcpy calls |
| 98 | +// to single load instructions — no actual byte-by-byte copy occurs. |
| 99 | +inline uint64_t RapidRead64(const uint8_t* p) { |
| 100 | + uint64_t v; |
| 101 | + memcpy(&v, p, sizeof(uint64_t)); |
| 102 | + return v; |
| 103 | +} |
| 104 | + |
| 105 | +inline uint64_t RapidRead32(const uint8_t* p) { |
| 106 | + uint32_t v; |
| 107 | + memcpy(&v, p, sizeof(uint32_t)); |
| 108 | + return v; |
| 109 | +} |
| 110 | + |
| 111 | +// Default rapidhash secret parameters. |
| 112 | +constexpr uint64_t kSecret[8] = {0x2d358dccaa6c78a5ULL, |
| 113 | + 0x8bb84b93962eacc9ULL, |
| 114 | + 0x4b33a62ed433d4a3ULL, |
| 115 | + 0x4d5a2da51de1aa47ULL, |
| 116 | + 0xa0761d6478bd642fULL, |
| 117 | + 0xe7037ed1a0b428dbULL, |
| 118 | + 0x90ed1765281c388cULL, |
| 119 | + 0xaaaaaaaaaaaaaaaaULL}; |
| 120 | + |
| 121 | +} // namespace hash_detail |
| 122 | + |
| 123 | +// Hash a contiguous byte range. Optimized for short inputs (≤48 bytes) |
| 124 | +// which is the common case for network identifiers and addresses. For |
| 125 | +// inputs >48 bytes, falls through to a loop processing 48-byte chunks. |
| 126 | +inline size_t HashBytes(const void* data, size_t len) { |
| 127 | + const uint8_t* p = static_cast<const uint8_t*>(data); |
| 128 | + |
| 129 | + // Seed initialization. |
| 130 | + uint64_t seed = hash_detail::RapidMix(0 ^ hash_detail::kSecret[2], |
| 131 | + hash_detail::kSecret[1]); |
| 132 | + uint64_t a = 0; |
| 133 | + uint64_t b = 0; |
| 134 | + size_t i = len; |
| 135 | + |
| 136 | + if (len <= 16) { |
| 137 | + if (len >= 4) { |
| 138 | + // Mix length into seed for better distribution of |
| 139 | + // different-length inputs with shared prefixes. |
| 140 | + seed ^= len; |
| 141 | + if (len >= 8) { |
| 142 | + // 8-16 bytes: two native 64-bit reads (overlapping from end). |
| 143 | + a = hash_detail::RapidRead64(p); |
| 144 | + b = hash_detail::RapidRead64(p + len - 8); |
| 145 | + } else { |
| 146 | + // 4-7 bytes: two 32-bit reads (overlapping from end). |
| 147 | + a = hash_detail::RapidRead32(p); |
| 148 | + b = hash_detail::RapidRead32(p + len - 4); |
| 149 | + } |
| 150 | + } else if (len > 0) { |
| 151 | + // 1-3 bytes: spread bytes across two values for mixing. |
| 152 | + a = (static_cast<uint64_t>(p[0]) << 45) | p[len - 1]; |
| 153 | + b = p[len >> 1]; |
| 154 | + } else { |
| 155 | + a = b = 0; |
| 156 | + } |
| 157 | + } else if (len <= 48) { |
| 158 | + // 17-48 bytes: process in 16-byte chunks, then read the tail. |
| 159 | + seed = hash_detail::RapidMix( |
| 160 | + hash_detail::RapidRead64(p) ^ hash_detail::kSecret[2], |
| 161 | + hash_detail::RapidRead64(p + 8) ^ seed); |
| 162 | + if (len > 32) { |
| 163 | + seed = hash_detail::RapidMix( |
| 164 | + hash_detail::RapidRead64(p + 16) ^ hash_detail::kSecret[2], |
| 165 | + hash_detail::RapidRead64(p + 24) ^ seed); |
| 166 | + } |
| 167 | + a = hash_detail::RapidRead64(p + len - 16) ^ len; |
| 168 | + b = hash_detail::RapidRead64(p + len - 8); |
| 169 | + } else { |
| 170 | + // >48 bytes: process 48-byte chunks with three parallel mix lanes. |
| 171 | + uint64_t see1 = seed; |
| 172 | + uint64_t see2 = seed; |
| 173 | + do { |
| 174 | + seed = hash_detail::RapidMix( |
| 175 | + hash_detail::RapidRead64(p) ^ hash_detail::kSecret[0], |
| 176 | + hash_detail::RapidRead64(p + 8) ^ seed); |
| 177 | + see1 = hash_detail::RapidMix( |
| 178 | + hash_detail::RapidRead64(p + 16) ^ hash_detail::kSecret[1], |
| 179 | + hash_detail::RapidRead64(p + 24) ^ see1); |
| 180 | + see2 = hash_detail::RapidMix( |
| 181 | + hash_detail::RapidRead64(p + 32) ^ hash_detail::kSecret[2], |
| 182 | + hash_detail::RapidRead64(p + 40) ^ see2); |
| 183 | + p += 48; |
| 184 | + i -= 48; |
| 185 | + } while (i > 48); |
| 186 | + seed ^= see1 ^ see2; |
| 187 | + // Process remaining 17-48 bytes. |
| 188 | + if (i > 16) { |
| 189 | + seed = hash_detail::RapidMix( |
| 190 | + hash_detail::RapidRead64(p) ^ hash_detail::kSecret[2], |
| 191 | + hash_detail::RapidRead64(p + 8) ^ seed); |
| 192 | + if (i > 32) { |
| 193 | + seed = hash_detail::RapidMix( |
| 194 | + hash_detail::RapidRead64(p + 16) ^ hash_detail::kSecret[2], |
| 195 | + hash_detail::RapidRead64(p + 24) ^ seed); |
| 196 | + } |
| 197 | + } |
| 198 | + a = hash_detail::RapidRead64(p + i - 16) ^ i; |
| 199 | + b = hash_detail::RapidRead64(p + i - 8); |
| 200 | + } |
| 201 | + |
| 202 | + // Final mix. |
| 203 | + a ^= hash_detail::kSecret[1]; |
| 204 | + b ^= seed; |
| 205 | + hash_detail::RapidMum(&a, &b); |
| 206 | + return static_cast<size_t>(hash_detail::RapidMix( |
| 207 | + a ^ hash_detail::kSecret[7], b ^ hash_detail::kSecret[1] ^ len)); |
| 208 | +} |
| 209 | + |
| 210 | +} // namespace node |
| 211 | + |
| 212 | +#endif // defined(NODE_WANT_INTERNALS) && NODE_WANT_INTERNALS |
0 commit comments