Skip to content

Commit b5c4277

Browse files
committed
quic: implement rapidhash for hashing improvements
Signed-off-by: James M Snell <jasnell@gmail.com> Assisted-by: Opencode:Opus 4.6 PR-URL: nodejs/node#62620 Reviewed-By: Robert Nagy <ronagy@icloud.com> Reviewed-By: Tim Perry <pimterry@gmail.com>
1 parent e6ef477 commit b5c4277

6 files changed

Lines changed: 229 additions & 33 deletions

File tree

node.gyp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -269,6 +269,7 @@
269269
'src/node_mem.h',
270270
'src/node_mem-inl.h',
271271
'src/node_messaging.h',
272+
'src/node_hash.h',
272273
'src/node_metadata.h',
273274
'src/node_mutex.h',
274275
'src/node_diagnostics_channel.h',

src/node_hash.h

Lines changed: 212 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,212 @@
1+
#pragma once
2+
3+
#if defined(NODE_WANT_INTERNALS) && NODE_WANT_INTERNALS
4+
5+
// Fast, high-quality hash function for byte sequences.
6+
//
7+
// Provides HashBytes() for use in hash tables. Uses native-width integer
8+
// loads and a 128-bit multiply-and-fold mixer for excellent avalanche
9+
// properties on short byte sequences (network identifiers, addresses,
10+
// tokens).
11+
//
12+
// Based on rapidhash V3 by Nicolas De Carli, which evolved from wyhash
13+
// by Wang Yi. Both use the same core mixing primitive (MUM: multiply,
14+
// then XOR the high and low halves of the 128-bit result).
15+
//
16+
// rapidhash: https://github.com/Nicoshev/rapidhash
17+
// Copyright (C) 2025 Nicolas De Carli — MIT License
18+
// wyhash: https://github.com/wangyi-fudan/wyhash
19+
// Wang Yi — public domain (The Unlicense)
20+
//
21+
// The implementation here uses rapidhash's read strategy (native-width
22+
// overlapping reads, optimized for short inputs) and secret constants.
23+
// The core mixing function (rapid_mum/rapid_mix) is identical to
24+
// wyhash's wymum/wymix.
25+
26+
#include <cstddef>
27+
#include <cstdint>
28+
#include <cstring>
29+
30+
#if defined(_MSC_VER)
31+
#include <intrin.h>
32+
#if defined(_M_X64) && !defined(_M_ARM64EC)
33+
#pragma intrinsic(_umul128)
34+
#endif
35+
#endif
36+
37+
namespace node {
38+
39+
namespace hash_detail {
40+
41+
// 128-bit multiply, then XOR the high and low halves.
42+
// This is the core mixing function ("rapid_mum" / "wymum").
43+
// On 64-bit platforms with __int128, this compiles to a single
44+
// mul instruction + shift + xor.
45+
inline uint64_t RapidMix(uint64_t a, uint64_t b) {
46+
#ifdef __SIZEOF_INT128__
47+
__uint128_t r = static_cast<__uint128_t>(a) * b;
48+
a = static_cast<uint64_t>(r);
49+
b = static_cast<uint64_t>(r >> 64);
50+
#elif defined(_MSC_VER) && (defined(_WIN64) || defined(_M_HYBRID_CHPE_ARM64))
51+
#if defined(_M_X64)
52+
a = _umul128(a, b, &b);
53+
#else
54+
uint64_t hi = __umulh(a, b);
55+
a = a * b;
56+
b = hi;
57+
#endif
58+
#else
59+
// Portable 64x64 -> 128-bit multiply fallback for 32-bit platforms.
60+
uint64_t ha = a >> 32, hb = b >> 32;
61+
uint64_t la = static_cast<uint32_t>(a), lb = static_cast<uint32_t>(b);
62+
uint64_t rh = ha * hb, rm0 = ha * lb, rm1 = hb * la, rl = la * lb;
63+
uint64_t t = rl + (rm0 << 32);
64+
uint64_t lo = t + (rm1 << 32);
65+
uint64_t hi = rh + (rm0 >> 32) + (rm1 >> 32) + (t < rl) + (lo < t);
66+
a = lo;
67+
b = hi;
68+
#endif
69+
return a ^ b;
70+
}
71+
72+
// Inline 128-bit multiply WITHOUT the final XOR (used in the
73+
// penultimate mixing step where a and b are updated separately).
74+
inline void RapidMum(uint64_t* a, uint64_t* b) {
75+
#ifdef __SIZEOF_INT128__
76+
__uint128_t r = static_cast<__uint128_t>(*a) * (*b);
77+
*a = static_cast<uint64_t>(r);
78+
*b = static_cast<uint64_t>(r >> 64);
79+
#elif defined(_MSC_VER) && (defined(_WIN64) || defined(_M_HYBRID_CHPE_ARM64))
80+
#if defined(_M_X64)
81+
*a = _umul128(*a, *b, b);
82+
#else
83+
uint64_t hi = __umulh(*a, *b);
84+
*a = (*a) * (*b);
85+
*b = hi;
86+
#endif
87+
#else
88+
uint64_t ha = *a >> 32, hb = *b >> 32;
89+
uint64_t la = static_cast<uint32_t>(*a), lb = static_cast<uint32_t>(*b);
90+
uint64_t rh = ha * hb, rm0 = ha * lb, rm1 = hb * la, rl = la * lb;
91+
uint64_t t = rl + (rm0 << 32);
92+
*a = t + (rm1 << 32);
93+
*b = rh + (rm0 >> 32) + (rm1 >> 32) + (t < rl) + (*a < t);
94+
#endif
95+
}
96+
97+
// Read functions. The compiler optimizes small fixed-size memcpy calls
98+
// to single load instructions — no actual byte-by-byte copy occurs.
99+
inline uint64_t RapidRead64(const uint8_t* p) {
100+
uint64_t v;
101+
memcpy(&v, p, sizeof(uint64_t));
102+
return v;
103+
}
104+
105+
inline uint64_t RapidRead32(const uint8_t* p) {
106+
uint32_t v;
107+
memcpy(&v, p, sizeof(uint32_t));
108+
return v;
109+
}
110+
111+
// Default rapidhash secret parameters.
112+
constexpr uint64_t kSecret[8] = {0x2d358dccaa6c78a5ULL,
113+
0x8bb84b93962eacc9ULL,
114+
0x4b33a62ed433d4a3ULL,
115+
0x4d5a2da51de1aa47ULL,
116+
0xa0761d6478bd642fULL,
117+
0xe7037ed1a0b428dbULL,
118+
0x90ed1765281c388cULL,
119+
0xaaaaaaaaaaaaaaaaULL};
120+
121+
} // namespace hash_detail
122+
123+
// Hash a contiguous byte range. Optimized for short inputs (≤48 bytes)
124+
// which is the common case for network identifiers and addresses. For
125+
// inputs >48 bytes, falls through to a loop processing 48-byte chunks.
126+
inline size_t HashBytes(const void* data, size_t len) {
127+
const uint8_t* p = static_cast<const uint8_t*>(data);
128+
129+
// Seed initialization.
130+
uint64_t seed = hash_detail::RapidMix(0 ^ hash_detail::kSecret[2],
131+
hash_detail::kSecret[1]);
132+
uint64_t a = 0;
133+
uint64_t b = 0;
134+
size_t i = len;
135+
136+
if (len <= 16) {
137+
if (len >= 4) {
138+
// Mix length into seed for better distribution of
139+
// different-length inputs with shared prefixes.
140+
seed ^= len;
141+
if (len >= 8) {
142+
// 8-16 bytes: two native 64-bit reads (overlapping from end).
143+
a = hash_detail::RapidRead64(p);
144+
b = hash_detail::RapidRead64(p + len - 8);
145+
} else {
146+
// 4-7 bytes: two 32-bit reads (overlapping from end).
147+
a = hash_detail::RapidRead32(p);
148+
b = hash_detail::RapidRead32(p + len - 4);
149+
}
150+
} else if (len > 0) {
151+
// 1-3 bytes: spread bytes across two values for mixing.
152+
a = (static_cast<uint64_t>(p[0]) << 45) | p[len - 1];
153+
b = p[len >> 1];
154+
} else {
155+
a = b = 0;
156+
}
157+
} else if (len <= 48) {
158+
// 17-48 bytes: process in 16-byte chunks, then read the tail.
159+
seed = hash_detail::RapidMix(
160+
hash_detail::RapidRead64(p) ^ hash_detail::kSecret[2],
161+
hash_detail::RapidRead64(p + 8) ^ seed);
162+
if (len > 32) {
163+
seed = hash_detail::RapidMix(
164+
hash_detail::RapidRead64(p + 16) ^ hash_detail::kSecret[2],
165+
hash_detail::RapidRead64(p + 24) ^ seed);
166+
}
167+
a = hash_detail::RapidRead64(p + len - 16) ^ len;
168+
b = hash_detail::RapidRead64(p + len - 8);
169+
} else {
170+
// >48 bytes: process 48-byte chunks with three parallel mix lanes.
171+
uint64_t see1 = seed;
172+
uint64_t see2 = seed;
173+
do {
174+
seed = hash_detail::RapidMix(
175+
hash_detail::RapidRead64(p) ^ hash_detail::kSecret[0],
176+
hash_detail::RapidRead64(p + 8) ^ seed);
177+
see1 = hash_detail::RapidMix(
178+
hash_detail::RapidRead64(p + 16) ^ hash_detail::kSecret[1],
179+
hash_detail::RapidRead64(p + 24) ^ see1);
180+
see2 = hash_detail::RapidMix(
181+
hash_detail::RapidRead64(p + 32) ^ hash_detail::kSecret[2],
182+
hash_detail::RapidRead64(p + 40) ^ see2);
183+
p += 48;
184+
i -= 48;
185+
} while (i > 48);
186+
seed ^= see1 ^ see2;
187+
// Process remaining 17-48 bytes.
188+
if (i > 16) {
189+
seed = hash_detail::RapidMix(
190+
hash_detail::RapidRead64(p) ^ hash_detail::kSecret[2],
191+
hash_detail::RapidRead64(p + 8) ^ seed);
192+
if (i > 32) {
193+
seed = hash_detail::RapidMix(
194+
hash_detail::RapidRead64(p + 16) ^ hash_detail::kSecret[2],
195+
hash_detail::RapidRead64(p + 24) ^ seed);
196+
}
197+
}
198+
a = hash_detail::RapidRead64(p + i - 16) ^ i;
199+
b = hash_detail::RapidRead64(p + i - 8);
200+
}
201+
202+
// Final mix.
203+
a ^= hash_detail::kSecret[1];
204+
b ^= seed;
205+
hash_detail::RapidMum(&a, &b);
206+
return static_cast<size_t>(hash_detail::RapidMix(
207+
a ^ hash_detail::kSecret[7], b ^ hash_detail::kSecret[1] ^ len));
208+
}
209+
210+
} // namespace node
211+
212+
#endif // defined(NODE_WANT_INTERNALS) && NODE_WANT_INTERNALS

src/node_sockaddr-inl.h

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -16,14 +16,6 @@ namespace node {
1616

1717
static constexpr uint32_t kLabelMask = 0xFFFFF;
1818

19-
inline void hash_combine(size_t* seed) { }
20-
21-
template <typename T, typename... Args>
22-
inline void hash_combine(size_t* seed, const T& value, Args... rest) {
23-
*seed ^= std::hash<T>{}(value) + 0x9e3779b9 + (*seed << 6) + (*seed >> 2);
24-
hash_combine(seed, rest...);
25-
}
26-
2719
bool SocketAddress::is_numeric_host(const char* hostname) {
2820
return is_numeric_host(hostname, AF_INET) ||
2921
is_numeric_host(hostname, AF_INET6);

src/node_sockaddr.cc

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
#include "memory_tracker-inl.h"
55
#include "nbytes.h"
66
#include "node_errors.h"
7+
#include "node_hash.h"
78
#include "node_sockaddr-inl.h" // NOLINT(build/include_inline)
89
#include "uv.h"
910

@@ -77,26 +78,28 @@ bool SocketAddress::New(
7778
}
7879

7980
size_t SocketAddress::Hash::operator()(const SocketAddress& addr) const {
80-
size_t hash = 0;
81+
// Hash only the meaningful bytes (family + port + address), not the
82+
// full 128-byte sockaddr_storage.
8183
switch (addr.family()) {
8284
case AF_INET: {
8385
const sockaddr_in* ipv4 =
8486
reinterpret_cast<const sockaddr_in*>(addr.raw());
85-
hash_combine(&hash, ipv4->sin_port, ipv4->sin_addr.s_addr);
86-
break;
87+
uint8_t buf[6];
88+
memcpy(buf, &ipv4->sin_port, 2);
89+
memcpy(buf + 2, &ipv4->sin_addr, 4);
90+
return HashBytes(buf, sizeof(buf));
8791
}
8892
case AF_INET6: {
8993
const sockaddr_in6* ipv6 =
9094
reinterpret_cast<const sockaddr_in6*>(addr.raw());
91-
const uint64_t* a =
92-
reinterpret_cast<const uint64_t*>(&ipv6->sin6_addr);
93-
hash_combine(&hash, ipv6->sin6_port, a[0], a[1]);
94-
break;
95+
uint8_t buf[18];
96+
memcpy(buf, &ipv6->sin6_port, 2);
97+
memcpy(buf + 2, &ipv6->sin6_addr, 16);
98+
return HashBytes(buf, sizeof(buf));
9599
}
96100
default:
97101
UNREACHABLE();
98102
}
99-
return hash;
100103
}
101104

102105
SocketAddress SocketAddress::FromSockName(const uv_tcp_t& handle) {

src/quic/cid.cc

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
#ifndef OPENSSL_NO_QUIC
44
#include <crypto/crypto_util.h>
55
#include <memory_tracker-inl.h>
6+
#include <node_hash.h>
67
#include <node_mutex.h>
78
#include <string_bytes.h>
89
#include "cid.h"
@@ -85,16 +86,7 @@ const CID CID::kInvalid{};
8586
// CID::Hash
8687

8788
size_t CID::Hash::operator()(const CID& cid) const {
88-
// Uses the Boost hash_combine strategy: XOR each byte with the golden
89-
// ratio constant 0x9e3779b9 (derived from the fractional part of the
90-
// golden ratio, (sqrt(5)-1)/2 * 2^32) plus bit-shifted accumulator
91-
// state. This provides good avalanche properties for short byte
92-
// sequences like connection IDs (1-20 bytes).
93-
size_t hash = 0;
94-
for (size_t n = 0; n < cid.length(); n++) {
95-
hash ^= cid.ptr_->data[n] + 0x9e3779b9 + (hash << 6) + (hash >> 2);
96-
}
97-
return hash;
89+
return HashBytes(cid.ptr_->data, cid.length());
9890
}
9991

10092
// ============================================================================

src/quic/tokens.cc

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
#ifndef OPENSSL_NO_QUIC
44
#include <crypto/crypto_util.h>
55
#include <ngtcp2/ngtcp2_crypto.h>
6+
#include <node_hash.h>
67
#include <node_sockaddr-inl.h>
78
#include <string_bytes.h>
89
#include <util-inl.h>
@@ -126,13 +127,8 @@ std::string StatelessResetToken::ToString() const {
126127

127128
size_t StatelessResetToken::Hash::operator()(
128129
const StatelessResetToken& token) const {
129-
// See CID::Hash for details on this hash combine strategy.
130-
size_t hash = 0;
131-
if (token.ptr_ == nullptr) return hash;
132-
for (size_t n = 0; n < kStatelessTokenLen; n++) {
133-
hash ^= token.ptr_[n] + 0x9e3779b9 + (hash << 6) + (hash >> 2);
134-
}
135-
return hash;
130+
if (token.ptr_ == nullptr) return 0;
131+
return HashBytes(token.ptr_, kStatelessTokenLen);
136132
}
137133

138134
StatelessResetToken StatelessResetToken::kInvalid;

0 commit comments

Comments
 (0)