Line data Source code
1 : // A C version of Bob Jenkins' spooky hash
2 : // Spooky Hash
3 : // A 128-bit noncryptographic hash, for checksums and table lookup
4 : // By Bob Jenkins. Public domain.
5 : // Oct 31 2010: published framework, disclaimer ShortHash isn't right
6 : // Nov 7 2010: disabled ShortHash
7 : // Oct 11 2011: C version ported by Andi Kleen (andikleen@github)
8 : // Oct 31 2011: replace End, ShortMix, ShortEnd, enable ShortHash again
9 : // Apr 10 2012: buffer overflow on platforms without unaligned reads
10 : // Apr 27 2012: C version updated by Ziga Zupanec ziga.zupanec@gmail.com (agiz@github)
11 :
12 : // Assumes little endian ness. Caller has to check this case.
13 :
14 : #include <memory.h>
15 :
16 : #include "spooky-c.h"
17 :
18 : #if defined(__i386__) || defined(__x86_64__) // add more architectures here
19 : #define ALLOW_UNALIGNED_READS 1
20 : #else
21 : #define ALLOW_UNALIGNED_READS 0
22 : #endif
23 :
24 : // SC_CONST: a constant which:
25 : // * is not zero
26 : // * is odd
27 : // * is a not-very-regular mix of 1's and 0's
28 : // * does not need any other special mathematical properties
29 : #define SC_CONST 0xdeadbeefdeadbeefLL
30 :
31 32756372 : static inline uint64_t rot64(uint64_t x, int k) {
32 32756372 : return (x << k) | (x >> (64 - k));
33 : }
34 :
35 : //
36 : // This is used if the input is 96 bytes long or longer.
37 : //
38 : // The internal state is fully overwritten every 96 bytes.
39 : // Every input bit appears to cause at least 128 bits of entropy
40 : // before 96 other bytes are combined, when run forward or backward
41 : // For every input bit,
42 : // Two inputs differing in just that input bit
43 : // Where "differ" means xor or subtraction
44 : // And the base value is random
45 : // When run forward or backwards one Mix
46 : // I tried 3 pairs of each; they all differed by at least 212 bits.
47 : //
48 868261 : static inline void mix(const uint64_t *data, uint64_t *s0, uint64_t *s1, uint64_t *s2,
49 : uint64_t *s3, uint64_t *s4, uint64_t *s5, uint64_t *s6,
50 : uint64_t *s7, uint64_t *s8, uint64_t *s9, uint64_t *s10,
51 : uint64_t *s11) {
52 868261 : *s0 += data[0];
53 868261 : *s2 ^= *s10;
54 868261 : *s11 ^= *s0;
55 868261 : *s0 = rot64(*s0, 11);
56 845893 : *s11 += *s1;
57 845893 : *s1 += data[1];
58 845893 : *s3 ^= *s11;
59 845893 : *s0 ^= *s1;
60 845893 : *s1 = rot64(*s1, 32);
61 800856 : *s0 += *s2;
62 800856 : *s2 += data[2];
63 800856 : *s4 ^= *s0;
64 800856 : *s1 ^= *s2;
65 800856 : *s2 = rot64(*s2, 43);
66 817091 : *s1 += *s3;
67 817091 : *s3 += data[3];
68 817091 : *s5 ^= *s1;
69 817091 : *s2 ^= *s3;
70 817091 : *s3 = rot64(*s3, 31);
71 817389 : *s2 += *s4;
72 817389 : *s4 += data[4];
73 817389 : *s6 ^= *s2;
74 817389 : *s3 ^= *s4;
75 817389 : *s4 = rot64(*s4, 17);
76 815455 : *s3 += *s5;
77 815455 : *s5 += data[5];
78 815455 : *s7 ^= *s3;
79 815455 : *s4 ^= *s5;
80 815455 : *s5 = rot64(*s5, 28);
81 816605 : *s4 += *s6;
82 816605 : *s6 += data[6];
83 816605 : *s8 ^= *s4;
84 816605 : *s5 ^= *s6;
85 816605 : *s6 = rot64(*s6, 39);
86 817152 : *s5 += *s7;
87 817152 : *s7 += data[7];
88 817152 : *s9 ^= *s5;
89 817152 : *s6 ^= *s7;
90 817152 : *s7 = rot64(*s7, 57);
91 832620 : *s6 += *s8;
92 832620 : *s8 += data[8];
93 832620 : *s10 ^= *s6;
94 832620 : *s7 ^= *s8;
95 832620 : *s8 = rot64(*s8, 55);
96 932927 : *s7 += *s9;
97 932927 : *s9 += data[9];
98 932927 : *s11 ^= *s7;
99 932927 : *s8 ^= *s9;
100 932927 : *s9 = rot64(*s9, 54);
101 892302 : *s8 += *s10;
102 892302 : *s10 += data[10];
103 892302 : *s0 ^= *s8;
104 892302 : *s9 ^= *s10;
105 892302 : *s10 = rot64(*s10, 22);
106 896683 : *s9 += *s11;
107 896683 : *s11 += data[11];
108 896683 : *s1 ^= *s9;
109 896683 : *s10 ^= *s11;
110 896683 : *s11 = rot64(*s11, 46);
111 907415 : *s10 += *s0;
112 907415 : }
113 :
114 : //
115 : // Mix all 12 inputs together so that h0, h1 are a hash of them all.
116 : //
117 : // For two inputs differing in just the input bits
118 : // Where "differ" means xor or subtraction
119 : // And the base value is random, or a counting value starting at that bit
120 : // The final result will have each bit of h0, h1 flip
121 : // For every input bit,
122 : // with probability 50 +- .3%
123 : // For every pair of input bits,
124 : // with probability 50 +- 3%
125 : //
126 : // This does not rely on the last Mix() call having already mixed some.
127 : // Two iterations was almost good enough for a 64-bit result, but a
128 : // 128-bit result is reported, so End() does three iterations.
129 : //
130 17261 : static inline void endPartial(uint64_t *h0, uint64_t *h1, uint64_t *h2, uint64_t *h3,
131 : uint64_t *h4, uint64_t *h5, uint64_t *h6, uint64_t *h7,
132 : uint64_t *h8, uint64_t *h9, uint64_t *h10, uint64_t *h11) {
133 17261 : *h11 += *h1;
134 17261 : *h2 ^= *h11;
135 17261 : *h1 = rot64(*h1, 44);
136 17262 : *h0 += *h2;
137 17262 : *h3 ^= *h0;
138 17262 : *h2 = rot64(*h2, 15);
139 17259 : *h1 += *h3;
140 17259 : *h4 ^= *h1;
141 17259 : *h3 = rot64(*h3, 34);
142 17257 : *h2 += *h4;
143 17257 : *h5 ^= *h2;
144 17257 : *h4 = rot64(*h4, 21);
145 17253 : *h3 += *h5;
146 17253 : *h6 ^= *h3;
147 17253 : *h5 = rot64(*h5, 38);
148 17254 : *h4 += *h6;
149 17254 : *h7 ^= *h4;
150 17254 : *h6 = rot64(*h6, 33);
151 17255 : *h5 += *h7;
152 17255 : *h8 ^= *h5;
153 17255 : *h7 = rot64(*h7, 10);
154 17256 : *h6 += *h8;
155 17256 : *h9 ^= *h6;
156 17256 : *h8 = rot64(*h8, 13);
157 17261 : *h7 += *h9;
158 17261 : *h10 ^= *h7;
159 17261 : *h9 = rot64(*h9, 38);
160 17255 : *h8 += *h10;
161 17255 : *h11 ^= *h8;
162 17255 : *h10 = rot64(*h10, 53);
163 17256 : *h9 += *h11;
164 17256 : *h0 ^= *h9;
165 17256 : *h11 = rot64(*h11, 42);
166 17259 : *h10 += *h0;
167 17259 : *h1 ^= *h10;
168 17259 : *h0 = rot64(*h0, 54);
169 17257 : }
170 :
171 5757 : static inline void end(uint64_t *h0, uint64_t *h1, uint64_t *h2, uint64_t *h3,
172 : uint64_t *h4, uint64_t *h5, uint64_t *h6, uint64_t *h7,
173 : uint64_t *h8, uint64_t *h9, uint64_t *h10, uint64_t *h11) {
174 5757 : endPartial(h0, h1, h2, h3, h4, h5, h6, h7, h8, h9, h10, h11);
175 5756 : endPartial(h0, h1, h2, h3, h4, h5, h6, h7, h8, h9, h10, h11);
176 5755 : endPartial(h0, h1, h2, h3, h4, h5, h6, h7, h8, h9, h10, h11);
177 5755 : }
178 :
179 : //
180 : // The goal is for each bit of the input to expand into 128 bits of
181 : // apparent entropy before it is fully overwritten.
182 : // n trials both set and cleared at least m bits of h0 h1 h2 h3
183 : // n: 2 m: 29
184 : // n: 3 m: 46
185 : // n: 4 m: 57
186 : // n: 5 m: 107
187 : // n: 6 m: 146
188 : // n: 7 m: 152
189 : // when run forwards or backwards
190 : // for all 1-bit and 2-bit diffs
191 : // with diffs defined by either xor or subtraction
192 : // with a base of all zeros plus a counter, or plus another bit, or random
193 : //
194 1388615 : static inline void short_mix(uint64_t *h0, uint64_t *h1, uint64_t *h2, uint64_t *h3) {
195 1388615 : *h2 = rot64(*h2, 50);
196 1388615 : *h2 += *h3;
197 1388615 : *h0 ^= *h2;
198 1388615 : *h3 = rot64(*h3, 52);
199 1388616 : *h3 += *h0;
200 1388616 : *h1 ^= *h3;
201 1388616 : *h0 = rot64(*h0, 30);
202 1388616 : *h0 += *h1;
203 1388616 : *h2 ^= *h0;
204 1388616 : *h1 = rot64(*h1, 41);
205 1388617 : *h1 += *h2;
206 1388617 : *h3 ^= *h1;
207 1388617 : *h2 = rot64(*h2, 54);
208 1388617 : *h2 += *h3;
209 1388617 : *h0 ^= *h2;
210 1388617 : *h3 = rot64(*h3, 48);
211 1388617 : *h3 += *h0;
212 1388617 : *h1 ^= *h3;
213 1388617 : *h0 = rot64(*h0, 38);
214 1388616 : *h0 += *h1;
215 1388616 : *h2 ^= *h0;
216 1388616 : *h1 = rot64(*h1, 37);
217 1388615 : *h1 += *h2;
218 1388615 : *h3 ^= *h1;
219 1388615 : *h2 = rot64(*h2, 62);
220 1388615 : *h2 += *h3;
221 1388615 : *h0 ^= *h2;
222 1388615 : *h3 = rot64(*h3, 34);
223 1388615 : *h3 += *h0;
224 1388615 : *h1 ^= *h3;
225 1388615 : *h0 = rot64(*h0, 5);
226 1388616 : *h0 += *h1;
227 1388616 : *h2 ^= *h0;
228 1388616 : *h1 = rot64(*h1, 36);
229 1388616 : *h1 += *h2;
230 1388616 : *h3 ^= *h1;
231 1388616 : }
232 :
233 : //
234 : // Mix all 4 inputs together so that h0, h1 are a hash of them all.
235 : //
236 : // For two inputs differing in just the input bits
237 : // Where "differ" means xor or subtraction
238 : // And the base value is random, or a counting value starting at that bit
239 : // The final result will have each bit of h0, h1 flip
240 : // For every input bit,
241 : // with probability 50 +- .3% (it is probably better than that)
242 : // For every pair of input bits,
243 : // with probability 50 +- .75% (the worst case is approximately that)
244 : //
245 912022 : static inline void short_end(uint64_t *h0, uint64_t *h1, uint64_t *h2, uint64_t *h3) {
246 912022 : *h3 ^= *h2;
247 912022 : *h2 = rot64(*h2, 15);
248 912021 : *h3 += *h2;
249 912021 : *h0 ^= *h3;
250 912021 : *h3 = rot64(*h3, 52);
251 912020 : *h0 += *h3;
252 912020 : *h1 ^= *h0;
253 912020 : *h0 = rot64(*h0, 26);
254 912021 : *h1 += *h0;
255 912021 : *h2 ^= *h1;
256 912021 : *h1 = rot64(*h1, 51);
257 912021 : *h2 += *h1;
258 912021 : *h3 ^= *h2;
259 912021 : *h2 = rot64(*h2, 28);
260 912022 : *h3 += *h2;
261 912022 : *h0 ^= *h3;
262 912022 : *h3 = rot64(*h3, 9);
263 912022 : *h0 += *h3;
264 912022 : *h1 ^= *h0;
265 912022 : *h0 = rot64(*h0, 47);
266 912025 : *h1 += *h0;
267 912025 : *h2 ^= *h1;
268 912025 : *h1 = rot64(*h1, 54);
269 912026 : *h2 += *h1;
270 912026 : *h3 ^= *h2;
271 912026 : *h2 = rot64(*h2, 32);
272 912026 : *h3 += *h2;
273 912026 : *h0 ^= *h3;
274 912026 : *h3 = rot64(*h3, 25);
275 912025 : *h0 += *h3;
276 912025 : *h1 ^= *h0;
277 912025 : *h0 = rot64(*h0, 63);
278 912024 : *h1 += *h0;
279 912024 : }
280 :
281 912008 : void spooky_shorthash(const void *message,
282 : size_t length,
283 : uint64_t *hash1,
284 : uint64_t *hash2) {
285 : uint64_t buf[2 * SC_NUMVARS];
286 : union {
287 : const uint8_t *p8;
288 : uint32_t *p32;
289 : uint64_t *p64;
290 : size_t i;
291 : } u;
292 : size_t remainder;
293 : uint64_t a, b, c, d;
294 912008 : u.p8 = (const uint8_t *)message;
295 :
296 : if(!ALLOW_UNALIGNED_READS && (u.i & 0x7)) {
297 : memcpy(buf, message, length);
298 : u.p64 = buf;
299 : }
300 :
301 912008 : remainder = length % 32;
302 912008 : a = *hash1;
303 912008 : b = *hash2;
304 912008 : c = SC_CONST;
305 912008 : d = SC_CONST;
306 :
307 912008 : if(length > 15) {
308 888242 : const uint64_t *endp = u.p64 + (length / 32) * 4;
309 :
310 : // handle all complete sets of 32 bytes
311 2198673 : for(; u.p64 < endp; u.p64 += 4) {
312 1310431 : c += u.p64[0];
313 1310431 : d += u.p64[1];
314 1310431 : short_mix(&a, &b, &c, &d);
315 1310431 : a += u.p64[2];
316 1310431 : b += u.p64[3];
317 : }
318 :
319 : // Handle the case of 16+ remaining bytes.
320 888242 : if(remainder >= 16) {
321 78186 : c += u.p64[0];
322 78186 : d += u.p64[1];
323 78186 : short_mix(&a, &b, &c, &d);
324 78186 : u.p64 += 2;
325 78186 : remainder -= 16;
326 : }
327 : }
328 :
329 : // Handle the last 0..15 bytes, and its length
330 912008 : d = ((uint64_t)length) << 56;
331 912008 : switch(remainder) {
332 : case 15:
333 392 : d += ((uint64_t)u.p8[14]) << 48;
334 : case 14:
335 1512 : d += ((uint64_t)u.p8[13]) << 40;
336 : case 13:
337 2716 : d += ((uint64_t)u.p8[12]) << 32;
338 : case 12:
339 4984 : d += u.p32[2];
340 4984 : c += u.p64[0];
341 4984 : break;
342 : case 11:
343 364 : d += ((uint64_t)u.p8[10]) << 16;
344 : case 10:
345 3001 : d += ((uint64_t)u.p8[9]) << 8;
346 : case 9:
347 3393 : d += (uint64_t)u.p8[8];
348 : case 8:
349 123065 : c += u.p64[0];
350 123065 : break;
351 : case 7:
352 56 : c += ((uint64_t)u.p8[6]) << 48;
353 : case 6:
354 140 : c += ((uint64_t)u.p8[5]) << 40;
355 : case 5:
356 336 : c += ((uint64_t)u.p8[4]) << 32;
357 : case 4:
358 7265 : c += u.p32[0];
359 7265 : break;
360 : case 3:
361 16363 : c += ((uint64_t)u.p8[2]) << 16;
362 : case 2:
363 64730 : c += ((uint64_t)u.p8[1]) << 8;
364 : case 1:
365 113542 : c += (uint64_t)u.p8[0];
366 113542 : break;
367 : case 0:
368 663168 : c += SC_CONST;
369 663168 : d += SC_CONST;
370 : }
371 912008 : short_end(&a, &b, &c, &d);
372 912027 : *hash1 = a;
373 912027 : *hash2 = b;
374 912027 : }
375 :
376 918755 : void spooky_hash128(const void *message,
377 : size_t length,
378 : uint64_t *hash1,
379 : uint64_t *hash2) {
380 : uint64_t h0, h1, h2, h3, h4, h5, h6, h7, h8, h9, h10, h11;
381 : uint64_t buf[SC_NUMVARS];
382 : uint64_t *endp;
383 : union {
384 : const uint8_t *p8;
385 : uint64_t *p64;
386 : uintptr_t i;
387 : } u;
388 : size_t remainder;
389 :
390 918755 : if(length < SC_BUFSIZE) {
391 912008 : spooky_shorthash(message, length, hash1, hash2);
392 912027 : return;
393 : }
394 :
395 6747 : h0 = h3 = h6 = h9 = *hash1;
396 6747 : h1 = h4 = h7 = h10 = *hash2;
397 6747 : h2 = h5 = h8 = h11 = SC_CONST;
398 :
399 6747 : u.p8 = (const uint8_t *)message;
400 6747 : endp = u.p64 + (length / SC_BLOCKSIZE) * SC_NUMVARS;
401 :
402 : // handle all whole blocks of SC_BLOCKSIZE bytes
403 : if(ALLOW_UNALIGNED_READS || (u.i & 0x7) == 0) {
404 906192 : while(u.p64 < endp) {
405 893694 : mix(u.p64, &h0, &h1, &h2, &h3, &h4, &h5, &h6, &h7, &h8, &h9, &h10, &h11);
406 892698 : u.p64 += SC_NUMVARS;
407 : }
408 : } else {
409 : while(u.p64 < endp) {
410 : memcpy(buf, u.p64, SC_BLOCKSIZE);
411 : mix(buf, &h0, &h1, &h2, &h3, &h4, &h5, &h6, &h7, &h8, &h9, &h10, &h11);
412 : u.p64 += SC_NUMVARS;
413 : }
414 : }
415 :
416 : // handle the last partial block of SC_BLOCKSIZE bytes
417 5751 : remainder = (length - ((const uint8_t *)endp - (const uint8_t *)message));
418 5751 : memcpy(buf, endp, remainder);
419 5751 : memset(((uint8_t *)buf) + remainder, 0, SC_BLOCKSIZE - remainder);
420 5751 : ((uint8_t *)buf)[SC_BLOCKSIZE - 1] = remainder;
421 5751 : mix(buf, &h0, &h1, &h2, &h3, &h4, &h5, &h6, &h7, &h8, &h9, &h10, &h11);
422 :
423 : // do some final mixing
424 5757 : end(&h0, &h1, &h2, &h3, &h4, &h5, &h6, &h7, &h8, &h9, &h10, &h11);
425 5755 : *hash1 = h0;
426 5755 : *hash2 = h1;
427 : }
428 :
429 9745 : uint64_t spooky_hash64(const void *message, size_t length, uint64_t seed) {
430 9745 : uint64_t hash1 = seed;
431 9745 : spooky_hash128(message, length, &hash1, &seed);
432 9750 : return hash1;
433 : }
434 :
435 898281 : uint32_t spooky_hash32(const void *message, size_t length, uint32_t seed) {
436 898281 : uint64_t hash1 = seed, hash2 = seed;
437 898281 : spooky_hash128(message, length, &hash1, &hash2);
438 898283 : return (uint32_t)hash1;
439 : }
|