Line data Source code
1 : // city.c - cityhash-c
2 : // CityHash on C
3 : // Copyright (c) 2011-2012, Alexander Nusov
4 : //
5 : // - original copyright notice -
6 : // Copyright (c) 2011 Google, Inc.
7 : //
8 : // Permission is hereby granted, free of charge, to any person obtaining a copy
9 : // of this software and associated documentation files (the "Software"), to deal
10 : // in the Software without restriction, including without limitation the rights
11 : // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 : // copies of the Software, and to permit persons to whom the Software is
13 : // furnished to do so, subject to the following conditions:
14 : //
15 : // The above copyright notice and this permission notice shall be included in
16 : // all copies or substantial portions of the Software.
17 : //
18 : // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 : // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 : // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 : // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 : // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 : // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24 : // THE SOFTWARE.
25 : //
26 : // CityHash, by Geoff Pike and Jyrki Alakuijala
27 : //
28 : // This file provides CityHash64() and related functions.
29 : //
30 : // It's probably possible to create even faster hash functions by
31 : // writing a program that systematically explores some of the space of
32 : // possible hash functions, by using SIMD instructions, or by
33 : // compromising on hash quality.
34 :
35 : #include <string.h>
36 : #include "city.h"
37 :
38 28797699 : static uint64 UNALIGNED_LOAD64(const char *p) {
39 : uint64 result;
40 28797699 : memcpy(&result, p, sizeof(result));
41 28797699 : return result;
42 : }
43 :
44 41549 : static uint32 UNALIGNED_LOAD32(const char *p) {
45 : uint32 result;
46 41549 : memcpy(&result, p, sizeof(result));
47 41549 : return result;
48 : }
49 :
50 : #if !defined(WORDS_BIGENDIAN)
51 :
52 : #define uint32_in_expected_order(x) (x)
53 : #define uint64_in_expected_order(x) (x)
54 :
55 : #else
56 :
57 : #ifdef _MSC_VER
58 : #include <stdlib.h>
59 : #define bswap_32(x) _byteswap_ulong(x)
60 : #define bswap_64(x) _byteswap_uint64(x)
61 :
62 : #elif defined(__APPLE__)
63 : // Mac OS X / Darwin features
64 : #include <libkern/OSByteOrder.h>
65 : #define bswap_32(x) OSSwapInt32(x)
66 : #define bswap_64(x) OSSwapInt64(x)
67 :
68 : #else
69 : #include <byteswap.h>
70 : #endif
71 :
72 : #define uint32_in_expected_order(x) (bswap_32(x))
73 : #define uint64_in_expected_order(x) (bswap_64(x))
74 :
75 : #endif // WORDS_BIGENDIAN
76 :
77 : #if !defined(LIKELY)
78 : #if HAVE_BUILTIN_EXPECT
79 : #define LIKELY(x) (__builtin_expect(!!(x), 1))
80 : #else
81 : #define LIKELY(x) (x)
82 : #endif
83 : #endif
84 :
85 27099638 : static uint64 Fetch64(const char *p) {
86 27099638 : return uint64_in_expected_order(UNALIGNED_LOAD64(p));
87 : }
88 :
89 41548 : static uint32 Fetch32(const char *p) {
90 41548 : return uint32_in_expected_order(UNALIGNED_LOAD32(p));
91 : }
92 :
93 : // Some primes between 2^63 and 2^64 for various uses.
94 : static const uint64 k0 = 0xc3a5c85c97cb3127ULL;
95 : static const uint64 k1 = 0xb492b66fbe98f273ULL;
96 : static const uint64 k2 = 0x9ae16a3b2f90404fULL;
97 : static const uint64 k3 = 0xc949d7c7509e6557ULL;
98 :
99 : // Hash 128 input bits down to 64 bits of output.
100 : // This is intended to be a reasonably good hash function.
101 302252 : static inline uint64 Hash128to64(const uint128 x) {
102 : // Murmur-inspired hashing.
103 302252 : const uint64 kMul = 0x9ddfea08eb382d69ULL;
104 302252 : uint64 a = (Uint128Low64(x) ^ Uint128High64(x)) * kMul;
105 302252 : a ^= (a >> 47);
106 302252 : uint64 b = (Uint128High64(x) ^ a) * kMul;
107 302252 : b ^= (b >> 47);
108 302252 : b *= kMul;
109 302252 : return b;
110 : }
111 :
112 : // Bitwise right rotate. Normally this will compile to a single
113 : // instruction, especially if the shift is a manifest constant.
114 20181643 : static uint64 Rotate(uint64 val, int shift) {
115 : // Avoid shifting by 64: doing so yields an undefined result.
116 20181643 : return shift == 0 ? val : ((val >> shift) | (val << (64 - shift)));
117 : }
118 :
119 : // Equivalent to Rotate(), but requires the second arg to be non-zero.
120 : // On x86-64, and probably others, it's possible for this to compile
121 : // to a single instruction if both args are already in registers.
122 61 : static uint64 RotateByAtLeast1(uint64 val, int shift) {
123 61 : return (val >> shift) | (val << (64 - shift));
124 : }
125 :
126 186683 : static uint64 ShiftMix(uint64 val) {
127 186683 : return val ^ (val >> 47);
128 : }
129 :
130 302099 : static uint64 HashLen16(uint64 u, uint64 v) {
131 : uint128 result;
132 302099 : result.first = u;
133 302099 : result.second = v;
134 302099 : return Hash128to64(result);
135 : }
136 :
137 69166 : static uint64 HashLen0to16(const char *s, size_t len) {
138 69166 : if(len > 8) {
139 61 : uint64 a = Fetch64(s);
140 61 : uint64 b = Fetch64(s + len - 8);
141 61 : return HashLen16(a, RotateByAtLeast1(b + len, len)) ^ b;
142 : }
143 69105 : if(len >= 4) {
144 20770 : uint64 a = Fetch32(s);
145 20774 : return HashLen16(len + (a << 3), Fetch32(s + len - 4));
146 : }
147 48335 : if(len > 0) {
148 48335 : uint8 a = s[0];
149 48335 : uint8 b = s[len >> 1];
150 48335 : uint8 c = s[len - 1];
151 48335 : uint32 y = (uint32)(a) + ((uint32)(b) << 8);
152 48335 : uint32 z = len + ((uint32)(c) << 2);
153 48335 : return ShiftMix(y * k2 ^ z * k3) * k2;
154 : }
155 0 : return k2;
156 : }
157 :
158 : // This probably works well for 16-byte strings as well, but it may be overkill
159 : // in that case.
160 0 : static uint64 HashLen17to32(const char *s, size_t len) {
161 0 : uint64 a = Fetch64(s) * k1;
162 0 : uint64 b = Fetch64(s + 8);
163 0 : uint64 c = Fetch64(s + len - 8) * k2;
164 0 : uint64 d = Fetch64(s + len - 16) * k0;
165 0 : return HashLen16(Rotate(a - b, 43) + Rotate(c, 30) + d,
166 0 : a + Rotate(b ^ k3, 20) - c + len);
167 : }
168 :
169 : // Return a 16-byte hash for 48 bytes. Quick and dirty.
170 : // Callers do best to use "random-looking" values for a and b.
171 : // static pair<uint64, uint64> WeakHashLen32WithSeeds(
172 7185110 : uint128 WeakHashLen32WithSeeds6(uint64 w, uint64 x, uint64 y, uint64 z, uint64 a,
173 : uint64 b) {
174 7185110 : a += w;
175 7185110 : b = Rotate(b + a + z, 21);
176 7165589 : uint64 c = a;
177 7165589 : a += x;
178 7165589 : a += y;
179 7165589 : b += Rotate(a, 44);
180 :
181 : uint128 result;
182 7657533 : result.first = (uint64)(a + z);
183 7657533 : result.second = (uint64)(b + c);
184 7657533 : return result;
185 : }
186 :
187 : // Return a 16-byte hash for s[0] ... s[31], a, and b. Quick and dirty.
188 : // static pair<uint64, uint64> WeakHashLen32WithSeeds(
189 7043499 : uint128 WeakHashLen32WithSeeds(const char *s, uint64 a, uint64 b) {
190 7043499 : return WeakHashLen32WithSeeds6(Fetch64(s), Fetch64(s + 8), Fetch64(s + 16),
191 : Fetch64(s + 24), a, b);
192 : }
193 :
194 : // Return an 8-byte hash for 33 to 64 bytes.
195 0 : static uint64 HashLen33to64(const char *s, size_t len) {
196 0 : uint64 z = Fetch64(s + 24);
197 0 : uint64 a = Fetch64(s) + (len + Fetch64(s + len - 16)) * k0;
198 0 : uint64 b = Rotate(a + z, 52);
199 0 : uint64 c = Rotate(a, 37);
200 0 : a += Fetch64(s + 8);
201 0 : c += Rotate(a, 7);
202 0 : a += Fetch64(s + 16);
203 0 : uint64 vf = a + z;
204 0 : uint64 vs = b + Rotate(a, 31) + c;
205 0 : a = Fetch64(s + 16) + Fetch64(s + len - 32);
206 0 : z = Fetch64(s + len - 8);
207 0 : b = Rotate(a + z, 52);
208 0 : c = Rotate(a, 37);
209 0 : a += Fetch64(s + len - 24);
210 0 : c += Rotate(a, 7);
211 0 : a += Fetch64(s + len - 16);
212 0 : uint64 wf = a + z;
213 0 : uint64 ws = b + Rotate(a, 31) + c;
214 0 : uint64 r = ShiftMix((vf + ws) * k2 + (wf + vs) * k0);
215 0 : return ShiftMix(r * k0 + vs) * k2;
216 : }
217 :
218 0 : uint64 CityHash64(const char *s, size_t len) {
219 0 : if(len <= 32) {
220 0 : if(len <= 16) {
221 0 : return HashLen0to16(s, len);
222 : } else {
223 0 : return HashLen17to32(s, len);
224 : }
225 0 : } else if(len <= 64) {
226 0 : return HashLen33to64(s, len);
227 : }
228 :
229 : // For strings over 64 bytes we hash the end first, and then as we
230 : // loop we keep 56 bytes of state: v, w, x, y, and z.
231 0 : uint64 x = Fetch64(s + len - 40);
232 0 : uint64 y = Fetch64(s + len - 16) + Fetch64(s + len - 56);
233 0 : uint64 z = HashLen16(Fetch64(s + len - 48) + len, Fetch64(s + len - 24));
234 : uint64 temp;
235 0 : uint128 v = WeakHashLen32WithSeeds(s + len - 64, len, z);
236 0 : uint128 w = WeakHashLen32WithSeeds(s + len - 32, y + k1, x);
237 0 : x = x * k1 + Fetch64(s);
238 :
239 : // Decrease len to the nearest multiple of 64, and operate on 64-byte chunks.
240 0 : len = (len - 1) & ~(size_t)(63);
241 : do {
242 0 : x = Rotate(x + y + v.first + Fetch64(s + 8), 37) * k1;
243 0 : y = Rotate(y + v.second + Fetch64(s + 48), 42) * k1;
244 0 : x ^= w.second;
245 0 : y += v.first + Fetch64(s + 40);
246 0 : z = Rotate(z + w.first, 33) * k1;
247 0 : v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first);
248 0 : w = WeakHashLen32WithSeeds(s + 32, z + w.second, y + Fetch64(s + 16));
249 0 : temp = z;
250 0 : z = x;
251 0 : x = temp;
252 0 : s += 64;
253 0 : len -= 64;
254 0 : } while(len != 0);
255 0 : return HashLen16(HashLen16(v.first, w.first) + ShiftMix(y) * k1 + z,
256 0 : HashLen16(v.second, w.second) + x);
257 : }
258 :
259 0 : uint64 CityHash64WithSeed(const char *s, size_t len, uint64 seed) {
260 0 : return CityHash64WithSeeds(s, len, k2, seed);
261 : }
262 :
263 0 : uint64 CityHash64WithSeeds(const char *s, size_t len, uint64 seed0, uint64 seed1) {
264 0 : return HashLen16(CityHash64(s, len) - seed0, seed1);
265 : }
266 :
267 : // A subroutine for CityHash128(). Returns a decent 128-bit hash for strings
268 : // of any length representable in signed long. Based on City and Murmur.
269 69187 : static uint128 CityMurmur(const char *s, size_t len, uint128 seed) {
270 69187 : uint64 a = Uint128Low64(seed);
271 69187 : uint64 b = Uint128High64(seed);
272 69187 : uint64 c = 0;
273 69187 : uint64 d = 0;
274 69187 : signed long l = len - 16;
275 69187 : if(l <= 0) { // len <= 16
276 69169 : a = ShiftMix(a * k1) * k1;
277 69171 : c = b * k1 + HashLen0to16(s, len);
278 69201 : d = ShiftMix(a + (len >= 8 ? Fetch64(s) : c));
279 : } else { // len > 16
280 18 : c = HashLen16(Fetch64(s + len - 8) + k1, a);
281 18 : d = HashLen16(b + len, c + Fetch64(s + len - 16));
282 18 : a += d;
283 : do {
284 54 : a ^= ShiftMix(Fetch64(s) * k1) * k1;
285 54 : a *= k1;
286 54 : b ^= a;
287 54 : c ^= ShiftMix(Fetch64(s + 8) * k1) * k1;
288 54 : c *= k1;
289 54 : d ^= c;
290 54 : s += 16;
291 54 : l -= 16;
292 54 : } while(l > 0);
293 : }
294 69198 : a = HashLen16(a, c);
295 69205 : b = HashLen16(d, b);
296 :
297 : uint128 result;
298 69200 : result.first = (uint64)(a ^ b);
299 69200 : result.second = (uint64)(HashLen16(b, a));
300 69214 : return result;
301 : }
302 :
303 87679 : uint128 CityHash128WithSeed(const char *s, size_t len, uint128 seed) {
304 87679 : if(len < 128) {
305 69203 : return CityMurmur(s, len, seed);
306 : }
307 :
308 : // We expect len >= 128 to be the common case. Keep 56 bytes of state:
309 : // v, w, x, y, and z.
310 : uint128 v, w;
311 18476 : uint64 x = Uint128Low64(seed);
312 18476 : uint64 y = Uint128High64(seed);
313 18476 : uint64 z = len * k1;
314 : uint64 temp;
315 18476 : v.first = Rotate(y ^ k1, 49) * k1 + Fetch64(s);
316 18474 : v.second = Rotate(v.first, 42) * k1 + Fetch64(s + 8);
317 18471 : w.first = Rotate(y + z, 35) * k1 + x;
318 18472 : w.second = Rotate(x + Fetch64(s + 88), 53) * k1;
319 :
320 : // This is the same inner loop as CityHash64(), manually unrolled.
321 : do {
322 2079540 : x = Rotate(x + y + v.first + Fetch64(s + 8), 37) * k1;
323 2059548 : y = Rotate(y + v.second + Fetch64(s + 48), 42) * k1;
324 2063360 : x ^= w.second;
325 2063360 : y += v.first + Fetch64(s + 40);
326 2037610 : z = Rotate(z + w.first, 33) * k1;
327 2053951 : v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first);
328 2074455 : w = WeakHashLen32WithSeeds(s + 32, z + w.second, y + Fetch64(s + 16));
329 2083205 : temp = z;
330 2083205 : z = x;
331 2083205 : x = temp;
332 2083205 : s += 64;
333 2083205 : x = Rotate(x + y + v.first + Fetch64(s + 8), 37) * k1;
334 2054841 : y = Rotate(y + v.second + Fetch64(s + 48), 42) * k1;
335 2044018 : x ^= w.second;
336 2044018 : y += v.first + Fetch64(s + 40);
337 2028034 : z = Rotate(z + w.first, 33) * k1;
338 2047134 : v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first);
339 2071764 : w = WeakHashLen32WithSeeds(s + 32, z + w.second, y + Fetch64(s + 16));
340 2079538 : temp = z;
341 2079538 : z = x;
342 2079538 : x = temp;
343 2079538 : s += 64;
344 2079538 : len -= 128;
345 2079538 : } while(LIKELY(len >= 128));
346 18470 : x += Rotate(v.first + z, 49) * k0;
347 18474 : z += Rotate(w.first, 37) * k0;
348 : // If 0 < len < 128, hash up to 4 chunks of 32 bytes each from the end of s.
349 : size_t tail_done;
350 36964 : for(tail_done = 0; tail_done < len;) {
351 18 : tail_done += 32;
352 18 : y = Rotate(x + y, 42) * k0 + v.second;
353 18 : w.first += Fetch64(s + len - tail_done + 16);
354 18 : x = x * k0 + w.first;
355 18 : z += w.second + Fetch64(s + len - tail_done);
356 18 : w.second += v.first;
357 18 : v = WeakHashLen32WithSeeds(s + len - tail_done, v.first + z, v.second);
358 : }
359 : // At this point our 56 bytes of state should contain more than
360 : // enough information for a strong 128-bit hash. We use two
361 : // different 56-byte-to-8-byte hashes to get a 16-byte final result.
362 18473 : x = HashLen16(x, v.first);
363 18472 : y = HashLen16(y + z, w.first);
364 :
365 : uint128 result;
366 18473 : result.first = (uint64)(HashLen16(x + v.second, w.second) + y);
367 18475 : result.second = (uint64)HashLen16(x + w.second, y + v.second);
368 18473 : return result;
369 : }
370 :
371 0 : uint128 CityHash128(const char *s, size_t len) {
372 : uint128 r;
373 0 : if(len >= 16) {
374 0 : r.first = (uint64)(Fetch64(s) ^ k3);
375 0 : r.second = (uint64)(Fetch64(s + 8));
376 :
377 0 : return CityHash128WithSeed(s + 16, len - 16, r);
378 :
379 0 : } else if(len >= 8) {
380 0 : r.first = (uint64)(Fetch64(s) ^ (len * k0));
381 0 : r.second = (uint64)(Fetch64(s + len - 8) ^ k1);
382 :
383 0 : return CityHash128WithSeed(NULL, 0, r);
384 : } else {
385 0 : r.first = (uint64)k0;
386 0 : r.second = (uint64)k1;
387 0 : return CityHash128WithSeed(s, len, r);
388 : }
389 : }
390 :
391 : #include "../config.h"
392 :
393 : #if RM_PLATFORM_64 && HAVE_SSE42
394 : #include "citycrc.h"
395 : #include <nmmintrin.h>
396 :
397 : // Requires len >= 240.
398 : static void CityHashCrc256Long(const char *s, size_t len, uint32 seed, uint64 *result) {
399 : uint64 a = Fetch64(s + 56) + k0;
400 : uint64 b = Fetch64(s + 96) + k0;
401 : uint64 c = result[0] = HashLen16(b, len);
402 : uint64 d = result[1] = Fetch64(s + 120) * k0 + len;
403 : uint64 e = Fetch64(s + 184) + seed;
404 : uint64 f = seed;
405 : uint64 g = 0;
406 : uint64 h = 0;
407 : uint64 i = 0;
408 : uint64 j = 0;
409 : uint64 t = c + d;
410 :
411 : // 240 bytes of input per iter.
412 : size_t iters = len / 240;
413 : len -= iters * 240;
414 : do {
415 : #define CHUNK(multiplier, z) \
416 : { \
417 : uint64 old_a = a; \
418 : a = Rotate(b, 41 ^ z) * multiplier + Fetch64(s); \
419 : b = Rotate(c, 27 ^ z) * multiplier + Fetch64(s + 8); \
420 : c = Rotate(d, 41 ^ z) * multiplier + Fetch64(s + 16); \
421 : d = Rotate(e, 33 ^ z) * multiplier + Fetch64(s + 24); \
422 : e = Rotate(t, 25 ^ z) * multiplier + Fetch64(s + 32); \
423 : t = old_a; \
424 : } \
425 : f = _mm_crc32_u64(f, a); \
426 : g = _mm_crc32_u64(g, b); \
427 : h = _mm_crc32_u64(h, c); \
428 : i = _mm_crc32_u64(i, d); \
429 : j = _mm_crc32_u64(j, e); \
430 : s += 40
431 :
432 : CHUNK(1, 1);
433 : CHUNK(k0, 0);
434 : CHUNK(1, 1);
435 : CHUNK(k0, 0);
436 : CHUNK(1, 1);
437 : CHUNK(k0, 0);
438 : } while(--iters > 0);
439 :
440 : while(len >= 40) {
441 : CHUNK(k0, 0);
442 : len -= 40;
443 : }
444 : if(len > 0) {
445 : s = s + len - 40;
446 : CHUNK(k0, 0);
447 : }
448 : j += i << 32;
449 : a = HashLen16(a, j);
450 : h += g << 32;
451 : b += h;
452 : c = HashLen16(c, f) + i;
453 : d = HashLen16(d, e + result[0]);
454 : j += e;
455 : i += HashLen16(h, t);
456 : e = HashLen16(a, d) + j;
457 : f = HashLen16(b, c) + a;
458 : g = HashLen16(j, i) + c;
459 : result[0] = e + f + g + h;
460 : a = ShiftMix((a + g) * k0) * k0 + b;
461 : result[1] += a + result[0];
462 : a = ShiftMix(a * k0) * k0 + c;
463 : result[2] = a + result[1];
464 : a = ShiftMix((a + e) * k0) * k0;
465 : result[3] = a + result[2];
466 : }
467 :
468 : // Requires len < 240.
469 : static void CityHashCrc256Short(const char *s, size_t len, uint64 *result) {
470 : char buf[240];
471 : memcpy(buf, s, len);
472 : memset(buf + len, 0, 240 - len);
473 : CityHashCrc256Long(buf, 240, ~(uint32)(len), result);
474 : }
475 :
476 : void CityHashCrc256(const char *s, size_t len, uint64 *result) {
477 : if(LIKELY(len >= 240)) {
478 : CityHashCrc256Long(s, len, 0, result);
479 : } else {
480 : CityHashCrc256Short(s, len, result);
481 : }
482 : }
483 :
484 : uint128 CityHashCrc128WithSeed(const char *s, size_t len, uint128 seed) {
485 : if(len <= 900) {
486 : return CityHash128WithSeed(s, len, seed);
487 : } else {
488 : uint64 result[4];
489 : CityHashCrc256(s, len, result);
490 : uint64 u = Uint128High64(seed) + result[0];
491 : uint64 v = Uint128Low64(seed) + result[1];
492 : uint128 crc;
493 : crc.first = (uint64)(HashLen16(u, v + result[2]));
494 : crc.second = (uint64)(HashLen16(Rotate(v, 32), u * k0 + result[3]));
495 : return crc;
496 : }
497 : }
498 :
499 : uint128 CityHashCrc128(const char *s, size_t len) {
500 : if(len <= 900) {
501 : return CityHash128(s, len);
502 : } else {
503 : uint64 result[4];
504 : CityHashCrc256(s, len, result);
505 : uint128 crc;
506 : crc.first = (uint64)result[2];
507 : crc.second = (uint64)result[3];
508 : return crc;
509 : }
510 : }
511 :
512 : #endif
|