LCOV - code coverage report
Current view: top level - lib/checksums - spooky-c.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 257 257 100.0 %
Date: 2015-09-30 14:09:30 Functions: 10 10 100.0 %

          Line data    Source code
       1             : // A C version of Bob Jenkins' spooky hash
       2             : // Spooky Hash
       3             : // A 128-bit noncryptographic hash, for checksums and table lookup
       4             : // By Bob Jenkins.  Public domain.
       5             : //   Oct 31 2010: published framework, disclaimer ShortHash isn't right
       6             : //   Nov 7 2010: disabled ShortHash
       7             : //   Oct 11 2011: C version ported by Andi Kleen (andikleen@github)
       8             : //   Oct 31 2011: replace End, ShortMix, ShortEnd, enable ShortHash again
       9             : //   Apr 10 2012: buffer overflow on platforms without unaligned reads
      10             : //   Apr 27 2012: C version updated by Ziga Zupanec ziga.zupanec@gmail.com (agiz@github)
      11             : 
      12             : //   Assumes little endian ness. Caller has to check this case.
      13             : 
      14             : #include <memory.h>
      15             : 
      16             : #include "spooky-c.h"
      17             : 
      18             : #if defined(__i386__) || defined(__x86_64__)  // add more architectures here
      19             : #define ALLOW_UNALIGNED_READS 1
      20             : #else
      21             : #define ALLOW_UNALIGNED_READS 0
      22             : #endif
      23             : 
      24             : // SC_CONST: a constant which:
      25             : //  * is not zero
      26             : //  * is odd
      27             : //  * is a not-very-regular mix of 1's and 0's
      28             : //  * does not need any other special mathematical properties
      29             : #define SC_CONST 0xdeadbeefdeadbeefLL
      30             : 
      31    32756372 : static inline uint64_t rot64(uint64_t x, int k) {
      32    32756372 :     return (x << k) | (x >> (64 - k));
      33             : }
      34             : 
      35             : //
      36             : // This is used if the input is 96 bytes long or longer.
      37             : //
      38             : // The internal state is fully overwritten every 96 bytes.
      39             : // Every input bit appears to cause at least 128 bits of entropy
      40             : // before 96 other bytes are combined, when run forward or backward
      41             : //   For every input bit,
      42             : //   Two inputs differing in just that input bit
      43             : //   Where "differ" means xor or subtraction
      44             : //   And the base value is random
      45             : //   When run forward or backwards one Mix
      46             : // I tried 3 pairs of each; they all differed by at least 212 bits.
      47             : //
      48      868261 : static inline void mix(const uint64_t *data, uint64_t *s0, uint64_t *s1, uint64_t *s2,
      49             :                        uint64_t *s3, uint64_t *s4, uint64_t *s5, uint64_t *s6,
      50             :                        uint64_t *s7, uint64_t *s8, uint64_t *s9, uint64_t *s10,
      51             :                        uint64_t *s11) {
      52      868261 :     *s0 += data[0];
      53      868261 :     *s2 ^= *s10;
      54      868261 :     *s11 ^= *s0;
      55      868261 :     *s0 = rot64(*s0, 11);
      56      845893 :     *s11 += *s1;
      57      845893 :     *s1 += data[1];
      58      845893 :     *s3 ^= *s11;
      59      845893 :     *s0 ^= *s1;
      60      845893 :     *s1 = rot64(*s1, 32);
      61      800856 :     *s0 += *s2;
      62      800856 :     *s2 += data[2];
      63      800856 :     *s4 ^= *s0;
      64      800856 :     *s1 ^= *s2;
      65      800856 :     *s2 = rot64(*s2, 43);
      66      817091 :     *s1 += *s3;
      67      817091 :     *s3 += data[3];
      68      817091 :     *s5 ^= *s1;
      69      817091 :     *s2 ^= *s3;
      70      817091 :     *s3 = rot64(*s3, 31);
      71      817389 :     *s2 += *s4;
      72      817389 :     *s4 += data[4];
      73      817389 :     *s6 ^= *s2;
      74      817389 :     *s3 ^= *s4;
      75      817389 :     *s4 = rot64(*s4, 17);
      76      815455 :     *s3 += *s5;
      77      815455 :     *s5 += data[5];
      78      815455 :     *s7 ^= *s3;
      79      815455 :     *s4 ^= *s5;
      80      815455 :     *s5 = rot64(*s5, 28);
      81      816605 :     *s4 += *s6;
      82      816605 :     *s6 += data[6];
      83      816605 :     *s8 ^= *s4;
      84      816605 :     *s5 ^= *s6;
      85      816605 :     *s6 = rot64(*s6, 39);
      86      817152 :     *s5 += *s7;
      87      817152 :     *s7 += data[7];
      88      817152 :     *s9 ^= *s5;
      89      817152 :     *s6 ^= *s7;
      90      817152 :     *s7 = rot64(*s7, 57);
      91      832620 :     *s6 += *s8;
      92      832620 :     *s8 += data[8];
      93      832620 :     *s10 ^= *s6;
      94      832620 :     *s7 ^= *s8;
      95      832620 :     *s8 = rot64(*s8, 55);
      96      932927 :     *s7 += *s9;
      97      932927 :     *s9 += data[9];
      98      932927 :     *s11 ^= *s7;
      99      932927 :     *s8 ^= *s9;
     100      932927 :     *s9 = rot64(*s9, 54);
     101      892302 :     *s8 += *s10;
     102      892302 :     *s10 += data[10];
     103      892302 :     *s0 ^= *s8;
     104      892302 :     *s9 ^= *s10;
     105      892302 :     *s10 = rot64(*s10, 22);
     106      896683 :     *s9 += *s11;
     107      896683 :     *s11 += data[11];
     108      896683 :     *s1 ^= *s9;
     109      896683 :     *s10 ^= *s11;
     110      896683 :     *s11 = rot64(*s11, 46);
     111      907415 :     *s10 += *s0;
     112      907415 : }
     113             : 
     114             : //
     115             : // Mix all 12 inputs together so that h0, h1 are a hash of them all.
     116             : //
     117             : // For two inputs differing in just the input bits
     118             : // Where "differ" means xor or subtraction
     119             : // And the base value is random, or a counting value starting at that bit
     120             : // The final result will have each bit of h0, h1 flip
     121             : // For every input bit,
     122             : // with probability 50 +- .3%
     123             : // For every pair of input bits,
     124             : // with probability 50 +- 3%
     125             : //
     126             : // This does not rely on the last Mix() call having already mixed some.
     127             : // Two iterations was almost good enough for a 64-bit result, but a
     128             : // 128-bit result is reported, so End() does three iterations.
     129             : //
     130       17261 : static inline void endPartial(uint64_t *h0, uint64_t *h1, uint64_t *h2, uint64_t *h3,
     131             :                               uint64_t *h4, uint64_t *h5, uint64_t *h6, uint64_t *h7,
     132             :                               uint64_t *h8, uint64_t *h9, uint64_t *h10, uint64_t *h11) {
     133       17261 :     *h11 += *h1;
     134       17261 :     *h2 ^= *h11;
     135       17261 :     *h1 = rot64(*h1, 44);
     136       17262 :     *h0 += *h2;
     137       17262 :     *h3 ^= *h0;
     138       17262 :     *h2 = rot64(*h2, 15);
     139       17259 :     *h1 += *h3;
     140       17259 :     *h4 ^= *h1;
     141       17259 :     *h3 = rot64(*h3, 34);
     142       17257 :     *h2 += *h4;
     143       17257 :     *h5 ^= *h2;
     144       17257 :     *h4 = rot64(*h4, 21);
     145       17253 :     *h3 += *h5;
     146       17253 :     *h6 ^= *h3;
     147       17253 :     *h5 = rot64(*h5, 38);
     148       17254 :     *h4 += *h6;
     149       17254 :     *h7 ^= *h4;
     150       17254 :     *h6 = rot64(*h6, 33);
     151       17255 :     *h5 += *h7;
     152       17255 :     *h8 ^= *h5;
     153       17255 :     *h7 = rot64(*h7, 10);
     154       17256 :     *h6 += *h8;
     155       17256 :     *h9 ^= *h6;
     156       17256 :     *h8 = rot64(*h8, 13);
     157       17261 :     *h7 += *h9;
     158       17261 :     *h10 ^= *h7;
     159       17261 :     *h9 = rot64(*h9, 38);
     160       17255 :     *h8 += *h10;
     161       17255 :     *h11 ^= *h8;
     162       17255 :     *h10 = rot64(*h10, 53);
     163       17256 :     *h9 += *h11;
     164       17256 :     *h0 ^= *h9;
     165       17256 :     *h11 = rot64(*h11, 42);
     166       17259 :     *h10 += *h0;
     167       17259 :     *h1 ^= *h10;
     168       17259 :     *h0 = rot64(*h0, 54);
     169       17257 : }
     170             : 
     171        5757 : static inline void end(uint64_t *h0, uint64_t *h1, uint64_t *h2, uint64_t *h3,
     172             :                        uint64_t *h4, uint64_t *h5, uint64_t *h6, uint64_t *h7,
     173             :                        uint64_t *h8, uint64_t *h9, uint64_t *h10, uint64_t *h11) {
     174        5757 :     endPartial(h0, h1, h2, h3, h4, h5, h6, h7, h8, h9, h10, h11);
     175        5756 :     endPartial(h0, h1, h2, h3, h4, h5, h6, h7, h8, h9, h10, h11);
     176        5755 :     endPartial(h0, h1, h2, h3, h4, h5, h6, h7, h8, h9, h10, h11);
     177        5755 : }
     178             : 
     179             : //
     180             : // The goal is for each bit of the input to expand into 128 bits of
     181             : //   apparent entropy before it is fully overwritten.
     182             : // n trials both set and cleared at least m bits of h0 h1 h2 h3
     183             : //   n: 2   m: 29
     184             : //   n: 3   m: 46
     185             : //   n: 4   m: 57
     186             : //   n: 5   m: 107
     187             : //   n: 6   m: 146
     188             : //   n: 7   m: 152
     189             : // when run forwards or backwards
     190             : // for all 1-bit and 2-bit diffs
     191             : // with diffs defined by either xor or subtraction
     192             : // with a base of all zeros plus a counter, or plus another bit, or random
     193             : //
     194     1388615 : static inline void short_mix(uint64_t *h0, uint64_t *h1, uint64_t *h2, uint64_t *h3) {
     195     1388615 :     *h2 = rot64(*h2, 50);
     196     1388615 :     *h2 += *h3;
     197     1388615 :     *h0 ^= *h2;
     198     1388615 :     *h3 = rot64(*h3, 52);
     199     1388616 :     *h3 += *h0;
     200     1388616 :     *h1 ^= *h3;
     201     1388616 :     *h0 = rot64(*h0, 30);
     202     1388616 :     *h0 += *h1;
     203     1388616 :     *h2 ^= *h0;
     204     1388616 :     *h1 = rot64(*h1, 41);
     205     1388617 :     *h1 += *h2;
     206     1388617 :     *h3 ^= *h1;
     207     1388617 :     *h2 = rot64(*h2, 54);
     208     1388617 :     *h2 += *h3;
     209     1388617 :     *h0 ^= *h2;
     210     1388617 :     *h3 = rot64(*h3, 48);
     211     1388617 :     *h3 += *h0;
     212     1388617 :     *h1 ^= *h3;
     213     1388617 :     *h0 = rot64(*h0, 38);
     214     1388616 :     *h0 += *h1;
     215     1388616 :     *h2 ^= *h0;
     216     1388616 :     *h1 = rot64(*h1, 37);
     217     1388615 :     *h1 += *h2;
     218     1388615 :     *h3 ^= *h1;
     219     1388615 :     *h2 = rot64(*h2, 62);
     220     1388615 :     *h2 += *h3;
     221     1388615 :     *h0 ^= *h2;
     222     1388615 :     *h3 = rot64(*h3, 34);
     223     1388615 :     *h3 += *h0;
     224     1388615 :     *h1 ^= *h3;
     225     1388615 :     *h0 = rot64(*h0, 5);
     226     1388616 :     *h0 += *h1;
     227     1388616 :     *h2 ^= *h0;
     228     1388616 :     *h1 = rot64(*h1, 36);
     229     1388616 :     *h1 += *h2;
     230     1388616 :     *h3 ^= *h1;
     231     1388616 : }
     232             : 
     233             : //
     234             : // Mix all 4 inputs together so that h0, h1 are a hash of them all.
     235             : //
     236             : // For two inputs differing in just the input bits
     237             : // Where "differ" means xor or subtraction
     238             : // And the base value is random, or a counting value starting at that bit
     239             : // The final result will have each bit of h0, h1 flip
     240             : // For every input bit,
     241             : // with probability 50 +- .3% (it is probably better than that)
     242             : // For every pair of input bits,
     243             : // with probability 50 +- .75% (the worst case is approximately that)
     244             : //
     245      912022 : static inline void short_end(uint64_t *h0, uint64_t *h1, uint64_t *h2, uint64_t *h3) {
     246      912022 :     *h3 ^= *h2;
     247      912022 :     *h2 = rot64(*h2, 15);
     248      912021 :     *h3 += *h2;
     249      912021 :     *h0 ^= *h3;
     250      912021 :     *h3 = rot64(*h3, 52);
     251      912020 :     *h0 += *h3;
     252      912020 :     *h1 ^= *h0;
     253      912020 :     *h0 = rot64(*h0, 26);
     254      912021 :     *h1 += *h0;
     255      912021 :     *h2 ^= *h1;
     256      912021 :     *h1 = rot64(*h1, 51);
     257      912021 :     *h2 += *h1;
     258      912021 :     *h3 ^= *h2;
     259      912021 :     *h2 = rot64(*h2, 28);
     260      912022 :     *h3 += *h2;
     261      912022 :     *h0 ^= *h3;
     262      912022 :     *h3 = rot64(*h3, 9);
     263      912022 :     *h0 += *h3;
     264      912022 :     *h1 ^= *h0;
     265      912022 :     *h0 = rot64(*h0, 47);
     266      912025 :     *h1 += *h0;
     267      912025 :     *h2 ^= *h1;
     268      912025 :     *h1 = rot64(*h1, 54);
     269      912026 :     *h2 += *h1;
     270      912026 :     *h3 ^= *h2;
     271      912026 :     *h2 = rot64(*h2, 32);
     272      912026 :     *h3 += *h2;
     273      912026 :     *h0 ^= *h3;
     274      912026 :     *h3 = rot64(*h3, 25);
     275      912025 :     *h0 += *h3;
     276      912025 :     *h1 ^= *h0;
     277      912025 :     *h0 = rot64(*h0, 63);
     278      912024 :     *h1 += *h0;
     279      912024 : }
     280             : 
     281      912008 : void spooky_shorthash(const void *message,
     282             :                       size_t length,
     283             :                       uint64_t *hash1,
     284             :                       uint64_t *hash2) {
     285             :     uint64_t buf[2 * SC_NUMVARS];
     286             :     union {
     287             :         const uint8_t *p8;
     288             :         uint32_t *p32;
     289             :         uint64_t *p64;
     290             :         size_t i;
     291             :     } u;
     292             :     size_t remainder;
     293             :     uint64_t a, b, c, d;
     294      912008 :     u.p8 = (const uint8_t *)message;
     295             : 
     296             :     if(!ALLOW_UNALIGNED_READS && (u.i & 0x7)) {
     297             :         memcpy(buf, message, length);
     298             :         u.p64 = buf;
     299             :     }
     300             : 
     301      912008 :     remainder = length % 32;
     302      912008 :     a = *hash1;
     303      912008 :     b = *hash2;
     304      912008 :     c = SC_CONST;
     305      912008 :     d = SC_CONST;
     306             : 
     307      912008 :     if(length > 15) {
     308      888242 :         const uint64_t *endp = u.p64 + (length / 32) * 4;
     309             : 
     310             :         // handle all complete sets of 32 bytes
     311     2198673 :         for(; u.p64 < endp; u.p64 += 4) {
     312     1310431 :             c += u.p64[0];
     313     1310431 :             d += u.p64[1];
     314     1310431 :             short_mix(&a, &b, &c, &d);
     315     1310431 :             a += u.p64[2];
     316     1310431 :             b += u.p64[3];
     317             :         }
     318             : 
     319             :         // Handle the case of 16+ remaining bytes.
     320      888242 :         if(remainder >= 16) {
     321       78186 :             c += u.p64[0];
     322       78186 :             d += u.p64[1];
     323       78186 :             short_mix(&a, &b, &c, &d);
     324       78186 :             u.p64 += 2;
     325       78186 :             remainder -= 16;
     326             :         }
     327             :     }
     328             : 
     329             :     // Handle the last 0..15 bytes, and its length
     330      912008 :     d = ((uint64_t)length) << 56;
     331      912008 :     switch(remainder) {
     332             :     case 15:
     333         392 :         d += ((uint64_t)u.p8[14]) << 48;
     334             :     case 14:
     335        1512 :         d += ((uint64_t)u.p8[13]) << 40;
     336             :     case 13:
     337        2716 :         d += ((uint64_t)u.p8[12]) << 32;
     338             :     case 12:
     339        4984 :         d += u.p32[2];
     340        4984 :         c += u.p64[0];
     341        4984 :         break;
     342             :     case 11:
     343         364 :         d += ((uint64_t)u.p8[10]) << 16;
     344             :     case 10:
     345        3001 :         d += ((uint64_t)u.p8[9]) << 8;
     346             :     case 9:
     347        3393 :         d += (uint64_t)u.p8[8];
     348             :     case 8:
     349      123065 :         c += u.p64[0];
     350      123065 :         break;
     351             :     case 7:
     352          56 :         c += ((uint64_t)u.p8[6]) << 48;
     353             :     case 6:
     354         140 :         c += ((uint64_t)u.p8[5]) << 40;
     355             :     case 5:
     356         336 :         c += ((uint64_t)u.p8[4]) << 32;
     357             :     case 4:
     358        7265 :         c += u.p32[0];
     359        7265 :         break;
     360             :     case 3:
     361       16363 :         c += ((uint64_t)u.p8[2]) << 16;
     362             :     case 2:
     363       64730 :         c += ((uint64_t)u.p8[1]) << 8;
     364             :     case 1:
     365      113542 :         c += (uint64_t)u.p8[0];
     366      113542 :         break;
     367             :     case 0:
     368      663168 :         c += SC_CONST;
     369      663168 :         d += SC_CONST;
     370             :     }
     371      912008 :     short_end(&a, &b, &c, &d);
     372      912027 :     *hash1 = a;
     373      912027 :     *hash2 = b;
     374      912027 : }
     375             : 
     376      918755 : void spooky_hash128(const void *message,
     377             :                     size_t length,
     378             :                     uint64_t *hash1,
     379             :                     uint64_t *hash2) {
     380             :     uint64_t h0, h1, h2, h3, h4, h5, h6, h7, h8, h9, h10, h11;
     381             :     uint64_t buf[SC_NUMVARS];
     382             :     uint64_t *endp;
     383             :     union {
     384             :         const uint8_t *p8;
     385             :         uint64_t *p64;
     386             :         uintptr_t i;
     387             :     } u;
     388             :     size_t remainder;
     389             : 
     390      918755 :     if(length < SC_BUFSIZE) {
     391      912008 :         spooky_shorthash(message, length, hash1, hash2);
     392      912027 :         return;
     393             :     }
     394             : 
     395        6747 :     h0 = h3 = h6 = h9 = *hash1;
     396        6747 :     h1 = h4 = h7 = h10 = *hash2;
     397        6747 :     h2 = h5 = h8 = h11 = SC_CONST;
     398             : 
     399        6747 :     u.p8 = (const uint8_t *)message;
     400        6747 :     endp = u.p64 + (length / SC_BLOCKSIZE) * SC_NUMVARS;
     401             : 
     402             :     // handle all whole blocks of SC_BLOCKSIZE bytes
     403             :     if(ALLOW_UNALIGNED_READS || (u.i & 0x7) == 0) {
     404      906192 :         while(u.p64 < endp) {
     405      893694 :             mix(u.p64, &h0, &h1, &h2, &h3, &h4, &h5, &h6, &h7, &h8, &h9, &h10, &h11);
     406      892698 :             u.p64 += SC_NUMVARS;
     407             :         }
     408             :     } else {
     409             :         while(u.p64 < endp) {
     410             :             memcpy(buf, u.p64, SC_BLOCKSIZE);
     411             :             mix(buf, &h0, &h1, &h2, &h3, &h4, &h5, &h6, &h7, &h8, &h9, &h10, &h11);
     412             :             u.p64 += SC_NUMVARS;
     413             :         }
     414             :     }
     415             : 
     416             :     // handle the last partial block of SC_BLOCKSIZE bytes
     417        5751 :     remainder = (length - ((const uint8_t *)endp - (const uint8_t *)message));
     418        5751 :     memcpy(buf, endp, remainder);
     419        5751 :     memset(((uint8_t *)buf) + remainder, 0, SC_BLOCKSIZE - remainder);
     420        5751 :     ((uint8_t *)buf)[SC_BLOCKSIZE - 1] = remainder;
     421        5751 :     mix(buf, &h0, &h1, &h2, &h3, &h4, &h5, &h6, &h7, &h8, &h9, &h10, &h11);
     422             : 
     423             :     // do some final mixing
     424        5757 :     end(&h0, &h1, &h2, &h3, &h4, &h5, &h6, &h7, &h8, &h9, &h10, &h11);
     425        5755 :     *hash1 = h0;
     426        5755 :     *hash2 = h1;
     427             : }
     428             : 
     429        9745 : uint64_t spooky_hash64(const void *message, size_t length, uint64_t seed) {
     430        9745 :     uint64_t hash1 = seed;
     431        9745 :     spooky_hash128(message, length, &hash1, &seed);
     432        9750 :     return hash1;
     433             : }
     434             : 
     435      898281 : uint32_t spooky_hash32(const void *message, size_t length, uint32_t seed) {
     436      898281 :     uint64_t hash1 = seed, hash2 = seed;
     437      898281 :     spooky_hash128(message, length, &hash1, &hash2);
     438      898283 :     return (uint32_t)hash1;
     439             : }

Generated by: LCOV version 1.11