LCOV - code coverage report
Current view: top level - lib - checksum.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 373 395 94.4 %
Date: 2015-09-30 14:09:30 Functions: 27 28 96.4 %

          Line data    Source code
       1             : /*
       2             :  *  This file is part of rmlint.
       3             :  *
       4             :  *  rmlint is free software: you can redistribute it and/or modify
       5             :  *  it under the terms of the GNU General Public License as published by
       6             :  *  the Free Software Foundation, either version 3 of the License, or
       7             :  *  (at your option) any later version.
       8             :  *
       9             :  *  rmlint is distributed in the hope that it will be useful,
      10             :  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
      11             :  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      12             :  *  GNU General Public License for more details.
      13             :  *
      14             :  *  You should have received a copy of the GNU General Public License
      15             :  *  along with rmlint.  If not, see <http://www.gnu.org/licenses/>.
      16             :  *
      17             :  * Authors:
      18             :  *
      19             :  *  - Christopher <sahib> Pahl 2010-2015 (https://github.com/sahib)
      20             :  *  - Daniel <SeeSpotRun> T.   2014-2015 (https://github.com/SeeSpotRun)
      21             :  *
      22             :  * Hosted on http://github.com/sahib/rmlint
      23             :  *
      24             :  */
      25             : 
      26             : #include <stdio.h>
      27             : #include <string.h>
      28             : #include <glib.h>
      29             : 
      30             : #include <sys/mman.h>
      31             : #include <sys/stat.h>
      32             : #include <unistd.h>
      33             : #include <fcntl.h>
      34             : 
      35             : #include "checksum.h"
      36             : 
      37             : #include "checksums/city.h"
      38             : #include "checksums/citycrc.h"
      39             : #include "checksums/murmur3.h"
      40             : #include "checksums/spooky-c.h"
      41             : #include "checksums/xxhash/xxhash.h"
      42             : 
      43             : #define _RM_CHECKSUM_DEBUG 0
      44             : 
      45             : 
      46             : 
      47             : ///////////////////////////////////////
      48             : //    BUFFER POOL IMPLEMENTATION     //
      49             : ///////////////////////////////////////
      50             : 
      51         140 : RmOff rm_buffer_size(RmBufferPool *pool) {
      52         140 :     return pool->buffer_size;
      53             : }
      54             : 
      55      173289 : RmBuffer *rm_buffer_new(RmBufferPool *pool) {
      56      173289 :     RmBuffer *self = g_slice_new0(RmBuffer);
      57      173289 :     self->pool = pool;
      58      173289 :     self->data = g_slice_alloc(pool->buffer_size);
      59      173289 :     return self;
      60             : }
      61             : 
      62      173218 : static void rm_buffer_free(RmBuffer *buf) {
      63      173218 :     g_slice_free1(buf->pool->buffer_size, buf->data);
      64      173218 :     g_slice_free(RmBuffer, buf);
      65      173218 : }
      66             : 
      67       36458 : RmBufferPool *rm_buffer_pool_init(gsize buffer_size, gsize max_mem, gsize max_kept_mem) {
      68       36458 :     RmBufferPool *self = g_slice_new(RmBufferPool);
      69       36458 :     self->stack = NULL;
      70       36458 :     self->buffer_size = buffer_size;
      71       36458 :     self->avail_buffers = MAX(max_mem / buffer_size, 1);
      72       36458 :     self->min_buffers = self->avail_buffers;
      73       36458 :     self->max_buffers = self->avail_buffers;
      74       36458 :     self->max_kept_buffers = MAX(max_kept_mem / buffer_size, 1);
      75       36458 :     self->kept_buffers = 0;
      76       36458 :     rm_log_debug_line("rm_buffer_pool_init: allocated max %"G_GSIZE_FORMAT
      77             :                       " buffers of %"G_GSIZE_FORMAT" bytes each",
      78             :                       self->avail_buffers, self->buffer_size);
      79       36458 :     g_cond_init(&self->change);
      80       36458 :     g_mutex_init(&self->lock);
      81       36458 :     return self;
      82             : }
      83             : 
      84       36458 : void rm_buffer_pool_destroy(RmBufferPool *pool) {
      85       36458 :     rm_log_debug_line("had %"G_GSIZE_FORMAT" unused read buffers", pool->min_buffers);
      86             : 
      87             :     /* Wait for all buffers to come back */
      88       36458 :     g_mutex_lock(&pool->lock);
      89             : 
      90             :     /* Free 'em */
      91       36458 :     g_slist_free_full(pool->stack, (GDestroyNotify)rm_buffer_free);
      92             : 
      93       36458 :     g_mutex_unlock(&pool->lock);
      94             : 
      95       36458 :     g_mutex_clear(&pool->lock);
      96       36458 :     g_cond_clear(&pool->change);
      97       36458 :     g_slice_free(RmBufferPool, pool);
      98       36458 : }
      99             : 
     100      718370 : RmBuffer *rm_buffer_pool_get(RmBufferPool *pool) {
     101      718370 :     RmBuffer *buffer = NULL;
     102      718370 :     g_mutex_lock(&pool->lock);
     103             :     {
     104     2155432 :         while(!buffer) {
     105      718634 :             if(pool->stack) {
     106      545110 :                 buffer = pool->stack->data;
     107      545110 :                 pool->stack = g_slist_delete_link(pool->stack, pool->stack);
     108      173524 :             } else if(pool->avail_buffers > 0) {
     109      173289 :                 buffer = rm_buffer_new(pool);
     110             :             } else {
     111         235 :                 if(!pool->mem_warned) {
     112           0 :                     rm_log_warning_line(
     113             :                         "read buffer limit reached - waiting for "
     114             :                         "processing to catch up");
     115           0 :                     pool->mem_warned = true;
     116             :                 }
     117         235 :                 g_cond_wait(&pool->change, &pool->lock);
     118             :             }
     119             :         }
     120      718399 :         pool->avail_buffers--;
     121             : 
     122      718399 :         if(pool->avail_buffers < pool->min_buffers) {
     123      168927 :             pool->min_buffers = pool->avail_buffers;
     124             :         }
     125             :     }
     126      718399 :     g_mutex_unlock(&pool->lock);
     127             : 
     128      718393 :     g_assert(buffer);
     129      718393 :     return buffer;
     130             : }
     131             : 
     132      698041 : void rm_buffer_pool_release(RmBuffer *buf) {
     133      698041 :     RmBufferPool *pool = buf->pool;
     134      698041 :     g_mutex_lock(&pool->lock);
     135             :     {
     136      698643 :         pool->avail_buffers++;
     137      698643 :         g_cond_signal(&pool->change);
     138      698643 :         pool->stack = g_slist_prepend(pool->stack, buf);
     139             :     }
     140      698643 :     g_mutex_unlock(&pool->lock);
     141      698581 : }
     142             : 
     143             : /* make another buffer available if one is being kept (in paranoid digest) */
     144       19752 : static void rm_buffer_pool_signal_keeping(_U RmBuffer *buf) {
     145       19752 :     RmBufferPool *pool = buf->pool;
     146             : 
     147       19752 :     g_mutex_lock(&pool->lock);
     148             :     {
     149       19756 :         pool->avail_buffers++;
     150       19756 :         pool->kept_buffers++;
     151       19756 :         g_cond_signal(&pool->change);
     152             :     }
     153       19756 :     g_mutex_unlock(&pool->lock);
     154       19754 : }
     155             : 
     156       19685 : static void rm_buffer_pool_release_kept(RmBuffer *buf) {
     157       19685 :     RmBufferPool *pool = buf->pool;
     158       19685 :     g_mutex_lock(&pool->lock);
     159             :     {
     160       19685 :         if(pool->kept_buffers > pool->max_kept_buffers) {
     161           0 :             rm_buffer_free(buf);
     162             :         } else {
     163       19685 :             pool->stack = g_slist_prepend(pool->stack, buf);
     164             :         }
     165       19685 :         pool->kept_buffers--;
     166       19685 :         g_cond_signal(&pool->change);
     167             :     }
     168       19685 :     g_mutex_unlock(&pool->lock);
     169       19685 : }
     170             : 
     171       14765 : static gboolean rm_buffer_equal(RmBuffer *a, RmBuffer *b) {
     172       14765 :     return (a->len == b->len && memcmp(a->data, b->data, a->len) == 0);
     173             : }
     174             : 
     175             : ///////////////////////////////////////
     176             : //      RMDIGEST IMPLEMENTATION      //
     177             : ///////////////////////////////////////
     178             : 
     179       31978 : RmDigestType rm_string_to_digest_type(const char *string) {
     180       31978 :     if(string == NULL) {
     181           0 :         return RM_DIGEST_UNKNOWN;
     182       31978 :     } else if(!strcasecmp(string, "md5")) {
     183        1881 :         return RM_DIGEST_MD5;
     184             : #if HAVE_SHA512
     185       30097 :     } else if(!strcasecmp(string, "sha512")) {
     186        1881 :         return RM_DIGEST_SHA512;
     187             : #endif
     188       28216 :     } else if(!strcasecmp(string, "city512")) {
     189        1881 :         return RM_DIGEST_CITY512;
     190       26335 :     } else if(!strcasecmp(string, "xxhash")) {
     191        1881 :         return RM_DIGEST_XXHASH;
     192       24454 :     } else if(!strcasecmp(string, "murmur512")) {
     193        1881 :         return RM_DIGEST_MURMUR512;
     194       22573 :     } else if(!strcasecmp(string, "sha256")) {
     195        1881 :         return RM_DIGEST_SHA256;
     196       20692 :     } else if(!strcasecmp(string, "city256")) {
     197        1881 :         return RM_DIGEST_CITY256;
     198       18811 :     } else if(!strcasecmp(string, "murmur256")) {
     199        1881 :         return RM_DIGEST_MURMUR256;
     200       16930 :     } else if(!strcasecmp(string, "sha1")) {
     201        1881 :         return RM_DIGEST_SHA1;
     202       15049 :     } else if(!strcasecmp(string, "spooky32")) {
     203        1881 :         return RM_DIGEST_SPOOKY32;
     204       13168 :     } else if(!strcasecmp(string, "spooky64")) {
     205        1881 :         return RM_DIGEST_SPOOKY64;
     206       11287 :     } else if(!strcasecmp(string, "murmur") || !strcasecmp(string, "murmur128")) {
     207        1881 :         return RM_DIGEST_MURMUR;
     208        9406 :     } else if(!strcasecmp(string, "spooky") || !strcasecmp(string, "spooky128")) {
     209        1881 :         return RM_DIGEST_SPOOKY;
     210        7525 :     } else if(!strcasecmp(string, "city") || !strcasecmp(string, "city128")) {
     211        1881 :         return RM_DIGEST_CITY;
     212        5644 :     } else if(!strcasecmp(string, "bastard") || !strcasecmp(string, "bastard256")) {
     213        1881 :         return RM_DIGEST_BASTARD;
     214        3763 :     } else if(!strcasecmp(string, "ext")) {
     215           0 :         return RM_DIGEST_EXT;
     216        3763 :     } else if(!strcasecmp(string, "cumulative")) {
     217           0 :         return RM_DIGEST_CUMULATIVE;
     218        3763 :     } else if(!strcasecmp(string, "paranoid")) {
     219        3763 :         return RM_DIGEST_PARANOID;
     220             :     } else {
     221           0 :         return RM_DIGEST_UNKNOWN;
     222             :     }
     223             : }
     224             : 
     225       72690 : const char *rm_digest_type_to_string(RmDigestType type) {
     226             :     static const char *names[] =
     227             :         {[RM_DIGEST_UNKNOWN] = "unknown", [RM_DIGEST_MURMUR] = "murmur",
     228             :          [RM_DIGEST_SPOOKY] = "spooky", [RM_DIGEST_SPOOKY32] = "spooky32",
     229             :          [RM_DIGEST_SPOOKY64] = "spooky64", [RM_DIGEST_CITY] = "city",
     230             :          [RM_DIGEST_MD5] = "md5", [RM_DIGEST_SHA1] = "sha1",
     231             :          [RM_DIGEST_SHA256] = "sha256", [RM_DIGEST_SHA512] = "sha512",
     232             :          [RM_DIGEST_MURMUR256] = "murmur256", [RM_DIGEST_CITY256] = "city256",
     233             :          [RM_DIGEST_BASTARD] = "bastard", [RM_DIGEST_MURMUR512] = "murmur512",
     234             :          [RM_DIGEST_CITY512] = "city512", [RM_DIGEST_EXT] = "ext",
     235             :          [RM_DIGEST_CUMULATIVE] = "cumulative", [RM_DIGEST_PARANOID] = "paranoid",
     236             :          [RM_DIGEST_XXHASH] = "xxhash"};
     237             : 
     238       72690 :     return names[MIN(type, sizeof(names) / sizeof(names[0]))];
     239             : }
     240             : 
     241           0 : int rm_digest_type_to_multihash_id(RmDigestType type) {
     242             :     static int ids[] = {[RM_DIGEST_UNKNOWN] = -1,    [RM_DIGEST_MURMUR] = 17,
     243             :                         [RM_DIGEST_SPOOKY] = 14,     [RM_DIGEST_SPOOKY32] = 16,
     244             :                         [RM_DIGEST_SPOOKY64] = 18,   [RM_DIGEST_CITY] = 15,
     245             :                         [RM_DIGEST_MD5] = 1,         [RM_DIGEST_SHA1] = 2,
     246             :                         [RM_DIGEST_SHA256] = 4,      [RM_DIGEST_SHA512] = 6,
     247             :                         [RM_DIGEST_MURMUR256] = 7,   [RM_DIGEST_CITY256] = 8,
     248             :                         [RM_DIGEST_BASTARD] = 9,     [RM_DIGEST_MURMUR512] = 10,
     249             :                         [RM_DIGEST_CITY512] = 11,    [RM_DIGEST_EXT] = 12,
     250             :                         [RM_DIGEST_CUMULATIVE] = 13, [RM_DIGEST_PARANOID] = 14};
     251             : 
     252           0 :     return ids[MIN(type, sizeof(ids) / sizeof(ids[0]))];
     253             : }
     254             : 
     255       16036 : RmOff rm_digest_paranoia_bytes(void) {
     256       16036 :     return 16 * 1024 * 1024;
     257             :     /* this is big enough buffer size to make seek time fairly insignificant relative to
     258             :      * sequential read time,
     259             :      * eg 16MB read at typical 100 MB/s read rate = 160ms read vs typical seek time 10ms*/
     260             : }
     261             : 
     262             : #define ADD_SEED(digest, seed)                                              \
     263             :     {                                                                       \
     264             :         if(seed) {                                                          \
     265             :             g_checksum_update(digest->glib_checksum, (const guchar *)&seed, \
     266             :                               sizeof(RmOff));                               \
     267             :         }                                                                   \
     268             :     }
     269             : 
     270      386722 : RmDigest *rm_digest_new(RmDigestType type, RmOff seed1, RmOff seed2, RmOff paranoid_size,
     271             :                         bool use_shadow_hash) {
     272      386722 :     RmDigest *digest = g_slice_new0(RmDigest);
     273             : 
     274      386723 :     digest->checksum = NULL;
     275      386723 :     digest->type = type;
     276      386723 :     digest->bytes = 0;
     277             : 
     278      386723 :     switch(type) {
     279             :     case RM_DIGEST_SPOOKY32:
     280             :         /* cannot go lower than 64, since we read 8 byte in some places.
     281             :          * simulate by leaving the part at the end empty
     282             :          */
     283        7891 :         digest->bytes = 64 / 8;
     284        7891 :         break;
     285             :     case RM_DIGEST_XXHASH:
     286             :     case RM_DIGEST_SPOOKY64:
     287       39683 :         digest->bytes = 64 / 8;
     288       39683 :         break;
     289             :     case RM_DIGEST_MD5:
     290        7879 :         digest->glib_checksum = g_checksum_new(G_CHECKSUM_MD5);
     291        7879 :         ADD_SEED(digest, seed1);
     292        7879 :         digest->bytes = 128 / 8;
     293        7879 :         return digest;
     294             : #if HAVE_SHA512
     295             :     case RM_DIGEST_SHA512:
     296        7935 :         digest->glib_checksum = g_checksum_new(G_CHECKSUM_SHA512);
     297        7935 :         ADD_SEED(digest, seed1);
     298        7935 :         digest->bytes = 512 / 8;
     299        7935 :         return digest;
     300             : #endif
     301             :     case RM_DIGEST_SHA256:
     302        7879 :         digest->glib_checksum = g_checksum_new(G_CHECKSUM_SHA256);
     303        7879 :         ADD_SEED(digest, seed1);
     304        7879 :         digest->bytes = 256 / 8;
     305        7879 :         return digest;
     306             :     case RM_DIGEST_SHA1:
     307       79413 :         digest->glib_checksum = g_checksum_new(G_CHECKSUM_SHA1);
     308       79413 :         ADD_SEED(digest, seed1);
     309       79413 :         digest->bytes = 160 / 8;
     310       79413 :         return digest;
     311             :     case RM_DIGEST_MURMUR512:
     312             :     case RM_DIGEST_CITY512:
     313       15782 :         digest->bytes = 512 / 8;
     314       15782 :         break;
     315             :     case RM_DIGEST_EXT:
     316             :         /* gets allocated on rm_digest_update() */
     317      144730 :         digest->bytes = paranoid_size;
     318      144730 :         break;
     319             :     case RM_DIGEST_MURMUR256:
     320             :     case RM_DIGEST_CITY256:
     321             :     case RM_DIGEST_BASTARD:
     322       31538 :         digest->bytes = 256 / 8;
     323       31538 :         break;
     324             :     case RM_DIGEST_SPOOKY:
     325             :     case RM_DIGEST_MURMUR:
     326             :     case RM_DIGEST_CITY:
     327             :     case RM_DIGEST_CUMULATIVE:
     328       27957 :         digest->bytes = 128 / 8;
     329       27957 :         break;
     330             :     case RM_DIGEST_PARANOID:
     331       16036 :         g_assert(paranoid_size <= rm_digest_paranoia_bytes());
     332       16036 :         g_assert(paranoid_size > 0);
     333       16036 :         digest->bytes = paranoid_size;
     334       16036 :         digest->paranoid = g_slice_new0(RmParanoid);
     335       16036 :         digest->paranoid->buffers = g_queue_new();
     336       16036 :         digest->paranoid->incoming_twin_candidates = g_async_queue_new();
     337       16036 :         g_assert(use_shadow_hash);
     338       16036 :         if(use_shadow_hash) {
     339       32072 :             digest->paranoid->shadow_hash =
     340       16036 :                 rm_digest_new(RM_DIGEST_XXHASH, seed1, seed2, 0, false);
     341             :         } else {
     342           0 :             digest->paranoid->shadow_hash = NULL;
     343             :         }
     344       16036 :         break;
     345             :     default:
     346           0 :         g_assert_not_reached();
     347             :     }
     348             : 
     349             :     /* starting values to let us generate up to 4 different hashes in parallel with
     350             :      * different starting seeds:
     351             :      * */
     352             :     static const RmOff seeds[4] = {0x0000000000000000, 0xf0f0f0f0f0f0f0f0,
     353             :                                    0x3333333333333333, 0xaaaaaaaaaaaaaaaa};
     354             : 
     355      283617 :     if(digest->bytes > 0 && type != RM_DIGEST_PARANOID) {
     356      155023 :         const int n_seeds = sizeof(seeds) / sizeof(seeds[0]);
     357             : 
     358             :         /* checksum type - allocate memory and initialise */
     359      155023 :         digest->checksum = g_slice_alloc0(digest->bytes);
     360      437116 :         for(gsize block = 0; block < (digest->bytes / 16); block++) {
     361      282093 :             digest->checksum[block].first = seeds[block % n_seeds] ^ seed1;
     362      282093 :             digest->checksum[block].second = seeds[block % n_seeds] ^ seed2;
     363             :         }
     364             :     }
     365             : 
     366      283617 :     if(digest->type == RM_DIGEST_BASTARD) {
     367             :         /* bastard type *always* has *pure* murmur hash for first checksum
     368             :          * and seeded city for second checksum */
     369       15756 :         digest->checksum[0].first = digest->checksum[0].second = 0;
     370             :     }
     371      283617 :     return digest;
     372             : }
     373             : 
     374          28 : void rm_digest_paranoia_shrink(RmDigest *digest, gsize new_size) {
     375          28 :     g_assert(new_size < digest->bytes);
     376          28 :     g_assert(digest->type == RM_DIGEST_PARANOID);
     377             : 
     378          28 :     digest->bytes = new_size;
     379          28 : }
     380             : 
     381       17384 : void rm_digest_release_buffers(RmDigest *digest) {
     382       17384 :     if(digest->paranoid && digest->paranoid->buffers) {
     383       15965 :         g_queue_free_full(digest->paranoid->buffers,
     384             :                           (GDestroyNotify)rm_buffer_pool_release_kept);
     385       15964 :         digest->paranoid->buffers = NULL;
     386             :     }
     387       17383 : }
     388             : 
     389      787249 : void rm_digest_free(RmDigest *digest) {
     390      787249 :     switch(digest->type) {
     391             :     case RM_DIGEST_MD5:
     392             :     case RM_DIGEST_SHA512:
     393             :     case RM_DIGEST_SHA256:
     394             :     case RM_DIGEST_SHA1:
     395      539609 :         g_checksum_free(digest->glib_checksum);
     396      539628 :         digest->glib_checksum = NULL;
     397      539628 :         break;
     398             :     case RM_DIGEST_PARANOID:
     399       13677 :         if(digest->paranoid->shadow_hash) {
     400       13677 :             rm_digest_free(digest->paranoid->shadow_hash);
     401             :         }
     402       13677 :         rm_digest_release_buffers(digest);
     403       13677 :         if(digest->paranoid->incoming_twin_candidates) {
     404       13677 :             g_async_queue_unref(digest->paranoid->incoming_twin_candidates);
     405             :         }
     406       13677 :         g_slice_free(RmParanoid, digest->paranoid);
     407       13677 :         break;
     408             :     case RM_DIGEST_EXT:
     409             :     case RM_DIGEST_CUMULATIVE:
     410             :     case RM_DIGEST_MURMUR512:
     411             :     case RM_DIGEST_XXHASH:
     412             :     case RM_DIGEST_CITY512:
     413             :     case RM_DIGEST_MURMUR256:
     414             :     case RM_DIGEST_CITY256:
     415             :     case RM_DIGEST_BASTARD:
     416             :     case RM_DIGEST_SPOOKY:
     417             :     case RM_DIGEST_SPOOKY32:
     418             :     case RM_DIGEST_SPOOKY64:
     419             :     case RM_DIGEST_MURMUR:
     420             :     case RM_DIGEST_CITY:
     421      233963 :         if(digest->checksum) {
     422      233963 :             g_slice_free1(digest->bytes, digest->checksum);
     423      233963 :             digest->checksum = NULL;
     424             :         }
     425      233963 :         break;
     426             :     default:
     427           0 :         g_assert_not_reached();
     428             :     }
     429      787268 :     g_slice_free(RmDigest, digest);
     430      787276 : }
     431             : 
     432      397895 : void rm_digest_update(RmDigest *digest, const unsigned char *data, RmOff size) {
     433      397895 :     switch(digest->type) {
     434             :     case RM_DIGEST_EXT:
     435             : /* Data is assumed to be a hex representation of a cchecksum.
     436             :  * Needs to be compressed in pure memory first.
     437             :  *
     438             :  * Checksum is not updated but rather overwritten.
     439             :  * */
     440             : #define CHAR_TO_NUM(c) (unsigned char)(g_ascii_isdigit(c) ? c - '0' : (c - 'a') + 10)
     441             : 
     442      112558 :         g_assert(data);
     443             : 
     444      112558 :         digest->bytes = size / 2;
     445      112558 :         digest->checksum = g_slice_alloc0(digest->bytes);
     446             : 
     447     7279398 :         for(unsigned i = 0; i < digest->bytes; ++i) {
     448    35834200 :             ((guint8 *)digest->checksum)[i] =
     449    28667360 :                 (CHAR_TO_NUM(data[2 * i]) << 4) + CHAR_TO_NUM(data[2 * i + 1]);
     450             :         }
     451             : 
     452      112558 :         break;
     453             :     case RM_DIGEST_MD5:
     454             :     case RM_DIGEST_SHA512:
     455             :     case RM_DIGEST_SHA256:
     456             :     case RM_DIGEST_SHA1:
     457      127421 :         g_checksum_update(digest->glib_checksum, (const guchar *)data, size);
     458      127396 :         break;
     459             :     case RM_DIGEST_SPOOKY32:
     460        9747 :         digest->checksum[0].first = spooky_hash32(data, size, digest->checksum[0].first);
     461        9749 :         break;
     462             :     case RM_DIGEST_SPOOKY64:
     463        9749 :         digest->checksum[0].first = spooky_hash64(data, size, digest->checksum[0].first);
     464        9750 :         break;
     465             :     case RM_DIGEST_SPOOKY:
     466        9751 :         spooky_hash128(data, size, (uint64_t *)&digest->checksum[0].first,
     467        9751 :                        (uint64_t *)&digest->checksum[0].second);
     468        9749 :         break;
     469             :     case RM_DIGEST_XXHASH:
     470       39227 :         digest->checksum[0].first = XXH64(data, size, digest->checksum[0].first);
     471       39212 :         break;
     472             :     case RM_DIGEST_MURMUR512:
     473             :     case RM_DIGEST_MURMUR256:
     474             :     case RM_DIGEST_MURMUR:
     475       97468 :         for(guint8 block = 0; block < (digest->bytes / 16); block++) {
     476             : #if RM_PLATFORM_32
     477             :             MurmurHash3_x86_128(data, size, (uint32_t)digest->checksum[block].first,
     478             :                                 &digest->checksum[block]);  //&
     479             : #elif RM_PLATFORM_64
     480      136436 :             MurmurHash3_x64_128(data, size, (uint32_t)digest->checksum[block].first,
     481      136436 :                                 &digest->checksum[block]);
     482             : #else
     483             : #error "Probably not a good idea to compile rmlint on 16bit."
     484             : #endif
     485             :         }
     486       29250 :         break;
     487             :     case RM_DIGEST_CITY:
     488             :     case RM_DIGEST_CITY256:
     489             :     case RM_DIGEST_CITY512:
     490       97468 :         for(guint8 block = 0; block < (digest->bytes / 16); block++) {
     491             :             /* Opt out for the more optimized version.
     492             :             * This needs the crc command of sse4.2
     493             :             * (available on Intel Nehalem and up; my amd box doesn't have this though)
     494             :             */
     495       68220 :             uint128 old = {digest->checksum[block].first, digest->checksum[block].second};
     496             : #if RM_PLATFORM_64 && HAVE_SSE42
     497             :             old = CityHashCrc128WithSeed((const char *)data, size, old);
     498             : #else
     499       68220 :             old = CityHash128WithSeed((const char *)data, size, old);
     500             : #endif
     501       68223 :             memcpy(&digest->checksum[block], &old, sizeof(uint128));
     502             :         }
     503       29248 :         break;
     504             :     case RM_DIGEST_BASTARD:
     505       19471 :         MurmurHash3_x86_128(data, size, (uint32_t)digest->checksum[0].first,
     506       19471 :                             &digest->checksum[0]);
     507             : 
     508       19476 :         uint128 old = {digest->checksum[1].first, digest->checksum[1].second};
     509             : #if RM_PLATFORM_64 && HAVE_SSE42
     510             :         old = CityHashCrc128WithSeed((const char *)data, size, old);
     511             : #else
     512       19476 :         old = CityHash128WithSeed((const char *)data, size, old);
     513             : #endif
     514       19471 :         memcpy(&digest->checksum[1], &old, sizeof(uint128));
     515       19471 :         break;
     516             :     case RM_DIGEST_CUMULATIVE: {
     517             :         /* This is basically FNV1a, it is just important that the order of
     518             :          * adding data to the hash has no effect on the result, so it can
     519             :          * be used as a lookup key:
     520             :          *
     521             :          * http://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function
     522             :          * */
     523       11480 :         RmOff hash = 0xcbf29ce484222325;
     524      195160 :         for(gsize i = 0; i < digest->bytes; ++i) {
     525      183680 :             hash ^= ((guint8 *)data)[i % size];
     526      183680 :             hash *= 0x100000001b3;
     527      183680 :             ((guint8 *)digest->checksum)[i] += hash;
     528             :         }
     529       11480 :     } break;
     530             :     case RM_DIGEST_PARANOID:
     531             :     default:
     532           0 :         g_assert_not_reached();
     533             :     }
     534      397863 : }
     535             : 
     536      273832 : void rm_digest_buffered_update(RmBuffer *buffer) {
     537      273832 :     RmDigest *digest = buffer->digest;
     538      273832 :     if(digest->type != RM_DIGEST_PARANOID) {
     539      254080 :         rm_digest_update(digest, buffer->data, buffer->len);
     540      254070 :         rm_buffer_pool_release(buffer);
     541             :     } else {
     542       19752 :         RmParanoid *paranoid = digest->paranoid;
     543             : 
     544       19752 :         g_queue_push_tail(paranoid->buffers, buffer);
     545       19754 :         paranoid->buffer_count++;
     546       19754 :         rm_buffer_pool_signal_keeping(buffer);
     547             : 
     548       19754 :         g_assert(paranoid->shadow_hash);
     549       19754 :         if(paranoid->shadow_hash) {
     550       19754 :             rm_digest_update(paranoid->shadow_hash, buffer->data, buffer->len);
     551             :         }
     552             : 
     553       19751 :         if(paranoid->twin_candidate) {
     554             :             /* do a running check that digest remains the same as its candidate twin */
     555        1733 :             if(rm_buffer_equal(buffer, paranoid->twin_candidate_buffer->data)) {
     556             :                 /* buffers match; move ptr to next one ready for next buffer */
     557        1731 :                 paranoid->twin_candidate_buffer = paranoid->twin_candidate_buffer->next;
     558             :             } else {
     559             :                 /* buffers don't match - delete candidate (new candidate might be added on
     560             :                  * next
     561             :                  * call to rm_digest_buffered_update) */
     562           2 :                 paranoid->twin_candidate = NULL;
     563           2 :                 paranoid->twin_candidate_buffer = NULL;
     564             : #if _RM_CHECKSUM_DEBUG
     565             :                 rm_log_debug_line("Ejected candidate match at buffer #%u",
     566             :                              paranoid->buffer_count);
     567             : #endif
     568             :             }
     569             :         }
     570             : 
     571       68997 :         while(!paranoid->twin_candidate && paranoid->incoming_twin_candidates &&
     572       18355 :               (paranoid->twin_candidate =
     573       18347 :                    g_async_queue_try_pop(paranoid->incoming_twin_candidates))) {
     574             :             /* validate the new candidate by comparing the previous buffers (not
     575             :              * including current)*/
     576       11141 :             paranoid->twin_candidate_buffer =
     577       11141 :                 paranoid->twin_candidate->paranoid->buffers->head;
     578       11141 :             GList *iter_self = paranoid->buffers->head;
     579       11141 :             gboolean match = TRUE;
     580       34160 :             while(match && iter_self) {
     581       11879 :                 match = (rm_buffer_equal(paranoid->twin_candidate_buffer->data,
     582       11879 :                                          iter_self->data));
     583       11878 :                 iter_self = iter_self->next;
     584       11878 :                 paranoid->twin_candidate_buffer = paranoid->twin_candidate_buffer->next;
     585             :             }
     586       11140 :             if(paranoid->twin_candidate && !match) {
     587             :                 /* reject the twin candidate */
     588             : #if _RM_CHECKSUM_DEBUG
     589             :                 rm_log_debug_line("Rejected twin candidate %p for %p",
     590             :                              paranoid->twin_candidate, paranoid);
     591             : #endif
     592         333 :                 paranoid->twin_candidate = NULL;
     593         333 :                 paranoid->twin_candidate_buffer = NULL;
     594             : #if _RM_CHECKSUM_DEBUG
     595             :             } else {
     596             :                 rm_log_debug_line("Added twin candidate %p for %p",
     597             :                              paranoid->twin_candidate, paranoid);
     598             : #endif
     599             :             }
     600             :         }
     601             :     }
     602      273867 : }
     603             : 
     604      484204 : RmDigest *rm_digest_copy(RmDigest *digest) {
     605      484204 :     g_assert(digest);
     606             : 
     607      484204 :     RmDigest *self = NULL;
     608             : 
     609      484204 :     switch(digest->type) {
     610             :     case RM_DIGEST_MD5:
     611             :     case RM_DIGEST_SHA512:
     612             :     case RM_DIGEST_SHA256:
     613             :     case RM_DIGEST_SHA1:
     614      451876 :         self = g_slice_new0(RmDigest);
     615      451876 :         self->bytes = digest->bytes;
     616      451876 :         self->type = digest->type;
     617      451876 :         self->glib_checksum = g_checksum_copy(digest->glib_checksum);
     618      451879 :         break;
     619             :     case RM_DIGEST_SPOOKY:
     620             :     case RM_DIGEST_SPOOKY32:
     621             :     case RM_DIGEST_SPOOKY64:
     622             :     case RM_DIGEST_MURMUR:
     623             :     case RM_DIGEST_CITY:
     624             :     case RM_DIGEST_CITY256:
     625             :     case RM_DIGEST_MURMUR256:
     626             :     case RM_DIGEST_CITY512:
     627             :     case RM_DIGEST_MURMUR512:
     628             :     case RM_DIGEST_XXHASH:
     629             :     case RM_DIGEST_BASTARD:
     630             :     case RM_DIGEST_CUMULATIVE:
     631             :     case RM_DIGEST_EXT:
     632       32328 :         self = rm_digest_new(digest->type, 0, 0, digest->bytes, FALSE);
     633             : 
     634       32328 :         if(self->checksum && digest->checksum) {
     635       32328 :             memcpy((char *)self->checksum, (char *)digest->checksum, self->bytes);
     636             :         }
     637             : 
     638       32328 :         break;
     639             :     case RM_DIGEST_PARANOID:
     640             :     default:
     641           0 :         g_assert_not_reached();
     642             :     }
     643             : 
     644      484207 :     return self;
     645             : }
     646             : 
     647     1184726 : static gboolean rm_digest_needs_steal(RmDigestType digest_type) {
     648     1184726 :     switch(digest_type) {
     649             :     case RM_DIGEST_MD5:
     650             :     case RM_DIGEST_SHA512:
     651             :     case RM_DIGEST_SHA256:
     652             :     case RM_DIGEST_SHA1:
     653             :         /* for all of the above, reading the digest is destructive, so we
     654             :          * need to take a copy */
     655      531551 :         return TRUE;
     656             :     case RM_DIGEST_SPOOKY32:
     657             :     case RM_DIGEST_SPOOKY64:
     658             :     case RM_DIGEST_SPOOKY:
     659             :     case RM_DIGEST_MURMUR:
     660             :     case RM_DIGEST_CITY:
     661             :     case RM_DIGEST_CITY256:
     662             :     case RM_DIGEST_CITY512:
     663             :     case RM_DIGEST_XXHASH:
     664             :     case RM_DIGEST_MURMUR256:
     665             :     case RM_DIGEST_MURMUR512:
     666             :     case RM_DIGEST_BASTARD:
     667             :     case RM_DIGEST_CUMULATIVE:
     668             :     case RM_DIGEST_EXT:
     669             :     case RM_DIGEST_PARANOID:
     670      653175 :         return FALSE;
     671             :     default:
     672           0 :         g_assert_not_reached();
     673             :     }
     674             : }
     675             : 
     676      910448 : guint8 *rm_digest_steal(RmDigest *digest) {
     677      910448 :     guint8 *result = g_slice_alloc0(digest->bytes);
     678      910452 :     gsize buflen = digest->bytes;
     679             : 
     680      910452 :     if(rm_digest_needs_steal(digest->type)) {
     681             :         /* reading the digest is destructive, so we need to take a copy */
     682      451719 :         RmDigest *copy = rm_digest_copy(digest);
     683      451718 :         g_checksum_get_digest(copy->glib_checksum, result, &buflen);
     684      451717 :         g_assert(buflen == digest->bytes);
     685      451717 :         rm_digest_free(copy);
     686             :     } else {
     687      458733 :         memcpy(result, digest->checksum, digest->bytes);
     688             :     }
     689      910453 :     return result;
     690             : }
     691             : 
     692      295712 : guint rm_digest_hash(RmDigest *digest) {
     693      295712 :     guint8 *buf = NULL;
     694      295712 :     gsize bytes = 0;
     695             : 
     696      295712 :     if(digest->type == RM_DIGEST_PARANOID) {
     697       21967 :         if(digest->paranoid->shadow_hash) {
     698       21967 :             buf = rm_digest_steal(digest->paranoid->shadow_hash);
     699       21969 :             bytes = digest->paranoid->shadow_hash->bytes;
     700             :         }
     701             :     } else {
     702      273745 :         buf = rm_digest_steal(digest);
     703      273749 :         bytes = digest->bytes;
     704             :     }
     705             : 
     706      295718 :     guint hash = 0;
     707      295718 :     if(buf != NULL) {
     708      295718 :         hash = *(RmOff *)buf;
     709      295718 :         g_slice_free1(bytes, buf);
     710             :     }
     711      295720 :     return hash;
     712             : }
     713             : 
     714      287126 : gboolean rm_digest_equal(RmDigest *a, RmDigest *b) {
     715      287126 :     g_assert(a && b);
     716             : 
     717      287157 :     if(a->type != b->type) {
     718           0 :         return false;
     719             :     }
     720             : 
     721      287157 :     if(a->bytes != b->bytes) {
     722           0 :         return false;
     723             :     }
     724             : 
     725      287157 :     if(a->type == RM_DIGEST_PARANOID) {
     726       12867 :         if(!a->paranoid->buffers) {
     727             :             /* buffers have been freed so we need to rely on shadow hash */
     728         908 :             return rm_digest_equal(a->paranoid->shadow_hash, b->paranoid->shadow_hash);
     729             :         }
     730       11959 :         if(a->paranoid->twin_candidate == b || b->paranoid->twin_candidate == a) {
     731       10807 :             return true;
     732             :         }
     733             : 
     734        1152 :         GList *a_iter = a->paranoid->buffers->head;
     735        1152 :         GList *b_iter = b->paranoid->buffers->head;
     736        1152 :         guint bytes = 0;
     737        3456 :         while(a_iter && b_iter) {
     738        1152 :             if(!rm_buffer_equal(a_iter->data, b_iter->data)) {
     739           0 :                 rm_log_error_line(
     740             :                     "Paranoid digest compare found mismatch - must be hash collision in "
     741             :                     "shadow hash");
     742           0 :                 return false;
     743             :             }
     744        1152 :             bytes += ((RmBuffer *)a_iter->data)->len;
     745        1152 :             a_iter = a_iter->next;
     746        1152 :             b_iter = b_iter->next;
     747             :         }
     748             : 
     749        1152 :         return (!a_iter && !b_iter && bytes == a->bytes);
     750             : 
     751      274290 :     } else if(rm_digest_needs_steal(a->type)) {
     752       79843 :         guint8 *buf_a = rm_digest_steal(a);
     753       79843 :         guint8 *buf_b = rm_digest_steal(b);
     754             : 
     755             :         gboolean result;
     756             : 
     757       79844 :         if(a->bytes != b->bytes) {
     758           0 :             result = false;
     759             :         } else {
     760       79844 :             result = !memcmp(buf_a, buf_b, MIN(a->bytes, b->bytes));
     761             :         }
     762             : 
     763       79844 :         g_slice_free1(a->bytes, buf_a);
     764       79843 :         g_slice_free1(b->bytes, buf_b);
     765             : 
     766       79842 :         return result;
     767             :     } else {
     768      194446 :         return !memcmp(a->checksum, b->checksum, MIN(a->bytes, b->bytes));
     769             :     }
     770             : }
     771             : 
     772      444275 : int rm_digest_hexstring(RmDigest *digest, char *buffer) {
     773             :     static const char *hex = "0123456789abcdef";
     774      444275 :     guint8 *input = NULL;
     775      444275 :     gsize bytes = 0;
     776      444275 :     if(digest == NULL) {
     777         700 :         return 0;
     778             :     }
     779             : 
     780      443575 :     if(digest->type == RM_DIGEST_PARANOID) {
     781       23579 :         if(digest->paranoid->shadow_hash) {
     782       23579 :             input = rm_digest_steal(digest->paranoid->shadow_hash);
     783       23579 :             bytes = digest->paranoid->shadow_hash->bytes;
     784             :         }
     785             :     } else {
     786      419996 :         input = rm_digest_steal(digest);
     787      419996 :         bytes = digest->bytes;
     788             :     }
     789             : 
     790    15441639 :     for(gsize i = 0; i < bytes; ++i) {
     791    14998064 :         buffer[0] = hex[input[i] / 16];
     792    14998064 :         buffer[1] = hex[input[i] % 16];
     793             : 
     794    14998064 :         if(i == bytes - 1) {
     795      443575 :             buffer[2] = '\0';
     796             :         }
     797             : 
     798    14998064 :         buffer += 2;
     799             :     }
     800             : 
     801      443575 :     g_slice_free1(bytes, input);
     802      443575 :     return bytes * 2 + 1;
     803             : }
     804             : 
     805      444265 : int rm_digest_get_bytes(RmDigest *self) {
     806      444265 :     if(self == NULL) {
     807         700 :         return 0;
     808             :     }
     809             : 
     810      443565 :     if(self->type != RM_DIGEST_PARANOID) {
     811      419960 :         return self->bytes;
     812       23605 :     } else if(self->paranoid->shadow_hash) {
     813       23605 :         return self->paranoid->shadow_hash->bytes;
     814             :     } else {
     815           0 :         return 0;
     816             :     }
     817             : }
     818             : 
     819       12434 : void rm_digest_send_match_candidate(RmDigest *target, RmDigest *candidate) {
     820       12434 :     if(!target->paranoid->incoming_twin_candidates) {
     821           0 :         target->paranoid->incoming_twin_candidates = g_async_queue_new();
     822             :     }
     823       12434 :     g_async_queue_push(target->paranoid->incoming_twin_candidates, candidate);
     824       12434 : }

Generated by: LCOV version 1.11