LCOV - code coverage report
Current view: top level - lib - preprocess.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 239 266 89.8 %
Date: 2015-09-30 14:09:30 Functions: 23 23 100.0 %

          Line data    Source code
       1             : /**
       2             :  *  This file is part of rmlint.
       3             :  *
       4             :  *  rmlint is free software: you can redistribute it and/or modify
       5             :  *  it under the terms of the GNU General Public License as published by
       6             :  *  the Free Software Foundation, either version 3 of the License, or
       7             :  *  (at your option) any later version.
       8             :  *
       9             :  *  rmlint is distributed in the hope that it will be useful,
      10             :  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
      11             :  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      12             :  *  GNU General Public License for more details.
      13             :  *
      14             :  *  You should have received a copy of the GNU General Public License
      15             :  *  along with rmlint.  If not, see <http://www.gnu.org/licenses/>.
      16             :  *
      17             :  * Authors:
      18             :  *
      19             :  *  - Christopher <sahib> Pahl 2010-2015 (https://github.com/sahib)
      20             :  *  - Daniel <SeeSpotRun> T.   2014-2015 (https://github.com/SeeSpotRun)
      21             :  *
      22             :  * Hosted on http://github.com/sahib/rmlint
      23             :  */
      24             : 
      25             : #include <stdio.h>
      26             : #include <string.h>
      27             : #include <stdlib.h>
      28             : #include <ctype.h>
      29             : 
      30             : #include "preprocess.h"
      31             : #include "utilities.h"
      32             : #include "formats.h"
      33             : #include "cmdline.h"
      34             : #include "shredder.h"
      35             : 
      36      276031 : static guint rm_file_hash(RmFile *file) {
      37      276031 :     RmCfg *cfg = file->session->cfg;
      38      276031 :     if(cfg->match_basename || cfg->match_with_extension) {
      39         448 :         RM_DEFINE_BASENAME(file);
      40         448 :         char *extension = rm_util_path_extension(file_basename);
      41        1540 :         return (guint)(file->file_size ^
      42         448 :                        (cfg->match_basename ? g_str_hash(file_basename) : 0) ^
      43         700 :                        ((cfg->match_with_extension && extension)
      44         196 :                             ? g_str_hash(rm_util_path_extension(file_basename))
      45             :                             : 0));
      46             :     } else {
      47      275583 :         return (guint)(file->file_size);
      48             :     }
      49             : }
      50             : 
      51          28 : static bool rm_file_check_with_extension(const RmFile *file_a, const RmFile *file_b) {
      52          28 :     RM_DEFINE_BASENAME(file_a);
      53          28 :     RM_DEFINE_BASENAME(file_b);
      54             : 
      55          28 :     char *ext_a = rm_util_path_extension(file_a_basename);
      56          28 :     char *ext_b = rm_util_path_extension(file_b_basename);
      57             : 
      58          28 :     if(ext_a && ext_b && g_ascii_strcasecmp(ext_a, ext_b) == 0) {
      59          28 :         return true;
      60             :     } else {
      61           0 :         return false;
      62             :     }
      63             : }
      64             : 
      65         196 : static bool rm_file_check_without_extension(const RmFile *file_a, const RmFile *file_b) {
      66         196 :     RM_DEFINE_BASENAME(file_a);
      67         196 :     RM_DEFINE_BASENAME(file_b);
      68             : 
      69         196 :     char *ext_a = rm_util_path_extension(file_a_basename);
      70         196 :     char *ext_b = rm_util_path_extension(file_b_basename);
      71             : 
      72             :     /* Check length till extension, or full length if none present */
      73         196 :     size_t a_len = (ext_a) ? (ext_a - file_a_basename) : (int)strlen(file_a_basename);
      74         196 :     size_t b_len = (ext_b) ? (ext_b - file_b_basename) : (int)strlen(file_b_basename);
      75             : 
      76         196 :     if(a_len != b_len) {
      77         112 :         return false;
      78             :     }
      79             : 
      80          84 :     if(g_ascii_strncasecmp(file_a_basename, file_b_basename, a_len) == 0) {
      81          28 :         return true;
      82             :     }
      83             : 
      84          56 :     return false;
      85             : }
      86             : 
      87      169145 : gboolean rm_file_equal(const RmFile *file_a, const RmFile *file_b) {
      88      169145 :     const RmCfg *cfg = file_a->session->cfg;
      89             : 
      90      507435 :     return (1 && (file_a->file_size == file_b->file_size) &&
      91      338330 :             (0 || (!cfg->match_basename) || (rm_file_basenames_match(file_a, file_b))) &&
      92      169155 :             (0 || (!cfg->match_with_extension) ||
      93      507259 :              (rm_file_check_with_extension(file_a, file_b))) &&
      94      169323 :             (0 || (!cfg->match_without_extension) ||
      95         196 :              (rm_file_check_without_extension(file_a, file_b))));
      96             : }
      97             : 
      98      456283 : static guint rm_node_hash(const RmFile *file) {
      99      456283 :     return file->inode ^ file->dev;
     100             : }
     101             : 
     102        3452 : static gboolean rm_node_equal(const RmFile *file_a, const RmFile *file_b) {
     103        3452 :     return (1 && (file_a->inode == file_b->inode) && (file_a->dev == file_b->dev));
     104             : }
     105             : 
     106             : /* GHashTable key tuned to recognize duplicate paths.
     107             :  * i.e. RmFiles that are not only hardlinks but
     108             :  * also point to the real path
     109             :  */
     110             : typedef struct RmPathDoubleKey {
     111             :     /* parent_inode and basename are initialized lazily,
     112             :      * since often, they are not needed.
     113             :      */
     114             :     bool parent_inode_set : 1;
     115             :     bool basename_set : 1;
     116             : 
     117             :     /* stat(dirname(file->path)).st_ino */
     118             :     ino_t parent_inode;
     119             :     char *basename;
     120             : 
     121             :     /* File the key points to */
     122             :     RmFile *file;
     123             : 
     124             : } RmPathDoubleKey;
     125             : 
     126        5761 : static guint rm_path_double_hash(const RmPathDoubleKey *key) {
     127             :     /* depend only on the always set components, never change the hash duringthe run */
     128        5761 :     return rm_node_hash(key->file);
     129             : }
     130             : 
     131        2655 : static bool rm_path_have_same_parent(RmCfg *cfg, RmPathDoubleKey *key_a,
     132             :                                      RmPathDoubleKey *key_b) {
     133        2655 :     RmFile *file_a = key_a->file, *file_b = key_b->file;
     134             : 
     135        2655 :     if(cfg->use_meta_cache) {
     136          94 :         if(key_a->parent_inode_set && key_b->parent_inode_set) {
     137           0 :             RM_DEFINE_PATH(file_a);
     138           0 :             RM_DEFINE_PATH(file_b);
     139             : 
     140           0 :             key_a->parent_inode = rm_util_parent_node(file_a_path);
     141           0 :             key_a->parent_inode_set = TRUE;
     142             : 
     143           0 :             key_b->parent_inode = rm_util_parent_node(file_b_path);
     144           0 :             key_b->parent_inode_set = TRUE;
     145             :         }
     146             : 
     147          94 :         return key_a->parent_inode == key_b->parent_inode;
     148             :     } else {
     149        2561 :         return file_a->folder->parent == file_b->folder->parent;
     150             :     }
     151             : }
     152             : 
     153        2655 : static gboolean rm_path_double_equal(RmPathDoubleKey *key_a, RmPathDoubleKey *key_b) {
     154        2655 :     if(key_a->file->inode != key_b->file->inode) {
     155           0 :         return FALSE;
     156             :     }
     157             : 
     158        2655 :     if(key_a->file->dev != key_b->file->dev) {
     159           0 :         return FALSE;
     160             :     }
     161             : 
     162        2655 :     RmFile *file_a = key_a->file;
     163        2655 :     RmFile *file_b = key_b->file;
     164             : 
     165        2655 :     if(!rm_path_have_same_parent(file_a->session->cfg, key_a, key_b)) {
     166           0 :         return FALSE;
     167             :     }
     168             : 
     169        2655 :     if(!file_a->session->cfg->use_meta_cache) {
     170        2561 :         return g_strcmp0(file_a->folder->basename, file_b->folder->basename) == 0;
     171             :     }
     172             : 
     173             :     /* If using --with-metadata-cache, save the basename for later use
     174             :      * so it doesn't trigger SELECTs very often.  Basenames are
     175             :      * generally much shorter than the path, so that should be
     176             :      * okay.
     177             :      */
     178          94 :     if(key_a->basename == NULL) {
     179          60 :         RM_DEFINE_BASENAME(file_a);
     180          60 :         key_a->basename = g_strdup(file_a_basename);
     181             :     }
     182             : 
     183          94 :     if(key_b->basename == NULL) {
     184          65 :         RM_DEFINE_BASENAME(file_b);
     185          65 :         key_b->basename = g_strdup(file_b_basename);
     186             :     }
     187             : 
     188          94 :     return g_strcmp0(key_a->basename, key_b->basename) == 0;
     189             : }
     190             : 
     191        3526 : static RmPathDoubleKey *rm_path_double_new(RmFile *file) {
     192        3526 :     RmPathDoubleKey *key = g_malloc0(sizeof(RmPathDoubleKey));
     193        3526 :     key->file = file;
     194        3526 :     return key;
     195             : }
     196             : 
     197        2235 : static void rm_path_double_free(RmPathDoubleKey *key) {
     198        2235 :     if(key->basename != NULL) {
     199          79 :         g_free(key->basename);
     200             :     }
     201        2235 :     g_free(key);
     202        2235 : }
     203             : 
     204       54884 : RmFileTables *rm_file_tables_new(_U RmSession *session) {
     205       54884 :     RmFileTables *tables = g_slice_new0(RmFileTables);
     206             : 
     207       54884 :     tables->size_groups = g_hash_table_new_full((GHashFunc)rm_file_hash,
     208             :                                                 (GEqualFunc)rm_file_equal, NULL, NULL);
     209             : 
     210       54884 :     tables->node_table = g_hash_table_new_full((GHashFunc)rm_node_hash,
     211             :                                                (GEqualFunc)rm_node_equal, NULL, NULL);
     212             : 
     213       54884 :     g_rec_mutex_init(&tables->lock);
     214       54884 :     return tables;
     215             : }
     216             : 
     217       52994 : void rm_file_tables_destroy(RmFileTables *tables) {
     218       52994 :     if(tables->size_groups) {
     219       16536 :         g_hash_table_unref(tables->size_groups);
     220             :     }
     221             : 
     222       52994 :     if(tables->node_table) {
     223       16536 :         g_hash_table_unref(tables->node_table);
     224             :     }
     225             : 
     226       52994 :     g_rec_mutex_clear(&tables->lock);
     227       52994 :     g_slice_free(RmFileTables, tables);
     228       52994 : }
     229             : 
     230             : /*  compare two files. return:
     231             :  *    - a negative integer file 'a' outranks 'b',
     232             :  *    - 0 if they are equal,
     233             :  *    - a positive integer if file 'b' outranks 'a'
     234             :  */
     235      367054 : int rm_pp_cmp_orig_criteria_impl(RmSession *session, time_t mtime_a, time_t mtime_b,
     236             :                                  const char *basename_a, const char *basename_b,
     237             :                                  int path_index_a, int path_index_b, guint8 path_depth_a,
     238             :                                  guint8 path_depth_b) {
     239      367054 :     RmCfg *sets = session->cfg;
     240             : 
     241      367054 :     int sort_criteria_len = strlen(sets->sort_criteria);
     242      617773 :     for(int i = 0; i < sort_criteria_len; i++) {
     243      547518 :         long cmp = 0;
     244      547518 :         switch(tolower(sets->sort_criteria[i])) {
     245             :         case 'm':
     246       87886 :             cmp = (long)(mtime_a) - (long)(mtime_b);
     247       87886 :             break;
     248             :         case 'a':
     249       76258 :             cmp = g_ascii_strcasecmp(basename_a, basename_b);
     250       76258 :             break;
     251             :         case 'l':
     252       51810 :             cmp = strlen(basename_a) - strlen(basename_b);
     253       51810 :             break;
     254             :         case 'd':
     255       51800 :             cmp = (short)path_depth_a - (short)path_depth_b;
     256       51800 :             break;
     257             :         case 'p':
     258      279764 :             cmp = (long)path_index_a - (long)path_index_b;
     259      279764 :             break;
     260             :         }
     261      547518 :         if(cmp) {
     262             :             /* reverse order if uppercase option (M|A|P) */
     263      296799 :             cmp = cmp * (isupper(sets->sort_criteria[i]) ? -1 : +1);
     264      296799 :             return cmp;
     265             :         }
     266             :     }
     267       70255 :     return 0;
     268             : }
     269             : 
     270             : /* Sort criteria for sorting by preferred path (first) then user-input criteria */
     271             : /* Return:
     272             :  *      a negative integer file 'a' outranks 'b',
     273             :  *      0 if they are equal,
     274             :  *      a positive integer if file 'b' outranks 'a'
     275             :  */
     276      369010 : int rm_pp_cmp_orig_criteria(RmFile *a, RmFile *b, RmSession *session) {
     277      369010 :     if(a->lint_type != b->lint_type) {
     278             :         /* "other" lint outranks duplicates and has lower ENUM */
     279           0 :         return a->lint_type - b->lint_type;
     280      369010 :     } else if(a->is_symlink != b->is_symlink) {
     281         112 :         return a->is_symlink - b->is_symlink;
     282      368898 :     } else if(a->is_prefd != b->is_prefd) {
     283        2516 :         return (b->is_prefd - a->is_prefd);
     284             :     } else {
     285      366382 :         RM_DEFINE_BASENAME(a);
     286      366382 :         RM_DEFINE_BASENAME(b);
     287     1465528 :         return rm_pp_cmp_orig_criteria_impl(session, a->mtime, b->mtime, a_basename,
     288      732764 :                                             b_basename, a->path_index, b->path_index,
     289      732764 :                                             a->path_depth, b->path_depth);
     290             :     }
     291             : }
     292             : 
     293             : /* initial list build, including kicking out path doubles and grouping of hardlinks */
     294      225352 : bool rm_file_tables_insert(RmSession *session, RmFile *file) {
     295      225352 :     RmFileTables *tables = session->tables;
     296      225352 :     GHashTable *node_table = tables->node_table;
     297      225352 :     bool is_hardlink = true;
     298             : 
     299      225352 :     if(rm_session_was_aborted(session)) {
     300           0 :         return false;
     301             :     }
     302             : 
     303      225340 :     g_rec_mutex_lock(&tables->lock);
     304             :     {
     305      225368 :         RmFile *inode_match = g_hash_table_lookup(node_table, file);
     306      225368 :         if(inode_match == NULL) {
     307      223535 :             g_hash_table_insert(node_table, file, file);
     308             :         } else {
     309             :             /* file(s) with matching dev, inode(, basename) already in table...
     310             :              * fails if the hardlinked file has been written to during traversal; so
     311             :              * instead we just print a warning
     312             :              * */
     313        1833 :             if(inode_match->file_size != file->file_size) {
     314           0 :                 RM_DEFINE_PATH(file);
     315           0 :                 rm_log_warning_line(_("Hardlink file size changed during traversal: %s"),
     316             :                                     file_path);
     317             :             }
     318             : 
     319             :             /* if this is the first time, set up the hardlinks.files queue */
     320        1833 :             if(!inode_match->hardlinks.files) {
     321        1693 :                 inode_match->hardlinks.files = g_queue_new();
     322             : 
     323             :                 /* NOTE: during list build, the hardlinks.files queue includes the file
     324             :                  * itself, as well as its hardlinks.  This makes operations
     325             :                  * in rm_file_tables_insert much simpler but complicates things later on,
     326             :                  * so the head file gets removed from the hardlinks.files queue
     327             :                  * in rm_pp_handle_hardlinks() during preprocessing */
     328        1693 :                 g_queue_push_head(inode_match->hardlinks.files, inode_match);
     329             :             }
     330             : 
     331             :             /* make sure the highest-ranked hardlink is "boss" */
     332        1833 :             if(rm_pp_cmp_orig_criteria(file, inode_match, session) < 0) {
     333             :                 /* this file outranks existing existing boss; swap.
     334             :                  * NOTE: it's important that rm_file_list_insert selects a
     335             :                  * RM_LINT_TYPE_DUPE_CANDIDATE as head file, unless all the
     336             :                  * files are "other lint".  This is achieved via
     337             :                  * rm_pp_cmp_orig_criteria */
     338        1619 :                 file->hardlinks.files = inode_match->hardlinks.files;
     339        1619 :                 inode_match->hardlinks.files = NULL;
     340        1619 :                 g_hash_table_add(node_table, file); /* replaces key and data*/
     341        1619 :                 g_queue_push_head(file->hardlinks.files, file);
     342             :             } else {
     343             :                 /* Find the right place to insert sorted */
     344         214 :                 GList *iter = inode_match->hardlinks.files->head;
     345         642 :                 while(iter && rm_pp_cmp_orig_criteria(iter->data, file, session) <= 0) {
     346             :                     /* iter outranks file - keep moving down the queue */
     347         214 :                     iter = iter->next;
     348             :                 }
     349             : 
     350             :                 /* Store the iter to this file, so we can swap it if needed */
     351         214 :                 if(iter) {
     352             :                     /* file outranks iter (or is equal), so should be inserted before iter
     353             :                      */
     354           0 :                     g_queue_insert_before(inode_match->hardlinks.files, iter, file);
     355             :                 } else {
     356         214 :                     g_queue_push_tail(inode_match->hardlinks.files, file);
     357             :                 }
     358             :             }
     359             :         }
     360             :     }
     361             : 
     362      225368 :     g_rec_mutex_unlock(&tables->lock);
     363      225367 :     return is_hardlink;
     364             : }
     365             : 
     366         196 : void rm_file_tables_clear(RmSession *session) {
     367             :     GHashTableIter iter;
     368             :     gpointer key;
     369             : 
     370         196 :     g_hash_table_iter_init(&iter, session->tables->node_table);
     371         560 :     while(g_hash_table_iter_next(&iter, &key, NULL)) {
     372         168 :         RmFile *file = key;
     373         168 :         if(file) {
     374         168 :             rm_file_destroy(file);
     375             :         }
     376             :     }
     377         196 : }
     378             : 
     379             : /* if file is not DUPE_CANDIDATE then send it to session->tables->other_lint
     380             :  * and return true; else return false */
     381      223787 : static bool rm_pp_handle_other_lint(RmSession *session, RmFile *file) {
     382      223787 :     if(file->lint_type != RM_LINT_TYPE_DUPE_CANDIDATE) {
     383         588 :         if(session->cfg->filter_mtime && file->mtime < session->cfg->min_mtime) {
     384           0 :             rm_file_destroy(file);
     385           0 :             return true;
     386             :         }
     387             : 
     388             :         /* Also protect other lint by --keep-all-{un,}tagged */
     389        1176 :         if((session->cfg->keep_all_tagged && file->is_prefd) ||
     390         588 :            (session->cfg->keep_all_untagged && !file->is_prefd)) {
     391           0 :             rm_file_destroy(file);
     392           0 :             return true;
     393             :         }
     394             : 
     395        1176 :         session->tables->other_lint[file->lint_type] =
     396         588 :             g_list_prepend(session->tables->other_lint[file->lint_type], file);
     397         588 :         return true;
     398             :     } else {
     399      223199 :         return false;
     400             :     }
     401             : }
     402             : 
     403      224077 : static bool rm_pp_handle_own_files(RmSession *session, RmFile *file) {
     404      224077 :     RM_DEFINE_PATH(file);
     405      224077 :     return rm_fmt_is_a_output(session->formats, file_path);
     406             : }
     407             : 
     408             : /* Preprocess files, including embedded hardlinks.  Any embedded hardlinks
     409             :  * that are "other lint" types are sent to rm_pp_handle_other_lint.  If the
     410             :  * file itself is "other lint" types it is likewise sent to rm_pp_handle_other_lint.
     411             :  * If there are no files left after this then return TRUE so that the
     412             :  * cluster can be deleted from the node_table hash table.
     413             :  * NOTE: we rely on rm_file_list_insert to select a RM_LINT_TYPE_DUPE_CANDIDATE as head
     414             :  * file (unless ALL the files are "other lint"). */
     415      224077 : static gboolean rm_pp_handle_inode_clusters(_U gpointer key, RmFile *file,
     416             :                                             RmSession *session) {
     417      224077 :     g_assert(file);
     418      224077 :     RmCfg *cfg = session->cfg;
     419             : 
     420      224077 :     if(file->hardlinks.files && file->hardlinks.files->head) {
     421             :         /* there is a cluster of inode matches - unpack them and check for path doubles */
     422             : 
     423        1693 :         GHashTable *unique_paths_table =
     424             :             g_hash_table_new_full((GHashFunc)rm_path_double_hash,
     425             :                                   (GEqualFunc)rm_path_double_equal,
     426             :                                   (GDestroyNotify)rm_path_double_free,
     427             :                                   NULL);
     428             : 
     429        1693 :         GList *next = NULL;
     430             : 
     431        5219 :         for(GList *iter = file->hardlinks.files->head; iter; iter = next) {
     432        3526 :             next = iter->next;
     433        3526 :             RmFile *iter_file = iter->data;
     434             : 
     435        3526 :             RmPathDoubleKey *key = rm_path_double_new(iter_file);
     436             : 
     437             :             /* Lookup if there is a file with the same path */
     438        3526 :             RmPathDoubleKey *match_double_key =
     439             :                 g_hash_table_lookup(unique_paths_table, key);
     440             : 
     441        3526 :             if(match_double_key == NULL) {
     442        2235 :                 g_hash_table_add(unique_paths_table, key);
     443             :             } else {
     444        1291 :                 g_assert(match_double_key->file != iter_file);
     445        1291 :                 RmFile *match_double = match_double_key->file;
     446             : 
     447        1291 :                 g_assert(rm_pp_cmp_orig_criteria(iter_file, match_double, session) >= 0);
     448             : 
     449        1291 :                 rm_log_debug_line("Ignoring path double %p, keeping %p", iter_file,
     450             :                              match_double);
     451             : 
     452        1291 :                 g_queue_delete_link(file->hardlinks.files, iter);
     453        1291 :                 rm_file_destroy(iter_file);
     454             :             }
     455             :         }
     456             : 
     457        1693 :         g_hash_table_unref(unique_paths_table);
     458             : 
     459             :         /* remove self from hardlink queue */
     460        1693 :         g_queue_remove(file->hardlinks.files, file);
     461             : 
     462        2235 :         for(GList *iter = file->hardlinks.files->head, *next = NULL; iter; iter = next) {
     463             :             /* Remember next element early */
     464         542 :             next = iter->next;
     465             : 
     466             :             /* call self to handle each embedded hardlink */
     467         542 :             RmFile *embedded = iter->data;
     468         542 :             g_assert(embedded != file);
     469         542 :             if(embedded->hardlinks.files != NULL) {
     470           0 :                 rm_log_error("Warning: embedded file %p has hardlinks", embedded);
     471           0 :                 GQueue *hardlinks = embedded->hardlinks.files;
     472           0 :                 g_assert(hardlinks->length < 2);
     473           0 :                 if(hardlinks->head) {
     474           0 :                     g_assert(hardlinks->head->data == embedded);
     475             :                 }
     476             : 
     477           0 :                 g_queue_free(hardlinks);
     478           0 :                 embedded->hardlinks.files = NULL;
     479             :             }
     480             : 
     481         542 :             if(rm_pp_handle_inode_clusters(NULL, embedded, session)) {
     482           0 :                 g_queue_delete_link(file->hardlinks.files, iter);
     483         542 :             } else if(!cfg->find_hardlinked_dupes) {
     484         112 :                 rm_file_destroy(embedded);
     485         112 :                 g_queue_delete_link(file->hardlinks.files, iter);
     486             :             } else {
     487         430 :                 embedded->hardlinks.hardlink_head = file;
     488         430 :                 g_assert(!embedded->hardlinks.is_head);
     489             :             }
     490             :         }
     491             : 
     492        1693 :         if(g_queue_is_empty(file->hardlinks.files)) {
     493        1347 :             g_queue_free(file->hardlinks.files);
     494        1347 :             file->hardlinks.files = NULL;
     495             :         } else {
     496         346 :             file->hardlinks.is_head = TRUE;
     497             :         }
     498             :     }
     499             : 
     500      224077 :     if(file->hardlinks.is_head && file->hardlinks.files) {
     501             :         /* Hardlinks are processed on the fly by shredder later,
     502             :          * so we do not really need to process them.
     503             :          */
     504         346 :         session->total_filtered_files -= file->hardlinks.files->length;
     505             :     }
     506             : 
     507             :     /*
     508             :     * Check if the file is a output of rmlint itself. Which we definitely
     509             :     * not want to handle. Creating a script that deletes itself is fun but useless.
     510             :     * */
     511      224077 :     bool remove = rm_pp_handle_own_files(session, file);
     512             : 
     513             :     /* handle the head file; if it's "other lint" then process it via
     514             :      * rm_pp_handle_other_lint
     515             :      * and return TRUE, else keep it
     516             :      */
     517      224077 :     if(remove == false && rm_pp_handle_other_lint(session, file)) {
     518         588 :         remove = true;
     519      223489 :     } else if(remove == true) {
     520         290 :         rm_file_destroy(file);
     521             :     }
     522             : 
     523      224077 :     session->total_filtered_files -= remove;
     524      224077 :     rm_fmt_set_state(session->formats, RM_PROGRESS_STATE_PREPROCESS);
     525             : 
     526      224077 :     return remove;
     527             : }
     528             : 
     529         308 : static int rm_pp_cmp_reverse_alphabetical(const RmFile *a, const RmFile *b) {
     530         308 :     RM_DEFINE_PATH(a);
     531         308 :     RM_DEFINE_PATH(b);
     532         308 :     return g_strcmp0(b_path, a_path);
     533             : }
     534             : 
     535       36654 : static RmOff rm_pp_handler_other_lint(RmSession *session) {
     536       36654 :     RmOff num_handled = 0;
     537       36654 :     RmFileTables *tables = session->tables;
     538             : 
     539      329886 :     for(RmOff type = 0; type < RM_LINT_TYPE_DUPE_CANDIDATE; ++type) {
     540      293232 :         if(type == RM_LINT_TYPE_EMPTY_DIR) {
     541       36654 :             tables->other_lint[type] = g_list_sort(
     542             :                 tables->other_lint[type], (GCompareFunc)rm_pp_cmp_reverse_alphabetical);
     543             :         }
     544             : 
     545      293232 :         GList *list = tables->other_lint[type];
     546      293820 :         for(GList *iter = list; iter; iter = iter->next) {
     547         588 :             RmFile *file = iter->data;
     548             : 
     549         588 :             g_assert(file);
     550         588 :             g_assert(type == file->lint_type);
     551             : 
     552         588 :             num_handled++;
     553             : 
     554         588 :             rm_fmt_write(file, session->formats, -1);
     555             :         }
     556             : 
     557      293232 :         if(!session->cfg->cache_file_structs) {
     558      155968 :             g_list_free_full(list, (GDestroyNotify)rm_file_destroy);
     559             :         } else {
     560      137264 :             g_list_free(list);
     561             :         }
     562             :     }
     563             : 
     564       36654 :     return num_handled;
     565             : }
     566             : 
     567             : /* This does preprocessing including handling of "other lint" (non-dupes) */
     568       36654 : void rm_preprocess(RmSession *session) {
     569       36654 :     RmFileTables *tables = session->tables;
     570       36654 :     g_assert(tables->node_table);
     571             : 
     572       36654 :     session->total_filtered_files = session->total_files;
     573             : 
     574             :     /* process hardlink groups, and move other_lint into tables- */
     575       36654 :     guint removed = g_hash_table_foreach_remove(
     576             :         tables->node_table, (GHRFunc)rm_pp_handle_inode_clusters, session);
     577             : 
     578       36654 :     rm_log_debug_line("process hardlink groups finished at time %.3f; removed %u of %d",
     579             :                  g_timer_elapsed(session->timer, NULL), removed, session->total_files);
     580             : 
     581       36654 :     session->other_lint_cnt += rm_pp_handler_other_lint(session);
     582       36654 :     rm_log_debug_line("Other lint handling finished at time %.3f",
     583             :                  g_timer_elapsed(session->timer, NULL));
     584             : 
     585       36654 :     rm_fmt_set_state(session->formats, RM_PROGRESS_STATE_PREPROCESS);
     586       36654 : }

Generated by: LCOV version 1.11