Line data Source code
1 : /**
2 : * This file is part of rmlint.
3 : *
4 : * rmlint is free software: you can redistribute it and/or modify
5 : * it under the terms of the GNU General Public License as published by
6 : * the Free Software Foundation, either version 3 of the License, or
7 : * (at your option) any later version.
8 : *
9 : * rmlint is distributed in the hope that it will be useful,
10 : * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 : * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 : * GNU General Public License for more details.
13 : *
14 : * You should have received a copy of the GNU General Public License
15 : * along with rmlint. If not, see <http://www.gnu.org/licenses/>.
16 : *
17 : * Authors:
18 : *
19 : * - Christopher <sahib> Pahl 2010-2015 (https://github.com/sahib)
20 : * - Daniel <SeeSpotRun> T. 2014-2015 (https://github.com/SeeSpotRun)
21 : *
22 : * Hosted on http://github.com/sahib/rmlint
23 : *
24 : */
25 :
26 : #include <stdlib.h>
27 : #include <string.h>
28 :
29 : #include <sys/types.h>
30 : #include <sys/stat.h>
31 : #include <fts.h>
32 : #include <errno.h>
33 :
34 : #include <glib.h>
35 :
36 : #include "preprocess.h"
37 : #include "formats.h"
38 : #include "utilities.h"
39 : #include "file.h"
40 : #include "xattr.h"
41 :
42 : ///////////////////////////////////////////
43 : // BUFFER FOR STARTING TRAVERSAL THREADS //
44 : ///////////////////////////////////////////
45 :
46 : /* Defines a path variable containing the buffer's path */
47 : #define RM_BUFFER_DEFINE_PATH(session, buff) \
48 : char *buff##_path = NULL; \
49 : char buff##_buf[PATH_MAX]; \
50 : if(session->cfg->use_meta_cache) { \
51 : rm_swap_table_lookup(session->meta_cache, session->meta_cache_dir_id, \
52 : GPOINTER_TO_UINT(buff->path), buff##_buf, PATH_MAX); \
53 : buff##_path = buff##_buf; \
54 : } else { \
55 : buff##_path = buff->path; \
56 : }
57 :
58 : typedef struct RmTravBuffer {
59 : RmStat stat_buf; /* rm_sys_stat(2) information about the directory */
60 : char *path; /* The path of the directory, as passed on command line. */
61 : bool is_prefd; /* Was this file in a preferred path? */
62 : RmOff path_index; /* Index of path, as passed on the commadline */
63 : } RmTravBuffer;
64 :
65 101353 : static RmTravBuffer *rm_trav_buffer_new(RmSession *session, char *path, bool is_prefd,
66 : unsigned long path_index) {
67 101353 : RmTravBuffer *self = g_new0(RmTravBuffer, 1);
68 101353 : self->path = path;
69 101353 : self->is_prefd = is_prefd;
70 101353 : self->path_index = path_index;
71 :
72 101353 : RM_BUFFER_DEFINE_PATH(session, self);
73 :
74 : int stat_state;
75 101353 : if(session->cfg->follow_symlinks) {
76 84 : stat_state = rm_sys_stat(self_path, &self->stat_buf);
77 : } else {
78 101269 : stat_state = rm_sys_lstat(self_path, &self->stat_buf);
79 : }
80 :
81 101353 : if(stat_state == -1) {
82 0 : rm_log_perror("Unable to stat file");
83 : }
84 101353 : return self;
85 : }
86 :
87 101348 : static void rm_trav_buffer_free(RmTravBuffer *self) {
88 101348 : g_free(self);
89 101353 : }
90 :
91 : //////////////////////
92 : // TRAVERSE SESSION //
93 : //////////////////////
94 :
95 : typedef struct RmTravSession {
96 : RmUserList *userlist;
97 : RmSession *session;
98 : } RmTravSession;
99 :
100 36878 : static RmTravSession *rm_traverse_session_new(RmSession *session) {
101 36878 : RmTravSession *self = g_new0(RmTravSession, 1);
102 36878 : self->session = session;
103 36878 : self->userlist = rm_userlist_new();
104 36878 : return self;
105 : }
106 :
107 36878 : static void rm_traverse_session_free(RmTravSession *trav_session) {
108 36878 : rm_log_debug_line("Found %d files, ignored %d hidden files and %d hidden folders",
109 : trav_session->session->total_files,
110 : trav_session->session->ignored_files,
111 : trav_session->session->ignored_folders);
112 :
113 36878 : rm_userlist_destroy(trav_session->userlist);
114 :
115 36878 : g_free(trav_session);
116 36878 : }
117 :
118 : //////////////////////
119 : // ACTUAL WORK HERE //
120 : //////////////////////
121 :
122 226530 : static void rm_traverse_file(RmTravSession *trav_session, RmStat *statp,
123 : GQueue *file_queue, char *path, size_t path_len,
124 : bool is_prefd, unsigned long path_index,
125 : RmLintType file_type, bool is_symlink, bool is_hidden,
126 : bool is_on_subvol_fs, short depth) {
127 226530 : RmSession *session = trav_session->session;
128 226530 : RmCfg *cfg = session->cfg;
129 :
130 : /* Try to autodetect the type of the lint */
131 226530 : if(file_type == RM_LINT_TYPE_UNKNOWN) {
132 : RmLintType gid_check;
133 : /* see if we can find a lint type */
134 226144 : if(statp->st_size == 0) {
135 430 : if(!cfg->find_emptyfiles) {
136 56 : return;
137 : } else {
138 374 : file_type = RM_LINT_TYPE_EMPTY_FILE;
139 : }
140 225714 : } else if(cfg->permissions && access(path, cfg->permissions) == -1) {
141 : /* bad permissions; ignore file */
142 0 : trav_session->session->ignored_files++;
143 0 : return;
144 451240 : } else if(cfg->find_badids &&
145 225518 : (gid_check = rm_util_uid_gid_check(statp, trav_session->userlist))) {
146 84 : file_type = gid_check;
147 225638 : } else if(cfg->find_nonstripped && rm_util_is_nonstripped(path, statp)) {
148 28 : file_type = RM_LINT_TYPE_NONSTRIPPED;
149 : } else {
150 225610 : RmOff file_size = statp->st_size;
151 227626 : if(!cfg->limits_specified ||
152 5124 : ((cfg->minsize == (RmOff)-1 || cfg->minsize <= file_size) &&
153 1680 : (cfg->maxsize == (RmOff)-1 || file_size <= cfg->maxsize))) {
154 449204 : if(rm_mounts_is_evil(trav_session->session->mounts, statp->st_dev) ==
155 : false) {
156 224602 : file_type = RM_LINT_TYPE_DUPE_CANDIDATE;
157 : } else {
158 : /* A file in a evil fs. Ignore. */
159 0 : trav_session->session->ignored_files++;
160 0 : return;
161 : }
162 : } else {
163 1008 : return;
164 : }
165 : }
166 : }
167 :
168 225474 : RmFile *file = rm_file_new(session, path, path_len, statp, file_type, is_prefd,
169 : path_index, depth);
170 :
171 225478 : if(file != NULL) {
172 225367 : file->is_symlink = is_symlink;
173 225367 : file->is_hidden = is_hidden;
174 225367 : file->is_on_subvol_fs = is_on_subvol_fs;
175 :
176 225367 : int added = 0;
177 225367 : if(file_queue != NULL) {
178 225280 : g_queue_push_tail(file_queue, file);
179 225280 : added = 1;
180 : } else {
181 87 : added = rm_file_tables_insert(session, file);
182 : }
183 :
184 225367 : g_atomic_int_add(&trav_session->session->total_files, added);
185 225367 : rm_fmt_set_state(session->formats, RM_PROGRESS_STATE_TRAVERSE);
186 :
187 225759 : if(trav_session->session->cfg->clear_xattr_fields &&
188 392 : file->lint_type == RM_LINT_TYPE_DUPE_CANDIDATE) {
189 392 : rm_xattr_clear_hash(session, file);
190 : }
191 : }
192 : }
193 :
194 226445 : static bool rm_traverse_is_hidden(RmCfg *cfg, const char *basename, char *hierarchy,
195 : size_t hierarchy_len) {
196 226445 : if(cfg->partial_hidden == false) {
197 220761 : return false;
198 5684 : } else if(*basename == '.') {
199 112 : return true;
200 : } else {
201 5572 : return !!memchr(hierarchy, 1, hierarchy_len);
202 : }
203 : }
204 :
205 : /* Macro for rm_traverse_directory() for easy file adding */
206 : #define _ADD_FILE(lint_type, is_symlink, stat_buf) \
207 : rm_traverse_file( \
208 : trav_session, (RmStat *)stat_buf, &file_queue, p->fts_path, p->fts_pathlen, \
209 : is_prefd, path_index, lint_type, is_symlink, \
210 : rm_traverse_is_hidden(cfg, p->fts_name, is_hidden, p->fts_level + 1), \
211 : is_on_subvol_fs, p->fts_level);
212 :
213 : #if RM_PLATFORM_32 && HAVE_STAT64
214 :
215 : static void rm_traverse_convert_small_stat_buf(struct stat *fts_statp, RmStat *buf) {
216 : /* Break a leg for supporting large files on 32 bit,
217 : * and convert the needed fields to the large version.
218 : *
219 : * We can't use memcpy here, since the layout might be (fatally) different.
220 : * Yes, this is stupid. *Sigh*
221 : * */
222 : memset(buf, 0, sizeof(RmStat));
223 : buf->st_dev = fts_statp->st_dev;
224 : buf->st_ino = fts_statp->st_ino;
225 : buf->st_mode = fts_statp->st_mode;
226 : buf->st_nlink = fts_statp->st_nlink;
227 : buf->st_uid = fts_statp->st_uid;
228 : buf->st_gid = fts_statp->st_gid;
229 : buf->st_rdev = fts_statp->st_rdev;
230 : buf->st_size = fts_statp->st_size;
231 : buf->st_blksize = fts_statp->st_blksize;
232 : buf->st_blocks = fts_statp->st_blocks;
233 : buf->st_atim = fts_statp->st_atim;
234 : buf->st_mtim = fts_statp->st_mtim;
235 : buf->st_ctim = fts_statp->st_ctim;
236 : }
237 :
238 : #define ADD_FILE(lint_type, is_symlink) \
239 : { \
240 : RmStat buf; \
241 : rm_traverse_convert_small_stat_buf(p->fts_statp, &buf); \
242 : _ADD_FILE(lint_type, is_symlink, &buf) \
243 : }
244 :
245 : #else
246 :
247 : #define ADD_FILE(lint_type, is_symlink) \
248 : _ADD_FILE(lint_type, is_symlink, (RmStat *)p->fts_statp)
249 :
250 : #endif
251 :
252 101266 : static void rm_traverse_directory(RmTravBuffer *buffer, RmTravSession *trav_session) {
253 101266 : RmSession *session = trav_session->session;
254 101266 : RmCfg *cfg = session->cfg;
255 :
256 101266 : char is_prefd = buffer->is_prefd;
257 101266 : RmOff path_index = buffer->path_index;
258 :
259 : /* Initialize ftsp */
260 101266 : int fts_flags = FTS_PHYSICAL | FTS_COMFOLLOW | FTS_NOCHDIR;
261 :
262 101266 : RM_BUFFER_DEFINE_PATH(trav_session->session, buffer);
263 :
264 101266 : bool is_on_subvol_fs = (buffer_path[0] == '/' && buffer_path[1] == '/');
265 101266 : if(is_on_subvol_fs) {
266 0 : rm_log_debug_line("Treating files under %s as a single volume", buffer_path);
267 : }
268 :
269 101266 : FTS *ftsp = fts_open((char * [2]){buffer_path, NULL}, fts_flags, NULL);
270 :
271 101261 : if(ftsp == NULL) {
272 0 : rm_log_error_line("fts_open() == NULL");
273 0 : return;
274 : }
275 :
276 : FTSENT *p, *chp;
277 101261 : chp = fts_children(ftsp, 0);
278 101259 : if(chp == NULL) {
279 0 : rm_log_warning_line("fts_children() == NULL");
280 0 : return;
281 : }
282 :
283 : /* start main processing */
284 : char is_emptydir[PATH_MAX / 2 + 1];
285 : char is_hidden[PATH_MAX / 2 + 1];
286 101259 : bool have_open_emptydirs = false;
287 101259 : bool clear_emptydir_flags = false;
288 101259 : bool next_is_symlink = false;
289 :
290 101259 : memset(is_emptydir, 0, sizeof(is_emptydir) - 1);
291 101259 : memset(is_hidden, 0, sizeof(is_hidden) - 1);
292 :
293 : /* rm_traverse_file add the finished file (if any) to this queue. They are
294 : * added to the preprocessing module in batch so there isn't many jumping
295 : * between BEGIN; INSERT[...]; COMMIT and SELECT with --with-metadata-cache.
296 : */
297 101259 : GQueue file_queue = G_QUEUE_INIT;
298 :
299 641951 : while(!rm_session_was_aborted(trav_session->session) &&
300 : (p = fts_read(ftsp)) != NULL) {
301 : /* check for hidden file or folder */
302 439422 : if(cfg->ignore_hidden && p->fts_level > 0 && p->fts_name[0] == '.') {
303 : /* ignoring hidden folders*/
304 :
305 864 : if(p->fts_info == FTS_D) {
306 51 : fts_set(ftsp, p, FTS_SKIP); /* do not recurse */
307 56 : g_atomic_int_inc(&trav_session->session->ignored_folders);
308 : } else {
309 813 : g_atomic_int_inc(&trav_session->session->ignored_files);
310 : }
311 :
312 869 : clear_emptydir_flags = true; /* flag current dir as not empty */
313 869 : is_emptydir[p->fts_level] = 0;
314 : } else {
315 438558 : switch(p->fts_info) {
316 : case FTS_D: /* preorder directory */
317 106039 : if(cfg->depth != 0 && p->fts_level >= cfg->depth) {
318 : /* continuing into folder would exceed maxdepth*/
319 0 : fts_set(ftsp, p, FTS_SKIP); /* do not recurse */
320 0 : clear_emptydir_flags = true; /* flag current dir as not empty */
321 0 : rm_log_debug_line("Not descending into %s because max depth reached",
322 : p->fts_path);
323 106039 : } else if(cfg->crossdev && p->fts_dev != chp->fts_dev) {
324 : /* continuing into folder would cross file systems*/
325 0 : fts_set(ftsp, p, FTS_SKIP); /* do not recurse */
326 0 : clear_emptydir_flags = true; /*flag current dir as not empty*/
327 0 : rm_log_info(
328 : "Not descending into %s because it is a different filesystem\n",
329 : p->fts_path);
330 : } else {
331 : /* recurse dir; assume empty until proven otherwise */
332 106039 : is_emptydir[p->fts_level + 1] = 1;
333 212078 : is_hidden[p->fts_level + 1] =
334 106039 : is_hidden[p->fts_level] | (p->fts_name[0] == '.');
335 106039 : have_open_emptydirs = true;
336 : }
337 106039 : break;
338 : case FTS_DC: /* directory that causes cycles */
339 28 : rm_log_warning_line(_("filesystem loop detected at %s (skipping)"),
340 : p->fts_path);
341 28 : clear_emptydir_flags = true; /* current dir not empty */
342 28 : break;
343 : case FTS_DNR: /* unreadable directory */
344 0 : rm_log_warning_line(_("cannot read directory %s: %s"), p->fts_path,
345 : g_strerror(p->fts_errno));
346 0 : clear_emptydir_flags = true; /* current dir not empty */
347 0 : break;
348 : case FTS_DOT: /* dot or dot-dot */
349 0 : break;
350 : case FTS_DP: /* postorder directory */
351 106040 : if(is_emptydir[p->fts_level + 1] && cfg->find_emptydirs) {
352 252 : ADD_FILE(RM_LINT_TYPE_EMPTY_DIR, false);
353 : }
354 106040 : is_hidden[p->fts_level + 1] = 0;
355 106040 : break;
356 : case FTS_ERR: /* error; errno is set */
357 0 : rm_log_warning_line(_("error %d in fts_read for %s (skipping)"), errno,
358 : p->fts_path);
359 0 : clear_emptydir_flags = true; /*current dir not empty*/
360 0 : break;
361 : case FTS_INIT: /* initialized only */
362 0 : break;
363 : case FTS_SLNONE: /* symbolic link without target */
364 28 : if(cfg->find_badlinks) {
365 28 : ADD_FILE(RM_LINT_TYPE_BADLINK, false);
366 : }
367 28 : clear_emptydir_flags = true; /*current dir not empty*/
368 28 : break;
369 : case FTS_W: /* whiteout object */
370 0 : clear_emptydir_flags = true; /*current dir not empty*/
371 0 : break;
372 : case FTS_NS: { /* rm_sys_stat(2) failed */
373 0 : clear_emptydir_flags = true; /*current dir not empty*/
374 : RmStat stat_buf;
375 :
376 : /* See if your stat can do better. */
377 0 : if(rm_sys_stat(p->fts_path, &stat_buf) != -1) {
378 : /* normal stat failed but 64-bit stat worked
379 : * -> must be a big file on 32 bit.
380 : */
381 0 : rm_traverse_file(trav_session, &stat_buf, &file_queue, p->fts_path,
382 0 : p->fts_pathlen, is_prefd, path_index,
383 : RM_LINT_TYPE_UNKNOWN, false,
384 0 : rm_traverse_is_hidden(cfg, p->fts_name, is_hidden,
385 0 : p->fts_level + 1),
386 0 : is_on_subvol_fs, p->fts_level);
387 0 : rm_log_warning_line(_("Added big file %s"), p->fts_path);
388 : } else {
389 0 : rm_log_warning(_("cannot stat file %s (skipping)"), p->fts_path);
390 : }
391 0 : } break;
392 : case FTS_SL: /* symbolic link */
393 504 : clear_emptydir_flags = true; /* current dir not empty */
394 504 : if(!cfg->follow_symlinks) {
395 336 : if(p->fts_level != 0) {
396 336 : rm_log_debug_line("Not following symlink %s because of cfg",
397 : p->fts_path);
398 : }
399 :
400 336 : if(access(p->fts_path, R_OK) == -1 && errno == ENOENT) {
401 : /* Oops, that's a badlink. */
402 224 : if(cfg->find_badlinks) {
403 112 : ADD_FILE(RM_LINT_TYPE_BADLINK, false);
404 : }
405 224 : } else if(cfg->see_symlinks) {
406 140 : ADD_FILE(RM_LINT_TYPE_UNKNOWN, true);
407 : }
408 : } else {
409 168 : rm_log_debug_line("Following symlink %s", p->fts_path);
410 168 : next_is_symlink = true;
411 168 : fts_set(ftsp, p, FTS_FOLLOW); /* do not recurse */
412 : }
413 504 : break;
414 : case FTS_NSOK: /* no rm_sys_stat(2) requested */
415 : case FTS_F: /* regular file */
416 : case FTS_DEFAULT: /* any file type not explicitly described by one of the
417 : above*/
418 225919 : clear_emptydir_flags = true; /* current dir not empty*/
419 225919 : ADD_FILE(RM_LINT_TYPE_UNKNOWN, next_is_symlink);
420 225925 : next_is_symlink = false;
421 225925 : break;
422 : default:
423 : /* unknown case; assume current dir not empty but otherwise do nothing */
424 0 : clear_emptydir_flags = true;
425 0 : rm_log_error_line(_("Unknown fts_info flag %d for file %s"), p->fts_info,
426 : p->fts_path);
427 0 : break;
428 : }
429 :
430 438564 : if(clear_emptydir_flags) {
431 : /* non-empty dir found above; need to clear emptydir flags for all open
432 : * levels */
433 226787 : if(have_open_emptydirs) {
434 103345 : memset(is_emptydir, 0, sizeof(is_emptydir) - 1);
435 103345 : have_open_emptydirs = false;
436 : }
437 226787 : clear_emptydir_flags = false;
438 : }
439 : /* current dir may not be empty; by association, all open dirs are non-empty
440 : */
441 : }
442 : }
443 :
444 101260 : if(errno != 0 && !rm_session_was_aborted(session)) {
445 0 : rm_log_error_line(_("'%s': fts_read failed on %s"), g_strerror(errno),
446 : ftsp->fts_path);
447 : }
448 :
449 : #undef ADD_FILE
450 :
451 101265 : fts_close(ftsp);
452 101261 : rm_trav_buffer_free(buffer);
453 :
454 : /* Pass the files to the preprocessing machinery. We collect the files first
455 : * in order to make -with-metadata-cache work: Without, too many
456 : * insert/selects would crossfire.
457 : */
458 326541 : for(GList *iter = file_queue.head; iter; iter = iter->next) {
459 225275 : RmFile *file = iter->data;
460 225275 : g_atomic_int_add(&trav_session->session->total_files,
461 : -(rm_file_tables_insert(session, file) == 0));
462 225279 : rm_fmt_set_state(session->formats, RM_PROGRESS_STATE_TRAVERSE);
463 : }
464 101266 : g_queue_clear(&file_queue);
465 : }
466 :
467 39170 : static void rm_traverse_directories(GQueue *path_queue, RmTravSession *trav_session) {
468 39170 : g_queue_foreach(path_queue, (GFunc)rm_traverse_directory, trav_session);
469 39169 : }
470 :
471 : ////////////////
472 : // PUBLIC API //
473 : ////////////////
474 :
475 36878 : void rm_traverse_tree(RmSession *session) {
476 36878 : RmCfg *cfg = session->cfg;
477 36878 : RmTravSession *trav_session = rm_traverse_session_new(session);
478 :
479 36878 : GHashTable *paths_per_disk =
480 : g_hash_table_new_full(NULL, NULL, NULL, (GDestroyNotify)g_queue_free);
481 :
482 138231 : for(RmOff idx = 0; cfg->paths[idx] != NULL; ++idx) {
483 101353 : char *path = cfg->paths[idx];
484 101353 : bool is_prefd = cfg->is_prefd[idx];
485 :
486 101353 : RmTravBuffer *buffer = rm_trav_buffer_new(session, path, is_prefd, idx);
487 :
488 101353 : RM_BUFFER_DEFINE_PATH(session, buffer);
489 :
490 101353 : if(S_ISREG(buffer->stat_buf.st_mode)) {
491 : /* Append normal paths directly */
492 87 : bool is_hidden = false;
493 :
494 : /* The is_hidden information is only needed for --partial-hidden */
495 87 : if(cfg->partial_hidden) {
496 84 : is_hidden = rm_util_path_is_hidden(buffer_path);
497 : }
498 :
499 87 : rm_traverse_file(trav_session, &buffer->stat_buf, NULL, buffer_path,
500 : strlen(buffer_path), is_prefd, idx, RM_LINT_TYPE_UNKNOWN,
501 : false, is_hidden, FALSE, 0);
502 :
503 87 : rm_trav_buffer_free(buffer);
504 101266 : } else if(S_ISDIR(buffer->stat_buf.st_mode)) {
505 : /* It's a directory, traverse it. */
506 202532 : dev_t disk = (!cfg->fake_pathindex_as_disk ? rm_mounts_get_disk_id_by_path(
507 97662 : session->mounts, buffer_path)
508 198928 : : (dev_t)idx);
509 :
510 101266 : GQueue *path_queue = rm_hash_table_setdefault(
511 : paths_per_disk, GUINT_TO_POINTER(disk), (RmNewFunc)g_queue_new);
512 101266 : g_queue_push_tail(path_queue, buffer);
513 : } else {
514 : /* Probably a block device, fifo or something weird. */
515 0 : rm_trav_buffer_free(buffer);
516 : }
517 : }
518 :
519 36878 : GThreadPool *traverse_pool = rm_util_thread_pool_new(
520 36878 : (GFunc)rm_traverse_directories, trav_session, session->cfg->threads);
521 :
522 : GHashTableIter iter;
523 36878 : GQueue *path_queue = NULL;
524 :
525 36878 : g_hash_table_iter_init(&iter, paths_per_disk);
526 112926 : while(g_hash_table_iter_next(&iter, NULL, (gpointer *)&path_queue)) {
527 39170 : rm_util_thread_pool_push(traverse_pool, path_queue);
528 : }
529 :
530 36878 : g_thread_pool_free(traverse_pool, false, true);
531 36878 : g_hash_table_unref(paths_per_disk);
532 36878 : rm_traverse_session_free(trav_session);
533 :
534 36878 : session->traverse_finished = TRUE;
535 36878 : rm_fmt_set_state(session->formats, RM_PROGRESS_STATE_TRAVERSE);
536 36878 : }
|