Line data Source code
1 : /*
2 : * This file is part of rmlint.
3 : *
4 : * rmlint is free software: you can redistribute it and/or modify
5 : * it under the terms of the GNU General Public License as published by
6 : * the Free Software Foundation, either version 3 of the License, or
7 : * (at your option) any later version.
8 : *
9 : * rmlint is distributed in the hope that it will be useful,
10 : * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 : * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 : * GNU General Public License for more details.
13 : *
14 : * You should have received a copy of the GNU General Public License
15 : * along with rmlint. If not, see <http://www.gnu.org/licenses/>.
16 : *
17 : * Authors:
18 : *
19 : * - Christopher <sahib> Pahl 2010-2015 (https://github.com/sahib)
20 : * - Daniel <SeeSpotRun> T. 2014-2015 (https://github.com/SeeSpotRun)
21 : *
22 : * Hosted on http://github.com/sahib/rmlint
23 : *
24 : */
25 :
26 : #include <glib.h>
27 : #include <unistd.h>
28 : #include <stdio.h>
29 : #include <string.h>
30 :
31 : #include <sys/uio.h>
32 :
33 : #include "checksum.h"
34 : #include "hasher.h"
35 :
36 : #include "preprocess.h"
37 : #include "utilities.h"
38 : #include "formats.h"
39 :
40 : #include "shredder.h"
41 : #include "xattr.h"
42 : #include "md-scheduler.h"
43 :
44 : /* Enable extra debug messages? */
45 : #define _RM_SHRED_DEBUG 0
46 :
47 : /* This is the engine of rmlint for file duplicate matching.
48 : *
49 : * Files are compared in progressive "generations" to identify matching
50 : * clusters termed "ShredGroup"s:
51 : * Generation 0: Same size files
52 : * Generation 1: Same size and same hash of first ~16kB
53 : * Generation 2: Same size and same hash of first ~50MB
54 : * Generation 3: Same size and same hash of first ~100MB
55 : * Generation 3: Same size and same hash of first ~150MB
56 : * ... and so on until the end of the file is reached.
57 : *
58 : * The default step size can be configured below.
59 : *
60 : *
61 : * The clusters and generations look something like this:
62 : *
63 : *+-------------------------------------------------------------------------+
64 : *| Initial list after filtering and preprocessing |
65 : *+-------------------------------------------------------------------------+
66 : * | same size | same size | same size
67 : * +------------------+ +------------------+ +----------------+
68 : * | ShredGroup 1 | | ShredGroup 2 | | ShredGroup 3 |
69 : * |F1,F2,F3,F4,F5,F6 | |F7,F8,F9,F10,F11 | | F12,F13 |
70 : * +------------------+ +------------------+ +----------------+
71 : * | | | |
72 : * +------------+ +----------+ +------------+ +---------+ +----+ +----+
73 : * | Child 1.1 | |Child 1.2 | | Child 2.1 | |Child 2.2| |3.1 | |3.2 |
74 : * | F1,F3,F6 | |F2,F4,F5 | |F7,F8,F9,F10| | F11 | |F12 | |F13 |
75 : * |(hash=hash1 | |(hash=h2) | |(hash=h3) | |(hash=h4)| |(h5)| |(h6)|
76 : * +------------+ +----------+ +------------+ +---------+ +----+ +----+
77 : * | | | | \ \
78 : * +----------+ +-----------+ +-----------+ +-----------+ free! free!
79 : * |Child1.1.1| |Child 1.2.1| |Child 2.2.1| |Child 2.2.2|
80 : * |F1,F3,F6 | |F2,F4,F5 | |F7,F9,F10 | | F8 |
81 : * +----------+ +-----------+ +-----------+ +-----------+
82 : * \ \ \ \
83 : * rm! rm! rm! free!
84 : *
85 : *
86 : * The basic workflow is:
87 : * 1. One worker thread is established for each physical device
88 : * 2. The device thread picks a file from its queue, reads the next increment of that
89 : * file, and sends it to a hashing thread.
90 : * 3. Depending on some logic ("worth_waiting"), the device thread may wait for the
91 : * file increment to finish hashing, or may move straight on to the next file in
92 : * the queue. The "worth_waiting" logic aims to reduce disk seeks on rotational
93 : * devices.
94 : * 4. The hashed fragment result is "sifted" into a child RmShredGroup of its parent
95 : * group, and unlinked it from its parent.
96 : * 5. (a) If the child RmShredGroup needs hashing (ie >= 2 files and not completely hashed
97 : * yet) then the file is pushed back to the device queue for further hashing;
98 : * (b) If the file is not completely hashed but is the only file in the group (or
99 : * otherwise fails criteria such as --must-match-tagged) then it is retained by the
100 : * child RmShredGroup until a suitable sibling arrives, whereupon it is released to
101 : * the device queue.
102 : * (c) If the file has finished hashing, it is retained by the child RmShredGroup
103 : * until its parent and all ancestors have finished processing, whereupon the file
104 : * is sent to the "result factory" (if >= 2 files in the group) or discarded.
105 : *
106 : * In the above example, the hashing order will depend on the "worth_waiting" logic.
107 : * On a rotational device the hashing order should end up being something like:
108 : * F1.1 F2.1 (F3.1,F3.2), (F4.1,F4.2), (F5.1,F5.2,F5.3)...
109 : * ^ ^ ^ ^
110 : * (^ indicates where hashing could continue on to a second increment (avoiding a
111 : * disk seek) because there was already a matching file after the first
112 : * increment)
113 : *
114 : * On a non-rotational device where there is no seek penalty, the hashing order is:
115 : * F1.1 F2.1 F3.1 F4.1 F5.1...
116 : *
117 : *
118 : * The threading looks somewhat like this for two devices:
119 : *
120 : * +----------+
121 : * | Result |
122 : * | Factory |
123 : * | Pipe |
124 : * +----------+
125 : * ^
126 : * |
127 : * +--------------+
128 : * | Matched |
129 : * | fully-hashed |
130 : * | dupe groups |
131 : * Device #1 +--------------+ Device #2
132 : * ^
133 : * +-------------------+ | +-------------------+
134 : * | RmShredDevice | | | RmShredDevice |
135 : * | Worker | | | Worker |
136 : * | +-------------+ | | | +-------------+ |
137 : * | | File Queue |<--+----+ | +----+>| File Queue | |
138 : * | +-------------+ | | | | | +-------------+ |
139 : * | pop from | | | | | pop from |
140 : * | queue | | | | | queue |
141 : * | | | | | | | | |
142 : * | |<--Continue | | | | | Continue-->| |
143 : * | | ^ | | | | | ^ | |
144 : * | v | | | | | | | v |
145 : * | Read | | | | | | | Read |
146 : * | | | | | | | | | | |
147 : * | | | | | | | | | | |
148 : * | | | | Device | Device | | | |
149 : * | [1] | | Not | Not | | [1] |
150 : * +-----|-----+-------+ Waiting | Waiting +------|-----|------+
151 : * | | | | | | |
152 : * | | | | | | |
153 : * | Device +-------+-----+-----+------+ Device |
154 : * | Waiting | Sifting | Waiting |
155 : * | | | (Identifies which | | |
156 : * | -----+ partially-hashed files +----+ |
157 : * | | qualify for further | |
158 : * | +--->| hashing) |<--+ |
159 : * | | | | | |
160 : * | | +--------------------------+ | |
161 : * | | ^ | | |
162 : * | | | v | |
163 : * | | +----------+ +----------+ | |
164 : * | | |Initial | | Rejects | | |
165 : * | | |File List | | | | |
166 : * | | +----------+ +----------+ | |
167 : * | | | |
168 : * +----+-----+-----------------------------------+------+----+
169 : * | v | Hashing Pool | v |
170 : * | +----------+ +----------+ |
171 : * | |Hash Pipe | |Hash Pipe | |
172 : * | +----------+ +----------+ |
173 : * +----------------------------------------------------------+
174 : *
175 : * Note [1] - at this point the read results are sent to the hashpipe
176 : * and the Device must decide if it is worth waiting for
177 : * the hashing/sifting result; if not then the device thread
178 : * will immediately pop the next file from its queue.
179 : *
180 : *
181 : *
182 : * Every subbox left and right are the task that are performed.
183 : *
184 : * The Device Workers, Hash Pipes and Finisher Pipe run as separate threads
185 : * managed by GThreadPool. Note that while they are implemented as
186 : * GThreadPools, the hashers and finisher are limited to 1 thread eash
187 : * hence the term "pipe" is more appropriate than "pool". This is
188 : * particularly important for hashing because hash functions are generally
189 : * order-dependent, ie hash(ab) != hash(ba); the only way to ensure hashing
190 : * tasks are complete in correct sequence is to use a single pipe.
191 : *
192 : * The Device Workers work sequentially through the queue of hashing
193 : * jobs; if the device is rotational then the files are sorted in order of
194 : * disk offset in order to reduce seek times.
195 : *
196 : * The Devlist Manager calls the hasher library (see hasher.c) to read one
197 : * file at a time. The hasher library takes care of read buffers, hash
198 : * pipe allocation, etc. Once the hasher is done, the result is sent back
199 : * via callback to rm_shred_hash_callback.
200 : *
201 : * If "worth_waiting" has been flagged then the callback sends the file
202 : * back to the Device Worker thread via a GAsyncQueue, whereupon the Device
203 : * Manager does a quick check to see if it can continue with the same file;
204 : * if not then a new file is taken from the device queue.
205 : *
206 : * The RmShredGroups don't have a thread managing them, instead the individual
207 : * Device Workers and/or hash pipe callbacks write to the RmShredGroups
208 : * under mutex protection.
209 : *
210 : *
211 : * The main ("foreground") thread waits for the Devlist Managers to
212 : * finish their sequential walk through the files. If there are still
213 : * files to process on the device, the initial thread sends them back to
214 : * the GThreadPool for another pass through the files.
215 : *
216 : *
217 : *
218 : * Additional notes regarding "paranoid" hashing:
219 : * The default file matching method uses the SHA1 cryptographic hash; there are
220 : * several other hash functions available as well. The data hashing is somewhat
221 : * cpu-intensive but this is handled by separate threads (the hash pipes) so generally
222 : * doesn't bottleneck rmlint (as long as CPU exceeds disk reading speed). The subsequent
223 : * hash matching is very fast because we only need to compare 20 bytes (in the case of
224 : * SHA1) to find matching files.
225 : * The "paranoid" method uses byte-by-byte comparison. In the implementation, this is
226 : * masqueraded as a hash function, but there is no hashing involved. Instead, the whole
227 : * data increment is kept in memory. This introduces 2 new challenges:
228 : * (1) Memory management. In order to avoid overflowing mem availability, we limit the
229 : * number of concurrent active RmShredGroups and also limit the size of each file
230 : * increment.
231 : * (2) Matching time. Unlike the conventional hashing strategy (CPU-intensive hashing
232 : * followed by simple matching), the paranoid method requires almost no CPU during
233 : * reading/hashing, but requires a large memcmp() at the end to find matching
234 : *files/groups.
235 : * That would not be a bottleneck as long as the reader thread still has other files
236 : * that it can go and read while the hasher/sorter does the memcmp in parallel... but
237 : * unfortunately the memory management issue means that's not always an option and so
238 : * reading gets delayed while waiting for the memcmp() to catch up.
239 : * Two strategies are used to speed this up:
240 : * (a) Pre-matching of candidate digests. During reading/hashing, as each buffer (4096
241 : * bytes) is read in, it can be checked against a "twin candidate". We can send twin
242 : * candidates to the hash pipe at any time via rm_digest_send_match_candidate(). If the
243 : * correct twin candidate has been sent, then when the increment is finished the
244 : * matching has already been done, and rm_digest_equal() is almost instantaneous.
245 : * (b) Shadow hash. A lightweight hash (Murmor) is calculated and used for hashtable
246 : * lookup to quickly identify potential matches. This saves time in the case of
247 : * RmShredGroups with large number of child groups and where the pre-matching strategy
248 : * failed.
249 : * */
250 :
251 : /*
252 : * Below some performance controls are listed that may impact performance.
253 : * Controls are sorted by subjectve importanceness.
254 : */
255 :
256 : ////////////////////////////////////////////
257 : // OPTIMISATION PARAMETERS FOR DECIDING //
258 : // HOW MANY BYTES TO READ BEFORE STOPPING //
259 : // TO COMPARE PROGRESSIVE HASHES //
260 : ////////////////////////////////////////////
261 :
262 : /* how many pages can we read in (seek_time)/(CHEAP)? (use for initial read) */
263 : #define SHRED_BALANCED_PAGES (4)
264 :
265 : /* How large a single page is (typically 4096 bytes but not always)*/
266 : #define SHRED_PAGE_SIZE (sysconf(_SC_PAGESIZE))
267 :
268 : #define SHRED_MAX_READ_FACTOR \
269 : ((256 * 1024 * 1024) / SHRED_BALANCED_PAGES / SHRED_PAGE_SIZE)
270 :
271 : /* Maximum increment size for paranoid digests. This is smaller than for other
272 : * digest types due to memory management issues.
273 : * 16MB should be big enough buffer size to make seek time fairly insignificant
274 : * relative to sequential read time, eg 16MB read at typical 100 MB/s read
275 : * rate = 160ms read vs typical seek time 10ms*/
276 : #define SHRED_PARANOID_BYTES (16 * 1024 * 1024)
277 :
278 : /* Whether to use buffered fread() or direct preadv()
279 : * The latter is preferred, since it's slightly faster on linux.
280 : * Other platforms may have different results though or not even have preadv.
281 : * */
282 : #define SHRED_USE_BUFFERED_READ (0)
283 :
284 : /* When paranoid hashing, if a file increments is larger
285 : * than SHRED_PREMATCH_THRESHOLD, we take a guess at the likely
286 : * matching file and do a progressive memcmp() on each buffer
287 : * rather than waiting until the whole increment has been read
288 : * */
289 : #define SHRED_PREMATCH_THRESHOLD (0)
290 :
291 : /* Minimum number of files or bytes that should be in an update sent to
292 : * the statistics counters.
293 : */
294 : #define SHRED_MIN_FILE_STATS_PACK_FILES (16)
295 : #define SHRED_MIN_FILE_STATS_PACK_BYTES (1024 * 1024 * 16)
296 :
297 : /* empirical estimate of mem usage per file (excluding read buffers and
298 : * paranoid digests) */
299 : #define RM_AVERAGE_MEM_PER_FILE (100)
300 :
301 : ////////////////////////
302 : // MATHS SHORTCUTS //
303 : ////////////////////////
304 :
305 : #define SIGN_DIFF(X, Y) (((X) > (Y)) - ((X) < (Y))) /* handy for comparing unit64's */
306 :
307 : ///////////////////////////////////////////////////////////////////////
308 : // INTERNAL STRUCTURES, WITH THEIR INITIALISERS AND DESTROYERS //
309 : ///////////////////////////////////////////////////////////////////////
310 :
311 : /////////* The main extra data for the duplicate finder *///////////
312 :
313 : typedef struct RmShredTag {
314 : RmSession *session;
315 : GAsyncQueue *device_return;
316 : GMutex hash_mem_mtx;
317 : gint64 paranoid_mem_alloc; /* how much memory to allocate for paranoid checks */
318 : gint32 active_groups; /* how many shred groups active (only used with paranoid) */
319 : RmHasher *hasher;
320 : GThreadPool *result_pool;
321 : gint32 page_size;
322 : bool mem_refusing;
323 :
324 : GMutex lock;
325 :
326 : gint32 remaining_files;
327 : gint64 remaining_bytes;
328 :
329 : bool after_preprocess : 1;
330 :
331 : /* cached counters to avoid blocking delays in rm_shred_adjust_counters */
332 : gint cache_file_count;
333 : gint cache_filtered_count;
334 : gint64 cache_byte_count;
335 :
336 : } RmShredTag;
337 :
338 : typedef enum RmShredGroupStatus {
339 : RM_SHRED_GROUP_DORMANT = 0,
340 : RM_SHRED_GROUP_START_HASHING,
341 : RM_SHRED_GROUP_HASHING,
342 : RM_SHRED_GROUP_FINISHING,
343 : RM_SHRED_GROUP_FINISHED
344 : } RmShredGroupStatus;
345 :
346 : #define NEEDS_PREF(group) \
347 : (group->session->cfg->must_match_tagged || group->session->cfg->keep_all_untagged)
348 : #define NEEDS_NPREF(group) \
349 : (group->session->cfg->must_match_untagged || group->session->cfg->keep_all_tagged)
350 : #define NEEDS_NEW(group) (group->session->cfg->min_mtime)
351 :
352 : #define HAS_CACHE(session) \
353 : (session->cfg->read_cksum_from_xattr || session->cache_list.length)
354 :
355 : #define NEEDS_SHADOW_HASH(cfg) \
356 : (TRUE || cfg->merge_directories || cfg->read_cksum_from_xattr)
357 : /* @sahib - performance is faster with shadow hash, probably due to hash
358 : * collisions in large RmShredGroups */
359 :
360 : typedef struct RmShredGroup {
361 : /* holding queue for files; they are held here until the group first meets
362 : * criteria for further hashing (normally just 2 or more files, but sometimes
363 : * related to preferred path counts)
364 : * */
365 : GQueue *held_files;
366 :
367 : /* link(s) to next generation of RmShredGroups(s) which have this RmShredGroup as
368 : * parent*/
369 : GHashTable *children;
370 :
371 : /* RmShredGroup of the same size files but with lower RmFile->hash_offset;
372 : * getsset to null when parent dies
373 : * */
374 : struct RmShredGroup *parent;
375 :
376 : /* total number of files that have passed through this group*/
377 : gulong num_files;
378 :
379 : /* number of pending digests */
380 : gulong num_pending;
381 :
382 : /* list of in-progress paranoid digests, used for pre-matching */
383 : GList *in_progress_digests;
384 :
385 : /* set if group has 1 or more files from "preferred" paths */
386 : bool has_pref : 1;
387 :
388 : /* set if group has 1 or more files from "non-preferred" paths */
389 : bool has_npref : 1;
390 :
391 : /* set if group has 1 or more files newer than cfg->min_mtime */
392 : bool has_new : 1;
393 :
394 : /* set if group has been greenlighted by paranoid mem manager */
395 : bool is_active : 1;
396 :
397 : /* true if all files in the group have an external checksum */
398 : bool has_only_ext_cksums : 1;
399 :
400 : /* incremented for each file in the group that obtained it's checksum from ext.
401 : * If all files came from there we do not even need to hash the group.
402 : */
403 : gulong num_ext_cksums;
404 :
405 : /* if whole group has same basename, pointer to first file, else null */
406 : RmFile *unique_basename;
407 :
408 : /* initially RM_SHRED_GROUP_DORMANT; triggered as soon as we have >= 2 files
409 : * and meet preferred path and will go to either RM_SHRED_GROUP_HASHING or
410 : * RM_SHRED_GROUP_FINISHING. When switching from dormant to hashing, all
411 : * held_files are released and future arrivals go straight to hashing
412 : * */
413 : RmShredGroupStatus status;
414 :
415 : /* file size of files in this group */
416 : RmOff file_size;
417 :
418 : /* file hash_offset when files arrived in this group */
419 : RmOff hash_offset;
420 :
421 : /* file hash_offset for next increment */
422 : RmOff next_offset;
423 :
424 : /* Factor of SHRED_BALANCED_PAGES to read next time */
425 : gint64 offset_factor;
426 :
427 : /* allocated memory for paranoid hashing */
428 : RmOff mem_allocation;
429 :
430 : /* checksum structure taken from first file to enter the group. This allows
431 : * digests to be released from RmFiles and memory freed up until they
432 : * are required again for further hashing.*/
433 : RmDigestType digest_type;
434 : RmDigest *digest;
435 :
436 : /* lock for access to this RmShredGroup */
437 : GMutex lock;
438 :
439 : /* Reference to main */
440 : const RmSession *session;
441 : } RmShredGroup;
442 :
443 : typedef struct RmSignal {
444 : GMutex lock;
445 : GCond cond;
446 : gboolean done;
447 : } RmSignal;
448 :
449 6 : static RmSignal *rm_signal_new(void) {
450 6 : RmSignal *self = g_slice_new(RmSignal);
451 6 : g_mutex_init(&self->lock);
452 6 : g_cond_init(&self->cond);
453 6 : self->done = FALSE;
454 6 : return self;
455 : }
456 :
457 6 : static void rm_signal_wait(RmSignal *signal) {
458 6 : g_mutex_lock(&signal->lock);
459 : {
460 18 : while(!signal->done) {
461 6 : g_cond_wait(&signal->cond, &signal->lock);
462 : }
463 : }
464 6 : g_mutex_unlock(&signal->lock);
465 6 : g_mutex_clear(&signal->lock);
466 6 : g_cond_clear(&signal->cond);
467 6 : g_slice_free(RmSignal, signal);
468 6 : }
469 :
470 6 : static void rm_signal_done(RmSignal *signal) {
471 6 : g_mutex_lock(&signal->lock);
472 : {
473 6 : signal->done = TRUE;
474 6 : g_cond_signal(&signal->cond);
475 : }
476 6 : g_mutex_unlock(&signal->lock);
477 6 : }
478 :
479 : /////////// RmShredGroup ////////////////
480 :
481 : /* allocate and initialise new RmShredGroup */
482 109490 : static RmShredGroup *rm_shred_group_new(RmFile *file) {
483 109490 : RmShredGroup *self = g_slice_new0(RmShredGroup);
484 :
485 109490 : if(file->digest) {
486 55948 : self->digest_type = file->digest->type;
487 55948 : self->digest = file->digest;
488 55948 : file->digest = NULL;
489 : } else {
490 : /* initial groups have no checksum */
491 53542 : g_assert(!file->shred_group);
492 : }
493 :
494 109490 : self->parent = file->shred_group;
495 109490 : self->session = file->session;
496 :
497 109490 : if(self->parent) {
498 55948 : self->offset_factor = MIN(self->parent->offset_factor * 8, SHRED_MAX_READ_FACTOR);
499 : } else {
500 53542 : self->offset_factor = 1;
501 : }
502 :
503 109490 : self->held_files = g_queue_new();
504 109490 : self->file_size = file->file_size;
505 109490 : self->hash_offset = file->hash_offset;
506 :
507 109490 : self->session = file->session;
508 :
509 109490 : g_mutex_init(&self->lock);
510 :
511 109490 : return self;
512 : }
513 :
514 : //////////////////////////////////
515 : // OPTIMISATION AND MEMORY //
516 : // MANAGEMENT ALGORITHMS //
517 : //////////////////////////////////
518 :
519 : /* Compute optimal size for next hash increment
520 : * call this with group locked
521 : * */
522 225665 : static gint32 rm_shred_get_read_size(RmFile *file, RmShredTag *tag) {
523 225665 : RmShredGroup *group = file->shred_group;
524 225665 : g_assert(group);
525 :
526 225665 : gint32 result = 0;
527 :
528 : /* calculate next_offset property of the RmShredGroup */
529 225665 : RmOff balanced_bytes = tag->page_size * SHRED_BALANCED_PAGES;
530 225665 : RmOff target_bytes = balanced_bytes * group->offset_factor;
531 225665 : if(group->next_offset == 2) {
532 0 : file->fadvise_requested = 1;
533 : }
534 :
535 : /* round to even number of pages, round up to MIN_READ_PAGES */
536 225665 : RmOff target_pages = MAX(target_bytes / tag->page_size, 1);
537 225665 : target_bytes = target_pages * tag->page_size;
538 :
539 : /* test if cost-effective to read the whole file */
540 225665 : if(group->hash_offset + target_bytes + (balanced_bytes) >= group->file_size) {
541 225309 : group->next_offset = group->file_size;
542 225309 : file->fadvise_requested = 1;
543 : } else {
544 356 : group->next_offset = group->hash_offset + target_bytes;
545 : }
546 :
547 : /* for paranoid digests, make sure next read is not > max size of paranoid buffer */
548 225665 : if(group->digest_type == RM_DIGEST_PARANOID) {
549 19862 : group->next_offset =
550 19862 : MIN(group->next_offset, group->hash_offset + SHRED_PARANOID_BYTES);
551 : }
552 :
553 225665 : file->status = RM_FILE_STATE_NORMAL;
554 225665 : result = (group->next_offset - file->hash_offset);
555 :
556 225665 : return result;
557 : }
558 :
559 : /* Memory manager (only used for RM_DIGEST_PARANOID at the moment
560 : * but could also be adapted for other digests if very large
561 : * filesystems are contemplated)
562 : */
563 :
564 108526 : static void rm_shred_mem_return(RmShredGroup *group) {
565 108526 : if(group->is_active) {
566 3826 : RmShredTag *tag = group->session->shredder;
567 3826 : g_mutex_lock(&tag->hash_mem_mtx);
568 : {
569 3826 : tag->paranoid_mem_alloc += group->mem_allocation;
570 3826 : tag->active_groups--;
571 3826 : group->is_active = FALSE;
572 : #if _RM_SHRED_DEBUG
573 : rm_log_debug_line("Mem avail %" LLI ", active groups %d. " YELLOW "Returned %" LLU " bytes for paranoid hashing.",
574 : tag->paranoid_mem_alloc,
575 : tag->active_groups,
576 : group->mem_allocation);
577 : #endif
578 3826 : tag->mem_refusing = FALSE;
579 3826 : if(group->digest) {
580 8 : g_assert(group->digest->type == RM_DIGEST_PARANOID);
581 8 : rm_digest_free(group->digest);
582 8 : group->digest = NULL;
583 : }
584 : }
585 3826 : g_mutex_unlock(&tag->hash_mem_mtx);
586 3826 : group->mem_allocation = 0;
587 : }
588 108526 : }
589 :
590 : /* what is the maximum number of files that a group may end up with (including
591 : * parent, grandparent etc group files that haven't been hashed yet)?
592 : */
593 103638 : static gulong rm_shred_group_potential_file_count(RmShredGroup *group) {
594 103638 : if(group) {
595 51823 : return group->num_pending + rm_shred_group_potential_file_count(group->parent);
596 : } else {
597 51815 : return 0;
598 : }
599 : }
600 :
601 : /* Governer to limit memory usage by limiting how many RmShredGroups can be
602 : * active at any one time
603 : * NOTE: group_lock must be held before calling rm_shred_check_paranoid_mem_alloc
604 : */
605 64033 : static bool rm_shred_check_paranoid_mem_alloc(RmShredGroup *group,
606 : int active_group_threshold) {
607 64033 : if(group->status >= RM_SHRED_GROUP_HASHING) {
608 : /* group already committed */
609 12218 : return true;
610 : }
611 :
612 51815 : gint64 mem_required =
613 103630 : (rm_shred_group_potential_file_count(group) / 2 + 1) *
614 51815 : MIN(group->file_size - group->hash_offset, SHRED_PARANOID_BYTES);
615 :
616 51815 : bool result = FALSE;
617 51815 : RmShredTag *tag = group->session->shredder;
618 51815 : g_mutex_lock(&tag->hash_mem_mtx);
619 : {
620 51815 : gint64 inherited = group->parent ? group->parent->mem_allocation : 0;
621 :
622 101677 : if(0 || mem_required <= tag->paranoid_mem_alloc + inherited ||
623 53688 : (tag->active_groups <= active_group_threshold)) {
624 : /* ok to proceed */
625 : /* only take what we need from parent */
626 3826 : inherited = MIN(inherited, mem_required);
627 3826 : if(inherited > 0) {
628 4 : group->parent->mem_allocation -= inherited;
629 4 : group->mem_allocation += inherited;
630 : }
631 :
632 : /* take the rest from bank */
633 3826 : gint64 borrowed =
634 3826 : MIN(mem_required - inherited, (gint64)tag->paranoid_mem_alloc);
635 3826 : tag->paranoid_mem_alloc -= borrowed;
636 3826 : group->mem_allocation += borrowed;
637 :
638 3826 : if(tag->mem_refusing) {
639 0 : rm_log_debug_line(
640 : "Mem avail %"LLI", active groups %d. Borrowed %"LLI". Inherited: %"LLI" bytes for paranoid hashing",
641 : tag->paranoid_mem_alloc,
642 : tag->active_groups, borrowed,
643 : inherited
644 : );
645 :
646 0 : if(mem_required > borrowed + inherited) {
647 0 : rm_log_debug_line("...due to %i active group limit", active_group_threshold);
648 : }
649 :
650 0 : tag->mem_refusing = FALSE;
651 : }
652 :
653 3826 : tag->active_groups++;
654 3826 : group->is_active = TRUE;
655 3826 : group->status = RM_SHRED_GROUP_HASHING;
656 3826 : result = TRUE;
657 : } else {
658 47989 : if(!tag->mem_refusing) {
659 581 : rm_log_debug_line("Mem avail %"LLI", active groups %d. " RED
660 : "Refused request for %" LLU
661 : " bytes for paranoid hashing.",
662 : tag->paranoid_mem_alloc, tag->active_groups, mem_required);
663 581 : tag->mem_refusing = TRUE;
664 : }
665 47989 : result = FALSE;
666 : }
667 : }
668 51815 : g_mutex_unlock(&tag->hash_mem_mtx);
669 :
670 51815 : return result;
671 : }
672 :
673 : ///////////////////////////////////
674 : // RmShredDevice UTILITIES //
675 : ///////////////////////////////////
676 :
677 666733 : static void rm_shred_adjust_counters(RmShredTag *tag, int files, gint64 bytes) {
678 666733 : g_mutex_lock(&tag->lock);
679 : {
680 666807 : RmSession *session = tag->session;
681 666807 : tag->cache_byte_count += bytes;
682 666807 : tag->cache_file_count += files;
683 666807 : if(files < 0) {
684 222489 : tag->cache_filtered_count += files;
685 : }
686 :
687 1333558 : if(abs(tag->cache_byte_count) >= SHRED_MIN_FILE_STATS_PACK_BYTES ||
688 666751 : abs(tag->cache_file_count) >= SHRED_MIN_FILE_STATS_PACK_FILES) {
689 112 : rm_fmt_lock_state(session->formats);
690 : {
691 : #if RM_SHRED_DEBUG
692 : gint64 bytes_remaining =
693 : session->shred_bytes_remaining + tag->cache_byte_count;
694 : gint64 files_remaining =
695 : session->shred_files_remaining + tag->cache_file_count;
696 : g_assert(check_bytes >= 0);
697 : g_assert(check_files >= 0);
698 : #endif
699 112 : session->shred_files_remaining += tag->cache_file_count;
700 112 : session->total_filtered_files += tag->cache_filtered_count;
701 112 : session->shred_bytes_remaining += tag->cache_byte_count;
702 :
703 112 : rm_fmt_set_state(session->formats, (tag->after_preprocess)
704 : ? RM_PROGRESS_STATE_SHREDDER
705 : : RM_PROGRESS_STATE_PREPROCESS);
706 112 : tag->cache_file_count = 0;
707 112 : tag->cache_filtered_count = 0;
708 112 : tag->cache_byte_count = 0;
709 : }
710 112 : rm_fmt_unlock_state(session->formats);
711 : }
712 : }
713 666807 : g_mutex_unlock(&tag->lock);
714 666800 : }
715 :
716 221889 : static void rm_shred_write_cksum_to_xattr(const RmSession *session, RmFile *file) {
717 221889 : if(session->cfg->write_cksum_to_xattr) {
718 392 : if(file->has_ext_cksum == false) {
719 392 : rm_xattr_write_hash((RmSession *)session, file);
720 : }
721 : }
722 221889 : }
723 :
724 : /* Unlink RmFile from device queue
725 : */
726 222919 : static void rm_shred_discard_file(RmFile *file, bool free_file) {
727 222919 : const RmSession *session = file->session;
728 222919 : RmShredTag *tag = session->shredder;
729 : /* update device counters (unless this file was a bundled hardlink) */
730 222919 : if(!file->hardlinks.hardlink_head) {
731 222489 : rm_mds_ref_dev(session->mds, file->disk, -1);
732 222489 : rm_shred_adjust_counters(tag, -1, -(gint64)(file->file_size - file->hash_offset));
733 :
734 : /* ShredGroup that was going nowhere */
735 : g_assert(session->cfg->write_unfinished || TRUE);
736 227885 : if(file->shred_group && file->shred_group->num_files <= 1 &&
737 5396 : session->cfg->write_unfinished) {
738 168 : file->lint_type = RM_LINT_TYPE_UNFINISHED_CKSUM;
739 168 : file->digest = (file->digest) ? file->digest : file->shred_group->digest;
740 :
741 168 : if(file->digest) {
742 84 : rm_fmt_write(file, session->formats, -1);
743 84 : rm_shred_write_cksum_to_xattr(session, file);
744 84 : file->digest = NULL;
745 : }
746 : }
747 : }
748 :
749 222919 : if(free_file) {
750 : /* toss the file (and any embedded hardlinks)*/
751 105473 : rm_file_destroy(file);
752 : }
753 222919 : }
754 :
755 : /* Push file to scheduler queue.
756 : * */
757 221829 : static void rm_shred_push_queue(RmFile *file) {
758 221829 : const RmSession *session = file->session;
759 221829 : if(file->hash_offset == 0) {
760 : /* first-timer; lookup disk offset */
761 434335 : if(file->session->cfg->build_fiemap &&
762 221101 : !rm_mounts_is_nonrotational(file->session->mounts, file->dev)) {
763 7867 : RM_DEFINE_PATH(file);
764 7867 : file->disk_offset = rm_offset_get_from_path(file_path, 0, NULL);
765 : } else {
766 : /* use inode number instead of disk offset */
767 213234 : file->disk_offset = file->inode;
768 : }
769 : }
770 443658 : rm_mds_push_task_by_dev(
771 443658 : session->mds, file->disk, file->disk_offset, NULL, file);
772 221829 : }
773 :
774 : //////////////////////////////////
775 : // RMSHREDGROUP UTILITIES //
776 : // AND SIFTING ALGORITHM //
777 : //////////////////////////////////
778 :
779 : /* Free RmShredGroup and any dormant files still in its queue
780 : */
781 109490 : static void rm_shred_group_free(RmShredGroup *self, bool force_free) {
782 109490 : g_assert(self->parent == NULL); /* children should outlive their parents! */
783 :
784 109490 : RmCfg *cfg = self->session->cfg;
785 :
786 109490 : bool needs_free = !(cfg->cache_file_structs) || force_free;
787 :
788 : /* May not free though when unfinished checksums are written.
789 : * Those are freed by the output module.
790 : */
791 109490 : if(cfg->write_unfinished) {
792 420 : needs_free = false;
793 : }
794 :
795 109490 : if(self->held_files) {
796 56800 : g_queue_foreach(self->held_files, (GFunc)rm_shred_discard_file,
797 56800 : GUINT_TO_POINTER(needs_free));
798 56800 : g_queue_free(self->held_files);
799 56800 : self->held_files = NULL;
800 : }
801 :
802 109490 : if(self->digest && needs_free) {
803 21708 : rm_digest_free(self->digest);
804 21708 : self->digest = NULL;
805 : }
806 :
807 109490 : if(self->children) {
808 52690 : g_hash_table_unref(self->children);
809 : }
810 :
811 109488 : g_assert(!self->in_progress_digests);
812 :
813 109488 : g_mutex_clear(&self->lock);
814 :
815 109490 : g_slice_free(RmShredGroup, self);
816 109490 : }
817 :
818 : /* call unlocked; should be no contention issues since group is finished */
819 108525 : static void rm_shred_group_finalise(RmShredGroup *self) {
820 : /* return any paranoid mem allocation */
821 108525 : rm_shred_mem_return(self);
822 :
823 108526 : switch(self->status) {
824 : case RM_SHRED_GROUP_DORMANT:
825 : /* Dead-ended files; don't force free since we may want to write the partial
826 : * checksums */
827 4686 : rm_shred_group_free(self, FALSE);
828 4686 : break;
829 : case RM_SHRED_GROUP_START_HASHING:
830 : case RM_SHRED_GROUP_HASHING:
831 : /* intermediate increment group no longer required; force free */
832 52690 : rm_shred_group_free(self, TRUE);
833 52690 : break;
834 : case RM_SHRED_GROUP_FINISHING:
835 : /* free any paranoid buffers held in group->digest (should not be needed for
836 : * results processing */
837 51150 : if(self->digest_type == RM_DIGEST_PARANOID) {
838 3707 : rm_digest_release_buffers(self->digest);
839 : }
840 : /* send it to finisher (which takes responsibility for calling
841 : * rm_shred_group_free())*/
842 51150 : rm_util_thread_pool_push(self->session->shredder->result_pool, self);
843 :
844 51150 : break;
845 : case RM_SHRED_GROUP_FINISHED:
846 : default:
847 0 : g_assert_not_reached();
848 : }
849 108526 : }
850 :
851 : /* Checks whether group qualifies as duplicate candidate (ie more than
852 : * two members and meets has_pref and NEEDS_PREF criteria).
853 : * Assume group already protected by group_lock.
854 : * */
855 444314 : static void rm_shred_group_update_status(RmShredGroup *group) {
856 444314 : if(group->status == RM_SHRED_GROUP_DORMANT) {
857 213403 : if(1 && group->num_files >= 2 /* it takes 2 to tango */
858 104338 : &&
859 208028 : (group->has_pref || !NEEDS_PREF(group))
860 : /* we have at least one file from preferred path, or we don't care */
861 104011 : &&
862 104255 : (group->has_npref || !NEEDS_NPREF(group))
863 : /* we have at least one file from non-pref path, or we don't care */
864 103874 : &&
865 103880 : (group->has_new || !NEEDS_NEW(group))
866 : /* we have at least one file newer than cfg->min_mtime, or we don't care */
867 103868 : &&
868 103896 : (!group->unique_basename || !group->session->cfg->unmatched_basenames)
869 : /* we have more than one unique basename, or we don't care */
870 : ) {
871 156530 : if(group->hash_offset < group->file_size &&
872 52690 : group->has_only_ext_cksums == false) {
873 : /* group can go active */
874 52690 : group->status = RM_SHRED_GROUP_START_HASHING;
875 : } else {
876 51150 : group->status = RM_SHRED_GROUP_FINISHING;
877 : }
878 : }
879 : }
880 444314 : }
881 :
882 : /* Only called by rm_shred_group_free (via GDestroyNotify of group->children).
883 : * Call with group->lock unlocked.
884 : */
885 55948 : static void rm_shred_group_make_orphan(RmShredGroup *self) {
886 55948 : gboolean group_finished = FALSE;
887 55948 : g_mutex_lock(&self->lock);
888 : {
889 55948 : self->parent = NULL;
890 55948 : group_finished = (self->num_pending == 0);
891 : }
892 55948 : g_mutex_unlock(&self->lock);
893 :
894 55948 : if(group_finished) {
895 55836 : rm_shred_group_finalise(self);
896 : }
897 55948 : }
898 :
899 : /* Call with shred_group->lock unlocked.
900 : * */
901 444314 : static RmFile *rm_shred_group_push_file(RmShredGroup *shred_group, RmFile *file,
902 : gboolean initial) {
903 444314 : RmFile *result = NULL;
904 444314 : file->shred_group = shred_group;
905 :
906 444314 : if(file->digest) {
907 165881 : rm_digest_free(file->digest);
908 165880 : file->digest = NULL;
909 : }
910 :
911 444313 : g_mutex_lock(&shred_group->lock);
912 : {
913 444316 : shred_group->has_pref |= file->is_prefd | file->hardlinks.has_prefd;
914 444316 : shred_group->has_npref |= (!file->is_prefd) | file->hardlinks.has_non_prefd;
915 444316 : shred_group->has_new |= file->is_new_or_has_new;
916 :
917 444316 : if (shred_group->num_files == 0 && shred_group->session->cfg->unmatched_basenames) {
918 84 : shred_group->unique_basename = file;
919 444316 : } else if (shred_group->unique_basename &&
920 84 : !rm_file_basenames_match(file, shred_group->unique_basename)) {
921 56 : shred_group->unique_basename = NULL;
922 : }
923 :
924 444316 : shred_group->num_files++;
925 444316 : if(file->hardlinks.is_head) {
926 692 : g_assert(file->hardlinks.files);
927 692 : shred_group->num_files += file->hardlinks.files->length;
928 692 : if (shred_group->unique_basename && shred_group->session->cfg->unmatched_basenames) {
929 0 : for(GList *iter = file->hardlinks.files->head; iter; iter = iter->next) {
930 0 : if (!rm_file_basenames_match(iter->data, shred_group->unique_basename)) {
931 0 : shred_group->unique_basename = NULL;
932 0 : break;
933 : }
934 : }
935 : }
936 : }
937 :
938 444316 : g_assert(file->hash_offset == shred_group->hash_offset);
939 :
940 444316 : rm_shred_group_update_status(shred_group);
941 444316 : switch(shred_group->status) {
942 : case RM_SHRED_GROUP_START_HASHING:
943 : /* clear the queue and push all its rmfiles to the appropriate device queue */
944 169309 : if(shred_group->held_files) {
945 52690 : shred_group->num_pending += g_queue_get_length(shred_group->held_files);
946 52690 : g_queue_free_full(shred_group->held_files,
947 : (GDestroyNotify)rm_shred_push_queue);
948 52690 : shred_group->held_files = NULL; /* won't need shred_group queue any more,
949 : since new arrivals will bypass */
950 : }
951 169309 : if(shred_group->digest_type == RM_DIGEST_PARANOID && !initial) {
952 8 : rm_shred_check_paranoid_mem_alloc(shred_group, 1);
953 : }
954 : /* FALLTHROUGH */
955 : case RM_SHRED_GROUP_HASHING:
956 169317 : shred_group->num_pending++;
957 169317 : if(initial || !file->devlist_waiting) {
958 : /* add file to device queue */
959 169317 : rm_shred_push_queue(file);
960 : } else {
961 : /* calling routine will handle the file */
962 0 : result = file;
963 : }
964 169317 : break;
965 : case RM_SHRED_GROUP_DORMANT:
966 : case RM_SHRED_GROUP_FINISHING:
967 : /* add file to held_files */
968 274998 : g_queue_push_head(shred_group->held_files, file);
969 274998 : break;
970 : case RM_SHRED_GROUP_FINISHED:
971 : default:
972 0 : g_assert_not_reached();
973 : }
974 : }
975 444315 : g_mutex_unlock(&shred_group->lock);
976 :
977 444318 : return result;
978 : }
979 :
980 : /* After partial hashing of RmFile, add it back into the sieve for further
981 : * hashing if required. If waiting option is set, then try to return the
982 : * RmFile to the calling routine so it can continue with the next hashing
983 : * increment (this bypasses the normal device queue and so avoids an unnecessary
984 : * file seek operation ) returns true if the file can be immediately be hashed
985 : * some more.
986 : * */
987 221801 : static RmFile *rm_shred_sift(RmFile *file) {
988 221801 : RmFile *result = NULL;
989 221801 : gboolean current_group_finished = FALSE;
990 :
991 221801 : g_assert(file);
992 221801 : RmShredGroup *current_group = file->shred_group;
993 221801 : g_assert(current_group);
994 :
995 221801 : g_mutex_lock(¤t_group->lock);
996 : {
997 221826 : current_group->num_pending--;
998 221826 : if(current_group->in_progress_digests) {
999 : /* remove this file from current_group's pending digests list */
1000 16008 : current_group->in_progress_digests =
1001 16007 : g_list_remove(current_group->in_progress_digests, file->digest);
1002 : }
1003 :
1004 221827 : if(file->status == RM_FILE_STATE_IGNORE) {
1005 : /* reading/hashing failed somewhere */
1006 0 : if(file->digest) {
1007 0 : rm_digest_free(file->digest);
1008 : }
1009 0 : rm_shred_discard_file(file, true);
1010 :
1011 : } else {
1012 221827 : g_assert(file->digest);
1013 :
1014 : /* check is child group hashtable has been created yet */
1015 221827 : if(current_group->children == NULL) {
1016 52689 : current_group->children =
1017 52690 : g_hash_table_new_full((GHashFunc)rm_digest_hash,
1018 : (GEqualFunc)rm_digest_equal,
1019 : NULL,
1020 : (GDestroyNotify)rm_shred_group_make_orphan);
1021 : }
1022 :
1023 : /* check if there is already a descendent of current_group which
1024 : * matches snap... if yes then move this file into it; if not then
1025 : * create a new group ... */
1026 221826 : RmShredGroup *child_group =
1027 221826 : g_hash_table_lookup(current_group->children, file->digest);
1028 221826 : if(!child_group) {
1029 55948 : child_group = rm_shred_group_new(file);
1030 55948 : g_hash_table_insert(current_group->children, child_group->digest,
1031 : child_group);
1032 55948 : child_group->has_only_ext_cksums = current_group->has_only_ext_cksums;
1033 :
1034 : /* signal any pending (paranoid) digests that there is a new match
1035 : * candidate digest */
1036 55948 : g_list_foreach(current_group->in_progress_digests,
1037 : (GFunc)rm_digest_send_match_candidate,
1038 55948 : child_group->digest);
1039 : }
1040 221826 : result =
1041 : rm_shred_group_push_file(child_group, file, FALSE);
1042 : }
1043 :
1044 : /* is current shred group needed any longer? */
1045 221828 : current_group_finished =
1046 221828 : !current_group->parent && current_group->num_pending == 0;
1047 : }
1048 221828 : g_mutex_unlock(¤t_group->lock);
1049 :
1050 221823 : if(current_group_finished) {
1051 52689 : rm_shred_group_finalise(current_group);
1052 : }
1053 :
1054 221820 : return result;
1055 : }
1056 :
1057 : /* Hasher callback file. Runs as threadpool in parallel / tandem with
1058 : * rm_shred_read_factory above
1059 : * */
1060 221820 : static void rm_shred_hash_callback(_U RmHasher *hasher, RmDigest *digest, RmShredTag *tag,
1061 : RmFile *file) {
1062 : /* Report the progress to rm_shred_devlist_factory */
1063 221820 : g_assert(file->digest == digest);
1064 :
1065 221820 : if(file->hash_offset == file->shred_group->next_offset ||
1066 0 : file->status == RM_FILE_STATE_IGNORE) {
1067 221820 : if(file->status != RM_FILE_STATE_IGNORE) {
1068 : /* remember that checksum */
1069 221819 : rm_shred_write_cksum_to_xattr(tag->session, file);
1070 : }
1071 :
1072 443626 : if(file->signal) {
1073 : /* MDS scheduler is waiting for result */
1074 6 : rm_signal_done(file->signal);
1075 : } else {
1076 : /* handle the file ourselves; MDS scheduler has moved on to the next file */
1077 221799 : rm_shred_sift(file);
1078 : }
1079 : } else {
1080 0 : RM_DEFINE_PATH(file);
1081 0 : rm_log_error_line("Unexpected hash offset for %s, got %" LLU ", expected %" LLU,
1082 : file_path, file->hash_offset, file->shred_group->next_offset);
1083 0 : g_assert_not_reached();
1084 : }
1085 221821 : }
1086 :
1087 : ////////////////////////////////////
1088 : // SHRED-SPECIFIC PREPROCESSING //
1089 : ////////////////////////////////////
1090 :
1091 : /* Basically this unloads files from the initial list build (which has
1092 : * hardlinks already grouped).
1093 : * Outline:
1094 : * 1. Use g_hash_table_foreach_remove to send RmFiles from node_table
1095 : * to size_groups via rm_shred_file_preprocess.
1096 : * 2. Use g_hash_table_foreach_remove to delete all singleton and other
1097 : * non-qualifying groups from size_groups via rm_shred_group_preprocess.
1098 : * 3. Use g_hash_table_foreach to do the FIEMAP lookup for all remaining
1099 : * files via rm_shred_device_preprocess.
1100 : * */
1101 222489 : static void rm_shred_file_preprocess(_U gpointer key, RmFile *file, RmShredTag *main) {
1102 : /* initial population of RmShredDevice's and first level RmShredGroup's */
1103 222489 : RmSession *session = main->session;
1104 :
1105 222489 : g_assert(file);
1106 222489 : g_assert(file->lint_type == RM_LINT_TYPE_DUPE_CANDIDATE);
1107 222489 : g_assert(file->file_size > 0);
1108 :
1109 222489 : file->is_new_or_has_new = (file->mtime >= session->cfg->min_mtime);
1110 :
1111 : /* if file has hardlinks then set file->hardlinks.has_[non_]prefd*/
1112 222489 : if(file->hardlinks.is_head) {
1113 776 : for(GList *iter = file->hardlinks.files->head; iter; iter = iter->next) {
1114 430 : RmFile *link = iter->data;
1115 430 : file->hardlinks.has_non_prefd |= !(link->is_prefd);
1116 430 : file->hardlinks.has_prefd |= link->is_prefd;
1117 430 : file->is_new_or_has_new |= (link->mtime >= session->cfg->min_mtime);
1118 : }
1119 : }
1120 :
1121 : /* cfg->fake_pathindex_as_disk is for debugging/testing... */
1122 222489 : file->disk = (session->cfg->fake_pathindex_as_disk) ? file->path_index : file->dev;
1123 :
1124 : /* add reference for this file to the MDS scheduler */
1125 222489 : rm_mds_ref_dev(session->mds, file->disk, 1);
1126 222489 : rm_shred_adjust_counters(main, 1, (gint64)file->file_size - file->hash_offset);
1127 :
1128 222489 : RmShredGroup *group = g_hash_table_lookup(session->tables->size_groups, file);
1129 :
1130 222489 : if(group == NULL) {
1131 53542 : group = rm_shred_group_new(file);
1132 53542 : group->digest_type = session->cfg->checksum_type;
1133 53542 : g_hash_table_insert(session->tables->size_groups, file, group);
1134 : }
1135 :
1136 222489 : rm_shred_group_push_file(group, file, true);
1137 :
1138 222489 : if(main->session->cfg->read_cksum_from_xattr) {
1139 392 : char *ext_cksum = rm_xattr_read_hash(main->session, file);
1140 392 : if(ext_cksum != NULL) {
1141 0 : file->folder->data = ext_cksum;
1142 : }
1143 : }
1144 :
1145 222489 : if(HAS_CACHE(session)) {
1146 1064 : RM_DEFINE_PATH(file);
1147 1064 : if(rm_trie_search(&session->cfg->file_trie, file_path)) {
1148 135 : group->num_ext_cksums += 1;
1149 135 : file->has_ext_cksum = 1;
1150 : }
1151 : }
1152 222489 : }
1153 :
1154 53542 : static gboolean rm_shred_group_preprocess(_U gpointer key, RmShredGroup *group,
1155 : _U RmShredTag *tag) {
1156 53542 : g_assert(group);
1157 53542 : if(group->status == RM_SHRED_GROUP_DORMANT) {
1158 964 : rm_shred_group_free(group, true);
1159 964 : return true;
1160 : } else {
1161 52578 : return false;
1162 : }
1163 : }
1164 :
1165 36458 : static void rm_shred_preprocess_input(RmShredTag *main) {
1166 36458 : RmSession *session = main->session;
1167 36458 : guint removed = 0;
1168 :
1169 : /* move remaining files to RmShredGroups */
1170 36458 : g_assert(session->tables->node_table);
1171 :
1172 : /* Read any cache files */
1173 36542 : for(GList *iter = main->session->cache_list.head; iter; iter = iter->next) {
1174 84 : char *cache_path = iter->data;
1175 84 : rm_json_cache_read(&session->cfg->file_trie, cache_path);
1176 : }
1177 :
1178 36458 : rm_log_debug_line("Moving files into size groups...");
1179 :
1180 : /* move files from node tables into initial RmShredGroups */
1181 36458 : g_hash_table_foreach_remove(session->tables->node_table,
1182 : (GHRFunc)rm_shred_file_preprocess, main);
1183 36458 : g_hash_table_unref(session->tables->node_table);
1184 36458 : session->tables->node_table = NULL;
1185 :
1186 : GHashTableIter iter;
1187 : gpointer size, p_group;
1188 :
1189 36458 : if(HAS_CACHE(main->session)) {
1190 140 : g_assert(session->tables->size_groups);
1191 140 : g_hash_table_iter_init(&iter, session->tables->size_groups);
1192 644 : while(g_hash_table_iter_next(&iter, &size, &p_group)) {
1193 364 : RmShredGroup *group = p_group;
1194 364 : if(group->num_files == group->num_ext_cksums) {
1195 0 : group->has_only_ext_cksums = true;
1196 : }
1197 : }
1198 : }
1199 :
1200 36458 : rm_log_debug_line("move remaining files to size_groups finished at time %.3f",
1201 : g_timer_elapsed(session->timer, NULL));
1202 :
1203 36458 : rm_log_debug_line("Discarding unique sizes and read fiemap data for others...");
1204 36458 : g_assert(session->tables->size_groups);
1205 36458 : removed = g_hash_table_foreach_remove(session->tables->size_groups,
1206 : (GHRFunc)rm_shred_group_preprocess, main);
1207 36458 : g_hash_table_unref(session->tables->size_groups);
1208 36458 : session->tables->size_groups = NULL;
1209 :
1210 36458 : rm_log_debug_line("...done at time %.3f; removed %u of %" LLU,
1211 : g_timer_elapsed(session->timer, NULL), removed,
1212 : session->total_filtered_files);
1213 36458 : }
1214 :
1215 : /////////////////////////////////
1216 : // POST PROCESSING //
1217 : /////////////////////////////////
1218 :
1219 : /* post-processing sorting of files by criteria (-S and -[kmKM])
1220 : * this is slightly different to rm_shred_cmp_orig_criteria in the case of
1221 : * either -K or -M options
1222 : */
1223 366047 : int rm_shred_cmp_orig_criteria(RmFile *a, RmFile *b, RmSession *session) {
1224 366047 : RmCfg *cfg = session->cfg;
1225 :
1226 : /* Make sure to *never* make a symlink to be the original */
1227 366047 : if(a->is_symlink != b->is_symlink) {
1228 140 : return a->is_symlink - b->is_symlink;
1229 366459 : } else if((a->is_prefd != b->is_prefd) &&
1230 928 : (cfg->keep_all_untagged || cfg->must_match_untagged)) {
1231 235 : return (a->is_prefd - b->is_prefd);
1232 : } else {
1233 365672 : int comparasion = rm_pp_cmp_orig_criteria(a, b, session);
1234 365672 : if(comparasion == 0) {
1235 69658 : return b->is_original - a->is_original;
1236 : }
1237 :
1238 296014 : return comparasion;
1239 : }
1240 : }
1241 :
1242 : /* iterate over group to find highest ranked; return it and tag it as original */
1243 : /* also in special cases (eg keep_all_tagged) there may be more than one original,
1244 : * in which case tag them as well
1245 : */
1246 52018 : void rm_shred_group_find_original(RmSession *session, GQueue *group) {
1247 : /* iterate over group, unbundling hardlinks and identifying "tagged" originals */
1248 271103 : for(GList *iter = group->head; iter; iter = iter->next) {
1249 219085 : RmFile *file = iter->data;
1250 219085 : file->is_original = false;
1251 :
1252 219085 : if(file->hardlinks.is_head && file->hardlinks.files) {
1253 : /* if group member has a hardlink cluster attached to it then
1254 : * unbundle the cluster and append it to the queue
1255 : */
1256 346 : GQueue *hardlinks = file->hardlinks.files;
1257 776 : for(GList *link = hardlinks->head; link; link = link->next) {
1258 430 : g_queue_push_tail(group, link->data);
1259 : }
1260 346 : g_queue_free(hardlinks);
1261 346 : file->hardlinks.files = NULL;
1262 : }
1263 : /* identify "tagged" originals: */
1264 438002 : if(((file->is_prefd) && (session->cfg->keep_all_tagged)) ||
1265 437330 : ((!file->is_prefd) && (session->cfg->keep_all_untagged))) {
1266 336 : file->is_original = true;
1267 :
1268 : #if _RM_SHRED_DEBUG
1269 : RM_DEFINE_PATH(file);
1270 : rm_log_debug_line("tagging %s as original because %s",
1271 : file_path,
1272 : ((file->is_prefd) && (session->cfg->keep_all_tagged))
1273 : ? "tagged"
1274 : : "untagged");
1275 : #endif
1276 : }
1277 : }
1278 :
1279 : /* sort the unbundled group */
1280 52018 : g_queue_sort(group, (GCompareDataFunc)rm_shred_cmp_orig_criteria, session);
1281 :
1282 52018 : RmFile *headfile = group->head->data;
1283 52018 : if(!headfile->is_original) {
1284 51850 : headfile->is_original = true;
1285 : #if _RM_SHRED_DEBUG
1286 : RM_DEFINE_PATH(headfile);
1287 : rm_log_debug_line("tagging %s as original because it is highest ranked",
1288 : headfile_path);
1289 : #endif
1290 : }
1291 52018 : if (session->cfg->unmatched_basenames) {
1292 : /* remove files which match headfile's basename */
1293 28 : GList *iter = group->head->next;
1294 84 : while(iter) {
1295 28 : RmFile *iter_file = iter->data;
1296 28 : GList *temp = iter;
1297 28 : iter = iter->next;
1298 28 : if (rm_file_basenames_match(iter_file, headfile)) {
1299 0 : rm_shred_discard_file(iter_file, TRUE);
1300 0 : g_queue_delete_link(group, temp);
1301 : }
1302 : }
1303 : }
1304 52018 : }
1305 :
1306 51374 : void rm_shred_forward_to_output(RmSession *session, GQueue *group) {
1307 51374 : g_assert(group);
1308 51374 : g_assert(group->head);
1309 :
1310 : #if _RM_SHRED_DEBUG
1311 : RmFile *head = group->head->data;
1312 : RM_DEFINE_PATH(head);
1313 : rm_log_debug_line("Forwarding %s's group", head_path);
1314 : #endif
1315 :
1316 : /* Hand it over to the printing module */
1317 267855 : for(GList *iter = group->head; iter; iter = iter->next) {
1318 216481 : RmFile *file = iter->data;
1319 216481 : rm_fmt_write(file, session->formats, group->length);
1320 : }
1321 51374 : }
1322 :
1323 216901 : static void rm_shred_dupe_totals(RmFile *file, RmSession *session) {
1324 216901 : if(!file->is_original) {
1325 165583 : session->dup_counter++;
1326 :
1327 : /* Only check file size if it's not a hardlink.
1328 : * Since deleting hardlinks does not free any space
1329 : * they should not be counted unless all of them would
1330 : * be removed.
1331 : */
1332 165583 : if(file->hardlinks.is_head || file->hardlinks.hardlink_head == NULL) {
1333 165153 : session->total_lint_size += file->file_size;
1334 : }
1335 : }
1336 216901 : }
1337 :
1338 51150 : static void rm_shred_result_factory(RmShredGroup *group, RmShredTag *tag) {
1339 51150 : RmCfg *cfg = tag->session->cfg;
1340 :
1341 51150 : if(g_queue_get_length(group->held_files) > 0) {
1342 : /* find the original(s)
1343 : * (note this also unbundles hardlinks and sorts the group from
1344 : * highest ranked to lowest ranked
1345 : */
1346 51150 : rm_shred_group_find_original(tag->session, group->held_files);
1347 :
1348 : /* Update statistics */
1349 51150 : rm_fmt_lock_state(tag->session->formats);
1350 : {
1351 51150 : tag->session->dup_group_counter++;
1352 51150 : g_queue_foreach(group->held_files, (GFunc)rm_shred_dupe_totals, tag->session);
1353 : }
1354 51150 : rm_fmt_unlock_state(tag->session->formats);
1355 :
1356 : /* Cache the files for merging them into directories */
1357 268051 : for(GList *iter = group->held_files->head; iter; iter = iter->next) {
1358 216901 : RmFile *file = iter->data;
1359 216901 : file->digest = group->digest;
1360 :
1361 216901 : if(cfg->merge_directories) {
1362 4060 : rm_tm_feed(tag->session->dir_merger, file);
1363 : }
1364 : }
1365 :
1366 51150 : if(cfg->merge_directories == false) {
1367 : /* Output them directly, do not merge them first. */
1368 49778 : rm_shred_forward_to_output(tag->session, group->held_files);
1369 : }
1370 : }
1371 :
1372 51150 : group->status = RM_SHRED_GROUP_FINISHED;
1373 : #if _RM_SHRED_DEBUG
1374 : rm_log_debug_line("Free from rm_shred_result_factory");
1375 : #endif
1376 :
1377 : /* Do not force free files here, output module might need do that itself. */
1378 51150 : rm_shred_group_free(group, false);
1379 51150 : }
1380 :
1381 : /////////////////////////////////
1382 : // ACTUAL IMPLEMENTATION //
1383 : /////////////////////////////////
1384 :
1385 269817 : static bool rm_shred_reassign_checksum(RmShredTag *main, RmFile *file) {
1386 269817 : bool can_process = true;
1387 269817 : RmCfg *cfg = main->session->cfg;
1388 269817 : RmShredGroup *group = file->shred_group;
1389 :
1390 269817 : if(group->has_only_ext_cksums) {
1391 : /* Cool, we were able to read the checksum from disk */
1392 0 : file->digest = rm_digest_new(RM_DIGEST_EXT, 0, 0, 0, NEEDS_SHADOW_HASH(cfg));
1393 :
1394 0 : RM_DEFINE_PATH(file);
1395 :
1396 0 : char *hexstring = file->folder->data;
1397 :
1398 0 : if(hexstring != NULL) {
1399 0 : rm_digest_update(file->digest, (unsigned char *)hexstring, strlen(hexstring));
1400 0 : rm_log_debug_line("%s=%s was read from cache.", hexstring, file_path);
1401 : } else {
1402 0 : rm_log_warning_line(
1403 : "Unable to read external checksum from interal cache for %s", file_path);
1404 0 : file->has_ext_cksum = 0;
1405 0 : group->has_only_ext_cksums = 0;
1406 : }
1407 269817 : } else if(group->digest_type == RM_DIGEST_PARANOID) {
1408 : /* check if memory allocation is ok */
1409 64025 : if(!rm_shred_check_paranoid_mem_alloc(group, 0)) {
1410 47989 : can_process = false;
1411 : } else {
1412 : /* get the required target offset into group->next_offset, so
1413 : * that we can make the paranoid RmDigest the right size*/
1414 16036 : if(group->next_offset == 0) {
1415 3826 : (void)rm_shred_get_read_size(file, main);
1416 : }
1417 16036 : g_assert(group->hash_offset == file->hash_offset);
1418 :
1419 16036 : if(file->is_symlink && cfg->see_symlinks) {
1420 28 : file->digest =
1421 28 : rm_digest_new(RM_DIGEST_PARANOID, 0, 0,
1422 : PATH_MAX + 1 /* max size of a symlink file */,
1423 : NEEDS_SHADOW_HASH(cfg));
1424 : } else {
1425 16008 : file->digest = rm_digest_new(RM_DIGEST_PARANOID, 0, 0,
1426 16008 : group->next_offset - file->hash_offset,
1427 : NEEDS_SHADOW_HASH(cfg));
1428 16008 : if(group->next_offset > file->hash_offset + SHRED_PREMATCH_THRESHOLD) {
1429 : /* send candidate twin(s) */
1430 16008 : if(group->children) {
1431 7166 : GList *children = g_hash_table_get_values(group->children);
1432 21647 : while(children) {
1433 7315 : RmShredGroup *child = children->data;
1434 7315 : rm_digest_send_match_candidate(file->digest, child->digest);
1435 7315 : children = g_list_delete_link(children, children);
1436 : }
1437 : }
1438 : /* store a reference so the shred group knows where to send any future
1439 : * twin candidate digests */
1440 16008 : group->in_progress_digests =
1441 16008 : g_list_prepend(group->in_progress_digests, file->digest);
1442 : }
1443 : }
1444 : }
1445 205792 : } else if(group->digest) {
1446 : /* pick up the digest-so-far from the RmShredGroup */
1447 312 : file->digest = rm_digest_copy(group->digest);
1448 : } else {
1449 : /* this is first generation of RMGroups, so there is no progressive hash yet */
1450 410960 : file->digest = rm_digest_new(cfg->checksum_type,
1451 205480 : main->session->hash_seed1,
1452 205480 : main->session->hash_seed2,
1453 : 0,
1454 : NEEDS_SHADOW_HASH(cfg));
1455 : }
1456 :
1457 269818 : return can_process;
1458 : }
1459 :
1460 : #define RM_SHRED_TOO_MANY_BYTES_TO_WAIT (64 * 1024 * 1024)
1461 :
1462 : /* call with device unlocked */
1463 491629 : static bool rm_shred_can_process(RmFile *file, RmShredTag *main) {
1464 : /* initialise hash (or recover progressive hash so far) */
1465 491629 : if(!file->shred_group) {
1466 0 : return FALSE;
1467 : }
1468 :
1469 491629 : bool result = TRUE;
1470 491629 : g_mutex_lock(&file->shred_group->lock);
1471 : {
1472 491647 : if(file->digest == NULL) {
1473 269818 : g_assert(file->shred_group);
1474 269818 : result = rm_shred_reassign_checksum(main, file);
1475 : }
1476 : }
1477 491647 : g_mutex_unlock(&file->shred_group->lock);
1478 491635 : return result;
1479 : }
1480 :
1481 : /* Callback for RmMDS */
1482 269797 : static gint rm_shred_process_file(RmFile *file, RmSession *session) {
1483 269797 : RmShredTag *tag = session->shredder;
1484 :
1485 269797 : if(session->aborted || file->shred_group->has_only_ext_cksums) {
1486 4 : if (session->aborted) {
1487 0 : file->status = RM_FILE_STATE_IGNORE;
1488 : }
1489 4 : rm_shred_sift(file);
1490 0 : return 1;
1491 : }
1492 :
1493 269793 : if(!rm_shred_can_process(file, tag)) {
1494 47989 : return 0;
1495 : }
1496 :
1497 221829 : RM_DEFINE_PATH(file);
1498 :
1499 665485 : while(file && rm_shred_can_process(file, tag)) {
1500 : /* hash the next increment of the file */
1501 221829 : bool worth_waiting = FALSE;
1502 221829 : RmCfg *cfg = session->cfg;
1503 221829 : RmOff bytes_to_read = rm_shred_get_read_size(file, tag);
1504 :
1505 221829 : g_mutex_lock(&file->shred_group->lock);
1506 : {
1507 221829 : worth_waiting =
1508 222189 : (file->shred_group->next_offset != file->file_size) &&
1509 660 : (cfg->shred_always_wait ||
1510 : (
1511 336 : !rm_mounts_is_nonrotational(session->mounts, file->dev) &&
1512 12 : rm_shred_get_read_size(file, tag) <
1513 12 : RM_SHRED_TOO_MANY_BYTES_TO_WAIT &&
1514 24 : (file->status == RM_FILE_STATE_NORMAL) && !cfg->shred_never_wait));
1515 : }
1516 221829 : g_mutex_unlock(&file->shred_group->lock);
1517 :
1518 221828 : RmHasherTask *task = rm_hasher_task_new(tag->hasher, file->digest, file);
1519 221828 : if(!rm_hasher_task_hash(task, file_path, file->hash_offset, bytes_to_read,
1520 221828 : file->is_symlink)) {
1521 : /* rm_hasher_start_increment failed somewhere */
1522 0 : file->status = RM_FILE_STATE_IGNORE;
1523 0 : worth_waiting = FALSE;
1524 : }
1525 :
1526 : /* Update totals for file, device and session*/
1527 221829 : file->hash_offset += bytes_to_read;
1528 221829 : if(file->is_symlink) {
1529 140 : rm_shred_adjust_counters(tag, 0, -(gint64)file->file_size);
1530 : } else {
1531 221689 : rm_shred_adjust_counters(tag, 0, -(gint64)bytes_to_read);
1532 : }
1533 :
1534 221829 : if(worth_waiting) {
1535 : /* some final checks if it's still worth waiting for the hash result */
1536 24 : g_mutex_lock(&file->shred_group->lock);
1537 : {
1538 24 : worth_waiting = worth_waiting && (file->shred_group->children);
1539 24 : if(file->digest->type == RM_DIGEST_PARANOID) {
1540 0 : worth_waiting =
1541 0 : worth_waiting && file->digest->paranoid->twin_candidate;
1542 : }
1543 : }
1544 24 : g_mutex_unlock(&file->shred_group->lock);
1545 : }
1546 :
1547 221829 : file->signal = worth_waiting ? rm_signal_new() : NULL;
1548 :
1549 : /* tell the hasher we have finished */
1550 221829 : rm_hasher_task_finish(task);
1551 :
1552 221827 : if(worth_waiting) {
1553 : /* wait until the increment has finished hashing; assert that we get the
1554 : * expected file back */
1555 6 : rm_signal_wait(file->signal);
1556 6 : file->signal = NULL;
1557 : /* sift file; if returned then continue processing it */
1558 6 : file = rm_shred_sift(file);
1559 : } else {
1560 221821 : file = NULL;
1561 : }
1562 : }
1563 221827 : return 1;
1564 : }
1565 :
1566 36458 : void rm_shred_run(RmSession *session) {
1567 36458 : g_assert(session);
1568 36458 : g_assert(session->tables);
1569 36458 : g_assert(session->mounts);
1570 :
1571 : RmShredTag tag;
1572 36458 : tag.active_groups = 0;
1573 36458 : tag.session = session;
1574 36458 : tag.mem_refusing = false;
1575 36458 : session->shredder = &tag;
1576 :
1577 36458 : tag.device_return = g_async_queue_new();
1578 36458 : tag.page_size = SHRED_PAGE_SIZE;
1579 :
1580 36458 : tag.cache_file_count = 0;
1581 36458 : tag.cache_byte_count = 0;
1582 36458 : tag.cache_filtered_count = 0;
1583 36458 : tag.after_preprocess = FALSE;
1584 :
1585 : /* would use g_atomic, but helgrind does not like that */
1586 36458 : g_mutex_init(&tag.hash_mem_mtx);
1587 :
1588 36458 : g_mutex_init(&tag.lock);
1589 36458 : gint threads = g_hash_table_size(session->mounts->disk_table);
1590 36458 : session->mds =
1591 36458 : rm_mds_new(threads, session->mounts, session->cfg->fake_pathindex_as_disk);
1592 36458 : rm_mds_configure(session->mds,
1593 : (RmMDSFunc)rm_shred_process_file,
1594 : session,
1595 36458 : session->cfg->sweep_count,
1596 : (RmMDSSortFunc)rm_mds_elevator_cmp);
1597 :
1598 36458 : rm_shred_preprocess_input(&tag);
1599 36458 : rm_log_debug_line("Done shred preprocessing");
1600 36458 : tag.after_preprocess = TRUE;
1601 36458 : session->shred_bytes_after_preprocess = session->shred_bytes_remaining;
1602 :
1603 : /* estimate mem used for RmFiles and allocate any leftovers to read buffer and/or
1604 : * paranoid mem */
1605 36458 : RmOff mem_used = RM_AVERAGE_MEM_PER_FILE * session->shred_files_remaining;
1606 :
1607 36458 : if(session->cfg->checksum_type == RM_DIGEST_PARANOID) {
1608 : /* allocate any spare mem for paranoid hashing */
1609 2665 : tag.paranoid_mem_alloc = MIN((gint64)session->cfg->paranoid_mem,
1610 : (gint64)session->cfg->total_mem - (gint64)mem_used -
1611 : (gint64)session->cfg->read_buffer_mem);
1612 2665 : tag.paranoid_mem_alloc = MAX(0, tag.paranoid_mem_alloc);
1613 2665 : rm_log_debug_line("Paranoid Mem: %" LLU, tag.paranoid_mem_alloc);
1614 : } else {
1615 67586 : session->cfg->read_buffer_mem =
1616 33793 : MAX((gint64)session->cfg->read_buffer_mem,
1617 : (gint64)session->cfg->total_mem - (gint64)mem_used);
1618 33793 : tag.paranoid_mem_alloc = 0;
1619 : }
1620 36458 : rm_log_debug_line("Read buffer Mem: %" LLU, session->cfg->read_buffer_mem);
1621 :
1622 : /* Initialise hasher */
1623 : /* Optimum buffer size based on /usr without dropping caches:
1624 : * SHRED_PAGE_SIZE * 1 => 5.29 seconds
1625 : * SHRED_PAGE_SIZE * 2 => 5.11 seconds
1626 : * SHRED_PAGE_SIZE * 4 => 5.04 seconds
1627 : * SHRED_PAGE_SIZE * 8 => 5.08 seconds
1628 : * With dropped caches:
1629 : * SHRED_PAGE_SIZE * 1 => 45.2 seconds
1630 : * SHRED_PAGE_SIZE * 4 => 45.0 seconds*/
1631 182290 : tag.hasher = rm_hasher_new(session->cfg->checksum_type,
1632 36458 : session->cfg->threads,
1633 36458 : session->cfg->use_buffered_read,
1634 36458 : SHRED_PAGE_SIZE * 4,
1635 36458 : session->cfg->read_buffer_mem,
1636 36458 : session->cfg->paranoid_mem,
1637 : (RmHasherCallback)rm_shred_hash_callback,
1638 : &tag);
1639 :
1640 : /* Create a pool for results processing */
1641 36458 : tag.result_pool = rm_util_thread_pool_new((GFunc)rm_shred_result_factory, &tag, 1);
1642 :
1643 36458 : rm_fmt_set_state(session->formats, RM_PROGRESS_STATE_SHREDDER);
1644 36458 : rm_mds_start(session->mds);
1645 :
1646 :
1647 : /* should complete shred session and then free: */
1648 36458 : rm_mds_free(session->mds, FALSE);
1649 36458 : rm_hasher_free(tag.hasher, TRUE);
1650 :
1651 36458 : session->shredder_finished = TRUE;
1652 36458 : session->shred_files_remaining += tag.cache_file_count;
1653 36458 : session->total_filtered_files += tag.cache_filtered_count;
1654 36458 : session->shred_bytes_remaining += tag.cache_byte_count;
1655 36458 : rm_fmt_set_state(session->formats, RM_PROGRESS_STATE_SHREDDER);
1656 :
1657 : /* This should not block, or at least only very short. */
1658 36458 : g_thread_pool_free(tag.result_pool, FALSE, TRUE);
1659 :
1660 36458 : g_async_queue_unref(tag.device_return);
1661 :
1662 36458 : g_mutex_clear(&tag.hash_mem_mtx);
1663 36458 : rm_log_debug_line("Remaining %"LLU" bytes in %"LLU" files, cached %i",
1664 : session->shred_bytes_remaining, session->shred_files_remaining,
1665 : tag.cache_filtered_count);
1666 36458 : }
|