Line data Source code
1 : /*
2 : * This file is part of rmlint.
3 : *
4 : * rmlint is free software: you can redistribute it and/or modify
5 : * it under the terms of the GNU General Public License as published by
6 : * the Free Software Foundation, either version 3 of the License, or
7 : * (at your option) any later version.
8 : *
9 : * rmlint is distributed in the hope that it will be useful,
10 : * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 : * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 : * GNU General Public License for more details.
13 : *
14 : * You should have received a copy of the GNU General Public License
15 : * along with rmlint. If not, see <http://www.gnu.org/licenses/>.
16 : *
17 : * Authors:
18 : *
19 : * - Christopher <sahib> Pahl 2010-2015 (https://github.com/sahib)
20 : * - Daniel <SeeSpotRun> T. 2014-2015 (https://github.com/SeeSpotRun)
21 : *
22 : * Hosted on http://github.com/sahib/rmlint
23 : *
24 : */
25 :
26 : #include <ctype.h>
27 : #include <stdlib.h>
28 : #include <string.h>
29 :
30 : #include "file.h"
31 : #include "formats.h"
32 :
33 : /* A group of output files.
34 : * These are only created when caching to the end of the run is requested.
35 : * Otherwise, files are directly outputed and not stored in groups.
36 : */
37 : typedef struct RmFmtGroup {
38 : GQueue files;
39 : int index;
40 : } RmFmtGroup;
41 :
42 66292 : static RmFmtGroup *rm_fmt_group_new(void) {
43 66292 : RmFmtGroup *self = g_slice_new(RmFmtGroup);
44 66292 : g_queue_init(&self->files);
45 66292 : return self;
46 : }
47 :
48 66292 : static void rm_fmt_group_destroy(RmFmtGroup *self) {
49 295532 : for(GList *iter = self->files.head; iter; iter = iter->next) {
50 229240 : RmFile *file = iter->data;
51 229240 : rm_file_destroy(file);
52 : }
53 :
54 66292 : g_queue_clear(&self->files);
55 66292 : g_slice_free(RmFmtGroup, self);
56 66292 : }
57 :
58 69822 : static void rm_fmt_handler_free(RmFmtHandler *handler) {
59 69822 : g_assert(handler);
60 :
61 69822 : g_free(handler->path);
62 69822 : g_free(handler);
63 69822 : }
64 :
65 54884 : RmFmtTable *rm_fmt_open(RmSession *session) {
66 54884 : RmFmtTable *self = g_slice_new0(RmFmtTable);
67 :
68 54884 : self->name_to_handler = g_hash_table_new_full(g_str_hash, g_str_equal, NULL, NULL);
69 :
70 54884 : self->path_to_handler = g_hash_table_new_full(g_str_hash, g_str_equal, NULL, NULL);
71 :
72 54884 : self->handler_to_file =
73 54884 : g_hash_table_new_full(NULL, NULL, (GDestroyNotify)rm_fmt_handler_free, NULL);
74 :
75 54884 : self->config = g_hash_table_new_full(g_str_hash, g_str_equal, g_free,
76 : (GDestroyNotify)g_hash_table_unref);
77 :
78 54884 : self->session = session;
79 54884 : g_queue_init(&self->groups);
80 54884 : g_rec_mutex_init(&self->state_mtx);
81 :
82 : extern RmFmtHandler *PROGRESS_HANDLER;
83 54884 : rm_fmt_register(self, PROGRESS_HANDLER);
84 :
85 : extern RmFmtHandler *CSV_HANDLER;
86 54884 : rm_fmt_register(self, CSV_HANDLER);
87 :
88 : extern RmFmtHandler *PRETTY_HANDLER;
89 54884 : rm_fmt_register(self, PRETTY_HANDLER);
90 :
91 : extern RmFmtHandler *SH_SCRIPT_HANDLER;
92 54884 : rm_fmt_register(self, SH_SCRIPT_HANDLER);
93 :
94 : extern RmFmtHandler *SUMMARY_HANDLER;
95 54884 : rm_fmt_register(self, SUMMARY_HANDLER);
96 :
97 : extern RmFmtHandler *TIMESTAMP_HANDLER;
98 54884 : rm_fmt_register(self, TIMESTAMP_HANDLER);
99 :
100 : extern RmFmtHandler *JSON_HANDLER;
101 54884 : rm_fmt_register(self, JSON_HANDLER);
102 :
103 : extern RmFmtHandler *PY_HANDLER;
104 54884 : rm_fmt_register(self, PY_HANDLER);
105 :
106 : extern RmFmtHandler *FDUPES_HANDLER;
107 54884 : rm_fmt_register(self, FDUPES_HANDLER);
108 :
109 : extern RmFmtHandler *NULL_HANDLER;
110 54884 : rm_fmt_register(self, NULL_HANDLER);
111 :
112 54884 : return self;
113 : }
114 :
115 18 : int rm_fmt_len(RmFmtTable *self) {
116 18 : if(self == NULL) {
117 0 : return -1;
118 : } else {
119 18 : return g_hash_table_size(self->handler_to_file);
120 : }
121 : }
122 :
123 52986 : bool rm_fmt_is_valid_key(RmFmtTable *self, const char *formatter, const char *key) {
124 52986 : RmFmtHandler *handler = g_hash_table_lookup(self->name_to_handler, formatter);
125 52986 : if(handler == NULL) {
126 0 : return false;
127 : }
128 :
129 211935 : for(int i = 0; handler->valid_keys[i]; ++i) {
130 211935 : if(g_strcmp0(handler->valid_keys[i], key) == 0) {
131 52986 : return true;
132 : }
133 : }
134 :
135 0 : return false;
136 : }
137 :
138 8 : void rm_fmt_clear(RmFmtTable *self) {
139 8 : if(rm_fmt_len(self) <= 0) {
140 8 : return;
141 : }
142 :
143 0 : g_hash_table_remove_all(self->handler_to_file);
144 0 : g_hash_table_remove_all(self->path_to_handler);
145 0 : g_hash_table_remove_all(self->config);
146 : }
147 :
148 548840 : void rm_fmt_register(RmFmtTable *self, RmFmtHandler *handler) {
149 548840 : g_hash_table_insert(self->name_to_handler, (char *)handler->name, handler);
150 548840 : g_mutex_init(&handler->print_mtx);
151 548840 : }
152 :
153 : #define RM_FMT_FOR_EACH_HANDLER(self) \
154 : FILE *file = NULL; \
155 : RmFmtHandler *handler = NULL; \
156 : \
157 : GHashTableIter iter; \
158 : g_hash_table_iter_init(&iter, self->handler_to_file); \
159 : while(g_hash_table_iter_next(&iter, (gpointer *)&handler, (gpointer *)&file))
160 :
161 : #define RM_FMT_CALLBACK(func, ...) \
162 : if(func) { \
163 : g_mutex_lock(&handler->print_mtx); \
164 : { \
165 : if(!handler->was_initialized && handler->head) { \
166 : if(handler->head) { \
167 : handler->head(self->session, handler, file); \
168 : } \
169 : handler->was_initialized = true; \
170 : } \
171 : func(self->session, handler, file, ##__VA_ARGS__); \
172 : } \
173 : g_mutex_unlock(&handler->print_mtx); \
174 : }
175 :
176 69822 : bool rm_fmt_add(RmFmtTable *self, const char *handler_name, const char *path) {
177 69822 : RmFmtHandler *new_handler = g_hash_table_lookup(self->name_to_handler, handler_name);
178 69822 : if(new_handler == NULL) {
179 0 : rm_log_warning_line(_("No such new_handler with this name: %s"), handler_name);
180 0 : return false;
181 : }
182 :
183 69822 : g_return_val_if_fail(path, false);
184 :
185 69822 : FILE *file_handle = NULL;
186 69822 : bool needs_full_path = false;
187 :
188 69822 : if(g_strcmp0(path, "stdout") == 0) {
189 27 : file_handle = stdout;
190 69795 : } else if(g_strcmp0(path, "stderr") == 0) {
191 0 : file_handle = stderr;
192 69795 : } else if(g_strcmp0(path, "stdin") == 0) {
193 : /* I bet someone finds a use for this :-) */
194 0 : file_handle = stdin;
195 : } else {
196 69795 : needs_full_path = true;
197 69795 : file_handle = fopen(path, "w");
198 : }
199 :
200 69822 : if(file_handle == NULL) {
201 0 : rm_log_warning_line(_("Unable to open file for writing: %s"), path);
202 0 : return false;
203 : }
204 :
205 : /* Make a copy of the handler so we can more than one per handler type.
206 : * Plus we have to set the handler specific path.
207 : */
208 69822 : RmFmtHandler *new_handler_copy = g_malloc0(new_handler->size);
209 69822 : memcpy(new_handler_copy, new_handler, new_handler->size);
210 69822 : g_mutex_init(&new_handler->print_mtx);
211 :
212 69822 : if(needs_full_path == false) {
213 27 : new_handler_copy->path = g_strdup(path);
214 : } else {
215 69795 : new_handler_copy->path = realpath(path, NULL);
216 : }
217 :
218 69822 : g_hash_table_insert(self->handler_to_file, new_handler_copy, file_handle);
219 69822 : g_hash_table_insert(self->path_to_handler, new_handler_copy->path, new_handler);
220 :
221 69822 : return true;
222 : }
223 :
224 329871 : static void rm_fmt_write_impl(RmFile *result, RmFmtTable *self) {
225 1104089 : RM_FMT_FOR_EACH_HANDLER(self) {
226 444347 : RM_FMT_CALLBACK(handler->elem, result);
227 : }
228 329871 : }
229 :
230 10654 : static gint rm_fmt_rank_size(const RmFmtGroup *ga, const RmFmtGroup *gb) {
231 10654 : RmFile *fa = ga->files.head->data;
232 10654 : RmFile *fb = gb->files.head->data;
233 :
234 10654 : RmOff sa = fa->file_size * (ga->files.length - 1);
235 10654 : RmOff sb = fb->file_size * (gb->files.length - 1);
236 :
237 : /* Better do not compare big unsigneds via a - b... */
238 10654 : if(sa < sb) {
239 7661 : return -1;
240 : }
241 :
242 2993 : if(sa > sb) {
243 2993 : return +1;
244 : }
245 :
246 0 : return 0;
247 : }
248 :
249 32314 : static gint rm_fmt_rank(const RmFmtGroup *ga, const RmFmtGroup *gb, RmFmtTable *self) {
250 32314 : const char *rank_order = self->session->cfg->rank_criteria;
251 :
252 32314 : RmFile *fa = ga->files.head->data;
253 32314 : RmFile *fb = gb->files.head->data;
254 :
255 32384 : if(fa->lint_type != RM_LINT_TYPE_DUPE_CANDIDATE &&
256 70 : fa->lint_type != RM_LINT_TYPE_DUPE_DIR_CANDIDATE) {
257 0 : return -1;
258 : }
259 :
260 32314 : if(fb->lint_type != RM_LINT_TYPE_DUPE_CANDIDATE &&
261 0 : fb->lint_type != RM_LINT_TYPE_DUPE_DIR_CANDIDATE) {
262 0 : return +1;
263 : }
264 :
265 48306 : for(int i = 0; rank_order[i]; ++i) {
266 47610 : gint64 r = 0;
267 47610 : switch(tolower(rank_order[i])) {
268 : case 's':
269 10654 : r = rm_fmt_rank_size(ga, gb);
270 10654 : break;
271 : case 'a': {
272 8024 : RM_DEFINE_BASENAME(fa)
273 8024 : RM_DEFINE_BASENAME(fb)
274 8024 : r = strcasecmp(fa_basename, fb_basename);
275 8024 : } break;
276 : case 'm':
277 10468 : r = ((gint64)fa->mtime) - ((gint64)fb->mtime);
278 10468 : break;
279 : case 'p':
280 7996 : r = ((gint64)fa->path_index) - ((gint64)fb->path_index);
281 7996 : break;
282 : case 'n':
283 10468 : r = ((gint64)ga->files.length) - ((gint64)gb->files.length);
284 10468 : break;
285 : case 'o':
286 0 : r = ga->index - gb->index;
287 0 : break;
288 : }
289 :
290 47610 : if(r != 0) {
291 31618 : r = CLAMP(r, -1, +1);
292 31618 : return isupper(rank_order[i]) ? -r : r;
293 : }
294 : }
295 :
296 696 : return 0;
297 : }
298 :
299 52985 : void rm_fmt_flush(RmFmtTable *self) {
300 52985 : RmCfg *cfg = self->session->cfg;
301 52985 : if(!cfg->cache_file_structs) {
302 19636 : return;
303 : }
304 :
305 33349 : if(*(cfg->rank_criteria)) {
306 32216 : g_queue_sort(&self->groups, (GCompareDataFunc)rm_fmt_rank, self);
307 : }
308 :
309 99641 : for(GList *iter = self->groups.head; iter; iter = iter->next) {
310 66292 : RmFmtGroup *group = iter->data;
311 66292 : g_queue_foreach(&group->files, (GFunc)rm_fmt_write_impl, self);
312 : }
313 : }
314 :
315 52994 : void rm_fmt_close(RmFmtTable *self) {
316 119286 : for(GList *iter = self->groups.head; iter; iter = iter->next) {
317 66292 : RmFmtGroup *group = iter->data;
318 66292 : rm_fmt_group_destroy(group);
319 : }
320 :
321 52994 : g_queue_clear(&self->groups);
322 :
323 175810 : RM_FMT_FOR_EACH_HANDLER(self) {
324 69822 : RM_FMT_CALLBACK(handler->foot);
325 69822 : fclose(file);
326 69822 : g_mutex_clear(&handler->print_mtx);
327 : }
328 :
329 52994 : g_hash_table_unref(self->name_to_handler);
330 52994 : g_hash_table_unref(self->handler_to_file);
331 52994 : g_hash_table_unref(self->path_to_handler);
332 52994 : g_hash_table_unref(self->config);
333 52994 : g_rec_mutex_clear(&self->state_mtx);
334 52994 : g_slice_free(RmFmtTable, self);
335 52994 : }
336 :
337 329871 : void rm_fmt_write(RmFile *result, RmFmtTable *self, gint64 twin_count) {
338 329871 : bool direct = !(self->session->cfg->cache_file_structs);
339 :
340 329871 : result->twin_count = twin_count;
341 :
342 329871 : if(direct) {
343 100631 : rm_fmt_write_impl(result, self);
344 : } else {
345 229240 : if(result->is_original || self->groups.length == 0) {
346 66292 : g_queue_push_tail(&self->groups, rm_fmt_group_new());
347 : }
348 :
349 229240 : RmFmtGroup *group = self->groups.tail->data;
350 229240 : group->index = self->groups.length - 1;
351 :
352 229240 : g_queue_push_tail(&group->files, result);
353 : }
354 329871 : }
355 :
356 1106139 : void rm_fmt_lock_state(RmFmtTable *self) {
357 1106139 : g_rec_mutex_lock(&self->state_mtx);
358 1106155 : }
359 :
360 1106155 : void rm_fmt_unlock_state(RmFmtTable *self) {
361 1106155 : g_rec_mutex_unlock(&self->state_mtx);
362 1106151 : }
363 :
364 1054881 : void rm_fmt_set_state(RmFmtTable *self, RmFmtProgressState state) {
365 1054881 : rm_fmt_lock_state(self);
366 : {
367 3661118 : RM_FMT_FOR_EACH_HANDLER(self) {
368 1551332 : RM_FMT_CALLBACK(handler->prog, state);
369 : }
370 : }
371 1054893 : rm_fmt_unlock_state(self);
372 1054889 : }
373 :
374 52987 : void rm_fmt_set_config_value(RmFmtTable *self, const char *formatter, const char *key,
375 : const char *value) {
376 52987 : GHashTable *key_to_vals = g_hash_table_lookup(self->config, formatter);
377 :
378 52987 : if(key_to_vals == NULL) {
379 52987 : key_to_vals = g_hash_table_new_full(g_str_hash, g_str_equal, g_free, g_free);
380 52987 : g_hash_table_insert(self->config, (char *)g_strdup(formatter), key_to_vals);
381 : }
382 52987 : g_hash_table_insert(key_to_vals, (char *)key, (char *)value);
383 52987 : }
384 :
385 652428 : const char *rm_fmt_get_config_value(RmFmtTable *self, const char *formatter,
386 : const char *key) {
387 652428 : GHashTable *key_to_vals = g_hash_table_lookup(self->config, formatter);
388 :
389 652428 : if(key_to_vals == NULL) {
390 1263 : return NULL;
391 : }
392 :
393 651165 : return g_hash_table_lookup(key_to_vals, key);
394 : }
395 :
396 224077 : bool rm_fmt_is_a_output(RmFmtTable *self, const char *path) {
397 224077 : return g_hash_table_contains(self->path_to_handler, path);
398 : }
399 :
400 122 : void rm_fmt_get_pair_iter(RmFmtTable *self, GHashTableIter *iter) {
401 122 : g_hash_table_iter_init(iter, self->path_to_handler);
402 122 : }
403 :
404 16 : bool rm_fmt_has_formatter(RmFmtTable *self, const char *name) {
405 : GHashTableIter iter;
406 16 : RmFmtHandler *handler = NULL;
407 :
408 16 : g_hash_table_iter_init(&iter, self->path_to_handler);
409 :
410 16 : while(g_hash_table_iter_next(&iter, NULL, (gpointer *)&handler)) {
411 24 : if(!strcmp(handler->name, name)) {
412 0 : return true;
413 : }
414 : }
415 :
416 16 : return false;
417 : }
418 :
419 124 : bool rm_fmt_is_stream(_U RmFmtTable *self, RmFmtHandler *handler) {
420 248 : if(0 || handler->path == NULL || strcmp(handler->path, "stdout") == 0 ||
421 248 : strcmp(handler->path, "stderr") == 0 || strcmp(handler->path, "stdin") == 0) {
422 0 : return true;
423 : }
424 :
425 124 : return (access(handler->path, W_OK) == -1);
426 : }
|