Line data Source code
1 : /**
2 : * This file is part of rmlint.
3 : *
4 : * rmlint is free software: you can redistribute it and/or modify
5 : * it under the terms of the GNU General Public License as published by
6 : * the Free Software Foundation, either version 3 of the License, or
7 : * (at your option) any later version.
8 : *
9 : * rmlint is distributed in the hope that it will be useful,
10 : * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 : * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 : * GNU General Public License for more details.
13 : *
14 : * You should have received a copy of the GNU General Public License
15 : * along with rmlint. If not, see <http://www.gnu.org/licenses/>.
16 : *
17 : ** Authors:
18 : *
19 : * - Christopher <sahib> Pahl 2010-2014 (https://github.com/sahib)
20 : * - Daniel <SeeSpotRun> T. 2014-2014 (https://github.com/SeeSpotRun)
21 : *
22 : ** Hosted on http://github.com/sahib/rmlint
23 : *
24 : **/
25 :
26 : #include <stdlib.h>
27 : #include <stdio.h>
28 : #include <string.h>
29 : #include <locale.h>
30 :
31 : #include "../lib/config.h"
32 : #include "../lib/hasher.h"
33 : #include "../lib/utilities.h"
34 :
35 : typedef struct RmHasherSession {
36 : /* Internal */
37 : char **paths;
38 : gint path_index;
39 : GMutex lock;
40 : RmDigest **completed_digests_buffer;
41 :
42 : /* Options */
43 : RmDigestType digest_type;
44 : gboolean print_in_order;
45 : gboolean print_multihash;
46 : } RmHasherSession;
47 :
48 0 : static gboolean rm_hasher_parse_type(_U const char *option_name,
49 : const gchar *value,
50 : RmHasherSession *session,
51 : GError **error) {
52 0 : session->digest_type = rm_string_to_digest_type(value);
53 :
54 0 : if(session->digest_type == RM_DIGEST_UNKNOWN) {
55 0 : g_set_error(error, RM_ERROR_QUARK, 0, _("Unknown hash algorithm: '%s'"), value);
56 0 : return FALSE;
57 : }
58 0 : return TRUE;
59 : }
60 :
61 0 : static void rm_hasher_print(RmDigest *digest, char *path, bool print_multihash) {
62 0 : gsize size = rm_digest_get_bytes(digest) * 2 + 1;
63 :
64 0 : char checksum_str[size];
65 0 : memset(checksum_str, '0', size);
66 0 : checksum_str[size - 1] = 0;
67 :
68 0 : rm_digest_hexstring(digest, checksum_str);
69 :
70 0 : if(print_multihash) {
71 0 : g_print("%02x%02x@", rm_digest_type_to_multihash_id(digest->type),
72 : rm_digest_get_bytes(digest));
73 : }
74 :
75 0 : g_print("%s %s\n", checksum_str, path);
76 0 : }
77 :
78 0 : static int rm_hasher_callback(_U RmHasher *hasher,
79 : RmDigest *digest,
80 : RmHasherSession *session,
81 : gpointer index_ptr) {
82 0 : gint index = GPOINTER_TO_INT(index_ptr);
83 :
84 0 : g_mutex_lock(&session->lock);
85 : {
86 0 : if(session->print_in_order && digest) {
87 : /* add digest in buffer array */
88 0 : session->completed_digests_buffer[index] = digest;
89 : /* check if the next due digest has been completed; if yes then print
90 : * it (and possibly any following digests) */
91 0 : while(session->completed_digests_buffer[session->path_index]) {
92 0 : if(session->paths[session->path_index]) {
93 0 : rm_hasher_print(
94 0 : session->completed_digests_buffer[session->path_index],
95 0 : session->paths[session->path_index],
96 0 : session->print_multihash);
97 0 : rm_digest_free(
98 0 : session->completed_digests_buffer[session->path_index]);
99 : }
100 0 : session->completed_digests_buffer[session->path_index] = NULL;
101 0 : session->path_index++;
102 : }
103 0 : } else if(digest) {
104 0 : rm_hasher_print(digest, session->paths[index], session->print_multihash);
105 : }
106 : }
107 0 : g_mutex_unlock(&session->lock);
108 0 : return 0;
109 : }
110 :
111 0 : int rm_hasher_main(int argc, const char **argv) {
112 : RmHasherSession tag;
113 :
114 : /* List of paths we got passed (or NULL) */
115 0 : tag.paths = NULL;
116 :
117 : /* Print hashes in the same order as files in command line args */
118 0 : tag.print_in_order = TRUE;
119 :
120 : /* Print a hash with builtin identifier */
121 0 : tag.print_multihash = FALSE;
122 :
123 : /* Digest type (user option, default SHA1) */
124 0 : tag.digest_type = RM_DIGEST_SHA1;
125 0 : gint threads = 8;
126 0 : gint64 buffer_mbytes = 256;
127 :
128 : ////////////// Option Parsing ///////////////
129 :
130 : /* clang-format off */
131 :
132 0 : const GOptionEntry entries[] = {
133 0 : {"digest-type" , 'd' , 0 , G_OPTION_ARG_CALLBACK , (GOptionArgFunc)rm_hasher_parse_type , _("Digest type [SHA1]") , "[TYPE]"} ,
134 0 : {"num-threads" , 't' , 0 , G_OPTION_ARG_INT , &threads , _("Number of hashing threads [8]") , "N"} ,
135 0 : {"multihash" , 'm' , 0 , G_OPTION_ARG_NONE , &tag.print_multihash , _("Print hash as self identifying multihash") , NULL} ,
136 0 : {"buffer-mbytes" , 'b' , 0 , G_OPTION_ARG_INT64 , &buffer_mbytes , _("Megabytes read buffer [256 MB]") , "MB"} ,
137 0 : {"ignore-order" , 'i' , G_OPTION_FLAG_REVERSE , G_OPTION_ARG_NONE , &tag.print_in_order , _("Print hashes in order completed, not in order entered (reduces memory usage)") , NULL} ,
138 0 : {"" , 0 , 0 , G_OPTION_ARG_FILENAME_ARRAY , &tag.paths , _("Space-separated list of files") , "[FILEā¦]"} ,
139 : {NULL , 0 , 0 , 0 , NULL , NULL , NULL}};
140 :
141 : /* clang-format on */
142 :
143 0 : GError *error = NULL;
144 0 : GOptionContext *context = g_option_context_new(_("Hash a list of files"));
145 0 : GOptionGroup *main_group =
146 0 : g_option_group_new(argv[0], _("Hash a list of files"), "", &tag, NULL);
147 :
148 : char summary[4096];
149 0 : memset(summary, 0, sizeof(summary));
150 :
151 0 : g_snprintf(summary, sizeof(summary),
152 0 : _("Multi-threaded file digest (hash) calculator.\n"
153 : "\n Available digest types:"
154 : "\n %s\n"
155 : "\n Versions with different bit numbers:"
156 : "\n %s\n"
157 : "\n Supported, but not useful:"
158 : "\n %s\n"),
159 : "spooky, city, xxhash, sha{1,256,512}, md5, murmur",
160 : "spooky{32,64,128}, city{128,256,512}, murmur{512}",
161 : "cumulative, paranoid, ext, bastard");
162 :
163 0 : g_option_group_add_entries(main_group, entries);
164 0 : g_option_context_set_main_group(context, main_group);
165 0 : g_option_context_set_summary(context, summary);
166 :
167 0 : if(!g_option_context_parse(context, &argc, (char ***)&argv, &error)) {
168 : /* print g_option error message */
169 0 : rm_log_error_line("%s", error->message);
170 0 : exit(EXIT_FAILURE);
171 : }
172 :
173 0 : if(tag.paths == NULL) {
174 : /* read paths from stdin */
175 : char path_buf[PATH_MAX];
176 0 : GPtrArray *paths = g_ptr_array_new();
177 :
178 0 : while(fgets(path_buf, PATH_MAX, stdin)) {
179 0 : char *abs_path = realpath(strtok(path_buf, "\n"), NULL);
180 0 : g_ptr_array_add(paths, abs_path);
181 : }
182 :
183 0 : tag.paths = (char **)g_ptr_array_free(paths, FALSE);
184 : }
185 :
186 0 : if(tag.paths == NULL || tag.paths[0] == NULL) {
187 0 : rm_log_error_line(_("No valid paths given."));
188 0 : exit(EXIT_FAILURE);
189 : }
190 :
191 0 : g_option_context_free(context);
192 :
193 : ////////// Implementation //////
194 :
195 0 : if(tag.print_in_order) {
196 : /* allocate buffer to collect results */
197 0 : tag.completed_digests_buffer =
198 0 : g_slice_alloc0((g_strv_length(tag.paths) + 1) * sizeof(RmDigest *));
199 0 : tag.path_index = 0;
200 : }
201 :
202 : /* initialise structures */
203 0 : g_mutex_init(&tag.lock);
204 0 : RmHasher *hasher = rm_hasher_new(tag.digest_type,
205 : threads,
206 : FALSE,
207 : 4096,
208 0 : 1024 * 1024 * buffer_mbytes,
209 : 0,
210 : (RmHasherCallback)rm_hasher_callback,
211 : &tag);
212 :
213 : /* Iterate over paths, pushing to hasher threads */
214 0 : for(int i = 0; tag.paths && tag.paths[i]; ++i) {
215 : /* check it is a regular file */
216 :
217 : RmStat stat_buf;
218 0 : if(rm_sys_stat(tag.paths[i], &stat_buf) == -1) {
219 0 : rm_log_warning_line(_("Can't open directory or file \"%s\": %s"),
220 : tag.paths[i], strerror(errno));
221 0 : } else if(S_ISDIR(stat_buf.st_mode)) {
222 0 : rm_log_warning_line(_("Directories are not supported: %s"), tag.paths[i]);
223 0 : } else if(S_ISREG(stat_buf.st_mode)) {
224 0 : RmHasherTask *task = rm_hasher_task_new(hasher, NULL, GINT_TO_POINTER(i));
225 0 : rm_hasher_task_hash(task, tag.paths[i], 0, 0, FALSE);
226 0 : rm_hasher_task_finish(task);
227 0 : continue;
228 : } else {
229 0 : rm_log_warning_line(_("%s: Unknown file type"), tag.paths[i]);
230 : }
231 :
232 : /* dummy callback for failed paths */
233 0 : g_free(tag.paths[i]);
234 0 : tag.paths[i] = NULL;
235 0 : rm_hasher_callback(hasher, NULL, &tag, GINT_TO_POINTER(i));
236 : }
237 :
238 : /* wait for all hasher threads to finish... */
239 0 : rm_hasher_free(hasher, TRUE);
240 :
241 : /* tidy up */
242 0 : if(tag.print_in_order) {
243 0 : g_slice_free1((g_strv_length(tag.paths) + 1) * sizeof(RmDigest *),
244 0 : tag.completed_digests_buffer);
245 : }
246 :
247 0 : g_strfreev(tag.paths);
248 :
249 0 : return EXIT_SUCCESS;
250 : }
|