Line data Source code
1 : /**
2 : * This file is part of rmlint.
3 : *
4 : * rmlint is free software: you can redistribute it and/or modify
5 : * it under the terms of the GNU General Public License as published by
6 : * the Free Software Foundation, either version 3 of the License, or
7 : * (at your option) any later version.
8 : *
9 : * rmlint is distributed in the hope that it will be useful,
10 : * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 : * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 : * GNU General Public License for more details.
13 : *
14 : * You should have received a copy of the GNU General Public License
15 : * along with rmlint. If not, see <http://www.gnu.org/licenses/>.
16 : *
17 : * Authors:
18 : *
19 : * - Christopher <sahib> Pahl 2010-2015 (https://github.com/sahib)
20 : * - Daniel <SeeSpotRun> T. 2014-2015 (https://github.com/SeeSpotRun)
21 : *
22 : * Hosted on http://github.com/sahib/rmlint
23 : *
24 : **/
25 :
26 : #include <stdlib.h>
27 : #include <stdbool.h>
28 : #include <string.h>
29 : #include <glib.h>
30 :
31 : #include "pathtricia.h"
32 : #include "config.h"
33 :
34 : //////////////////////////
35 : // RmPathNode Methods //
36 : //////////////////////////
37 :
38 655418 : static RmNode *rm_node_new(RmTrie *trie, const char *elem) {
39 655418 : RmNode *self = g_slice_alloc0(sizeof(RmNode));
40 :
41 655418 : if(elem != NULL) {
42 : /* Note: We could use g_string_chunk_insert_const here.
43 : * That would keep a hash table with all strings internally though,
44 : * but would sort out storing duplicate path elements. In normal
45 : * setups this will not happen that much though I guess.
46 : */
47 597174 : self->basename = g_string_chunk_insert(trie->chunks, elem);
48 : }
49 655418 : return self;
50 : }
51 :
52 653528 : static void rm_node_free(RmNode *node) {
53 653528 : if(node->children) {
54 312821 : g_hash_table_unref(node->children);
55 : }
56 653528 : memset(node, 0, sizeof(RmNode));
57 653528 : g_slice_free(RmNode, node);
58 653528 : }
59 :
60 1398128 : static RmNode *rm_node_insert(RmTrie *trie, RmNode *parent, const char *elem) {
61 1398128 : if(parent->children == NULL) {
62 312821 : parent->children = g_hash_table_new(g_str_hash, g_str_equal);
63 : }
64 :
65 1398128 : RmNode *exists = g_hash_table_lookup(parent->children, elem);
66 1398128 : if(exists == NULL) {
67 597174 : RmNode *node = rm_node_new(trie, elem);
68 597174 : node->parent = parent;
69 597174 : g_hash_table_insert(parent->children, node->basename, node);
70 597174 : return node;
71 : }
72 :
73 800954 : return exists;
74 : }
75 :
76 : ///////////////////////////
77 : // RmTrie Methods //
78 : ///////////////////////////
79 :
80 58244 : void rm_trie_init(RmTrie *self) {
81 58244 : g_assert(self);
82 58244 : self->root = rm_node_new(self, NULL);
83 :
84 : /* Average path len is 93.633236.
85 : * I did ze science! :-)
86 : */
87 58244 : self->chunks = g_string_chunk_new(100);
88 :
89 58244 : g_mutex_init(&self->lock);
90 58244 : }
91 :
92 : /* Path iterator that works with absolute paths.
93 : * Absolute paths are required to start with a /
94 : */
95 : typedef struct RmPathIter {
96 : char *curr_elem;
97 : char path_buf[PATH_MAX];
98 : } RmPathIter;
99 :
100 515658 : void rm_path_iter_init(RmPathIter *iter, const char *path) {
101 515658 : if(*path == '/') {
102 515658 : path++;
103 : }
104 :
105 515658 : memset(iter->path_buf, 0, PATH_MAX);
106 515658 : strncpy(iter->path_buf, path, PATH_MAX);
107 :
108 515658 : iter->curr_elem = iter->path_buf;
109 515658 : }
110 :
111 2247069 : char *rm_path_iter_next(RmPathIter *iter) {
112 2247069 : char *elem_begin = iter->curr_elem;
113 :
114 2247069 : if(elem_begin && (iter->curr_elem = strchr(elem_begin, '/'))) {
115 1385266 : *(iter->curr_elem) = 0;
116 1385266 : iter->curr_elem += 1;
117 : }
118 :
119 2247069 : return elem_begin;
120 : }
121 :
122 365782 : RmNode *rm_trie_insert(RmTrie *self, const char *path, void *value) {
123 365782 : g_assert(self);
124 365782 : g_assert(path);
125 :
126 : RmPathIter iter;
127 365782 : rm_path_iter_init(&iter, path);
128 :
129 365798 : g_mutex_lock(&self->lock);
130 :
131 365802 : char *path_elem = NULL;
132 365802 : RmNode *curr_node = self->root;
133 :
134 2129732 : while((path_elem = rm_path_iter_next(&iter))) {
135 1398128 : curr_node = rm_node_insert(self, curr_node, path_elem);
136 : }
137 :
138 365802 : if(curr_node != NULL) {
139 365802 : curr_node->has_value = true;
140 365802 : curr_node->data = value;
141 365802 : self->size++;
142 : }
143 :
144 365802 : g_mutex_unlock(&self->lock);
145 :
146 365801 : return curr_node;
147 : }
148 :
149 149882 : RmNode *rm_trie_search_node(RmTrie *self, const char *path) {
150 149882 : g_assert(self);
151 149882 : g_assert(path);
152 :
153 : RmPathIter iter;
154 149882 : rm_path_iter_init(&iter, path);
155 :
156 149882 : g_mutex_lock(&self->lock);
157 :
158 149882 : char *path_elem = NULL;
159 149882 : RmNode *curr_node = self->root;
160 :
161 732468 : while(curr_node && (path_elem = rm_path_iter_next(&iter))) {
162 451899 : if(curr_node->children == NULL) {
163 : /* Can't go any further */
164 19195 : g_mutex_unlock(&self->lock);
165 19195 : return NULL;
166 : }
167 :
168 432704 : curr_node = g_hash_table_lookup(curr_node->children, path_elem);
169 : }
170 :
171 130687 : g_mutex_unlock(&self->lock);
172 130687 : return curr_node;
173 : }
174 :
175 35840 : void *rm_trie_search(RmTrie *self, const char *path) {
176 35840 : RmNode *find = rm_trie_search_node(self, path);
177 35840 : return (find) ? find->data : NULL;
178 : }
179 :
180 140 : bool rm_trie_set_value(RmTrie *self, const char *path, void *data) {
181 140 : RmNode *find = rm_trie_search_node(self, path);
182 140 : if(find == NULL) {
183 5 : return false;
184 : } else {
185 135 : find->data = data;
186 135 : return true;
187 : }
188 : }
189 :
190 998935 : char *rm_trie_build_path_unlocked(RmNode *node, char *buf, size_t buf_len) {
191 998935 : if(node == NULL) {
192 0 : return NULL;
193 : }
194 :
195 998935 : size_t n_elements = 1;
196 998935 : char *elements[PATH_MAX / 2 + 1] = {node->basename, NULL};
197 :
198 : /* walk up the folder tree, collecting path elements into a list */
199 4969018 : for(RmNode *folder = node->parent; folder && folder->parent;
200 2971148 : folder = folder->parent) {
201 2971148 : elements[n_elements++] = folder->basename;
202 2971148 : if(n_elements >= sizeof(elements)) {
203 0 : break;
204 : }
205 : }
206 :
207 : /* copy collected elements into *buf */
208 998935 : char *buf_ptr = buf;
209 5967953 : while(n_elements && (size_t)(buf_ptr - buf) < buf_len) {
210 3970083 : *buf_ptr = '/';
211 3970083 : buf_ptr = g_stpcpy(buf_ptr + 1, (char *)elements[--n_elements]);
212 : }
213 :
214 998935 : return buf;
215 : }
216 :
217 992156 : char *rm_trie_build_path(RmTrie *self, RmNode *node, char *buf, size_t buf_len) {
218 992156 : if(node == NULL) {
219 0 : return NULL;
220 : }
221 992156 : char *result = NULL;
222 992156 : g_mutex_lock(&self->lock);
223 992159 : { result = rm_trie_build_path_unlocked(node, buf, buf_len); }
224 992159 : g_mutex_unlock(&self->lock);
225 992159 : return result;
226 : }
227 :
228 112 : size_t rm_trie_size(RmTrie *self) {
229 112 : return self->size;
230 : }
231 :
232 676152 : static void _rm_trie_iter(RmTrie *self, RmNode *root, bool pre_order, bool all_nodes,
233 : RmTrieIterCallback callback, void *user_data, int level) {
234 : GHashTableIter iter;
235 : gpointer key, value;
236 :
237 676152 : if(root == NULL) {
238 59714 : root = self->root;
239 : }
240 :
241 676152 : if(pre_order && (all_nodes || root->has_value)) {
242 14000 : if(callback(self, root, level, user_data) != 0) {
243 0 : return;
244 : }
245 : }
246 :
247 676152 : if(root->children != NULL) {
248 325925 : g_hash_table_iter_init(&iter, root->children);
249 1268288 : while(g_hash_table_iter_next(&iter, &key, &value)) {
250 616438 : _rm_trie_iter(self, value, pre_order, all_nodes, callback, user_data,
251 : level + 1);
252 : }
253 : }
254 :
255 676152 : if(!pre_order && (all_nodes || root->has_value)) {
256 653528 : if(callback(self, root, level, user_data) != 0) {
257 0 : return;
258 : }
259 : }
260 : }
261 :
262 59714 : void rm_trie_iter(RmTrie *self, RmNode *root, bool pre_order, bool all_nodes,
263 : RmTrieIterCallback callback, void *user_data) {
264 59714 : g_mutex_lock(&self->lock);
265 59714 : _rm_trie_iter(self, root, pre_order, all_nodes, callback, user_data, 0);
266 59714 : g_mutex_unlock(&self->lock);
267 59714 : }
268 :
269 653528 : static int rm_trie_destroy_callback(_U RmTrie *self,
270 : RmNode *node,
271 : _U int level,
272 : _U void *user_data) {
273 653528 : rm_node_free(node);
274 653528 : return 0;
275 : }
276 :
277 56354 : void rm_trie_destroy(RmTrie *self) {
278 56354 : rm_trie_iter(self, NULL, false, true, rm_trie_destroy_callback, NULL);
279 56354 : g_string_chunk_free(self->chunks);
280 56354 : g_mutex_clear(&self->lock);
281 56354 : }
282 :
283 : #ifdef _RM_PATHTRICIA_BUILD_MAIN
284 :
285 : #include <stdio.h>
286 :
287 : static int rm_trie_print_callback(_U RmTrie *self,
288 : RmNode *node,
289 : int level,
290 : _U void *user_data) {
291 : for(int i = 0; i < level; ++i) {
292 : g_printerr(" ");
293 : }
294 :
295 : g_printerr("%s %s\n",
296 : (char *)((node->basename) ? node->basename : "[root]"),
297 : (node->data) ? "[leaf]" : "");
298 :
299 : return 0;
300 : }
301 :
302 : void rm_trie_print(RmTrie *self) {
303 : rm_trie_iter(self, NULL, true, true, rm_trie_print_callback, NULL);
304 : }
305 :
306 : int main(void) {
307 : RmTrie trie;
308 : rm_trie_init(&trie);
309 : GTimer *timer = g_timer_new();
310 :
311 : g_timer_start(timer);
312 : char buf[1024];
313 : int i = 0;
314 :
315 : while(fgets(buf, sizeof(buf), stdin)) {
316 : buf[strlen(buf) - 1] = 0;
317 : rm_trie_insert(&trie, buf, GUINT_TO_POINTER(++i));
318 : memset(buf, 0, sizeof(buf));
319 : }
320 :
321 : g_printerr("Took %2.5f to insert %d items\n", g_timer_elapsed(timer, NULL), i);
322 : rm_trie_print(&trie);
323 : memset(buf, 0, sizeof(buf));
324 : rm_trie_build_path(&trie, rm_trie_search_node(&trie, "/usr/bin/rmlint"), buf,
325 : sizeof(buf));
326 : g_printerr("=> %s\n", buf);
327 :
328 : g_timer_start(timer);
329 : const int N = 10000000;
330 : for(int x = 0; x < N; x++) {
331 : rm_trie_search(&trie, "/usr/bin/rmlint");
332 : }
333 : g_printerr("Took %2.5f to search\n", g_timer_elapsed(timer, NULL));
334 :
335 : g_printerr("%u\n", GPOINTER_TO_UINT(rm_trie_search(&trie, "/usr/bin/rmlint")));
336 : g_printerr("%u\n", GPOINTER_TO_UINT(rm_trie_search(&trie, "/a/b/c")));
337 : rm_trie_destroy(&trie);
338 : g_timer_destroy(timer);
339 : return 0;
340 : }
341 :
342 : #endif
|