2 This file is part of GNUnet
3 Copyright (C) 2005-2012 GNUnet e.V.
5 GNUnet is free software: you can redistribute it and/or modify it
6 under the terms of the GNU Affero General Public License as published
7 by the Free Software Foundation, either version 3 of the License,
8 or (at your option) any later version.
10 GNUnet is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Affero General Public License for more details.
17 * @file fs/fs_sharetree.c
18 * @brief code to manipulate the 'struct GNUNET_FS_ShareTreeItem' tree
20 * @author Christian Grothoff
23 #include "gnunet_fs_service.h"
24 #include "gnunet_scheduler_lib.h"
29 * Entry for each unique keyword to track how often
30 * it occured. Contains the keyword and the counter.
36 * This is a doubly-linked list
38 struct KeywordCounter *prev;
41 * This is a doubly-linked list
43 struct KeywordCounter *next;
46 * Keyword that was found.
51 * How many files have this keyword?
59 * Aggregate information we keep for meta data in each directory.
65 * This is a doubly-linked list
67 struct MetaCounter *prev;
70 * This is a doubly-linked list
72 struct MetaCounter *next;
75 * Name of the plugin that provided that piece of metadata
77 const char *plugin_name;
80 * MIME-type of the metadata itself
82 const char *data_mime_type;
85 * The actual meta data.
90 * Number of bytes in 'data'.
97 enum EXTRACTOR_MetaType type;
102 enum EXTRACTOR_MetaFormat format;
105 * How many files have meta entries matching this value?
106 * (type and format do not have to match).
114 * A structure that forms a singly-linked list that serves as a stack
115 * for metadata-processing function.
121 * Map from the hash over the keyword to an 'struct KeywordCounter *'
122 * counter that says how often this keyword was
123 * encountered in the current directory.
125 struct GNUNET_CONTAINER_MultiHashMap *keywordcounter;
128 * Map from the hash over the metadata to an 'struct MetaCounter *'
129 * counter that says how often this metadata was
130 * encountered in the current directory.
132 struct GNUNET_CONTAINER_MultiHashMap *metacounter;
135 * Position we are currently manipulating.
137 struct GNUNET_FS_ShareTreeItem *pos;
140 * Number of times an item has to be found to be moved to the parent.
142 unsigned int move_threshold;
148 * Add the given keyword to the keyword statistics tracker.
150 * @param cls the multihashmap we store the keyword counters in
151 * @param keyword the keyword to count
152 * @param is_mandatory ignored
153 * @return always GNUNET_OK
156 add_to_keyword_counter (void *cls, const char *keyword, int is_mandatory)
158 struct GNUNET_CONTAINER_MultiHashMap *mcm = cls;
159 struct KeywordCounter *cnt;
160 struct GNUNET_HashCode hc;
163 klen = strlen (keyword) + 1;
164 GNUNET_CRYPTO_hash (keyword, klen - 1, &hc);
165 cnt = GNUNET_CONTAINER_multihashmap_get (mcm, &hc);
168 cnt = GNUNET_malloc (sizeof (struct KeywordCounter) + klen);
169 cnt->value = (const char *) &cnt[1];
170 GNUNET_memcpy (&cnt[1], keyword, klen);
171 GNUNET_assert (GNUNET_OK ==
172 GNUNET_CONTAINER_multihashmap_put (mcm,
174 GNUNET_CONTAINER_MULTIHASHMAPOPTION_UNIQUE_ONLY));
182 * Function called on each meta data item. Increments the
183 * respective counter.
185 * @param cls the container multihashmap to update
186 * @param plugin_name name of the plugin that produced this value;
187 * special values can be used (i.e. '<zlib>' for zlib being
188 * used in the main libextractor library and yielding
190 * @param type libextractor-type describing the meta data
191 * @param format basic format information about data
192 * @param data_mime_type mime-type of data (not of the original file);
193 * can be NULL (if mime-type is not known)
194 * @param data actual meta-data found
195 * @param data_len number of bytes in data
196 * @return 0 to continue extracting / iterating
199 add_to_meta_counter (void *cls, const char *plugin_name,
200 enum EXTRACTOR_MetaType type, enum EXTRACTOR_MetaFormat format,
201 const char *data_mime_type, const char *data, size_t data_len)
203 struct GNUNET_CONTAINER_MultiHashMap *map = cls;
204 struct GNUNET_HashCode key;
205 struct MetaCounter *cnt;
207 GNUNET_CRYPTO_hash (data, data_len, &key);
208 cnt = GNUNET_CONTAINER_multihashmap_get (map, &key);
211 cnt = GNUNET_new (struct MetaCounter);
213 cnt->data_size = data_len;
214 cnt->plugin_name = plugin_name;
216 cnt->format = format;
217 cnt->data_mime_type = data_mime_type;
218 GNUNET_assert (GNUNET_OK ==
219 GNUNET_CONTAINER_multihashmap_put (map,
221 GNUNET_CONTAINER_MULTIHASHMAPOPTION_UNIQUE_ONLY));
229 * Remove keywords above the threshold.
231 * @param cls the 'struct TrimContext' with the pos to remove the keywords from
232 * @param keyword the keyword to check
233 * @param is_mandatory ignored
234 * @return always GNUNET_OK
237 remove_high_frequency_keywords (void *cls, const char *keyword, int is_mandatory)
239 struct TrimContext *tc = cls;
240 struct KeywordCounter *counter;
241 struct GNUNET_HashCode hc;
244 klen = strlen (keyword) + 1;
245 GNUNET_CRYPTO_hash (keyword, klen - 1, &hc);
246 counter = GNUNET_CONTAINER_multihashmap_get (tc->keywordcounter, &hc);
247 GNUNET_assert (NULL != counter);
248 if (counter->count < tc->move_threshold)
250 GNUNET_FS_uri_ksk_remove_keyword (tc->pos->ksk_uri,
257 * Move "frequent" keywords over to the target ksk uri, free the
260 * @param cls the 'struct TrimContext'
261 * @param key key of the entry
262 * @param value the 'struct KeywordCounter'
263 * @return GNUNET_YES (always)
266 migrate_and_drop_keywords (void *cls, const struct GNUNET_HashCode * key, void *value)
268 struct TrimContext *tc = cls;
269 struct KeywordCounter *counter = value;
271 if (counter->count >= tc->move_threshold)
273 if (NULL == tc->pos->ksk_uri)
274 tc->pos->ksk_uri = GNUNET_FS_uri_ksk_create_from_args (1, &counter->value);
276 GNUNET_FS_uri_ksk_add_keyword (tc->pos->ksk_uri, counter->value, GNUNET_NO);
278 GNUNET_assert (GNUNET_YES ==
279 GNUNET_CONTAINER_multihashmap_remove (tc->keywordcounter,
282 GNUNET_free (counter);
288 * Copy "frequent" metadata items over to the
289 * target metadata container, free the counters.
291 * @param cls the 'struct TrimContext'
292 * @param key key of the entry
293 * @param value the 'struct KeywordCounter'
294 * @return GNUNET_YES (always)
297 migrate_and_drop_metadata (void *cls, const struct GNUNET_HashCode * key, void *value)
299 struct TrimContext *tc = cls;
300 struct MetaCounter *counter = value;
302 if (counter->count >= tc->move_threshold)
304 if (NULL == tc->pos->meta)
305 tc->pos->meta = GNUNET_CONTAINER_meta_data_create ();
306 GNUNET_CONTAINER_meta_data_insert (tc->pos->meta,
307 counter->plugin_name,
310 counter->data_mime_type, counter->data,
313 GNUNET_assert (GNUNET_YES ==
314 GNUNET_CONTAINER_multihashmap_remove (tc->metacounter,
317 GNUNET_free (counter);
323 * Process a share item tree, moving frequent keywords up and
324 * copying frequent metadata up.
326 * @param tc trim context with hash maps to use
327 * @param tree tree to trim
330 share_tree_trim (struct TrimContext *tc,
331 struct GNUNET_FS_ShareTreeItem *tree)
333 struct GNUNET_FS_ShareTreeItem *pos;
334 unsigned int num_children;
336 /* first, trim all children */
338 for (pos = tree->children_head; NULL != pos; pos = pos->next)
340 share_tree_trim (tc, pos);
344 /* consider adding filename to directory meta data */
345 if (tree->is_directory == GNUNET_YES)
347 const char *user = getenv ("USER");
348 if ( (user == NULL) ||
349 (0 != strncasecmp (user, tree->short_filename, strlen(user))))
351 /* only use filename if it doesn't match $USER */
352 if (NULL == tree->meta)
353 tree->meta = GNUNET_CONTAINER_meta_data_create ();
354 GNUNET_CONTAINER_meta_data_insert (tree->meta, "<libgnunetfs>",
355 EXTRACTOR_METATYPE_GNUNET_ORIGINAL_FILENAME,
356 EXTRACTOR_METAFORMAT_UTF8,
357 "text/plain", tree->short_filename,
358 strlen (tree->short_filename) + 1);
362 if (1 >= num_children)
363 return; /* nothing to trim */
365 /* now, count keywords and meta data in children */
366 for (pos = tree->children_head; NULL != pos; pos = pos->next)
368 if (NULL != pos->meta)
369 GNUNET_CONTAINER_meta_data_iterate (pos->meta, &add_to_meta_counter, tc->metacounter);
370 if (NULL != pos->ksk_uri)
371 GNUNET_FS_uri_ksk_get_keywords (pos->ksk_uri, &add_to_keyword_counter, tc->keywordcounter);
374 /* calculate threshold for moving keywords / meta data */
375 tc->move_threshold = 1 + (num_children / 2);
377 /* remove high-frequency keywords from children */
378 for (pos = tree->children_head; NULL != pos; pos = pos->next)
381 if (NULL != pos->ksk_uri)
383 struct GNUNET_FS_Uri *ksk_uri_copy = GNUNET_FS_uri_dup (pos->ksk_uri);
384 GNUNET_FS_uri_ksk_get_keywords (ksk_uri_copy, &remove_high_frequency_keywords, tc);
385 GNUNET_FS_uri_destroy (ksk_uri_copy);
389 /* add high-frequency meta data and keywords to parent */
391 GNUNET_CONTAINER_multihashmap_iterate (tc->keywordcounter,
392 &migrate_and_drop_keywords,
394 GNUNET_CONTAINER_multihashmap_iterate (tc->metacounter,
395 &migrate_and_drop_metadata,
401 * Process a share item tree, moving frequent keywords up and
402 * copying frequent metadata up.
404 * @param toplevel toplevel directory in the tree, returned by the scanner
407 GNUNET_FS_share_tree_trim (struct GNUNET_FS_ShareTreeItem *toplevel)
409 struct TrimContext tc;
411 if (toplevel == NULL)
413 tc.keywordcounter = GNUNET_CONTAINER_multihashmap_create (1024, GNUNET_NO);
414 tc.metacounter = GNUNET_CONTAINER_multihashmap_create (1024, GNUNET_NO);
415 share_tree_trim (&tc, toplevel);
416 GNUNET_CONTAINER_multihashmap_destroy (tc.keywordcounter);
417 GNUNET_CONTAINER_multihashmap_destroy (tc.metacounter);
422 * Release memory of a share item tree.
424 * @param toplevel toplevel of the tree to be freed
427 GNUNET_FS_share_tree_free (struct GNUNET_FS_ShareTreeItem *toplevel)
429 struct GNUNET_FS_ShareTreeItem *pos;
431 while (NULL != (pos = toplevel->children_head))
432 GNUNET_FS_share_tree_free (pos);
433 if (NULL != toplevel->parent)
434 GNUNET_CONTAINER_DLL_remove (toplevel->parent->children_head,
435 toplevel->parent->children_tail,
437 if (NULL != toplevel->meta)
438 GNUNET_CONTAINER_meta_data_destroy (toplevel->meta);
439 if (NULL != toplevel->ksk_uri)
440 GNUNET_FS_uri_destroy (toplevel->ksk_uri);
441 GNUNET_free_non_null (toplevel->filename);
442 GNUNET_free_non_null (toplevel->short_filename);
443 GNUNET_free (toplevel);
446 /* end fs_sharetree.c */