2 This file is part of GNUnet
3 (C) 2005-2012 Christian Grothoff (and other contributing authors)
5 GNUnet is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published
7 by the Free Software Foundation; either version 2, or (at your
8 option) any later version.
10 GNUnet is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with GNUnet; see the file COPYING. If not, write to the
17 Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA.
22 * @file fs/fs_sharetree.c
23 * @brief code to manipulate the 'struct GNUNET_FS_ShareTreeItem' tree
25 * @author Christian Grothoff
28 #include "gnunet_fs_service.h"
29 #include "gnunet_scheduler_lib.h"
34 * Entry for each unique keyword to track how often
35 * it occured. Contains the keyword and the counter.
41 * This is a doubly-linked list
43 struct KeywordCounter *prev;
46 * This is a doubly-linked list
48 struct KeywordCounter *next;
51 * Keyword that was found.
56 * How many files have this keyword?
64 * Aggregate information we keep for meta data in each directory.
70 * This is a doubly-linked list
72 struct MetaCounter *prev;
75 * This is a doubly-linked list
77 struct MetaCounter *next;
80 * Name of the plugin that provided that piece of metadata
82 const char *plugin_name;
85 * MIME-type of the metadata itself
87 const char *data_mime_type;
90 * The actual meta data.
95 * Number of bytes in 'data'.
102 enum EXTRACTOR_MetaType type;
107 enum EXTRACTOR_MetaFormat format;
110 * How many files have meta entries matching this value?
111 * (type and format do not have to match).
119 * A structure that forms a singly-linked list that serves as a stack
120 * for metadata-processing function.
126 * Map from the hash over the keyword to an 'struct KeywordCounter *'
127 * counter that says how often this keyword was
128 * encountered in the current directory.
130 struct GNUNET_CONTAINER_MultiHashMap *keywordcounter;
133 * Map from the hash over the metadata to an 'struct MetaCounter *'
134 * counter that says how often this metadata was
135 * encountered in the current directory.
137 struct GNUNET_CONTAINER_MultiHashMap *metacounter;
140 * Position we are currently manipulating.
142 struct GNUNET_FS_ShareTreeItem *pos;
145 * Number of times an item has to be found to be moved to the parent.
147 unsigned int move_threshold;
153 * Add the given keyword to the keyword statistics tracker.
155 * @param cls the multihashmap we store the keyword counters in
156 * @param keyword the keyword to count
157 * @param is_mandatory ignored
158 * @return always GNUNET_OK
161 add_to_keyword_counter (void *cls, const char *keyword, int is_mandatory)
163 struct GNUNET_CONTAINER_MultiHashMap *mcm = cls;
164 struct KeywordCounter *cnt;
165 struct GNUNET_HashCode hc;
168 klen = strlen (keyword) + 1;
169 GNUNET_CRYPTO_hash (keyword, klen - 1, &hc);
170 cnt = GNUNET_CONTAINER_multihashmap_get (mcm, &hc);
173 cnt = GNUNET_malloc (sizeof (struct KeywordCounter) + klen);
174 cnt->value = (const char *) &cnt[1];
175 memcpy (&cnt[1], keyword, klen);
176 GNUNET_assert (GNUNET_OK ==
177 GNUNET_CONTAINER_multihashmap_put (mcm,
179 GNUNET_CONTAINER_MULTIHASHMAPOPTION_UNIQUE_ONLY));
187 * Function called on each meta data item. Increments the
188 * respective counter.
190 * @param cls the container multihashmap to update
191 * @param plugin_name name of the plugin that produced this value;
192 * special values can be used (i.e. '<zlib>' for zlib being
193 * used in the main libextractor library and yielding
195 * @param type libextractor-type describing the meta data
196 * @param format basic format information about data
197 * @param data_mime_type mime-type of data (not of the original file);
198 * can be NULL (if mime-type is not known)
199 * @param data actual meta-data found
200 * @param data_len number of bytes in data
201 * @return GNUNET_OK to continue extracting / iterating
204 add_to_meta_counter (void *cls, const char *plugin_name,
205 enum EXTRACTOR_MetaType type, enum EXTRACTOR_MetaFormat format,
206 const char *data_mime_type, const char *data, size_t data_len)
208 struct GNUNET_CONTAINER_MultiHashMap *map = cls;
209 struct GNUNET_HashCode key;
210 struct MetaCounter *cnt;
212 GNUNET_CRYPTO_hash (data, data_len, &key);
213 cnt = GNUNET_CONTAINER_multihashmap_get (map, &key);
216 cnt = GNUNET_malloc (sizeof (struct MetaCounter));
218 cnt->data_size = data_len;
219 cnt->plugin_name = plugin_name;
221 cnt->format = format;
222 cnt->data_mime_type = data_mime_type;
223 GNUNET_assert (GNUNET_OK ==
224 GNUNET_CONTAINER_multihashmap_put (map,
226 GNUNET_CONTAINER_MULTIHASHMAPOPTION_UNIQUE_ONLY));
234 * Remove keywords above the threshold.
236 * @param cls the 'struct TrimContext' with the pos to remove the keywords from
237 * @param keyword the keyword to check
238 * @param is_mandatory ignored
239 * @return always GNUNET_OK
242 remove_high_frequency_keywords (void *cls, const char *keyword, int is_mandatory)
244 struct TrimContext *tc = cls;
245 struct KeywordCounter *counter;
246 struct GNUNET_HashCode hc;
249 klen = strlen (keyword) + 1;
250 GNUNET_CRYPTO_hash (keyword, klen - 1, &hc);
251 counter = GNUNET_CONTAINER_multihashmap_get (tc->keywordcounter, &hc);
252 GNUNET_assert (NULL != counter);
253 if (counter->count < tc->move_threshold)
255 GNUNET_FS_uri_ksk_remove_keyword (tc->pos->ksk_uri,
262 * Move "frequent" keywords over to the target ksk uri, free the
265 * @param cls the 'struct TrimContext'
266 * @param key key of the entry
267 * @param value the 'struct KeywordCounter'
268 * @return GNUNET_YES (always)
271 migrate_and_drop_keywords (void *cls, const struct GNUNET_HashCode * key, void *value)
273 struct TrimContext *tc = cls;
274 struct KeywordCounter *counter = value;
276 if (counter->count >= tc->move_threshold)
278 if (NULL == tc->pos->ksk_uri)
279 tc->pos->ksk_uri = GNUNET_FS_uri_ksk_create_from_args (1, &counter->value);
281 GNUNET_FS_uri_ksk_add_keyword (tc->pos->ksk_uri, counter->value, GNUNET_NO);
283 GNUNET_assert (GNUNET_YES ==
284 GNUNET_CONTAINER_multihashmap_remove (tc->keywordcounter,
287 GNUNET_free (counter);
293 * Copy "frequent" metadata items over to the
294 * target metadata container, free the counters.
296 * @param cls the 'struct TrimContext'
297 * @param key key of the entry
298 * @param value the 'struct KeywordCounter'
299 * @return GNUNET_YES (always)
302 migrate_and_drop_metadata (void *cls, const struct GNUNET_HashCode * key, void *value)
304 struct TrimContext *tc = cls;
305 struct MetaCounter *counter = value;
307 if (counter->count >= tc->move_threshold)
309 if (NULL == tc->pos->meta)
310 tc->pos->meta = GNUNET_CONTAINER_meta_data_create ();
311 GNUNET_CONTAINER_meta_data_insert (tc->pos->meta,
312 counter->plugin_name,
315 counter->data_mime_type, counter->data,
318 GNUNET_assert (GNUNET_YES ==
319 GNUNET_CONTAINER_multihashmap_remove (tc->metacounter,
322 GNUNET_free (counter);
328 * Process a share item tree, moving frequent keywords up and
329 * copying frequent metadata up.
331 * @param tc trim context with hash maps to use
332 * @param tree tree to trim
335 share_tree_trim (struct TrimContext *tc,
336 struct GNUNET_FS_ShareTreeItem *tree)
338 struct GNUNET_FS_ShareTreeItem *pos;
339 unsigned int num_children;
341 /* first, trim all children */
343 for (pos = tree->children_head; NULL != pos; pos = pos->next)
345 share_tree_trim (tc, pos);
349 /* consider adding filename to directory meta data */
350 if (tree->is_directory == GNUNET_YES)
352 const char *user = getenv ("USER");
353 if ( (user == NULL) ||
354 (0 != strncasecmp (user, tree->short_filename, strlen(user))))
356 /* only use filename if it doesn't match $USER */
357 if (NULL == tree->meta)
358 tree->meta = GNUNET_CONTAINER_meta_data_create ();
359 GNUNET_CONTAINER_meta_data_insert (tree->meta, "<libgnunetfs>",
360 EXTRACTOR_METATYPE_GNUNET_ORIGINAL_FILENAME,
361 EXTRACTOR_METAFORMAT_UTF8,
362 "text/plain", tree->short_filename,
363 strlen (tree->short_filename) + 1);
367 if (1 >= num_children)
368 return; /* nothing to trim */
370 /* now, count keywords and meta data in children */
371 for (pos = tree->children_head; NULL != pos; pos = pos->next)
373 if (NULL != pos->meta)
374 GNUNET_CONTAINER_meta_data_iterate (pos->meta, &add_to_meta_counter, tc->metacounter);
375 if (NULL != pos->ksk_uri)
376 GNUNET_FS_uri_ksk_get_keywords (pos->ksk_uri, &add_to_keyword_counter, tc->keywordcounter);
379 /* calculate threshold for moving keywords / meta data */
380 tc->move_threshold = 1 + (num_children / 2);
382 /* remove high-frequency keywords from children */
383 for (pos = tree->children_head; NULL != pos; pos = pos->next)
386 if (NULL != pos->ksk_uri)
388 struct GNUNET_FS_Uri *ksk_uri_copy = GNUNET_FS_uri_dup (pos->ksk_uri);
389 GNUNET_FS_uri_ksk_get_keywords (ksk_uri_copy, &remove_high_frequency_keywords, tc);
390 GNUNET_FS_uri_destroy (ksk_uri_copy);
394 /* add high-frequency meta data and keywords to parent */
396 GNUNET_CONTAINER_multihashmap_iterate (tc->keywordcounter,
397 &migrate_and_drop_keywords,
399 GNUNET_CONTAINER_multihashmap_iterate (tc->metacounter,
400 &migrate_and_drop_metadata,
406 * Process a share item tree, moving frequent keywords up and
407 * copying frequent metadata up.
409 * @param toplevel toplevel directory in the tree, returned by the scanner
412 GNUNET_FS_share_tree_trim (struct GNUNET_FS_ShareTreeItem *toplevel)
414 struct TrimContext tc;
416 if (toplevel == NULL)
418 tc.keywordcounter = GNUNET_CONTAINER_multihashmap_create (1024, GNUNET_NO);
419 tc.metacounter = GNUNET_CONTAINER_multihashmap_create (1024, GNUNET_NO);
420 share_tree_trim (&tc, toplevel);
421 GNUNET_CONTAINER_multihashmap_destroy (tc.keywordcounter);
422 GNUNET_CONTAINER_multihashmap_destroy (tc.metacounter);
427 * Release memory of a share item tree.
429 * @param toplevel toplevel of the tree to be freed
432 GNUNET_FS_share_tree_free (struct GNUNET_FS_ShareTreeItem *toplevel)
434 struct GNUNET_FS_ShareTreeItem *pos;
436 while (NULL != (pos = toplevel->children_head))
437 GNUNET_FS_share_tree_free (pos);
438 if (NULL != toplevel->parent)
439 GNUNET_CONTAINER_DLL_remove (toplevel->parent->children_head,
440 toplevel->parent->children_tail,
442 if (NULL != toplevel->meta)
443 GNUNET_CONTAINER_meta_data_destroy (toplevel->meta);
444 if (NULL != toplevel->ksk_uri)
445 GNUNET_FS_uri_destroy (toplevel->ksk_uri);
446 GNUNET_free_non_null (toplevel->filename);
447 GNUNET_free_non_null (toplevel->short_filename);
448 GNUNET_free (toplevel);
451 /* end fs_sharetree.c */