2 This file is part of GNUnet
\r
3 (C) 2005-2012 Christian Grothoff (and other contributing authors)
\r
5 GNUnet is free software; you can redistribute it and/or modify
\r
6 it under the terms of the GNU General Public License as published
\r
7 by the Free Software Foundation; either version 2, or (at your
\r
8 option) any later version.
\r
10 GNUnet is distributed in the hope that it will be useful, but
\r
11 WITHOUT ANY WARRANTY; without even the implied warranty of
\r
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
\r
13 General Public License for more details.
\r
15 You should have received a copy of the GNU General Public License
\r
16 along with GNUnet; see the file COPYING. If not, write to the
\r
17 Free Software Foundation, Inc., 59 Temple Place - Suite 330,
\r
18 Boston, MA 02111-1307, USA.
\r
21 #include "platform.h"
\r
22 #include "gnunet_fs_service.h"
\r
23 #include "gnunet_scheduler_lib.h"
\r
26 * Entry for each unique keyword to track how often
\r
27 * it occured. Contains the keyword and the counter.
\r
29 struct KeywordCounter
\r
33 * Keyword that was found.
\r
38 * How many files have this keyword?
\r
43 * This is a doubly-linked list
\r
45 struct KeywordCounter *prev;
\r
48 * This is a doubly-linked list
\r
50 struct KeywordCounter *next;
\r
54 * Aggregate information we keep for meta data in each directory.
\r
59 * The actual meta data.
\r
64 * Number of bytes in 'data'.
\r
69 * Name of the plugin that provided that piece of metadata
\r
71 const char *plugin_name;
\r
76 enum EXTRACTOR_MetaType type;
\r
79 * Format of the data
\r
81 enum EXTRACTOR_MetaFormat format;
\r
84 * MIME-type of the metadata itself
\r
86 const char *data_mime_type;
\r
89 * How many files have meta entries matching this value?
\r
90 * (type and format do not have to match).
\r
95 * This is a doubly-linked list
\r
97 struct MetaCounter *prev;
\r
100 * This is a doubly-linked list
\r
102 struct MetaCounter *next;
\r
105 struct AddDirContext;
\r
108 * A structure used to hold a pointer to the tree item that is being
\r
110 * Needed to avoid changing the context for every recursive call.
\r
117 struct AddDirContext *adc;
\r
122 struct ShareTreeItem *parent;
\r
126 * Execution context for 'add_dir'
\r
127 * Owned by the initiator thread.
\r
129 struct AddDirContext
\r
132 * After the scan is finished, it will contain a pointer to the
\r
133 * top-level directory entry in the directory tree built by the
\r
136 struct ShareTreeItem *toplevel;
\r
139 * Expanded filename (as given by the scan initiator).
\r
140 * The scanner thread stores a copy here, and frees it when it finishes.
\r
142 char *filename_expanded;
\r
145 * A pipe end to read signals from.
\r
146 * Owned by the initiator thread.
\r
148 const struct GNUNET_DISK_FileHandle *stop_read;
\r
151 * 1 if the scanner should stop, 0 otherwise. Set in response
\r
152 * to communication errors or when the initiator wants the scanning
\r
158 * Handle of the pipe end into which the progress messages are written
\r
159 * The pipe is owned by the initiator thread, and there's no way to
\r
160 * close this end without having access to the pipe, so it won't
\r
161 * be closed by the scanner thread.
\r
162 * The initiator MUST keep it alive until the scanner thread is finished.
\r
164 const struct GNUNET_DISK_FileHandle *progress_write;
\r
168 * List of libextractor plugins to use for extracting.
\r
169 * Initialized when the scan starts, removed when it finishes.
\r
171 struct EXTRACTOR_PluginList *plugins;
\r
175 * An opaque structure a pointer to which is returned to the
\r
176 * caller to be used to control the scanner.
\r
178 struct GNUNET_FS_DirScanner
\r
181 * A pipe end to read signals from.
\r
182 * Owned by the initiator thread.
\r
184 const struct GNUNET_DISK_FileHandle *stop_write;
\r
187 * A pipe transfer signals to the scanner.
\r
188 * Owned by the initiator thread.
\r
190 struct GNUNET_DISK_PipeHandle *stop_pipe;
\r
193 * A thread object for the scanner thread.
\r
194 * Owned by the initiator thread.
\r
203 * A task for reading progress messages from the scanner.
\r
205 GNUNET_SCHEDULER_TaskIdentifier progress_read_task;
\r
208 * The end of the pipe that is used to read progress messages.
\r
210 const struct GNUNET_DISK_FileHandle *progress_read;
\r
213 * The pipe that is used to read progress messages.
\r
214 * Owned (along with both of its ends) by the initiator thread.
\r
215 * Only closed after the scanner thread is finished.
\r
217 struct GNUNET_DISK_PipeHandle *progress_pipe;
\r
220 * The function that will be called every time there's a progress
\r
223 GNUNET_FS_DirScannerProgressCallback progress_callback;
\r
226 * A closure for progress_callback.
\r
231 * A pointer to the context of the scanner.
\r
232 * Owned by the initiator thread.
\r
233 * Initiator thread shouldn't touch it until the scanner thread
\r
236 struct AddDirContext *adc;
\r
240 * A structure that forms a singly-linked list that serves as a stack
\r
241 * for metadata-processing function.
\r
243 struct ProcessMetadataStackItem
\r
246 * A pointer to metadata-processing context.
\r
247 * The same in every stack item.
\r
249 struct ProcessMetadataContext *ctx;
\r
252 * This is a singly-linked list. A pointer to its end is kept, and
\r
253 * this pointer is used to walk it backwards.
\r
255 struct ProcessMetadataStackItem *parent;
\r
258 * Map from the hash over the keyword to an 'struct KeywordCounter *'
\r
259 * counter that says how often this keyword was
\r
260 * encountered in the current directory.
\r
262 struct GNUNET_CONTAINER_MultiHashMap *keywordcounter;
\r
265 * Map from the hash over the metadata to an 'struct MetaCounter *'
\r
266 * counter that says how often this metadata was
\r
267 * encountered in the current directory.
\r
269 struct GNUNET_CONTAINER_MultiHashMap *metacounter;
\r
272 * Number of files in the current directory.
\r
274 unsigned int dir_entry_count;
\r
277 * Keywords to exclude from using for KSK since they'll be associated
\r
278 * with the parent as well. NULL for nothing blocked.
\r
280 struct GNUNET_FS_Uri *exclude_ksk;
\r
283 * A share tree item that is being processed.
\r
285 struct ShareTreeItem *item;
\r
288 * Set to GNUNET_YES to indicate that the directory pointer by 'item'
\r
289 * was processed, and we should move on to the next.
\r
290 * Otherwise the directory will be recursed into.
\r
297 * The structure to keep the state of metadata processing
\r
299 struct ProcessMetadataContext
\r
302 * The top of the stack.
\r
304 struct ProcessMetadataStackItem *stack;
\r
307 * Callback to invoke when processing is finished
\r
309 GNUNET_SCHEDULER_Task cb;
\r
317 * Toplevel directory item of the tree to process.
\r
319 struct ShareTreeItem *toplevel;
\r
323 * Called every now and then by the scanner.
\r
324 * Checks the synchronization privitive.
\r
325 * Returns 1 if the scanner should stop, 0 otherwise.
\r
328 should_stop (struct AddDirContext *adc)
\r
332 if (GNUNET_DISK_file_read_non_blocking (adc->stop_read, &c, 1) == 1
\r
333 || errno != EAGAIN)
\r
337 return adc->do_stop;
\r
341 * Write progress message.
\r
343 * <reason><filename length><filename><directory flag>
\r
344 * If filename is NULL, filename is not written, and its length
\r
345 * is written as 0, and nothing else is written. It signals the initiator
\r
346 * thread that the scanner is finished, and that it can now join its thread.
\r
348 * Also checks if the initiator thread wants the scanner to stop,
\r
349 * Returns 1 to stop scanning (if the signal was received, or
\r
350 * if the pipe was broken somehow), 0 otherwise.
\r
353 write_progress (struct AddDirContext *adc, const char *filename,
\r
354 char is_directory, enum GNUNET_DirScannerProgressUpdateReason reason)
\r
356 size_t filename_len;
\r
358 size_t total_write;
\r
359 if ((adc->do_stop || should_stop (adc)) && reason != GNUNET_DIR_SCANNER_ASKED_TO_STOP
\r
360 && reason != GNUNET_DIR_SCANNER_FINISHED)
\r
364 while ((wr > 0 || errno == EAGAIN) && total_write < sizeof (reason))
\r
366 wr = GNUNET_DISK_file_write_blocking (adc->progress_write,
\r
367 &((char *)&reason)[total_write], sizeof (reason) - total_write);
\r
371 if (sizeof (reason) != total_write)
\r
372 return adc->do_stop = 1;
\r
374 filename_len = strlen (filename) + 1;
\r
379 while ((wr > 0 || errno == EAGAIN) && total_write < sizeof (size_t))
\r
381 wr = GNUNET_DISK_file_write_blocking (adc->progress_write,
\r
382 &((char *)&filename_len)[total_write], sizeof (size_t) - total_write);
\r
386 if (sizeof (size_t) != total_write)
\r
387 return adc->do_stop = 1;
\r
392 while ((wr > 0 || errno == EAGAIN) && total_write < filename_len)
\r
394 wr = GNUNET_DISK_file_write_blocking (adc->progress_write,
\r
395 &((char *)filename)[total_write], filename_len - total_write);
\r
399 if (filename_len != total_write)
\r
400 return adc->do_stop = 1;
\r
403 while ((wr > 0 || errno == EAGAIN) && total_write < sizeof (char))
\r
405 wr = GNUNET_DISK_file_write_blocking (adc->progress_write,
\r
406 &((char *)&is_directory)[total_write], sizeof (char) - total_write);
\r
410 if (sizeof (char) != total_write)
\r
411 return adc->do_stop = 1;
\r
417 * Add the given keyword to the
\r
418 * keyword statistics tracker.
\r
420 * @param cls closure (user-defined)
\r
421 * @param keyword the keyword to count
\r
422 * @param is_mandatory ignored
\r
423 * @return always GNUNET_OK
\r
426 add_to_keyword_counter (void *cls, const char *keyword, int is_mandatory)
\r
428 struct GNUNET_CONTAINER_MultiHashMap *mcm = cls;
\r
429 struct KeywordCounter *cnt, *first_cnt;
\r
430 GNUNET_HashCode hc;
\r
433 klen = strlen (keyword) + 1;
\r
434 GNUNET_CRYPTO_hash (keyword, klen - 1, &hc);
\r
435 /* Since the map might contain multiple values per keyword, we only
\r
436 * store one value, and attach all other to it, forming a linked list.
\r
437 * Somewhat easier than retrieving multiple items via callback.
\r
439 first_cnt = GNUNET_CONTAINER_multihashmap_get (mcm, &hc);
\r
440 for (cnt = first_cnt; cnt && strcmp (cnt->value, keyword) != 0; cnt = cnt->next);
\r
443 cnt = GNUNET_malloc (sizeof (struct KeywordCounter) + klen);
\r
444 cnt->value = (const char *) &cnt[1];
\r
445 memcpy (&cnt[1], keyword, klen);
\r
446 if (first_cnt != NULL)
\r
448 if (first_cnt->prev != NULL)
\r
450 first_cnt->prev->next = cnt;
\r
451 cnt->prev = first_cnt->prev;
\r
453 first_cnt->prev = cnt;
\r
454 cnt->next = first_cnt;
\r
457 GNUNET_CONTAINER_multihashmap_put (mcm, &hc, cnt,
\r
458 GNUNET_CONTAINER_MULTIHASHMAPOPTION_MULTIPLE);
\r
465 * Type of a function that libextractor calls for each
\r
466 * meta data item found.
\r
468 * @param cls the container multihashmap to update
\r
469 * @param plugin_name name of the plugin that produced this value;
\r
470 * special values can be used (i.e. '<zlib>' for zlib being
\r
471 * used in the main libextractor library and yielding
\r
473 * @param type libextractor-type describing the meta data
\r
474 * @param format basic format information about data
\r
475 * @param data_mime_type mime-type of data (not of the original file);
\r
476 * can be NULL (if mime-type is not known)
\r
477 * @param data actual meta-data found
\r
478 * @param data_len number of bytes in data
\r
479 * @return GNUNET_OK to continue extracting / iterating
\r
482 add_to_meta_counter (void *cls, const char *plugin_name,
\r
483 enum EXTRACTOR_MetaType type, enum EXTRACTOR_MetaFormat format,
\r
484 const char *data_mime_type, const char *data, size_t data_len)
\r
486 struct GNUNET_CONTAINER_MultiHashMap *map = cls;
\r
487 GNUNET_HashCode key;
\r
488 struct MetaCounter *cnt, *first_cnt;
\r
490 GNUNET_CRYPTO_hash (data, data_len, &key);
\r
491 first_cnt = GNUNET_CONTAINER_multihashmap_get (map, &key);
\r
492 for (cnt = first_cnt; cnt
\r
493 && cnt->data_size != data_len
\r
494 && memcmp (cnt->data, data, cnt->data_size) != 0; cnt = cnt->next);
\r
497 cnt = GNUNET_malloc (sizeof (struct MetaCounter));
\r
499 cnt->data_size = data_len;
\r
500 cnt->plugin_name = plugin_name;
\r
502 cnt->format = format;
\r
503 cnt->data_mime_type = data_mime_type;
\r
505 if (first_cnt != NULL)
\r
507 if (first_cnt->prev != NULL)
\r
509 first_cnt->prev->next = cnt;
\r
510 cnt->prev = first_cnt->prev;
\r
512 first_cnt->prev = cnt;
\r
513 cnt->next = first_cnt;
\r
516 GNUNET_CONTAINER_multihashmap_put (map, &key, cnt,
\r
517 GNUNET_CONTAINER_MULTIHASHMAPOPTION_MULTIPLE);
\r
524 * Allocates a struct ShareTreeItem and adds it to its parent.
\r
526 static struct ShareTreeItem *
\r
527 make_item (struct ShareTreeItem *parent)
\r
529 struct ShareTreeItem *item;
\r
530 item = GNUNET_malloc (sizeof (struct ShareTreeItem));
\r
532 item->parent = parent;
\r
534 GNUNET_CONTAINER_DLL_insert (parent->children_head, parent->children_tail,
\r
540 * Extract metadata from a file and add it to the share tree
\r
542 * @param adc context to modify
\r
543 * @param filename name of the file to process
\r
546 extract_file (struct AddDirStack *ads, const char *filename)
\r
548 struct ShareTreeItem *item;
\r
549 const char *short_fn;
\r
551 item = make_item (ads->parent);
\r
553 GNUNET_DISK_file_size (filename, &item->file_size, GNUNET_YES);
\r
554 item->is_directory = GNUNET_NO;
\r
556 item->meta = GNUNET_CONTAINER_meta_data_create ();
\r
557 GNUNET_FS_meta_data_extract_from_file (item->meta, filename,
\r
558 ads->adc->plugins);
\r
559 GNUNET_CONTAINER_meta_data_delete (item->meta, EXTRACTOR_METATYPE_FILENAME,
\r
561 short_fn = GNUNET_STRINGS_get_short_name (filename);
\r
563 item->filename = GNUNET_strdup (filename);
\r
564 item->short_filename = GNUNET_strdup (short_fn);
\r
566 GNUNET_CONTAINER_meta_data_insert (item->meta, "<libgnunetfs>",
\r
567 EXTRACTOR_METATYPE_FILENAME,
\r
568 EXTRACTOR_METAFORMAT_UTF8, "text/plain",
\r
569 short_fn, strlen (short_fn) + 1);
\r
573 * Remove the keyword from the ksk URI.
\r
575 * @param cls the ksk uri
\r
576 * @param keyword the word to remove
\r
577 * @param is_mandatory ignored
\r
578 * @return always GNUNET_OK
\r
581 remove_keyword (void *cls, const char *keyword, int is_mandatory)
\r
583 struct GNUNET_FS_Uri *ksk = cls;
\r
585 GNUNET_FS_uri_ksk_remove_keyword (ksk, keyword);
\r
590 * Remove keywords from current directory's children, if they are
\r
591 * in the exluded keywords list of that directory.
\r
593 * @param cls the ksk uri
\r
594 * @param keyword the word to remove
\r
595 * @param is_mandatory ignored
\r
596 * @return always GNUNET_OK
\r
599 remove_keywords (struct ProcessMetadataStackItem *stack, struct ShareTreeItem *dir)
\r
601 struct ShareTreeItem *item;
\r
603 for (item = dir->children_head; item; item = item->next)
\r
605 if (stack->exclude_ksk != NULL)
\r
606 GNUNET_FS_uri_ksk_get_keywords (stack->exclude_ksk, &remove_keyword, item->ksk_uri);
\r
612 * Context passed to 'migrate_and_drop'.
\r
614 struct KeywordProcessContext
\r
617 * All the keywords we migrated to the parent.
\r
619 struct GNUNET_FS_Uri *ksk;
\r
622 * How often does a keyword have to occur to be
\r
623 * migrated to the parent?
\r
625 unsigned int threshold;
\r
629 * Context passed to 'migrate_and_drop'.
\r
631 struct MetaProcessContext
\r
634 * All the metadata we copy to the parent.
\r
636 struct GNUNET_CONTAINER_MetaData *meta;
\r
639 * How often does a metadata have to occur to be
\r
640 * migrated to the parent?
\r
642 unsigned int threshold;
\r
647 * Move "frequent" keywords over to the
\r
648 * target ksk uri, free the counters.
\r
652 migrate_and_drop (void *cls, const GNUNET_HashCode * key, void *value)
\r
654 struct KeywordProcessContext *kpc = cls;
\r
655 struct KeywordCounter *counter = value;
\r
657 if (counter->count >= kpc->threshold && counter->count > 1)
\r
659 GNUNET_FS_uri_ksk_add_keyword (kpc->ksk, counter->value, GNUNET_NO);
\r
661 GNUNET_free (counter);
\r
665 * Copy "frequent" metadata items over to the
\r
666 * target metadata container, free the counters.
\r
670 migrate_and_drop_metadata (void *cls, const GNUNET_HashCode * key, void *value)
\r
672 struct MetaProcessContext *mpc = cls;
\r
673 struct MetaCounter *counter = value;
\r
675 if (counter->count >= mpc->threshold && counter->count > 1)
\r
677 GNUNET_CONTAINER_meta_data_insert (mpc->meta,
\r
678 counter->plugin_name,
\r
681 counter->data_mime_type, counter->data,
\r
682 counter->data_size);
\r
684 GNUNET_free (counter);
\r
689 * Go over the collected keywords from all entries in the
\r
690 * directory and push common keywords up one level (by
\r
691 * adding it to the returned struct). Do the same for metadata.
\r
692 * Destroys keywordcounter and metacoutner for current directory.
\r
694 * @param adc collection of child meta data
\r
695 * @param exclude_ksk pointer to where moveable keywords will be stored
\r
696 * @param copy_meta pointer to where copyable metadata will be stored
\r
699 process_keywords_and_metadata (struct ProcessMetadataStackItem *stack,
\r
700 struct GNUNET_FS_Uri **exclude_ksk,
\r
701 struct GNUNET_CONTAINER_MetaData **copy_meta)
\r
703 struct KeywordProcessContext kpc;
\r
704 struct MetaProcessContext mpc;
\r
705 struct GNUNET_CONTAINER_MetaData *tmp;
\r
707 /* Surprisingly, it's impossible to create a ksk with 0 keywords directly.
\r
708 * But we can create one from an empty metadata set
\r
710 tmp = GNUNET_CONTAINER_meta_data_create ();
\r
711 kpc.ksk = GNUNET_FS_uri_ksk_create_from_meta_data (tmp);
\r
712 GNUNET_CONTAINER_meta_data_destroy (tmp);
\r
713 mpc.meta = GNUNET_CONTAINER_meta_data_create ();
\r
715 kpc.threshold = mpc.threshold = (stack->dir_entry_count + 1) / 2; /* 50% */
\r
717 GNUNET_CONTAINER_multihashmap_iterate (stack->keywordcounter,
\r
718 &migrate_and_drop, &kpc);
\r
719 GNUNET_CONTAINER_multihashmap_iterate (stack->metacounter,
\r
720 &migrate_and_drop_metadata, &mpc);
\r
722 GNUNET_CONTAINER_multihashmap_destroy (stack->keywordcounter);
\r
723 GNUNET_CONTAINER_multihashmap_destroy (stack->metacounter);
\r
724 *exclude_ksk = kpc.ksk;
\r
725 *copy_meta = mpc.meta;
\r
729 * Function called by the directory iterator to
\r
730 * (recursively) add all of the files in the
\r
731 * directory to the tree.
\r
732 * Called by the directory scanner to initiate the
\r
734 * TODO: find a way to make it non-recursive.
\r
736 * @param cls the 'struct AddDirStack *' we're in
\r
737 * @param filename file or directory to scan
\r
740 scan_directory (void *cls, const char *filename)
\r
742 struct AddDirStack *ads = cls, recurse_ads;
\r
743 struct AddDirContext *adc = ads->adc;
\r
745 struct ShareTreeItem *item;
\r
746 const char *short_fn;
\r
751 return GNUNET_SYSERR;
\r
753 /* If the file doesn't exist (or is not statable for any other reason,
\r
754 * skip it, and report it.
\r
756 if (0 != STAT (filename, &sbuf))
\r
758 do_stop = write_progress (adc, filename, S_ISDIR (sbuf.st_mode),
\r
759 GNUNET_DIR_SCANNER_DOES_NOT_EXIST);
\r
763 /* Report the progress */
\r
764 do_stop = write_progress (adc, filename, S_ISDIR (sbuf.st_mode),
\r
765 GNUNET_DIR_SCANNER_NEW_FILE);
\r
768 /* We were asked to stop, acknowledge that and return */
\r
769 do_stop = write_progress (adc, filename, S_ISDIR (sbuf.st_mode),
\r
770 GNUNET_DIR_SCANNER_ASKED_TO_STOP);
\r
771 return GNUNET_SYSERR;
\r
774 if (!S_ISDIR (sbuf.st_mode))
\r
775 extract_file (ads, filename);
\r
778 item = make_item (ads->parent);
\r
779 item->meta = GNUNET_CONTAINER_meta_data_create ();
\r
781 item->is_directory = GNUNET_YES;
\r
783 recurse_ads.adc = adc;
\r
784 recurse_ads.parent = item;
\r
786 /* recurse into directory */
\r
787 GNUNET_DISK_directory_scan (filename, &scan_directory, &recurse_ads);
\r
789 short_fn = GNUNET_STRINGS_get_short_name (filename);
\r
791 item->filename = GNUNET_strdup (filename);
\r
792 item->short_filename = GNUNET_strdup (short_fn);
\r
794 if (ads->parent == NULL)
\r
796 /* we're finished with the scan, make sure caller gets the top-level
\r
797 * directory pointer
\r
799 adc->toplevel = item;
\r
806 * Signals the scanner to finish the scan as fast as possible.
\r
808 * Can close the pipe if asked to, but that is only used by the
\r
809 * internal call to this function during cleanup. The client
\r
810 * must understand the consequences of closing the pipe too early.
\r
812 * @param ds directory scanner structure
\r
813 * @param close_pipe GNUNET_YES to close
\r
816 GNUNET_FS_directory_scan_finish (struct GNUNET_FS_DirScanner *ds,
\r
820 GNUNET_DISK_file_write (ds->stop_write, &c, 1);
\r
824 if (ds->progress_read_task != GNUNET_SCHEDULER_NO_TASK)
\r
826 GNUNET_SCHEDULER_cancel (ds->progress_read_task);
\r
827 ds->progress_read_task = GNUNET_SCHEDULER_NO_TASK;
\r
829 GNUNET_DISK_pipe_close_end (ds->progress_pipe, GNUNET_DISK_PIPE_END_READ);
\r
830 ds->progress_read = NULL;
\r
835 * Signals the scanner thread to finish (in case it isn't finishing
\r
836 * already) and joins the scanner thread. Closes the pipes, frees the
\r
837 * scanner contexts (both of them), returns the results of the scan.
\r
838 * Results are valid (and have to be freed) even if the scanner had
\r
839 * an error or was rushed to finish prematurely.
\r
840 * Blocks until the scanner is finished.
\r
842 * @param ds directory scanner structure
\r
843 * @return the results of the scan (a directory tree)
\r
845 struct ShareTreeItem *
\r
846 GNUNET_FS_directory_scan_cleanup (struct GNUNET_FS_DirScanner *ds)
\r
848 struct ShareTreeItem *result;
\r
850 GNUNET_FS_directory_scan_finish (ds, GNUNET_YES);
\r
852 WaitForSingleObject (ds->thread, INFINITE);
\r
853 CloseHandle (ds->thread);
\r
855 pthread_join (ds->thread, NULL);
\r
856 pthread_detach (ds->thread);
\r
859 GNUNET_DISK_pipe_close (ds->stop_pipe);
\r
860 GNUNET_DISK_pipe_close (ds->progress_pipe);
\r
861 result = ds->adc->toplevel;
\r
862 GNUNET_free (ds->adc);
\r
868 * The function from which the scanner thread starts
\r
875 run_directory_scan_thread (struct AddDirContext *adc)
\r
877 struct AddDirStack ads;
\r
880 scan_directory (&ads, adc->filename_expanded);
\r
881 GNUNET_free (adc->filename_expanded);
\r
882 if (adc->plugins != NULL)
\r
883 EXTRACTOR_plugin_remove_all (adc->plugins);
\r
884 /* Tell the initiator that we're finished, it can now join the thread */
\r
885 write_progress (adc, NULL, 0, GNUNET_DIR_SCANNER_FINISHED);
\r
890 * Called every time there is data to read from the scanner.
\r
891 * Calls the scanner progress handler.
\r
893 * @param cls the closure (directory scanner object)
\r
894 * @param tc task context in which the task is running
\r
897 read_progress_task (void *cls, const struct GNUNET_SCHEDULER_TaskContext *tc)
\r
899 struct GNUNET_FS_DirScanner *ds;
\r
901 enum GNUNET_DirScannerProgressUpdateReason reason;
\r
903 ssize_t total_read;
\r
905 size_t filename_len;
\r
911 ds->progress_read_task = GNUNET_SCHEDULER_NO_TASK;
\r
913 if (!(tc->reason & GNUNET_SCHEDULER_REASON_READ_READY))
\r
915 ds->progress_callback (ds->cls, ds, NULL, 0, GNUNET_DIR_SCANNER_SHUTDOWN);
\r
919 /* Read one message. If message is malformed or can't be read, end the scanner */
\r
920 total_read = rd = GNUNET_DISK_file_read (ds->progress_read, &reason, sizeof (reason));
\r
921 while (rd > 0 && total_read < sizeof (reason))
\r
923 rd = GNUNET_DISK_file_read (ds->progress_read,
\r
924 &((char *) &reason)[total_read],
\r
925 sizeof (reason) - total_read);
\r
929 if (total_read != sizeof (reason)
\r
930 || reason <= GNUNET_DIR_SCANNER_FIRST
\r
931 || reason >= GNUNET_DIR_SCANNER_LAST)
\r
934 reason = GNUNET_DIR_SCANNER_PROTOCOL_ERROR;
\r
939 total_read = rd = GNUNET_DISK_file_read (ds->progress_read, &filename_len,
\r
941 while (rd > 0 && total_read < sizeof (size_t))
\r
943 rd = GNUNET_DISK_file_read (ds->progress_read,
\r
944 &((char *) &filename_len)[total_read],
\r
945 sizeof (size_t) - total_read);
\r
949 if (rd != sizeof (size_t))
\r
952 reason = GNUNET_DIR_SCANNER_PROTOCOL_ERROR;
\r
957 if (filename_len == 0)
\r
959 else if (filename_len > MAX_PATH)
\r
962 reason = GNUNET_DIR_SCANNER_PROTOCOL_ERROR;
\r
967 filename = GNUNET_malloc (filename_len);
\r
968 total_read = rd = GNUNET_DISK_file_read (ds->progress_read, filename,
\r
970 while (rd > 0 && total_read < filename_len)
\r
972 rd = GNUNET_DISK_file_read (ds->progress_read, &filename[total_read],
\r
973 filename_len - total_read);
\r
977 if (rd != filename_len)
\r
979 GNUNET_free (filename);
\r
980 reason = GNUNET_DIR_SCANNER_PROTOCOL_ERROR;
\r
984 if (!end_it && filename_len > 0)
\r
986 total_read = rd = GNUNET_DISK_file_read (ds->progress_read, &is_directory,
\r
988 while (rd > 0 && total_read < sizeof (char))
\r
990 rd = GNUNET_DISK_file_read (ds->progress_read, &(&is_directory)[total_read],
\r
991 sizeof (char) - total_read);
\r
995 if (rd != sizeof (char))
\r
997 GNUNET_free (filename);
\r
998 reason = GNUNET_DIR_SCANNER_PROTOCOL_ERROR;
\r
1004 end_it = ds->progress_callback (ds->cls, ds, (const char *) filename, is_directory, reason);
\r
1005 GNUNET_free (filename);
\r
1008 ds->progress_read_task = GNUNET_SCHEDULER_add_read_file (
\r
1009 GNUNET_TIME_UNIT_FOREVER_REL, ds->progress_read, &read_progress_task,
\r
1015 ds->progress_callback (ds->cls, ds, NULL, 0, reason);
\r
1021 * Start a directory scanner thread.
\r
1023 * @param filename name of the directory to scan
\r
1024 * @param GNUNET_YES to not to run libextractor on files (only build a tree)
\r
1025 * @param ex if not NULL, must be a list of extra plugins for extractor
\r
1026 * @param cb the callback to call when there are scanning progress messages
\r
1027 * @param cls closure for 'cb'
\r
1028 * @return directory scanner object to be used for controlling the scanner
\r
1030 struct GNUNET_FS_DirScanner *
\r
1031 GNUNET_FS_directory_scan_start (const char *filename,
\r
1032 int disable_extractor, const char *ex,
\r
1033 GNUNET_FS_DirScannerProgressCallback cb, void *cls)
\r
1036 struct AddDirContext *adc;
\r
1037 char *filename_expanded;
\r
1038 struct GNUNET_FS_DirScanner *ds;
\r
1039 struct GNUNET_DISK_PipeHandle *progress_pipe;
\r
1042 if (0 != STAT (filename, &sbuf))
\r
1044 /* TODO: consider generalizing this for files too! */
\r
1045 if (!S_ISDIR (sbuf.st_mode))
\r
1050 /* scan_directory() is guaranteed to be given expanded filenames,
\r
1051 * so expand we will!
\r
1053 filename_expanded = GNUNET_STRINGS_filename_expand (filename);
\r
1054 if (filename_expanded == NULL)
\r
1057 progress_pipe = GNUNET_DISK_pipe (GNUNET_NO, GNUNET_NO, GNUNET_NO, GNUNET_NO);
\r
1058 if (progress_pipe == NULL)
\r
1060 GNUNET_free (filename_expanded);
\r
1064 adc = GNUNET_malloc (sizeof (struct AddDirContext));
\r
1066 ds = GNUNET_malloc (sizeof (struct GNUNET_FS_DirScanner));
\r
1070 ds->stop_pipe = GNUNET_DISK_pipe (GNUNET_NO, GNUNET_NO, GNUNET_NO, GNUNET_NO);
\r
1071 if (ds->stop_pipe == NULL)
\r
1073 GNUNET_free (adc);
\r
1075 GNUNET_free (filename_expanded);
\r
1076 GNUNET_DISK_pipe_close (progress_pipe);
\r
1079 ds->stop_write = GNUNET_DISK_pipe_handle (ds->stop_pipe,
\r
1080 GNUNET_DISK_PIPE_END_WRITE);
\r
1081 adc->stop_read = GNUNET_DISK_pipe_handle (ds->stop_pipe,
\r
1082 GNUNET_DISK_PIPE_END_READ);
\r
1084 adc->plugins = NULL;
\r
1085 if (!disable_extractor)
\r
1087 adc->plugins = EXTRACTOR_plugin_add_defaults (
\r
1088 EXTRACTOR_OPTION_DEFAULT_POLICY);
\r
1089 if (ex && strlen (ex) > 0)
\r
1090 adc->plugins = EXTRACTOR_plugin_add_config (adc->plugins, ex,
\r
1091 EXTRACTOR_OPTION_DEFAULT_POLICY);
\r
1094 adc->filename_expanded = filename_expanded;
\r
1095 adc->progress_write = GNUNET_DISK_pipe_handle (progress_pipe,
\r
1096 GNUNET_DISK_PIPE_END_WRITE);
\r
1099 ds->progress_read = GNUNET_DISK_pipe_handle (progress_pipe,
\r
1100 GNUNET_DISK_PIPE_END_READ);
\r
1103 ds->thread = CreateThread (NULL, 0,
\r
1104 (LPTHREAD_START_ROUTINE) &run_directory_scan_thread, (LPVOID) adc,
\r
1106 ok = ds->thread != NULL;
\r
1108 ok = !pthread_create (&ds->thread, NULL, &run_directory_scan_thread,
\r
1113 GNUNET_free (adc);
\r
1114 GNUNET_free (filename_expanded);
\r
1115 GNUNET_DISK_pipe_close (progress_pipe);
\r
1120 ds->progress_callback = cb;
\r
1123 ds->progress_pipe = progress_pipe;
\r
1125 ds->progress_read_task = GNUNET_SCHEDULER_add_read_file (
\r
1126 GNUNET_TIME_UNIT_FOREVER_REL, ds->progress_read, &read_progress_task,
\r
1133 * Task that post-processes the share item tree.
\r
1134 * This processing has to be done in the main thread, because
\r
1135 * it requires access to libgcrypt's hashing functions, and
\r
1136 * libgcrypt is not thread-safe without some special magic.
\r
1138 * @param cls top of the stack
\r
1139 * @param tc task context
\r
1142 trim_share_tree_task (void *cls,
\r
1143 const struct GNUNET_SCHEDULER_TaskContext *tc)
\r
1145 struct ProcessMetadataStackItem *stack = cls;
\r
1146 struct ProcessMetadataStackItem *next = stack;
\r
1147 /* FIXME: figure out what to do when tc says we're shutting down */
\r
1149 /* item == NULL means that we've just finished going over the children of
\r
1150 * current directory.
\r
1152 if (stack->item == NULL)
\r
1154 if (stack->parent->item != NULL)
\r
1156 /* end of a directory */
\r
1157 struct GNUNET_FS_Uri *ksk;
\r
1159 /* use keyword and metadata counters to create lists of keywords to move
\r
1160 * and metadata to copy.
\r
1162 process_keywords_and_metadata (stack, &stack->parent->exclude_ksk, &stack->parent->item->meta);
\r
1164 /* create keywords from metadata (copies all text-metadata as keywords,
\r
1165 * AND parses the directory name we've just added, producing even more
\r
1167 * then merge these keywords with the ones moved from children.
\r
1169 ksk = GNUNET_FS_uri_ksk_create_from_meta_data (stack->parent->item->meta);
\r
1170 stack->parent->item->ksk_uri = GNUNET_FS_uri_ksk_merge (ksk, stack->parent->exclude_ksk);
\r
1171 GNUNET_FS_uri_destroy (ksk);
\r
1173 /* remove moved keywords from children (complete the move) */
\r
1174 remove_keywords (stack->parent, stack->parent->item);
\r
1175 GNUNET_FS_uri_destroy (stack->parent->exclude_ksk);
\r
1177 /* go up the stack */
\r
1178 next = stack->parent;
\r
1179 GNUNET_free (stack);
\r
1180 next->end_directory = GNUNET_YES;
\r
1184 /* we've just finished processing the toplevel directory */
\r
1185 struct ProcessMetadataContext *ctx = stack->ctx;
\r
1187 GNUNET_SCHEDULER_add_continuation (ctx->cb, ctx->cls,
\r
1188 GNUNET_SCHEDULER_REASON_PREREQ_DONE);
\r
1189 GNUNET_free (stack->parent);
\r
1190 GNUNET_free (stack);
\r
1191 GNUNET_free (ctx);
\r
1194 else if (stack->item->is_directory
\r
1195 && !stack->end_directory
\r
1196 && stack->item->children_head != NULL)
\r
1198 /* recurse into subdirectory */
\r
1199 next = GNUNET_malloc (sizeof (struct ProcessMetadataStackItem));
\r
1200 next->ctx = stack->ctx;
\r
1201 next->item = stack->item->children_head;
\r
1202 next->keywordcounter = GNUNET_CONTAINER_multihashmap_create (1024);
\r
1203 next->metacounter = GNUNET_CONTAINER_multihashmap_create (1024);
\r
1204 next->dir_entry_count = 0;
\r
1205 next->parent = stack;
\r
1209 /* process a child entry (a file or a directory) and move to the next one*/
\r
1210 if (stack->item->is_directory)
\r
1211 stack->end_directory = GNUNET_NO;
\r
1212 stack->dir_entry_count++;
\r
1213 GNUNET_CONTAINER_meta_data_iterate (stack->item->meta, &add_to_meta_counter, stack->metacounter);
\r
1215 if (stack->item->is_directory)
\r
1217 char *user = getenv ("USER");
\r
1218 if ((user == NULL) || (0 != strncasecmp (user, stack->item->short_filename, strlen(user))))
\r
1220 /* only use filename if it doesn't match $USER */
\r
1221 GNUNET_CONTAINER_meta_data_insert (stack->item->meta, "<libgnunetfs>",
\r
1222 EXTRACTOR_METATYPE_FILENAME,
\r
1223 EXTRACTOR_METAFORMAT_UTF8,
\r
1224 "text/plain", stack->item->short_filename,
\r
1225 strlen (stack->item->short_filename) + 1);
\r
1226 GNUNET_CONTAINER_meta_data_insert (stack->item->meta, "<libgnunetfs>",
\r
1227 EXTRACTOR_METATYPE_GNUNET_ORIGINAL_FILENAME,
\r
1228 EXTRACTOR_METAFORMAT_UTF8,
\r
1229 "text/plain", stack->item->short_filename,
\r
1230 strlen (stack->item->short_filename) + 1);
\r
1234 stack->item->ksk_uri = GNUNET_FS_uri_ksk_create_from_meta_data (stack->item->meta);
\r
1235 GNUNET_FS_uri_ksk_get_keywords (stack->item->ksk_uri, &add_to_keyword_counter, stack->keywordcounter);
\r
1236 stack->item = stack->item->next;
\r
1238 /* Call this task again later, if there are more entries to process */
\r
1240 GNUNET_SCHEDULER_add_continuation (&trim_share_tree_task, next,
\r
1241 GNUNET_SCHEDULER_REASON_PREREQ_DONE);
\r
1245 * Process a share item tree, moving frequent keywords up and
\r
1246 * copying frequent metadata up.
\r
1248 * @param toplevel toplevel directory in the tree, returned by the scanner
\r
1249 * @param cb called after processing is done
\r
1250 * @param cls closure for 'cb'
\r
1252 struct ProcessMetadataContext *
\r
1253 GNUNET_FS_trim_share_tree (struct ShareTreeItem *toplevel,
\r
1254 GNUNET_SCHEDULER_Task cb, void *cls)
\r
1256 struct ProcessMetadataContext *ret;
\r
1258 if (toplevel == NULL)
\r
1260 struct GNUNET_SCHEDULER_TaskContext tc;
\r
1261 tc.reason = GNUNET_SCHEDULER_REASON_PREREQ_DONE;
\r
1266 ret = GNUNET_malloc (sizeof (struct ProcessMetadataContext));
\r
1267 ret->toplevel = toplevel;
\r
1268 ret->stack = GNUNET_malloc (sizeof (struct ProcessMetadataStackItem));
\r
1269 ret->stack->ctx = ret;
\r
1270 ret->stack->item = toplevel;
\r
1271 ret->stack->keywordcounter = GNUNET_CONTAINER_multihashmap_create (1024);
\r
1272 ret->stack->metacounter = GNUNET_CONTAINER_multihashmap_create (1024);
\r
1273 ret->stack->dir_entry_count = 0;
\r
1274 ret->stack->end_directory = GNUNET_NO;
\r
1276 /* dummy stack entry that tells us we're at the top of the stack */
\r
1277 ret->stack->parent = GNUNET_malloc (sizeof (struct ProcessMetadataStackItem));
\r
1278 ret->stack->parent->ctx = ret;
\r
1283 GNUNET_SCHEDULER_add_continuation (&trim_share_tree_task, ret->stack,
\r
1284 GNUNET_SCHEDULER_REASON_PREREQ_DONE);
\r