2 This file is part of GNUnet
3 (C) 2005-2012 Christian Grothoff (and other contributing authors)
5 GNUnet is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published
7 by the Free Software Foundation; either version 2, or (at your
8 option) any later version.
10 GNUnet is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with GNUnet; see the file COPYING. If not, write to the
17 Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA.
22 #include "gnunet_fs_service.h"
23 #include "gnunet_scheduler_lib.h"
27 * Entry for each unique keyword to track how often
28 * it occured. Contains the keyword and the counter.
34 * Keyword that was found.
39 * How many files have this keyword?
44 * This is a doubly-linked list
46 struct KeywordCounter *prev;
49 * This is a doubly-linked list
51 struct KeywordCounter *next;
55 * Aggregate information we keep for meta data in each directory.
60 * The actual meta data.
65 * Number of bytes in 'data'.
70 * Name of the plugin that provided that piece of metadata
72 const char *plugin_name;
77 enum EXTRACTOR_MetaType type;
82 enum EXTRACTOR_MetaFormat format;
85 * MIME-type of the metadata itself
87 const char *data_mime_type;
90 * How many files have meta entries matching this value?
91 * (type and format do not have to match).
96 * This is a doubly-linked list
98 struct MetaCounter *prev;
101 * This is a doubly-linked list
103 struct MetaCounter *next;
106 struct AddDirContext;
109 * A structure used to hold a pointer to the tree item that is being
111 * Needed to avoid changing the context for every recursive call.
118 struct AddDirContext *adc;
123 struct GNUNET_FS_ShareTreeItem *parent;
127 * Execution context for 'add_dir'
128 * Owned by the initiator thread.
133 * After the scan is finished, it will contain a pointer to the
134 * top-level directory entry in the directory tree built by the
137 struct GNUNET_FS_ShareTreeItem *toplevel;
140 * Expanded filename (as given by the scan initiator).
141 * The scanner thread stores a copy here, and frees it when it finishes.
143 char *filename_expanded;
146 * A pipe end to read signals from.
147 * Owned by the initiator thread.
149 const struct GNUNET_DISK_FileHandle *stop_read;
152 * 1 if the scanner should stop, 0 otherwise. Set in response
153 * to communication errors or when the initiator wants the scanning
159 * Handle of the pipe end into which the progress messages are written
160 * The pipe is owned by the initiator thread, and there's no way to
161 * close this end without having access to the pipe, so it won't
162 * be closed by the scanner thread.
163 * The initiator MUST keep it alive until the scanner thread is finished.
165 const struct GNUNET_DISK_FileHandle *progress_write;
169 * List of libextractor plugins to use for extracting.
170 * Initialized when the scan starts, removed when it finishes.
172 struct EXTRACTOR_PluginList *plugins;
176 * An opaque structure a pointer to which is returned to the
177 * caller to be used to control the scanner.
179 struct GNUNET_FS_DirScanner
182 * A pipe end to read signals from.
183 * Owned by the initiator thread.
185 const struct GNUNET_DISK_FileHandle *stop_write;
188 * A pipe transfer signals to the scanner.
189 * Owned by the initiator thread.
191 struct GNUNET_DISK_PipeHandle *stop_pipe;
194 * A thread object for the scanner thread.
195 * Owned by the initiator thread.
204 * A task for reading progress messages from the scanner.
206 GNUNET_SCHEDULER_TaskIdentifier progress_read_task;
209 * The end of the pipe that is used to read progress messages.
211 const struct GNUNET_DISK_FileHandle *progress_read;
214 * The pipe that is used to read progress messages.
215 * Owned (along with both of its ends) by the initiator thread.
216 * Only closed after the scanner thread is finished.
218 struct GNUNET_DISK_PipeHandle *progress_pipe;
221 * The function that will be called every time there's a progress
224 GNUNET_FS_DirScannerProgressCallback progress_callback;
227 * A closure for progress_callback.
232 * A pointer to the context of the scanner.
233 * Owned by the initiator thread.
234 * Initiator thread shouldn't touch it until the scanner thread
237 struct AddDirContext *adc;
241 * A structure that forms a singly-linked list that serves as a stack
242 * for metadata-processing function.
244 struct ProcessMetadataStackItem
247 * A pointer to metadata-processing context.
248 * The same in every stack item.
250 struct GNUNET_FS_ProcessMetadataContext *ctx;
253 * This is a singly-linked list. A pointer to its end is kept, and
254 * this pointer is used to walk it backwards.
256 struct ProcessMetadataStackItem *parent;
259 * Map from the hash over the keyword to an 'struct KeywordCounter *'
260 * counter that says how often this keyword was
261 * encountered in the current directory.
263 struct GNUNET_CONTAINER_MultiHashMap *keywordcounter;
266 * Map from the hash over the metadata to an 'struct MetaCounter *'
267 * counter that says how often this metadata was
268 * encountered in the current directory.
270 struct GNUNET_CONTAINER_MultiHashMap *metacounter;
273 * Number of files in the current directory.
275 unsigned int dir_entry_count;
278 * Keywords to exclude from using for KSK since they'll be associated
279 * with the parent as well. NULL for nothing blocked.
281 struct GNUNET_FS_Uri *exclude_ksk;
284 * A share tree item that is being processed.
286 struct GNUNET_FS_ShareTreeItem *item;
289 * Set to GNUNET_YES to indicate that the directory pointer by 'item'
290 * was processed, and we should move on to the next.
291 * Otherwise the directory will be recursed into.
298 * The structure to keep the state of metadata processing
300 struct GNUNET_FS_ProcessMetadataContext
303 * The top of the stack.
305 struct ProcessMetadataStackItem *stack;
308 * Callback to invoke when processing is finished
310 GNUNET_SCHEDULER_Task cb;
318 * Toplevel directory item of the tree to process.
320 struct GNUNET_FS_ShareTreeItem *toplevel;
324 * Called every now and then by the scanner.
325 * Checks the synchronization privitive.
326 * Returns 1 if the scanner should stop, 0 otherwise.
329 should_stop (struct AddDirContext *adc)
333 if (GNUNET_DISK_file_read_non_blocking (adc->stop_read, &c, 1) == 1
342 * Write progress message.
344 * <reason><filename length><filename><directory flag>
345 * If filename is NULL, filename is not written, and its length
346 * is written as 0, and nothing else is written. It signals the initiator
347 * thread that the scanner is finished, and that it can now join its thread.
349 * Also checks if the initiator thread wants the scanner to stop,
350 * Returns 1 to stop scanning (if the signal was received, or
351 * if the pipe was broken somehow), 0 otherwise.
354 write_progress (struct AddDirContext *adc, const char *filename,
355 char is_directory, enum GNUNET_FS_DirScannerProgressUpdateReason reason)
360 if ((adc->do_stop || should_stop (adc)) && reason != GNUNET_DIR_SCANNER_ASKED_TO_STOP
361 && reason != GNUNET_DIR_SCANNER_FINISHED)
365 while ((wr > 0 || errno == EAGAIN) && total_write < sizeof (reason))
367 wr = GNUNET_DISK_file_write_blocking (adc->progress_write,
368 &((char *)&reason)[total_write], sizeof (reason) - total_write);
372 if (sizeof (reason) != total_write)
373 return adc->do_stop = 1;
375 filename_len = strlen (filename) + 1;
380 while ((wr > 0 || errno == EAGAIN) && total_write < sizeof (size_t))
382 wr = GNUNET_DISK_file_write_blocking (adc->progress_write,
383 &((char *)&filename_len)[total_write], sizeof (size_t) - total_write);
387 if (sizeof (size_t) != total_write)
388 return adc->do_stop = 1;
393 while ((wr > 0 || errno == EAGAIN) && total_write < filename_len)
395 wr = GNUNET_DISK_file_write_blocking (adc->progress_write,
396 &((char *)filename)[total_write], filename_len - total_write);
400 if (filename_len != total_write)
401 return adc->do_stop = 1;
404 while ((wr > 0 || errno == EAGAIN) && total_write < sizeof (char))
406 wr = GNUNET_DISK_file_write_blocking (adc->progress_write,
407 &((char *)&is_directory)[total_write], sizeof (char) - total_write);
411 if (sizeof (char) != total_write)
412 return adc->do_stop = 1;
418 * Add the given keyword to the
419 * keyword statistics tracker.
421 * @param cls closure (user-defined)
422 * @param keyword the keyword to count
423 * @param is_mandatory ignored
424 * @return always GNUNET_OK
427 add_to_keyword_counter (void *cls, const char *keyword, int is_mandatory)
429 struct GNUNET_CONTAINER_MultiHashMap *mcm = cls;
430 struct KeywordCounter *cnt, *first_cnt;
434 klen = strlen (keyword) + 1;
435 GNUNET_CRYPTO_hash (keyword, klen - 1, &hc);
436 /* Since the map might contain multiple values per keyword, we only
437 * store one value, and attach all other to it, forming a linked list.
438 * Somewhat easier than retrieving multiple items via callback.
440 first_cnt = GNUNET_CONTAINER_multihashmap_get (mcm, &hc);
441 for (cnt = first_cnt; cnt && strcmp (cnt->value, keyword) != 0; cnt = cnt->next);
444 cnt = GNUNET_malloc (sizeof (struct KeywordCounter) + klen);
445 cnt->value = (const char *) &cnt[1];
446 memcpy (&cnt[1], keyword, klen);
447 if (first_cnt != NULL)
449 if (first_cnt->prev != NULL)
451 first_cnt->prev->next = cnt;
452 cnt->prev = first_cnt->prev;
454 first_cnt->prev = cnt;
455 cnt->next = first_cnt;
458 GNUNET_CONTAINER_multihashmap_put (mcm, &hc, cnt,
459 GNUNET_CONTAINER_MULTIHASHMAPOPTION_MULTIPLE);
466 * Type of a function that libextractor calls for each
467 * meta data item found.
469 * @param cls the container multihashmap to update
470 * @param plugin_name name of the plugin that produced this value;
471 * special values can be used (i.e. '<zlib>' for zlib being
472 * used in the main libextractor library and yielding
474 * @param type libextractor-type describing the meta data
475 * @param format basic format information about data
476 * @param data_mime_type mime-type of data (not of the original file);
477 * can be NULL (if mime-type is not known)
478 * @param data actual meta-data found
479 * @param data_len number of bytes in data
480 * @return GNUNET_OK to continue extracting / iterating
483 add_to_meta_counter (void *cls, const char *plugin_name,
484 enum EXTRACTOR_MetaType type, enum EXTRACTOR_MetaFormat format,
485 const char *data_mime_type, const char *data, size_t data_len)
487 struct GNUNET_CONTAINER_MultiHashMap *map = cls;
489 struct MetaCounter *cnt, *first_cnt;
491 GNUNET_CRYPTO_hash (data, data_len, &key);
492 first_cnt = GNUNET_CONTAINER_multihashmap_get (map, &key);
493 for (cnt = first_cnt; cnt
494 && cnt->data_size != data_len
495 && memcmp (cnt->data, data, cnt->data_size) != 0; cnt = cnt->next);
498 cnt = GNUNET_malloc (sizeof (struct MetaCounter));
500 cnt->data_size = data_len;
501 cnt->plugin_name = plugin_name;
503 cnt->format = format;
504 cnt->data_mime_type = data_mime_type;
506 if (first_cnt != NULL)
508 if (first_cnt->prev != NULL)
510 first_cnt->prev->next = cnt;
511 cnt->prev = first_cnt->prev;
513 first_cnt->prev = cnt;
514 cnt->next = first_cnt;
517 GNUNET_CONTAINER_multihashmap_put (map, &key, cnt,
518 GNUNET_CONTAINER_MULTIHASHMAPOPTION_MULTIPLE);
525 * Allocates a struct GNUNET_FS_ShareTreeItem and adds it to its parent.
527 static struct GNUNET_FS_ShareTreeItem *
528 make_item (struct GNUNET_FS_ShareTreeItem *parent)
530 struct GNUNET_FS_ShareTreeItem *item;
531 item = GNUNET_malloc (sizeof (struct GNUNET_FS_ShareTreeItem));
533 item->parent = parent;
535 GNUNET_CONTAINER_DLL_insert (parent->children_head, parent->children_tail,
541 * Extract metadata from a file and add it to the share tree
543 * @param adc context to modify
544 * @param filename name of the file to process
547 extract_file (struct AddDirStack *ads, const char *filename)
549 struct GNUNET_FS_ShareTreeItem *item;
550 const char *short_fn;
552 item = make_item (ads->parent);
554 GNUNET_DISK_file_size (filename, &item->file_size, GNUNET_YES);
555 item->is_directory = GNUNET_NO;
557 item->meta = GNUNET_CONTAINER_meta_data_create ();
558 GNUNET_FS_meta_data_extract_from_file (item->meta, filename,
560 GNUNET_CONTAINER_meta_data_delete (item->meta, EXTRACTOR_METATYPE_FILENAME,
562 short_fn = GNUNET_STRINGS_get_short_name (filename);
564 item->filename = GNUNET_strdup (filename);
565 item->short_filename = GNUNET_strdup (short_fn);
567 GNUNET_CONTAINER_meta_data_insert (item->meta, "<libgnunetfs>",
568 EXTRACTOR_METATYPE_FILENAME,
569 EXTRACTOR_METAFORMAT_UTF8, "text/plain",
570 short_fn, strlen (short_fn) + 1);
574 * Remove the keyword from the ksk URI.
576 * @param cls the ksk uri
577 * @param keyword the word to remove
578 * @param is_mandatory ignored
579 * @return always GNUNET_OK
582 remove_keyword (void *cls, const char *keyword, int is_mandatory)
584 struct GNUNET_FS_Uri *ksk = cls;
586 GNUNET_FS_uri_ksk_remove_keyword (ksk, keyword);
591 * Remove keywords from current directory's children, if they are
592 * in the exluded keywords list of that directory.
594 * @param cls the ksk uri
595 * @param keyword the word to remove
596 * @param is_mandatory ignored
597 * @return always GNUNET_OK
600 remove_keywords (struct ProcessMetadataStackItem *stack, struct GNUNET_FS_ShareTreeItem *dir)
602 struct GNUNET_FS_ShareTreeItem *item;
604 for (item = dir->children_head; item; item = item->next)
606 if (stack->exclude_ksk != NULL)
607 GNUNET_FS_uri_ksk_get_keywords (stack->exclude_ksk, &remove_keyword, item->ksk_uri);
613 * Context passed to 'migrate_and_drop'.
615 struct KeywordProcessContext
618 * All the keywords we migrated to the parent.
620 struct GNUNET_FS_Uri *ksk;
623 * How often does a keyword have to occur to be
624 * migrated to the parent?
626 unsigned int threshold;
630 * Context passed to 'migrate_and_drop'.
632 struct MetaProcessContext
635 * All the metadata we copy to the parent.
637 struct GNUNET_CONTAINER_MetaData *meta;
640 * How often does a metadata have to occur to be
641 * migrated to the parent?
643 unsigned int threshold;
648 * Move "frequent" keywords over to the
649 * target ksk uri, free the counters.
653 migrate_and_drop (void *cls, const GNUNET_HashCode * key, void *value)
655 struct KeywordProcessContext *kpc = cls;
656 struct KeywordCounter *counter = value;
658 if (counter->count >= kpc->threshold && counter->count > 1)
660 GNUNET_FS_uri_ksk_add_keyword (kpc->ksk, counter->value, GNUNET_NO);
662 GNUNET_free (counter);
666 * Copy "frequent" metadata items over to the
667 * target metadata container, free the counters.
671 migrate_and_drop_metadata (void *cls, const GNUNET_HashCode * key, void *value)
673 struct MetaProcessContext *mpc = cls;
674 struct MetaCounter *counter = value;
676 if (counter->count >= mpc->threshold && counter->count > 1)
678 GNUNET_CONTAINER_meta_data_insert (mpc->meta,
679 counter->plugin_name,
682 counter->data_mime_type, counter->data,
685 GNUNET_free (counter);
690 * Go over the collected keywords from all entries in the
691 * directory and push common keywords up one level (by
692 * adding it to the returned struct). Do the same for metadata.
693 * Destroys keywordcounter and metacoutner for current directory.
695 * @param adc collection of child meta data
696 * @param exclude_ksk pointer to where moveable keywords will be stored
697 * @param copy_meta pointer to where copyable metadata will be stored
700 process_keywords_and_metadata (struct ProcessMetadataStackItem *stack,
701 struct GNUNET_FS_Uri **exclude_ksk,
702 struct GNUNET_CONTAINER_MetaData **copy_meta)
704 struct KeywordProcessContext kpc;
705 struct MetaProcessContext mpc;
706 struct GNUNET_CONTAINER_MetaData *tmp;
708 /* Surprisingly, it's impossible to create a ksk with 0 keywords directly.
709 * But we can create one from an empty metadata set
711 tmp = GNUNET_CONTAINER_meta_data_create ();
712 kpc.ksk = GNUNET_FS_uri_ksk_create_from_meta_data (tmp);
713 GNUNET_CONTAINER_meta_data_destroy (tmp);
714 mpc.meta = GNUNET_CONTAINER_meta_data_create ();
716 kpc.threshold = mpc.threshold = (stack->dir_entry_count + 1) / 2; /* 50% */
718 GNUNET_CONTAINER_multihashmap_iterate (stack->keywordcounter,
719 &migrate_and_drop, &kpc);
720 GNUNET_CONTAINER_multihashmap_iterate (stack->metacounter,
721 &migrate_and_drop_metadata, &mpc);
723 GNUNET_CONTAINER_multihashmap_destroy (stack->keywordcounter);
724 GNUNET_CONTAINER_multihashmap_destroy (stack->metacounter);
725 *exclude_ksk = kpc.ksk;
726 *copy_meta = mpc.meta;
730 * Function called by the directory iterator to
731 * (recursively) add all of the files in the
732 * directory to the tree.
733 * Called by the directory scanner to initiate the
735 * TODO: find a way to make it non-recursive.
737 * @param cls the 'struct AddDirStack *' we're in
738 * @param filename file or directory to scan
741 scan_directory (void *cls, const char *filename)
743 struct AddDirStack *ads = cls, recurse_ads;
744 struct AddDirContext *adc = ads->adc;
746 struct GNUNET_FS_ShareTreeItem *item;
747 const char *short_fn;
752 return GNUNET_SYSERR;
754 /* If the file doesn't exist (or is not statable for any other reason,
755 * skip it, and report it.
757 if (0 != STAT (filename, &sbuf))
759 do_stop = write_progress (adc, filename, S_ISDIR (sbuf.st_mode),
760 GNUNET_DIR_SCANNER_DOES_NOT_EXIST);
764 /* Report the progress */
765 do_stop = write_progress (adc, filename, S_ISDIR (sbuf.st_mode),
766 GNUNET_DIR_SCANNER_NEW_FILE);
769 /* We were asked to stop, acknowledge that and return */
770 do_stop = write_progress (adc, filename, S_ISDIR (sbuf.st_mode),
771 GNUNET_DIR_SCANNER_ASKED_TO_STOP);
772 return GNUNET_SYSERR;
775 if (!S_ISDIR (sbuf.st_mode))
776 extract_file (ads, filename);
779 item = make_item (ads->parent);
780 item->meta = GNUNET_CONTAINER_meta_data_create ();
782 item->is_directory = GNUNET_YES;
784 recurse_ads.adc = adc;
785 recurse_ads.parent = item;
787 /* recurse into directory */
788 GNUNET_DISK_directory_scan (filename, &scan_directory, &recurse_ads);
790 short_fn = GNUNET_STRINGS_get_short_name (filename);
792 item->filename = GNUNET_strdup (filename);
793 item->short_filename = GNUNET_strdup (short_fn);
795 if (ads->parent == NULL)
797 /* we're finished with the scan, make sure caller gets the top-level
800 adc->toplevel = item;
807 * Signals the scanner to finish the scan as fast as possible.
809 * Can close the pipe if asked to, but that is only used by the
810 * internal call to this function during cleanup. The client
811 * must understand the consequences of closing the pipe too early.
813 * @param ds directory scanner structure
814 * @param close_pipe GNUNET_YES to close
817 GNUNET_FS_directory_scan_finish (struct GNUNET_FS_DirScanner *ds,
821 GNUNET_DISK_file_write (ds->stop_write, &c, 1);
825 if (ds->progress_read_task != GNUNET_SCHEDULER_NO_TASK)
827 GNUNET_SCHEDULER_cancel (ds->progress_read_task);
828 ds->progress_read_task = GNUNET_SCHEDULER_NO_TASK;
830 GNUNET_DISK_pipe_close_end (ds->progress_pipe, GNUNET_DISK_PIPE_END_READ);
831 ds->progress_read = NULL;
836 * Signals the scanner thread to finish (in case it isn't finishing
837 * already) and joins the scanner thread. Closes the pipes, frees the
838 * scanner contexts (both of them), returns the results of the scan.
839 * Results are valid (and have to be freed) even if the scanner had
840 * an error or was rushed to finish prematurely.
841 * Blocks until the scanner is finished.
843 * @param ds directory scanner structure
844 * @return the results of the scan (a directory tree)
846 struct GNUNET_FS_ShareTreeItem *
847 GNUNET_FS_directory_scan_cleanup (struct GNUNET_FS_DirScanner *ds)
849 struct GNUNET_FS_ShareTreeItem *result;
851 GNUNET_FS_directory_scan_finish (ds, GNUNET_YES);
853 WaitForSingleObject (ds->thread, INFINITE);
854 CloseHandle (ds->thread);
856 pthread_join (ds->thread, NULL);
857 pthread_detach (ds->thread);
860 GNUNET_DISK_pipe_close (ds->stop_pipe);
861 GNUNET_DISK_pipe_close (ds->progress_pipe);
862 result = ds->adc->toplevel;
863 GNUNET_free (ds->adc);
869 * The function from which the scanner thread starts
876 run_directory_scan_thread (void *cls)
878 struct AddDirContext *adc = cls;
879 struct AddDirStack ads;
882 scan_directory (&ads, adc->filename_expanded);
883 GNUNET_free (adc->filename_expanded);
884 if (adc->plugins != NULL)
885 EXTRACTOR_plugin_remove_all (adc->plugins);
886 /* Tell the initiator that we're finished, it can now join the thread */
887 write_progress (adc, NULL, 0, GNUNET_DIR_SCANNER_FINISHED);
892 * Called every time there is data to read from the scanner.
893 * Calls the scanner progress handler.
895 * @param cls the closure (directory scanner object)
896 * @param tc task context in which the task is running
899 read_progress_task (void *cls, const struct GNUNET_SCHEDULER_TaskContext *tc)
901 struct GNUNET_FS_DirScanner *ds;
903 enum GNUNET_FS_DirScannerProgressUpdateReason reason;
913 ds->progress_read_task = GNUNET_SCHEDULER_NO_TASK;
915 if (!(tc->reason & GNUNET_SCHEDULER_REASON_READ_READY))
917 ds->progress_callback (ds->cls, ds, NULL, 0, GNUNET_DIR_SCANNER_SHUTDOWN);
921 /* Read one message. If message is malformed or can't be read, end the scanner */
922 total_read = rd = GNUNET_DISK_file_read (ds->progress_read, &reason, sizeof (reason));
923 while (rd > 0 && total_read < sizeof (reason))
925 rd = GNUNET_DISK_file_read (ds->progress_read,
926 &((char *) &reason)[total_read],
927 sizeof (reason) - total_read);
931 if (total_read != sizeof (reason)
932 || reason <= GNUNET_DIR_SCANNER_FIRST
933 || reason >= GNUNET_DIR_SCANNER_LAST)
936 reason = GNUNET_DIR_SCANNER_PROTOCOL_ERROR;
941 total_read = rd = GNUNET_DISK_file_read (ds->progress_read, &filename_len,
943 while (rd > 0 && total_read < sizeof (size_t))
945 rd = GNUNET_DISK_file_read (ds->progress_read,
946 &((char *) &filename_len)[total_read],
947 sizeof (size_t) - total_read);
951 if (rd != sizeof (size_t))
954 reason = GNUNET_DIR_SCANNER_PROTOCOL_ERROR;
959 if (filename_len == 0)
961 else if (filename_len > PATH_MAX)
964 reason = GNUNET_DIR_SCANNER_PROTOCOL_ERROR;
969 filename = GNUNET_malloc (filename_len);
970 total_read = rd = GNUNET_DISK_file_read (ds->progress_read, filename,
972 while (rd > 0 && total_read < filename_len)
974 rd = GNUNET_DISK_file_read (ds->progress_read, &filename[total_read],
975 filename_len - total_read);
979 if (rd != filename_len)
981 GNUNET_free (filename);
982 reason = GNUNET_DIR_SCANNER_PROTOCOL_ERROR;
986 if (!end_it && filename_len > 0)
988 total_read = rd = GNUNET_DISK_file_read (ds->progress_read, &is_directory,
990 while (rd > 0 && total_read < sizeof (char))
992 rd = GNUNET_DISK_file_read (ds->progress_read, &(&is_directory)[total_read],
993 sizeof (char) - total_read);
997 if (rd != sizeof (char))
999 GNUNET_free (filename);
1000 reason = GNUNET_DIR_SCANNER_PROTOCOL_ERROR;
1006 end_it = ds->progress_callback (ds->cls, ds, (const char *) filename, is_directory, reason);
1007 GNUNET_free (filename);
1010 ds->progress_read_task = GNUNET_SCHEDULER_add_read_file (
1011 GNUNET_TIME_UNIT_FOREVER_REL, ds->progress_read, &read_progress_task,
1017 ds->progress_callback (ds->cls, ds, NULL, 0, reason);
1023 * Start a directory scanner thread.
1025 * @param filename name of the directory to scan
1026 * @param GNUNET_YES to not to run libextractor on files (only build a tree)
1027 * @param ex if not NULL, must be a list of extra plugins for extractor
1028 * @param cb the callback to call when there are scanning progress messages
1029 * @param cls closure for 'cb'
1030 * @return directory scanner object to be used for controlling the scanner
1032 struct GNUNET_FS_DirScanner *
1033 GNUNET_FS_directory_scan_start (const char *filename,
1034 int disable_extractor, const char *ex,
1035 GNUNET_FS_DirScannerProgressCallback cb, void *cls)
1038 struct AddDirContext *adc;
1039 char *filename_expanded;
1040 struct GNUNET_FS_DirScanner *ds;
1041 struct GNUNET_DISK_PipeHandle *progress_pipe;
1044 if (0 != STAT (filename, &sbuf))
1046 /* TODO: consider generalizing this for files too! */
1047 if (!S_ISDIR (sbuf.st_mode))
1052 /* scan_directory() is guaranteed to be given expanded filenames,
1053 * so expand we will!
1055 filename_expanded = GNUNET_STRINGS_filename_expand (filename);
1056 if (filename_expanded == NULL)
1059 progress_pipe = GNUNET_DISK_pipe (GNUNET_NO, GNUNET_NO, GNUNET_NO, GNUNET_NO);
1060 if (progress_pipe == NULL)
1062 GNUNET_free (filename_expanded);
1066 adc = GNUNET_malloc (sizeof (struct AddDirContext));
1068 ds = GNUNET_malloc (sizeof (struct GNUNET_FS_DirScanner));
1072 ds->stop_pipe = GNUNET_DISK_pipe (GNUNET_NO, GNUNET_NO, GNUNET_NO, GNUNET_NO);
1073 if (ds->stop_pipe == NULL)
1077 GNUNET_free (filename_expanded);
1078 GNUNET_DISK_pipe_close (progress_pipe);
1081 ds->stop_write = GNUNET_DISK_pipe_handle (ds->stop_pipe,
1082 GNUNET_DISK_PIPE_END_WRITE);
1083 adc->stop_read = GNUNET_DISK_pipe_handle (ds->stop_pipe,
1084 GNUNET_DISK_PIPE_END_READ);
1086 adc->plugins = NULL;
1087 if (!disable_extractor)
1089 adc->plugins = EXTRACTOR_plugin_add_defaults (
1090 EXTRACTOR_OPTION_DEFAULT_POLICY);
1091 if (ex && strlen (ex) > 0)
1092 adc->plugins = EXTRACTOR_plugin_add_config (adc->plugins, ex,
1093 EXTRACTOR_OPTION_DEFAULT_POLICY);
1096 adc->filename_expanded = filename_expanded;
1097 adc->progress_write = GNUNET_DISK_pipe_handle (progress_pipe,
1098 GNUNET_DISK_PIPE_END_WRITE);
1101 ds->progress_read = GNUNET_DISK_pipe_handle (progress_pipe,
1102 GNUNET_DISK_PIPE_END_READ);
1105 ds->thread = CreateThread (NULL, 0,
1106 (LPTHREAD_START_ROUTINE) &run_directory_scan_thread, (LPVOID) adc,
1108 ok = ds->thread != NULL;
1110 ok = !pthread_create (&ds->thread, NULL, &run_directory_scan_thread,
1116 GNUNET_free (filename_expanded);
1117 GNUNET_DISK_pipe_close (progress_pipe);
1122 ds->progress_callback = cb;
1125 ds->progress_pipe = progress_pipe;
1127 ds->progress_read_task = GNUNET_SCHEDULER_add_read_file (
1128 GNUNET_TIME_UNIT_FOREVER_REL, ds->progress_read, &read_progress_task,
1135 * Task that post-processes the share item tree.
1136 * This processing has to be done in the main thread, because
1137 * it requires access to libgcrypt's hashing functions, and
1138 * libgcrypt is not thread-safe without some special magic.
1140 * @param cls top of the stack
1141 * @param tc task context
1144 trim_share_tree_task (void *cls,
1145 const struct GNUNET_SCHEDULER_TaskContext *tc)
1147 struct ProcessMetadataStackItem *stack = cls;
1148 struct ProcessMetadataStackItem *next = stack;
1149 /* FIXME: figure out what to do when tc says we're shutting down */
1151 /* item == NULL means that we've just finished going over the children of
1152 * current directory.
1154 if (stack->item == NULL)
1156 if (stack->parent->item != NULL)
1158 /* end of a directory */
1159 struct GNUNET_FS_Uri *ksk;
1161 /* use keyword and metadata counters to create lists of keywords to move
1162 * and metadata to copy.
1164 process_keywords_and_metadata (stack, &stack->parent->exclude_ksk, &stack->parent->item->meta);
1166 /* create keywords from metadata (copies all text-metadata as keywords,
1167 * AND parses the directory name we've just added, producing even more
1169 * then merge these keywords with the ones moved from children.
1171 ksk = GNUNET_FS_uri_ksk_create_from_meta_data (stack->parent->item->meta);
1172 stack->parent->item->ksk_uri = GNUNET_FS_uri_ksk_merge (ksk, stack->parent->exclude_ksk);
1173 GNUNET_FS_uri_destroy (ksk);
1175 /* remove moved keywords from children (complete the move) */
1176 remove_keywords (stack->parent, stack->parent->item);
1177 GNUNET_FS_uri_destroy (stack->parent->exclude_ksk);
1179 /* go up the stack */
1180 next = stack->parent;
1181 GNUNET_free (stack);
1182 next->end_directory = GNUNET_YES;
1186 /* we've just finished processing the toplevel directory */
1187 struct GNUNET_FS_ProcessMetadataContext *ctx = stack->ctx;
1189 GNUNET_SCHEDULER_add_continuation (ctx->cb, ctx->cls,
1190 GNUNET_SCHEDULER_REASON_PREREQ_DONE);
1191 GNUNET_free (stack->parent);
1192 GNUNET_free (stack);
1196 else if (stack->item->is_directory
1197 && !stack->end_directory
1198 && stack->item->children_head != NULL)
1200 /* recurse into subdirectory */
1201 next = GNUNET_malloc (sizeof (struct ProcessMetadataStackItem));
1202 next->ctx = stack->ctx;
1203 next->item = stack->item->children_head;
1204 next->keywordcounter = GNUNET_CONTAINER_multihashmap_create (1024);
1205 next->metacounter = GNUNET_CONTAINER_multihashmap_create (1024);
1206 next->dir_entry_count = 0;
1207 next->parent = stack;
1211 /* process a child entry (a file or a directory) and move to the next one*/
1212 if (stack->item->is_directory)
1213 stack->end_directory = GNUNET_NO;
1214 stack->dir_entry_count++;
1215 GNUNET_CONTAINER_meta_data_iterate (stack->item->meta, &add_to_meta_counter, stack->metacounter);
1217 if (stack->item->is_directory)
1219 char *user = getenv ("USER");
1220 if ((user == NULL) || (0 != strncasecmp (user, stack->item->short_filename, strlen(user))))
1222 /* only use filename if it doesn't match $USER */
1223 GNUNET_CONTAINER_meta_data_insert (stack->item->meta, "<libgnunetfs>",
1224 EXTRACTOR_METATYPE_FILENAME,
1225 EXTRACTOR_METAFORMAT_UTF8,
1226 "text/plain", stack->item->short_filename,
1227 strlen (stack->item->short_filename) + 1);
1228 GNUNET_CONTAINER_meta_data_insert (stack->item->meta, "<libgnunetfs>",
1229 EXTRACTOR_METATYPE_GNUNET_ORIGINAL_FILENAME,
1230 EXTRACTOR_METAFORMAT_UTF8,
1231 "text/plain", stack->item->short_filename,
1232 strlen (stack->item->short_filename) + 1);
1236 stack->item->ksk_uri = GNUNET_FS_uri_ksk_create_from_meta_data (stack->item->meta);
1237 GNUNET_FS_uri_ksk_get_keywords (stack->item->ksk_uri, &add_to_keyword_counter, stack->keywordcounter);
1238 stack->item = stack->item->next;
1240 /* Call this task again later, if there are more entries to process */
1242 GNUNET_SCHEDULER_add_continuation (&trim_share_tree_task, next,
1243 GNUNET_SCHEDULER_REASON_PREREQ_DONE);
1247 * Process a share item tree, moving frequent keywords up and
1248 * copying frequent metadata up.
1250 * @param toplevel toplevel directory in the tree, returned by the scanner
1251 * @param cb called after processing is done
1252 * @param cls closure for 'cb'
1254 struct GNUNET_FS_ProcessMetadataContext *
1255 GNUNET_FS_trim_share_tree (struct GNUNET_FS_ShareTreeItem *toplevel,
1256 GNUNET_SCHEDULER_Task cb, void *cls)
1258 struct GNUNET_FS_ProcessMetadataContext *ret;
1260 if (toplevel == NULL)
1262 struct GNUNET_SCHEDULER_TaskContext tc;
1263 tc.reason = GNUNET_SCHEDULER_REASON_PREREQ_DONE;
1268 ret = GNUNET_malloc (sizeof (struct GNUNET_FS_ProcessMetadataContext));
1269 ret->toplevel = toplevel;
1270 ret->stack = GNUNET_malloc (sizeof (struct ProcessMetadataStackItem));
1271 ret->stack->ctx = ret;
1272 ret->stack->item = toplevel;
1273 ret->stack->keywordcounter = GNUNET_CONTAINER_multihashmap_create (1024);
1274 ret->stack->metacounter = GNUNET_CONTAINER_multihashmap_create (1024);
1275 ret->stack->dir_entry_count = 0;
1276 ret->stack->end_directory = GNUNET_NO;
1278 /* dummy stack entry that tells us we're at the top of the stack */
1279 ret->stack->parent = GNUNET_malloc (sizeof (struct ProcessMetadataStackItem));
1280 ret->stack->parent->ctx = ret;
1285 GNUNET_SCHEDULER_add_continuation (&trim_share_tree_task, ret->stack,
1286 GNUNET_SCHEDULER_REASON_PREREQ_DONE);