2 This file is part of GNUnet
3 (C) 2005-2012 Christian Grothoff (and other contributing authors)
5 GNUnet is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published
7 by the Free Software Foundation; either version 2, or (at your
8 option) any later version.
10 GNUnet is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with GNUnet; see the file COPYING. If not, write to the
17 Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA.
22 * @file fs/fs_dirmetascan.c
23 * @brief code to asynchronously build a 'struct GNUNET_FS_ShareTreeItem'
24 * from an on-disk directory for publishing
26 * @author Christian Grothoff
29 #include "gnunet_fs_service.h"
30 #include "gnunet_scheduler_lib.h"
35 * An opaque structure a pointer to which is returned to the
36 * caller to be used to control the scanner.
38 struct GNUNET_FS_DirScanner
42 * A thread object for the scanner thread.
51 * Expanded filename (as given by the scan initiator).
52 * The scanner thread stores a copy here, and frees it when it finishes.
54 char *filename_expanded;
57 * List of libextractor plugins to use for extracting.
58 * Initialized when the scan starts, removed when it finishes.
60 struct EXTRACTOR_PluginList *plugins;
63 * A pipe transfer signals to the scanner.
65 struct GNUNET_DISK_PipeHandle *stop_pipe;
68 * A pipe end to read signals from.
70 const struct GNUNET_DISK_FileHandle *stop_read;
73 * A pipe end to read signals from.
75 const struct GNUNET_DISK_FileHandle *stop_write;
78 * The pipe that is used to read progress messages. Only closed
79 * after the scanner thread is finished.
81 struct GNUNET_DISK_PipeHandle *progress_pipe;
84 * The end of the pipe that is used to read progress messages.
86 const struct GNUNET_DISK_FileHandle *progress_read;
89 * Handle of the pipe end into which the progress messages are written
90 * The initiator MUST keep it alive until the scanner thread is finished.
92 const struct GNUNET_DISK_FileHandle *progress_write;
95 * The function that will be called every time there's a progress
98 GNUNET_FS_DirScannerProgressCallback progress_callback;
101 * A closure for progress_callback.
103 void *progress_callback_cls;
106 * A task for reading progress messages from the scanner.
108 GNUNET_SCHEDULER_TaskIdentifier progress_read_task;
111 * After the scan is finished, it will contain a pointer to the
112 * top-level directory entry in the directory tree built by the
113 * scanner. Must only be manipulated by the thread for the
114 * duration of the thread's runtime.
116 struct GNUNET_FS_ShareTreeItem *toplevel;
119 * 1 if the scanner should stop, 0 otherwise. Set in response
120 * to communication errors or when the initiator wants the scanning
132 * @param ds directory scanner structure
135 GNUNET_FS_directory_scan_abort (struct GNUNET_FS_DirScanner *ds)
139 /* signal shutdown to other thread */
140 (void) GNUNET_DISK_file_write (ds->stop_write, &c, 1);
141 GNUNET_DISK_pipe_close_end (ds->stop_pipe, GNUNET_DISK_PIPE_END_WRITE);
143 /* stop reading from progress */
144 if (ds->progress_read_task != GNUNET_SCHEDULER_NO_TASK)
146 GNUNET_SCHEDULER_cancel (ds->progress_read_task);
147 ds->progress_read_task = GNUNET_SCHEDULER_NO_TASK;
149 GNUNET_DISK_pipe_close_end (ds->progress_pipe, GNUNET_DISK_PIPE_END_READ);
151 /* wait for other thread to terminate */
153 WaitForSingleObject (ds->thread, INFINITE);
154 CloseHandle (ds->thread);
156 pthread_join (ds->thread, NULL);
157 pthread_detach (ds->thread);
161 GNUNET_DISK_pipe_close (ds->stop_pipe);
162 GNUNET_DISK_pipe_close (ds->progress_pipe);
163 if (NULL != ds->toplevel)
164 GNUNET_FS_share_tree_free (ds->toplevel);
165 if (NULL != ds->plugins)
166 EXTRACTOR_plugin_remove_all (ds->plugins);
172 * Obtain the result of the scan after the scan has signalled
173 * completion. Must not be called prior to completion. The 'ds' is
174 * freed as part of this call.
176 * @param ds directory scanner structure
177 * @return the results of the scan (a directory tree)
179 struct GNUNET_FS_ShareTreeItem *
180 GNUNET_FS_directory_scan_get_result (struct GNUNET_FS_DirScanner *ds)
182 struct GNUNET_FS_ShareTreeItem *result;
184 /* check that we're actually done */
185 GNUNET_assert (GNUNET_SCHEDULER_NO_TASK == ds->progress_read_task);
186 /* preserve result */
187 result = ds->toplevel;
189 GNUNET_FS_directory_scan_abort (ds);
195 * Write 'size' bytes from 'buf' into 'out'.
197 * @param in pipe to write to
198 * @param buf buffer with data to write
199 * @param size number of bytes to write
200 * @return GNUNET_OK on success, GNUNET_SYSERR on error
203 write_all (const struct GNUNET_DISK_FileHandle *out,
207 const char *cbuf = buf;
214 wr = GNUNET_DISK_file_write (out,
219 } while ( (wr > 0) && (total < size) );
221 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
222 "Failed to write to inter thread communication pipe: %s\n",
224 return (total == size) ? GNUNET_OK : GNUNET_SYSERR;
229 * Write progress message.
232 * @param filename name of the file to transmit, never NULL
233 * @param is_directory GNUNET_YES for directory, GNUNET_NO for file, GNUNET_SYSERR for neither
234 * @param reason reason for the progress call
235 * @return GNUNET_SYSERR to stop scanning (the pipe was broken somehow)
238 write_progress (struct GNUNET_FS_DirScanner *ds,
239 const char *filename,
241 enum GNUNET_FS_DirScannerProgressUpdateReason reason)
245 slen = strlen (filename) + 1;
247 write_all (ds->progress_write,
251 write_all (ds->progress_write,
255 write_all (ds->progress_write,
259 write_all (ds->progress_write,
261 sizeof (is_directory))) )
262 return GNUNET_SYSERR;
268 * Called every now and then by the scanner thread to check
269 * if we're being aborted.
271 * @param ds scanner context
272 * @return GNUNET_OK to continue, GNUNET_SYSERR to stop
275 test_thread_stop (struct GNUNET_FS_DirScanner *ds)
279 if ( (GNUNET_DISK_file_read_non_blocking (ds->stop_read, &c, 1) == 1) ||
281 return GNUNET_SYSERR;
287 * Function called to (recursively) add all of the files in the
288 * directory to the tree. Called by the directory scanner to initiate
289 * the scan. Does NOT yet add any metadata.
291 * @param ds directory scanner context to use
292 * @param filename file or directory to scan
293 * @param dst where to store the resulting share tree item
294 * @return GNUNET_OK on success, GNUNET_SYSERR on error
297 preprocess_file (struct GNUNET_FS_DirScanner *ds,
298 const char *filename,
299 struct GNUNET_FS_ShareTreeItem **dst);
303 * Closure for the 'scan_callback'
305 struct RecursionContext
308 * Global scanner context.
310 struct GNUNET_FS_DirScanner *ds;
313 * Parent to add the files to.
315 struct GNUNET_FS_ShareTreeItem *parent;
318 * Flag to set to GNUNET_YES on serious errors.
325 * Function called by the directory iterator to (recursively) add all
326 * of the files in the directory to the tree. Called by the directory
327 * scanner to initiate the scan. Does NOT yet add any metadata.
329 * @param cls the 'struct RecursionContext'
330 * @param filename file or directory to scan
331 * @return GNUNET_OK on success, GNUNET_SYSERR on error
334 scan_callback (void *cls,
335 const char *filename)
337 struct RecursionContext *rc = cls;
338 struct GNUNET_FS_ShareTreeItem *chld;
341 preprocess_file (rc->ds,
345 rc->stop = GNUNET_YES;
346 return GNUNET_SYSERR;
348 chld->parent = rc->parent;
349 GNUNET_CONTAINER_DLL_insert (rc->parent->children_head,
350 rc->parent->children_tail,
357 * Function called to (recursively) add all of the files in the
358 * directory to the tree. Called by the directory scanner to initiate
359 * the scan. Does NOT yet add any metadata.
361 * @param ds directory scanner context to use
362 * @param filename file or directory to scan
363 * @param dst where to store the resulting share tree item
364 * @return GNUNET_OK on success, GNUNET_SYSERR on error
367 preprocess_file (struct GNUNET_FS_DirScanner *ds,
368 const char *filename,
369 struct GNUNET_FS_ShareTreeItem **dst)
371 struct GNUNET_FS_ShareTreeItem *item;
374 if (0 != STAT (filename, &sbuf))
376 /* If the file doesn't exist (or is not stat-able for any other reason)
377 skip it (but report it), but do continue. */
379 write_progress (ds, filename, GNUNET_SYSERR,
380 GNUNET_FS_DIRSCANNER_DOES_NOT_EXIST))
381 return GNUNET_SYSERR;
385 /* Report the progress */
389 S_ISDIR (sbuf.st_mode) ? GNUNET_YES : GNUNET_NO,
390 GNUNET_FS_DIRSCANNER_FILE_START))
391 return GNUNET_SYSERR;
392 item = GNUNET_malloc (sizeof (struct GNUNET_FS_ShareTreeItem));
393 item->meta = GNUNET_CONTAINER_meta_data_create ();
394 item->filename = GNUNET_strdup (filename);
395 item->short_filename = GNUNET_strdup (GNUNET_STRINGS_get_short_name (filename));
396 item->is_directory = (S_ISDIR (sbuf.st_mode)) ? GNUNET_YES : GNUNET_NO;
397 item->file_size = (uint64_t) sbuf.st_size;
398 if (item->is_directory)
400 struct RecursionContext rc;
405 GNUNET_DISK_directory_scan (filename,
408 if ( (rc.stop == GNUNET_YES) ||
410 test_thread_stop (ds)) )
412 GNUNET_FS_share_tree_free (item);
413 return GNUNET_SYSERR;
416 /* Report the progress */
420 S_ISDIR (sbuf.st_mode) ? GNUNET_YES : GNUNET_NO,
421 GNUNET_FS_DIRSCANNER_SUBTREE_COUNTED))
423 GNUNET_FS_share_tree_free (item);
424 return GNUNET_SYSERR;
432 * Extract metadata from files.
434 * @param ds directory scanner context
435 * @param item entry we are processing
436 * @return GNUNET_OK on success, GNUNET_SYSERR on fatal errors
439 extract_files (struct GNUNET_FS_DirScanner *ds,
440 struct GNUNET_FS_ShareTreeItem *item)
442 if (item->is_directory)
444 /* for directories, we simply only descent, no extraction, no
445 progress reporting */
446 struct GNUNET_FS_ShareTreeItem *pos;
448 for (pos = item->children_head; NULL != pos; pos = pos->next)
450 extract_files (ds, pos))
451 return GNUNET_SYSERR;
455 /* this is the expensive operation, *afterwards* we'll check for aborts */
456 fprintf (stderr, "\tCalling extract on `%s'\n", item->filename);
457 GNUNET_FS_meta_data_extract_from_file (item->meta,
460 fprintf (stderr, "\tExtract `%s' done\n", item->filename);
462 /* having full filenames is too dangerous; always make sure we clean them up */
463 GNUNET_CONTAINER_meta_data_delete (item->meta,
464 EXTRACTOR_METATYPE_FILENAME,
466 GNUNET_CONTAINER_meta_data_insert (item->meta, "<libgnunetfs>",
467 EXTRACTOR_METATYPE_FILENAME,
468 EXTRACTOR_METAFORMAT_UTF8, "text/plain",
469 item->short_filename,
470 strlen (item->short_filename) + 1);
471 /* check for abort */
473 test_thread_stop (ds))
474 return GNUNET_SYSERR;
476 /* Report the progress */
481 GNUNET_FS_DIRSCANNER_EXTRACT_FINISHED))
482 return GNUNET_SYSERR;
488 * The function from which the scanner thread starts
490 * @param cls the 'struct GNUNET_FS_DirScanner'
498 run_directory_scan_thread (void *cls)
500 struct GNUNET_FS_DirScanner *ds = cls;
502 if (GNUNET_OK != preprocess_file (ds,
503 ds->filename_expanded,
506 (void) write_progress (ds, "", GNUNET_SYSERR, GNUNET_FS_DIRSCANNER_INTERNAL_ERROR);
507 GNUNET_DISK_pipe_close_end (ds->progress_pipe, GNUNET_DISK_PIPE_END_WRITE);
511 write_progress (ds, "", GNUNET_SYSERR, GNUNET_FS_DIRSCANNER_ALL_COUNTED))
513 GNUNET_DISK_pipe_close_end (ds->progress_pipe, GNUNET_DISK_PIPE_END_WRITE);
517 extract_files (ds, ds->toplevel))
519 (void) write_progress (ds, "", GNUNET_SYSERR, GNUNET_FS_DIRSCANNER_INTERNAL_ERROR);
520 GNUNET_DISK_pipe_close_end (ds->progress_pipe, GNUNET_DISK_PIPE_END_WRITE);
523 (void) write_progress (ds, "", GNUNET_SYSERR, GNUNET_FS_DIRSCANNER_FINISHED);
524 GNUNET_DISK_pipe_close_end (ds->progress_pipe, GNUNET_DISK_PIPE_END_WRITE);
530 * Read 'size' bytes from 'in' into 'buf'.
532 * @param in pipe to read from
533 * @param buf buffer to read to
534 * @param size number of bytes to read
535 * @return GNUNET_OK on success, GNUNET_SYSERR on error
538 read_all (const struct GNUNET_DISK_FileHandle *in,
548 rd = GNUNET_DISK_file_read (in,
553 } while ( (rd > 0) && (total < size) );
555 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
556 "Failed to read from inter thread communication pipe: %s\n",
558 return (total == size) ? GNUNET_OK : GNUNET_SYSERR;
563 * Called every time there is data to read from the scanner.
564 * Calls the scanner progress handler.
566 * @param cls the closure (directory scanner object)
567 * @param tc task context in which the task is running
570 read_progress_task (void *cls, const struct GNUNET_SCHEDULER_TaskContext *tc)
572 struct GNUNET_FS_DirScanner *ds = cls;
573 enum GNUNET_FS_DirScannerProgressUpdateReason reason;
578 ds->progress_read_task = GNUNET_SCHEDULER_NO_TASK;
579 if (0 == (tc->reason & GNUNET_SCHEDULER_REASON_READ_READY))
581 ds->progress_read_task
582 = GNUNET_SCHEDULER_add_read_file (GNUNET_TIME_UNIT_FOREVER_REL,
583 ds->progress_read, &read_progress_task,
588 /* Read one message. If message is malformed or can't be read, end the scanner */
591 read_all (ds->progress_read,
594 (reason < GNUNET_FS_DIRSCANNER_FILE_START) ||
595 (reason > GNUNET_FS_DIRSCANNER_INTERNAL_ERROR) ||
597 read_all (ds->progress_read,
598 (char*) &filename_len,
600 (filename_len == 0) ||
601 (filename_len > PATH_MAX) ||
603 read_all (ds->progress_read,
604 filename = GNUNET_malloc (filename_len),
606 (filename[filename_len-1] != '\0') ||
608 read_all (ds->progress_read,
609 (char*) &is_directory,
610 sizeof (is_directory))) )
612 /* IPC error, complain, signal client and stop reading
615 ds->progress_callback (ds->progress_callback_cls, ds,
617 GNUNET_FS_DIRSCANNER_INTERNAL_ERROR);
618 GNUNET_free_non_null (filename);
621 /* schedule task to keep reading (done here in case client calls
622 abort or something similar) */
623 if ( (reason != GNUNET_FS_DIRSCANNER_FINISHED) &&
624 (reason != GNUNET_FS_DIRSCANNER_INTERNAL_ERROR) )
626 ds->progress_read_task
627 = GNUNET_SCHEDULER_add_read_file (GNUNET_TIME_UNIT_FOREVER_REL,
629 &read_progress_task, ds);
631 /* read successfully, notify client about progress */
632 ds->progress_callback (ds->progress_callback_cls,
637 GNUNET_free (filename);
642 * Start a directory scanner thread.
644 * @param filename name of the directory to scan
645 * @param GNUNET_YES to not to run libextractor on files (only build a tree)
646 * @param ex if not NULL, must be a list of extra plugins for extractor
647 * @param cb the callback to call when there are scanning progress messages
648 * @param cb_cls closure for 'cb'
649 * @return directory scanner object to be used for controlling the scanner
651 struct GNUNET_FS_DirScanner *
652 GNUNET_FS_directory_scan_start (const char *filename,
653 int disable_extractor, const char *ex,
654 GNUNET_FS_DirScannerProgressCallback cb,
658 char *filename_expanded;
659 struct GNUNET_FS_DirScanner *ds;
660 struct GNUNET_DISK_PipeHandle *progress_pipe;
661 struct GNUNET_DISK_PipeHandle *stop_pipe;
664 if (0 != STAT (filename, &sbuf))
666 filename_expanded = GNUNET_STRINGS_filename_expand (filename);
667 if (NULL == filename_expanded)
669 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
670 "Starting to scan directory `%s'\n",
672 progress_pipe = GNUNET_DISK_pipe (GNUNET_NO, GNUNET_NO, GNUNET_NO, GNUNET_NO);
673 if (NULL == progress_pipe)
675 GNUNET_free (filename_expanded);
678 stop_pipe = GNUNET_DISK_pipe (GNUNET_NO, GNUNET_NO, GNUNET_NO, GNUNET_NO);
679 if (NULL == stop_pipe)
681 GNUNET_DISK_pipe_close (progress_pipe);
682 GNUNET_free (filename_expanded);
686 ds = GNUNET_malloc (sizeof (struct GNUNET_FS_DirScanner));
687 ds->progress_callback = cb;
688 ds->progress_callback_cls = cb_cls;
689 ds->stop_pipe = stop_pipe;
690 ds->stop_write = GNUNET_DISK_pipe_handle (ds->stop_pipe,
691 GNUNET_DISK_PIPE_END_WRITE);
692 ds->stop_read = GNUNET_DISK_pipe_handle (ds->stop_pipe,
693 GNUNET_DISK_PIPE_END_READ);
694 ds->progress_pipe = progress_pipe;
695 ds->progress_write = GNUNET_DISK_pipe_handle (progress_pipe,
696 GNUNET_DISK_PIPE_END_WRITE);
697 ds->progress_read = GNUNET_DISK_pipe_handle (progress_pipe,
698 GNUNET_DISK_PIPE_END_READ);
699 ds->filename_expanded = filename_expanded;
700 if (! disable_extractor)
702 ds->plugins = EXTRACTOR_plugin_add_defaults (EXTRACTOR_OPTION_DEFAULT_POLICY);
703 if ( (NULL != ex) && strlen (ex) > 0)
704 ds->plugins = EXTRACTOR_plugin_add_config (ds->plugins, ex,
705 EXTRACTOR_OPTION_DEFAULT_POLICY);
708 ds->thread = CreateThread (NULL, 0,
709 (LPTHREAD_START_ROUTINE) &run_directory_scan_thread,
710 (LPVOID) ds, 0, NULL);
711 ok = (ds->thread != NULL);
713 ok = (0 == pthread_create (&ds->thread, NULL,
714 &run_directory_scan_thread, ds));
718 EXTRACTOR_plugin_remove_all (ds->plugins);
719 GNUNET_free (filename_expanded);
720 GNUNET_DISK_pipe_close (stop_pipe);
721 GNUNET_DISK_pipe_close (progress_pipe);
725 ds->progress_read_task
726 = GNUNET_SCHEDULER_add_read_file (GNUNET_TIME_UNIT_FOREVER_REL,
728 &read_progress_task, ds);
733 /* end of fs_dirmetascan.c */