2 This file is part of GNUnet
3 (C) 2005-2012 Christian Grothoff (and other contributing authors)
5 GNUnet is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published
7 by the Free Software Foundation; either version 2, or (at your
8 option) any later version.
10 GNUnet is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with GNUnet; see the file COPYING. If not, write to the
17 Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA.
22 * @file fs/fs_dirmetascan.c
23 * @brief code to asynchronously build a 'struct GNUNET_FS_ShareTreeItem'
24 * from an on-disk directory for publishing
26 * @author Christian Grothoff
29 #include "gnunet_fs_service.h"
30 #include "gnunet_scheduler_lib.h"
35 * An opaque structure a pointer to which is returned to the
36 * caller to be used to control the scanner.
38 struct GNUNET_FS_DirScanner
42 * A thread object for the scanner thread.
51 * Expanded filename (as given by the scan initiator).
52 * The scanner thread stores a copy here, and frees it when it finishes.
54 char *filename_expanded;
57 * List of libextractor plugins to use for extracting.
58 * Initialized when the scan starts, removed when it finishes.
60 struct EXTRACTOR_PluginList *plugins;
63 * A pipe transfer signals to the scanner.
65 struct GNUNET_DISK_PipeHandle *stop_pipe;
68 * A pipe end to read signals from.
70 const struct GNUNET_DISK_FileHandle *stop_read;
73 * A pipe end to read signals from.
75 const struct GNUNET_DISK_FileHandle *stop_write;
78 * The pipe that is used to read progress messages. Only closed
79 * after the scanner thread is finished.
81 struct GNUNET_DISK_PipeHandle *progress_pipe;
84 * The end of the pipe that is used to read progress messages.
86 const struct GNUNET_DISK_FileHandle *progress_read;
89 * Handle of the pipe end into which the progress messages are written
90 * The initiator MUST keep it alive until the scanner thread is finished.
92 const struct GNUNET_DISK_FileHandle *progress_write;
95 * The function that will be called every time there's a progress
98 GNUNET_FS_DirScannerProgressCallback progress_callback;
101 * A closure for progress_callback.
103 void *progress_callback_cls;
106 * A task for reading progress messages from the scanner.
108 GNUNET_SCHEDULER_TaskIdentifier progress_read_task;
111 * After the scan is finished, it will contain a pointer to the
112 * top-level directory entry in the directory tree built by the
113 * scanner. Must only be manipulated by the thread for the
114 * duration of the thread's runtime.
116 struct GNUNET_FS_ShareTreeItem *toplevel;
119 * 1 if the scanner should stop, 0 otherwise. Set in response
120 * to communication errors or when the initiator wants the scanning
132 * @param ds directory scanner structure
135 GNUNET_FS_directory_scan_abort (struct GNUNET_FS_DirScanner *ds)
139 /* signal shutdown to other thread */
140 (void) GNUNET_DISK_file_write (ds->stop_write, &c, 1);
141 GNUNET_DISK_pipe_close_end (ds->stop_pipe, GNUNET_DISK_PIPE_END_WRITE);
143 /* stop reading from progress */
144 if (ds->progress_read_task != GNUNET_SCHEDULER_NO_TASK)
146 GNUNET_SCHEDULER_cancel (ds->progress_read_task);
147 ds->progress_read_task = GNUNET_SCHEDULER_NO_TASK;
149 GNUNET_DISK_pipe_close_end (ds->progress_pipe, GNUNET_DISK_PIPE_END_READ);
151 /* wait for other thread to terminate */
153 WaitForSingleObject (ds->thread, INFINITE);
154 CloseHandle (ds->thread);
156 pthread_join (ds->thread, NULL);
157 pthread_detach (ds->thread);
161 GNUNET_DISK_pipe_close (ds->stop_pipe);
162 GNUNET_DISK_pipe_close (ds->progress_pipe);
163 if (NULL != ds->toplevel)
164 GNUNET_FS_share_tree_free (ds->toplevel);
165 if (NULL != ds->plugins)
166 EXTRACTOR_plugin_remove_all (ds->plugins);
172 * Obtain the result of the scan after the scan has signalled
173 * completion. Must not be called prior to completion. The 'ds' is
174 * freed as part of this call.
176 * @param ds directory scanner structure
177 * @return the results of the scan (a directory tree)
179 struct GNUNET_FS_ShareTreeItem *
180 GNUNET_FS_directory_scan_get_result (struct GNUNET_FS_DirScanner *ds)
182 struct GNUNET_FS_ShareTreeItem *result;
184 /* check that we're actually done */
185 GNUNET_assert (GNUNET_SCHEDULER_NO_TASK == ds->progress_read_task);
186 /* preserve result */
187 result = ds->toplevel;
189 GNUNET_FS_directory_scan_abort (ds);
195 * Write 'size' bytes from 'buf' into 'out'.
197 * @param in pipe to write to
198 * @param buf buffer with data to write
199 * @param size number of bytes to write
200 * @return GNUNET_OK on success, GNUNET_SYSERR on error
203 write_all (const struct GNUNET_DISK_FileHandle *out,
207 const char *cbuf = buf;
214 wr = GNUNET_DISK_file_write (out,
219 } while ( (wr > 0) && (total < size) );
220 return (total == size) ? GNUNET_OK : GNUNET_SYSERR;
225 * Write progress message.
228 * @param filename name of the file to transmit, never NULL
229 * @param is_directory GNUNET_YES for directory, GNUNET_NO for file, GNUNET_SYSERR for neither
230 * @param reason reason for the progress call
231 * @return GNUNET_SYSERR to stop scanning (the pipe was broken somehow)
234 write_progress (struct GNUNET_FS_DirScanner *ds,
235 const char *filename,
237 enum GNUNET_FS_DirScannerProgressUpdateReason reason)
241 slen = strlen (filename) + 1;
243 write_all (ds->progress_write,
247 write_all (ds->progress_write,
251 write_all (ds->progress_write,
255 write_all (ds->progress_write,
257 sizeof (is_directory))) )
258 return GNUNET_SYSERR;
264 * Called every now and then by the scanner thread to check
265 * if we're being aborted.
267 * @param ds scanner context
268 * @return GNUNET_OK to continue, GNUNET_SYSERR to stop
271 test_thread_stop (struct GNUNET_FS_DirScanner *ds)
275 if ( (GNUNET_DISK_file_read_non_blocking (ds->stop_read, &c, 1) == 1) ||
277 return GNUNET_SYSERR;
283 * Function called to (recursively) add all of the files in the
284 * directory to the tree. Called by the directory scanner to initiate
285 * the scan. Does NOT yet add any metadata.
287 * @param ds directory scanner context to use
288 * @param filename file or directory to scan
289 * @param dst where to store the resulting share tree item
290 * @return GNUNET_OK on success, GNUNET_SYSERR on error
293 preprocess_file (struct GNUNET_FS_DirScanner *ds,
294 const char *filename,
295 struct GNUNET_FS_ShareTreeItem **dst);
299 * Closure for the 'scan_callback'
301 struct RecursionContext
304 * Global scanner context.
306 struct GNUNET_FS_DirScanner *ds;
309 * Parent to add the files to.
311 struct GNUNET_FS_ShareTreeItem *parent;
314 * Flag to set to GNUNET_YES on serious errors.
321 * Function called by the directory iterator to (recursively) add all
322 * of the files in the directory to the tree. Called by the directory
323 * scanner to initiate the scan. Does NOT yet add any metadata.
325 * @param cls the 'struct RecursionContext'
326 * @param filename file or directory to scan
327 * @return GNUNET_OK on success, GNUNET_SYSERR on error
330 scan_callback (void *cls,
331 const char *filename)
333 struct RecursionContext *rc = cls;
334 struct GNUNET_FS_ShareTreeItem *chld;
337 preprocess_file (rc->ds,
341 rc->stop = GNUNET_YES;
342 return GNUNET_SYSERR;
344 chld->parent = rc->parent;
345 GNUNET_CONTAINER_DLL_insert (rc->parent->children_head,
346 rc->parent->children_tail,
353 * Function called to (recursively) add all of the files in the
354 * directory to the tree. Called by the directory scanner to initiate
355 * the scan. Does NOT yet add any metadata.
357 * @param ds directory scanner context to use
358 * @param filename file or directory to scan
359 * @param dst where to store the resulting share tree item
360 * @return GNUNET_OK on success, GNUNET_SYSERR on error
363 preprocess_file (struct GNUNET_FS_DirScanner *ds,
364 const char *filename,
365 struct GNUNET_FS_ShareTreeItem **dst)
367 struct GNUNET_FS_ShareTreeItem *item;
370 if (0 != STAT (filename, &sbuf))
372 /* If the file doesn't exist (or is not stat-able for any other reason)
373 skip it (but report it), but do continue. */
375 write_progress (ds, filename, GNUNET_SYSERR,
376 GNUNET_FS_DIRSCANNER_DOES_NOT_EXIST))
377 return GNUNET_SYSERR;
381 /* Report the progress */
385 S_ISDIR (sbuf.st_mode) ? GNUNET_YES : GNUNET_NO,
386 GNUNET_FS_DIRSCANNER_FILE_START))
387 return GNUNET_SYSERR;
388 item = GNUNET_malloc (sizeof (struct GNUNET_FS_ShareTreeItem));
389 item->meta = GNUNET_CONTAINER_meta_data_create ();
390 item->filename = GNUNET_strdup (filename);
391 item->short_filename = GNUNET_strdup (GNUNET_STRINGS_get_short_name (filename));
392 item->is_directory = (S_ISDIR (sbuf.st_mode)) ? GNUNET_YES : GNUNET_NO;
393 item->file_size = (uint64_t) sbuf.st_size;
394 if (item->is_directory)
396 struct RecursionContext rc;
401 GNUNET_DISK_directory_scan (filename,
404 if ( (rc.stop == GNUNET_YES) ||
406 test_thread_stop (ds)) )
408 GNUNET_FS_share_tree_free (item);
409 return GNUNET_SYSERR;
412 /* Report the progress */
416 S_ISDIR (sbuf.st_mode) ? GNUNET_YES : GNUNET_NO,
417 GNUNET_FS_DIRSCANNER_SUBTREE_COUNTED))
419 GNUNET_FS_share_tree_free (item);
420 return GNUNET_SYSERR;
428 * Extract metadata from files.
430 * @param ds directory scanner context
431 * @param item entry we are processing
432 * @return GNUNET_OK on success, GNUNET_SYSERR on fatal errors
435 extract_files (struct GNUNET_FS_DirScanner *ds,
436 struct GNUNET_FS_ShareTreeItem *item)
438 if (item->is_directory)
440 /* for directories, we simply only descent, no extraction, no
441 progress reporting */
442 struct GNUNET_FS_ShareTreeItem *pos;
444 for (pos = item->children_head; NULL != pos; pos = pos->next)
446 extract_files (ds, pos))
447 return GNUNET_SYSERR;
451 /* this is the expensive operation, *afterwards* we'll check for aborts */
452 GNUNET_FS_meta_data_extract_from_file (item->meta,
456 /* having full filenames is too dangerous; always make sure we clean them up */
457 GNUNET_CONTAINER_meta_data_delete (item->meta,
458 EXTRACTOR_METATYPE_FILENAME,
460 GNUNET_CONTAINER_meta_data_insert (item->meta, "<libgnunetfs>",
461 EXTRACTOR_METATYPE_FILENAME,
462 EXTRACTOR_METAFORMAT_UTF8, "text/plain",
463 item->short_filename,
464 strlen (item->short_filename) + 1);
465 /* check for abort */
467 test_thread_stop (ds))
468 return GNUNET_SYSERR;
470 /* Report the progress */
475 GNUNET_FS_DIRSCANNER_EXTRACT_FINISHED))
476 return GNUNET_SYSERR;
482 * The function from which the scanner thread starts
484 * @param cls the 'struct GNUNET_FS_DirScanner'
492 run_directory_scan_thread (void *cls)
494 struct GNUNET_FS_DirScanner *ds = cls;
496 if (GNUNET_OK != preprocess_file (ds,
497 ds->filename_expanded,
500 (void) write_progress (ds, "", GNUNET_NO, GNUNET_FS_DIRSCANNER_INTERNAL_ERROR);
504 write_progress (ds, "", GNUNET_NO, GNUNET_FS_DIRSCANNER_ALL_COUNTED))
507 extract_files (ds, ds->toplevel))
509 (void) write_progress (ds, "", GNUNET_NO, GNUNET_FS_DIRSCANNER_INTERNAL_ERROR);
512 (void) write_progress (ds, "", GNUNET_NO, GNUNET_FS_DIRSCANNER_FINISHED);
518 * Read 'size' bytes from 'in' into 'buf'.
520 * @param in pipe to read from
521 * @param buf buffer to read to
522 * @param size number of bytes to read
523 * @return GNUNET_OK on success, GNUNET_SYSERR on error
526 read_all (const struct GNUNET_DISK_FileHandle *in,
536 rd = GNUNET_DISK_file_read (in,
541 } while ( (rd > 0) && (total < size) );
542 return (total == size) ? GNUNET_OK : GNUNET_SYSERR;
547 * Called every time there is data to read from the scanner.
548 * Calls the scanner progress handler.
550 * @param cls the closure (directory scanner object)
551 * @param tc task context in which the task is running
554 read_progress_task (void *cls, const struct GNUNET_SCHEDULER_TaskContext *tc)
556 struct GNUNET_FS_DirScanner *ds = cls;
557 enum GNUNET_FS_DirScannerProgressUpdateReason reason;
562 ds->progress_read_task = GNUNET_SCHEDULER_NO_TASK;
563 if (! (tc->reason & GNUNET_SCHEDULER_REASON_READ_READY))
565 ds->progress_read_task
566 = GNUNET_SCHEDULER_add_read_file (GNUNET_TIME_UNIT_FOREVER_REL,
567 ds->progress_read, &read_progress_task,
572 /* Read one message. If message is malformed or can't be read, end the scanner */
575 read_all (ds->progress_read,
578 (reason < GNUNET_FS_DIRSCANNER_FILE_START) ||
579 (reason > GNUNET_FS_DIRSCANNER_INTERNAL_ERROR) ||
581 read_all (ds->progress_read,
582 (char*) &filename_len,
584 (filename_len == 0) ||
585 (filename_len > PATH_MAX) ||
587 read_all (ds->progress_read,
588 filename = GNUNET_malloc (filename_len),
590 (filename[filename_len-1] != '\0') ||
592 read_all (ds->progress_read,
593 (char*) &is_directory,
594 sizeof (is_directory))) )
596 /* IPC error, complain, signal client and stop reading
599 ds->progress_callback (ds->progress_callback_cls, ds,
601 GNUNET_FS_DIRSCANNER_INTERNAL_ERROR);
602 GNUNET_free_non_null (filename);
605 /* schedule task to keep reading (done here in case client calls
606 abort or something similar) */
607 if ( (reason != GNUNET_FS_DIRSCANNER_EXTRACT_FINISHED) &&
608 (reason != GNUNET_FS_DIRSCANNER_INTERNAL_ERROR) )
609 ds->progress_read_task
610 = GNUNET_SCHEDULER_add_read_file (GNUNET_TIME_UNIT_FOREVER_REL,
612 &read_progress_task, ds);
614 /* read successfully, notify client about progress */
615 ds->progress_callback (ds->progress_callback_cls,
620 GNUNET_free (filename);
625 * Start a directory scanner thread.
627 * @param filename name of the directory to scan
628 * @param GNUNET_YES to not to run libextractor on files (only build a tree)
629 * @param ex if not NULL, must be a list of extra plugins for extractor
630 * @param cb the callback to call when there are scanning progress messages
631 * @param cb_cls closure for 'cb'
632 * @return directory scanner object to be used for controlling the scanner
634 struct GNUNET_FS_DirScanner *
635 GNUNET_FS_directory_scan_start (const char *filename,
636 int disable_extractor, const char *ex,
637 GNUNET_FS_DirScannerProgressCallback cb,
641 char *filename_expanded;
642 struct GNUNET_FS_DirScanner *ds;
643 struct GNUNET_DISK_PipeHandle *progress_pipe;
644 struct GNUNET_DISK_PipeHandle *stop_pipe;
647 if (0 != STAT (filename, &sbuf))
649 filename_expanded = GNUNET_STRINGS_filename_expand (filename);
650 if (NULL == filename_expanded)
652 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
653 "Starting to scan directory `%s'\n",
655 progress_pipe = GNUNET_DISK_pipe (GNUNET_NO, GNUNET_NO, GNUNET_NO, GNUNET_NO);
656 if (NULL == progress_pipe)
658 GNUNET_free (filename_expanded);
661 stop_pipe = GNUNET_DISK_pipe (GNUNET_NO, GNUNET_NO, GNUNET_NO, GNUNET_NO);
662 if (NULL == stop_pipe)
664 GNUNET_DISK_pipe_close (progress_pipe);
665 GNUNET_free (filename_expanded);
669 ds = GNUNET_malloc (sizeof (struct GNUNET_FS_DirScanner));
670 ds->progress_callback = cb;
671 ds->progress_callback_cls = cb_cls;
672 ds->stop_pipe = stop_pipe;
673 ds->stop_write = GNUNET_DISK_pipe_handle (ds->stop_pipe,
674 GNUNET_DISK_PIPE_END_WRITE);
675 ds->stop_read = GNUNET_DISK_pipe_handle (ds->stop_pipe,
676 GNUNET_DISK_PIPE_END_READ);
677 ds->progress_pipe = progress_pipe;
678 ds->progress_write = GNUNET_DISK_pipe_handle (progress_pipe,
679 GNUNET_DISK_PIPE_END_WRITE);
680 ds->progress_read = GNUNET_DISK_pipe_handle (progress_pipe,
681 GNUNET_DISK_PIPE_END_READ);
682 ds->filename_expanded = filename_expanded;
683 if (! disable_extractor)
685 ds->plugins = EXTRACTOR_plugin_add_defaults (EXTRACTOR_OPTION_DEFAULT_POLICY);
686 if ( (NULL != ex) && strlen (ex) > 0)
687 ds->plugins = EXTRACTOR_plugin_add_config (ds->plugins, ex,
688 EXTRACTOR_OPTION_DEFAULT_POLICY);
691 ds->thread = CreateThread (NULL, 0,
692 (LPTHREAD_START_ROUTINE) &run_directory_scan_thread,
693 (LPVOID) ds, 0, NULL);
694 ok = (ds->thread != NULL);
696 ok = (0 == pthread_create (&ds->thread, NULL,
697 &run_directory_scan_thread, ds));
701 EXTRACTOR_plugin_remove_all (ds->plugins);
702 GNUNET_free (filename_expanded);
703 GNUNET_DISK_pipe_close (stop_pipe);
704 GNUNET_DISK_pipe_close (progress_pipe);
708 ds->progress_read_task
709 = GNUNET_SCHEDULER_add_read_file (GNUNET_TIME_UNIT_FOREVER_REL,
711 &read_progress_task, ds);
716 /* end of fs_dirmetascan.c */