2 This file is part of GNUnet.
3 Copyright (C) 2012 GNUnet e.V.
5 GNUnet is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published
7 by the Free Software Foundation; either version 3, or (at your
8 option) any later version.
10 GNUnet is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with GNUnet; see the file COPYING. If not, write to the
17 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
18 Boston, MA 02110-1301, USA.
22 * @file src/fs/gnunet-helper-fs-publish.c
23 * @brief Tool to help extract meta data asynchronously
24 * @author Christian Grothoff
26 * This program will scan a directory for files with meta data
27 * and report the results to stdout.
30 #include "gnunet_fs_service.h"
34 * A node of a directory tree.
40 * This is a doubly-linked list
42 struct ScanTreeNode *next;
45 * This is a doubly-linked list
47 struct ScanTreeNode *prev;
50 * Parent of this node, NULL for top-level entries.
52 struct ScanTreeNode *parent;
55 * This is a doubly-linked tree
56 * NULL for files and empty directories
58 struct ScanTreeNode *children_head;
61 * This is a doubly-linked tree
62 * NULL for files and empty directories
64 struct ScanTreeNode *children_tail;
67 * Name of the file/directory
72 * Size of the file (if it is a file), in bytes.
73 * At the moment it is set to 0 for directories.
78 * #GNUNET_YES if this is a directory
87 * List of libextractor plugins to use for extracting.
89 static struct EXTRACTOR_PluginList *plugins;
93 * File descriptor we use for IPC with the parent.
95 static int output_stream;
100 * Add meta data that libextractor finds to our meta data
103 * @param cls closure, our meta data container
104 * @param plugin_name name of the plugin that produced this value;
105 * special values can be used (i.e. '<zlib>' for zlib being
106 * used in the main libextractor library and yielding
108 * @param type libextractor-type describing the meta data
109 * @param format basic format information about data
110 * @param data_mime_type mime-type of data (not of the original file);
111 * can be NULL (if mime-type is not known)
112 * @param data actual meta-data found
113 * @param data_len number of bytes in @a data
114 * @return always 0 to continue extracting
117 add_to_md (void *cls,
118 const char *plugin_name,
119 enum EXTRACTOR_MetaType type,
120 enum EXTRACTOR_MetaFormat format,
121 const char *data_mime_type,
125 struct GNUNET_CONTAINER_MetaData *md = cls;
127 if ( ((EXTRACTOR_METAFORMAT_UTF8 == format) ||
128 (EXTRACTOR_METAFORMAT_C_STRING == format)) &&
129 ('\0' != data[data_len - 1]) )
131 char zdata[data_len + 1];
132 memcpy (zdata, data, data_len);
133 zdata[data_len] = '\0';
134 (void) GNUNET_CONTAINER_meta_data_insert (md, plugin_name, type, format,
135 data_mime_type, zdata, data_len + 1);
139 (void) GNUNET_CONTAINER_meta_data_insert (md, plugin_name, type, format,
140 data_mime_type, data, data_len);
148 * Free memory of the @a tree structure
150 * @param tree tree to free
153 free_tree (struct ScanTreeNode *tree)
155 struct ScanTreeNode *pos;
157 while (NULL != (pos = tree->children_head))
159 if (NULL != tree->parent)
160 GNUNET_CONTAINER_DLL_remove (tree->parent->children_head,
161 tree->parent->children_tail,
163 GNUNET_free (tree->filename);
169 * Write @a size bytes from @a buf into the #output_stream.
171 * @param buf buffer with data to write
172 * @param size number of bytes to write
173 * @return #GNUNET_OK on success, #GNUNET_SYSERR on error
176 write_all (const void *buf,
179 const char *cbuf = buf;
186 wr = write (output_stream,
191 } while ( (wr > 0) && (total < size) );
193 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
194 "Failed to write to stdout: %s\n",
196 return (total == size) ? GNUNET_OK : GNUNET_SYSERR;
201 * Write message to the master process.
203 * @param message_type message type to use
204 * @param data data to append, NULL for none
205 * @param data_length number of bytes in @a data
206 * @return #GNUNET_SYSERR to stop scanning (the pipe was broken somehow)
209 write_message (uint16_t message_type,
213 struct GNUNET_MessageHeader hdr;
217 "Helper sends %u-byte message of type %u\n",
218 (unsigned int) (sizeof (struct GNUNET_MessageHeader) + data_length),
219 (unsigned int) message_type);
221 hdr.type = htons (message_type);
222 hdr.size = htons (sizeof (struct GNUNET_MessageHeader) + data_length);
229 return GNUNET_SYSERR;
235 * Function called to (recursively) add all of the files in the
236 * directory to the tree. Called by the directory scanner to initiate
237 * the scan. Does NOT yet add any metadata.
239 * @param filename file or directory to scan
240 * @param dst where to store the resulting share tree item;
241 * NULL is stored in @a dst upon recoverable errors (#GNUNET_OK is returned)
242 * @return #GNUNET_OK on success, #GNUNET_SYSERR on error
245 preprocess_file (const char *filename,
246 struct ScanTreeNode **dst);
250 * Closure for the 'scan_callback'
252 struct RecursionContext
255 * Parent to add the files to.
257 struct ScanTreeNode *parent;
260 * Flag to set to GNUNET_YES on serious errors.
267 * Function called by the directory iterator to (recursively) add all
268 * of the files in the directory to the tree. Called by the directory
269 * scanner to initiate the scan. Does NOT yet add any metadata.
271 * @param cls the `struct RecursionContext`
272 * @param filename file or directory to scan
273 * @return #GNUNET_OK on success, #GNUNET_SYSERR on error
276 scan_callback (void *cls,
277 const char *filename)
279 struct RecursionContext *rc = cls;
280 struct ScanTreeNode *chld;
283 preprocess_file (filename,
286 rc->stop = GNUNET_YES;
287 return GNUNET_SYSERR;
291 chld->parent = rc->parent;
292 GNUNET_CONTAINER_DLL_insert (rc->parent->children_head,
293 rc->parent->children_tail,
300 * Function called to (recursively) add all of the files in the
301 * directory to the tree. Called by the directory scanner to initiate
302 * the scan. Does NOT yet add any metadata.
304 * @param filename file or directory to scan
305 * @param dst where to store the resulting share tree item;
306 * NULL is stored in @a dst upon recoverable errors (#GNUNET_OK is returned)
307 * @return #GNUNET_OK on success, #GNUNET_SYSERR on error
310 preprocess_file (const char *filename,
311 struct ScanTreeNode **dst)
313 struct ScanTreeNode *item;
317 if ((0 != STAT (filename, &sbuf)) ||
318 ((!S_ISDIR (sbuf.st_mode)) && (GNUNET_OK != GNUNET_DISK_file_size (
319 filename, &fsize, GNUNET_NO, GNUNET_YES))))
321 /* If the file doesn't exist (or is not stat-able for any other reason)
322 skip it (but report it), but do continue. */
324 write_message (GNUNET_MESSAGE_TYPE_FS_PUBLISH_HELPER_SKIP_FILE,
325 filename, strlen (filename) + 1))
326 return GNUNET_SYSERR;
327 /* recoverable error, store 'NULL' in *dst */
332 /* Report the progress */
334 write_message (S_ISDIR (sbuf.st_mode)
335 ? GNUNET_MESSAGE_TYPE_FS_PUBLISH_HELPER_PROGRESS_DIRECTORY
336 : GNUNET_MESSAGE_TYPE_FS_PUBLISH_HELPER_PROGRESS_FILE,
337 filename, strlen (filename) + 1))
338 return GNUNET_SYSERR;
339 item = GNUNET_new (struct ScanTreeNode);
340 item->filename = GNUNET_strdup (filename);
341 item->is_directory = (S_ISDIR (sbuf.st_mode)) ? GNUNET_YES : GNUNET_NO;
342 item->file_size = fsize;
343 if (GNUNET_YES == item->is_directory)
345 struct RecursionContext rc;
349 GNUNET_DISK_directory_scan (filename,
352 if ( (GNUNET_YES == rc.stop) ||
354 write_message (GNUNET_MESSAGE_TYPE_FS_PUBLISH_HELPER_PROGRESS_DIRECTORY,
358 return GNUNET_SYSERR;
367 * Extract metadata from files.
369 * @param item entry we are processing
370 * @return #GNUNET_OK on success, #GNUNET_SYSERR on fatal errors
373 extract_files (struct ScanTreeNode *item)
375 struct GNUNET_CONTAINER_MetaData *meta;
379 if (GNUNET_YES == item->is_directory)
381 /* for directories, we simply only descent, no extraction, no
382 progress reporting */
383 struct ScanTreeNode *pos;
385 for (pos = item->children_head; NULL != pos; pos = pos->next)
388 return GNUNET_SYSERR;
392 /* this is the expensive operation, *afterwards* we'll check for aborts */
393 meta = GNUNET_CONTAINER_meta_data_create ();
394 #if HAVE_LIBEXTRACTOR
395 EXTRACTOR_extract (plugins,
401 slen = strlen (item->filename) + 1;
402 size = GNUNET_CONTAINER_meta_data_get_serialized_size (meta);
406 GNUNET_CONTAINER_meta_data_destroy (meta);
408 write_message (GNUNET_MESSAGE_TYPE_FS_PUBLISH_HELPER_META_DATA,
409 item->filename, slen))
410 return GNUNET_SYSERR;
413 else if (size > (UINT16_MAX - sizeof (struct GNUNET_MessageHeader) - slen))
415 /* We can't transfer more than 64k bytes in one message. */
416 size = UINT16_MAX - sizeof (struct GNUNET_MessageHeader) - slen;
419 char buf[size + slen];
420 char *dst = &buf[slen];
422 memcpy (buf, item->filename, slen);
423 size = GNUNET_CONTAINER_meta_data_serialize (meta,
425 GNUNET_CONTAINER_META_DATA_SERIALIZE_PART);
431 GNUNET_CONTAINER_meta_data_destroy (meta);
433 write_message (GNUNET_MESSAGE_TYPE_FS_PUBLISH_HELPER_META_DATA,
436 return GNUNET_SYSERR;
444 * Install a signal handler to ignore SIGPIPE.
449 struct sigaction oldsig;
450 struct sigaction sig;
452 memset (&sig, 0, sizeof (struct sigaction));
453 sig.sa_handler = SIG_IGN;
454 sigemptyset (&sig.sa_mask);
456 sig.sa_flags = SA_INTERRUPT; /* SunOS */
458 sig.sa_flags = SA_RESTART;
460 if (0 != sigaction (SIGPIPE, &sig, &oldsig))
462 "Failed to install SIGPIPE handler: %s\n", strerror (errno));
467 * Turn the given file descriptor in to '/dev/null'.
469 * @param fd fd to bind to /dev/null
470 * @param flags flags to use (O_RDONLY or O_WRONLY)
473 make_dev_zero (int fd,
478 GNUNET_assert (0 == close (fd));
479 z = open ("/dev/null", flags);
480 GNUNET_assert (-1 != z);
484 GNUNET_assert (0 == close (z));
491 * Main function of the helper process to extract meta data.
493 * @param argc should be 3
494 * @param argv [0] our binary name
495 * [1] name of the file or directory to process
496 * [2] "-" to disable extraction, NULL for defaults,
497 * otherwise custom plugins to load from LE
498 * @return 0 on success
504 const char *filename_expanded;
506 struct ScanTreeNode *root;
509 /* We're using stdout to communicate binary data back to the parent; use
512 _setmode (1, _O_BINARY);
513 /* Get utf-8-encoded arguments */
514 if (GNUNET_OK != GNUNET_STRINGS_get_utf8_args (argc, argv, &argc, &argv))
516 output_stream = 1; /* stdout */
519 /* move stdout to some other FD for IPC, bind
520 stdout/stderr to /dev/null */
521 output_stream = dup (1);
522 make_dev_zero (1, O_WRONLY);
523 make_dev_zero (2, O_WRONLY);
526 /* parse command line */
527 if ( (3 != argc) && (2 != argc) )
531 "gnunet-helper-fs-publish needs exactly one or two arguments\n");
533 GNUNET_free ((void*) argv);
537 filename_expanded = argv[1];
540 (0 != strcmp (ex, "-")) )
542 #if HAVE_LIBEXTRACTOR
543 plugins = EXTRACTOR_plugin_add_defaults (EXTRACTOR_OPTION_DEFAULT_POLICY);
545 plugins = EXTRACTOR_plugin_add_config (plugins, ex,
546 EXTRACTOR_OPTION_DEFAULT_POLICY);
550 /* scan tree to find out how much work there is to be done */
551 if (GNUNET_OK != preprocess_file (filename_expanded,
554 (void) write_message (GNUNET_MESSAGE_TYPE_FS_PUBLISH_HELPER_ERROR, NULL, 0);
555 #if HAVE_LIBEXTRACTOR
556 EXTRACTOR_plugin_remove_all (plugins);
559 GNUNET_free ((void*) argv);
563 /* signal that we're done counting files, so that a percentage of
564 progress can now be calculated */
566 write_message (GNUNET_MESSAGE_TYPE_FS_PUBLISH_HELPER_COUNTING_DONE, NULL, 0))
568 #if HAVE_LIBEXTRACTOR
569 EXTRACTOR_plugin_remove_all (plugins);
572 GNUNET_free ((void*) argv);
579 extract_files (root))
581 (void) write_message (GNUNET_MESSAGE_TYPE_FS_PUBLISH_HELPER_ERROR, NULL, 0);
583 #if HAVE_LIBEXTRACTOR
584 EXTRACTOR_plugin_remove_all (plugins);
587 GNUNET_free ((void*) argv);
593 /* enable "clean" shutdown by telling parent that we are done */
594 (void) write_message (GNUNET_MESSAGE_TYPE_FS_PUBLISH_HELPER_FINISHED, NULL, 0);
595 #if HAVE_LIBEXTRACTOR
596 EXTRACTOR_plugin_remove_all (plugins);
599 GNUNET_free ((void*) argv);
604 /* end of gnunet-helper-fs-publish.c */