2 This file is part of GNUnet.
3 Copyright (C) 2012 GNUnet e.V.
5 GNUnet is free software: you can redistribute it and/or modify it
6 under the terms of the GNU Affero General Public License as published
7 by the Free Software Foundation, either version 3 of the License,
8 or (at your option) any later version.
10 GNUnet is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Affero General Public License for more details.
15 You should have received a copy of the GNU Affero General Public License
16 along with this program. If not, see <http://www.gnu.org/licenses/>.
18 SPDX-License-Identifier: AGPL3.0-or-later
22 * @file src/fs/gnunet-helper-fs-publish.c
23 * @brief Tool to help extract meta data asynchronously
24 * @author Christian Grothoff
26 * This program will scan a directory for files with meta data
27 * and report the results to stdout.
30 #include "gnunet_fs_service.h"
34 * A node of a directory tree.
39 * This is a doubly-linked list
41 struct ScanTreeNode *next;
44 * This is a doubly-linked list
46 struct ScanTreeNode *prev;
49 * Parent of this node, NULL for top-level entries.
51 struct ScanTreeNode *parent;
54 * This is a doubly-linked tree
55 * NULL for files and empty directories
57 struct ScanTreeNode *children_head;
60 * This is a doubly-linked tree
61 * NULL for files and empty directories
63 struct ScanTreeNode *children_tail;
66 * Name of the file/directory
71 * Size of the file (if it is a file), in bytes.
72 * At the moment it is set to 0 for directories.
77 * #GNUNET_YES if this is a directory
85 * List of libextractor plugins to use for extracting.
87 static struct EXTRACTOR_PluginList *plugins;
91 * File descriptor we use for IPC with the parent.
93 static int output_stream;
98 * Add meta data that libextractor finds to our meta data
101 * @param cls closure, our meta data container
102 * @param plugin_name name of the plugin that produced this value;
103 * special values can be used (i.e. '<zlib>' for zlib being
104 * used in the main libextractor library and yielding
106 * @param type libextractor-type describing the meta data
107 * @param format basic format information about data
108 * @param data_mime_type mime-type of data (not of the original file);
109 * can be NULL (if mime-type is not known)
110 * @param data actual meta-data found
111 * @param data_len number of bytes in @a data
112 * @return always 0 to continue extracting
115 add_to_md (void *cls,
116 const char *plugin_name,
117 enum EXTRACTOR_MetaType type,
118 enum EXTRACTOR_MetaFormat format,
119 const char *data_mime_type,
123 struct GNUNET_CONTAINER_MetaData *md = cls;
125 if (((EXTRACTOR_METAFORMAT_UTF8 == format) ||
126 (EXTRACTOR_METAFORMAT_C_STRING == format)) &&
127 ('\0' != data[data_len - 1]))
129 char zdata[data_len + 1];
130 GNUNET_memcpy (zdata, data, data_len);
131 zdata[data_len] = '\0';
132 (void) GNUNET_CONTAINER_meta_data_insert (md,
142 (void) GNUNET_CONTAINER_meta_data_insert (md,
156 * Free memory of the @a tree structure
158 * @param tree tree to free
161 free_tree (struct ScanTreeNode *tree)
163 struct ScanTreeNode *pos;
165 while (NULL != (pos = tree->children_head))
167 if (NULL != tree->parent)
168 GNUNET_CONTAINER_DLL_remove (tree->parent->children_head,
169 tree->parent->children_tail,
171 GNUNET_free (tree->filename);
177 * Write @a size bytes from @a buf into the #output_stream.
179 * @param buf buffer with data to write
180 * @param size number of bytes to write
181 * @return #GNUNET_OK on success, #GNUNET_SYSERR on error
184 write_all (const void *buf, size_t size)
186 const char *cbuf = buf;
193 wr = write (output_stream, &cbuf[total], size - total);
197 while ((wr > 0) && (total < size));
199 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
200 "Failed to write to stdout: %s\n",
202 return (total == size) ? GNUNET_OK : GNUNET_SYSERR;
207 * Write message to the master process.
209 * @param message_type message type to use
210 * @param data data to append, NULL for none
211 * @param data_length number of bytes in @a data
212 * @return #GNUNET_SYSERR to stop scanning (the pipe was broken somehow)
215 write_message (uint16_t message_type, const char *data, size_t data_length)
217 struct GNUNET_MessageHeader hdr;
221 "Helper sends %u-byte message of type %u\n",
222 (unsigned int) (sizeof(struct GNUNET_MessageHeader) + data_length),
223 (unsigned int) message_type);
225 hdr.type = htons (message_type);
226 hdr.size = htons (sizeof(struct GNUNET_MessageHeader) + data_length);
227 if ((GNUNET_OK != write_all (&hdr, sizeof(hdr))) ||
228 (GNUNET_OK != write_all (data, data_length)))
229 return GNUNET_SYSERR;
235 * Function called to (recursively) add all of the files in the
236 * directory to the tree. Called by the directory scanner to initiate
237 * the scan. Does NOT yet add any metadata.
239 * @param filename file or directory to scan
240 * @param dst where to store the resulting share tree item;
241 * NULL is stored in @a dst upon recoverable errors (#GNUNET_OK is returned)
242 * @return #GNUNET_OK on success, #GNUNET_SYSERR on error
245 preprocess_file (const char *filename, struct ScanTreeNode **dst);
249 * Closure for the 'scan_callback'
251 struct RecursionContext
254 * Parent to add the files to.
256 struct ScanTreeNode *parent;
259 * Flag to set to GNUNET_YES on serious errors.
266 * Function called by the directory iterator to (recursively) add all
267 * of the files in the directory to the tree. Called by the directory
268 * scanner to initiate the scan. Does NOT yet add any metadata.
270 * @param cls the `struct RecursionContext`
271 * @param filename file or directory to scan
272 * @return #GNUNET_OK on success, #GNUNET_SYSERR on error
275 scan_callback (void *cls, const char *filename)
277 struct RecursionContext *rc = cls;
278 struct ScanTreeNode *chld;
280 if (GNUNET_OK != preprocess_file (filename, &chld))
282 rc->stop = GNUNET_YES;
283 return GNUNET_SYSERR;
287 chld->parent = rc->parent;
288 GNUNET_CONTAINER_DLL_insert (rc->parent->children_head,
289 rc->parent->children_tail,
296 * Function called to (recursively) add all of the files in the
297 * directory to the tree. Called by the directory scanner to initiate
298 * the scan. Does NOT yet add any metadata.
300 * @param filename file or directory to scan
301 * @param dst where to store the resulting share tree item;
302 * NULL is stored in @a dst upon recoverable errors (#GNUNET_OK is returned)
303 * @return #GNUNET_OK on success, #GNUNET_SYSERR on error
306 preprocess_file (const char *filename, struct ScanTreeNode **dst)
308 struct ScanTreeNode *item;
312 if ((0 != stat (filename, &sbuf)) ||
313 ((! S_ISDIR (sbuf.st_mode)) &&
315 GNUNET_DISK_file_size (filename, &fsize, GNUNET_NO, GNUNET_YES))))
317 /* If the file doesn't exist (or is not stat-able for any other reason)
318 skip it (but report it), but do continue. */
320 write_message (GNUNET_MESSAGE_TYPE_FS_PUBLISH_HELPER_SKIP_FILE,
322 strlen (filename) + 1))
323 return GNUNET_SYSERR;
324 /* recoverable error, store 'NULL' in *dst */
329 /* Report the progress */
332 write_message (S_ISDIR (sbuf.st_mode)
333 ? GNUNET_MESSAGE_TYPE_FS_PUBLISH_HELPER_PROGRESS_DIRECTORY
334 : GNUNET_MESSAGE_TYPE_FS_PUBLISH_HELPER_PROGRESS_FILE,
336 strlen (filename) + 1))
337 return GNUNET_SYSERR;
338 item = GNUNET_new (struct ScanTreeNode);
339 item->filename = GNUNET_strdup (filename);
340 item->is_directory = (S_ISDIR (sbuf.st_mode)) ? GNUNET_YES : GNUNET_NO;
341 item->file_size = fsize;
342 if (GNUNET_YES == item->is_directory)
344 struct RecursionContext rc;
348 GNUNET_DISK_directory_scan (filename, &scan_callback, &rc);
350 (GNUNET_YES == rc.stop) ||
352 write_message (GNUNET_MESSAGE_TYPE_FS_PUBLISH_HELPER_PROGRESS_DIRECTORY,
357 return GNUNET_SYSERR;
366 * Extract metadata from files.
368 * @param item entry we are processing
369 * @return #GNUNET_OK on success, #GNUNET_SYSERR on fatal errors
372 extract_files (struct ScanTreeNode *item)
374 struct GNUNET_CONTAINER_MetaData *meta;
378 if (GNUNET_YES == item->is_directory)
380 /* for directories, we simply only descent, no extraction, no
381 progress reporting */
382 struct ScanTreeNode *pos;
384 for (pos = item->children_head; NULL != pos; pos = pos->next)
385 if (GNUNET_OK != extract_files (pos))
386 return GNUNET_SYSERR;
390 /* this is the expensive operation, *afterwards* we'll check for aborts */
391 meta = GNUNET_CONTAINER_meta_data_create ();
392 #if HAVE_LIBEXTRACTOR
393 EXTRACTOR_extract (plugins, item->filename, NULL, 0, &add_to_md, meta);
395 slen = strlen (item->filename) + 1;
396 size = GNUNET_CONTAINER_meta_data_get_serialized_size (meta);
400 GNUNET_CONTAINER_meta_data_destroy (meta);
402 write_message (GNUNET_MESSAGE_TYPE_FS_PUBLISH_HELPER_META_DATA,
405 return GNUNET_SYSERR;
408 else if (size > (UINT16_MAX - sizeof(struct GNUNET_MessageHeader) - slen))
410 /* We can't transfer more than 64k bytes in one message. */
411 size = UINT16_MAX - sizeof(struct GNUNET_MessageHeader) - slen;
414 char buf[size + slen];
415 char *dst = &buf[slen];
417 GNUNET_memcpy (buf, item->filename, slen);
418 size = GNUNET_CONTAINER_meta_data_serialize (
422 GNUNET_CONTAINER_META_DATA_SERIALIZE_PART);
428 GNUNET_CONTAINER_meta_data_destroy (meta);
430 write_message (GNUNET_MESSAGE_TYPE_FS_PUBLISH_HELPER_META_DATA,
433 return GNUNET_SYSERR;
440 * Install a signal handler to ignore SIGPIPE.
445 struct sigaction oldsig;
446 struct sigaction sig;
448 memset (&sig, 0, sizeof(struct sigaction));
449 sig.sa_handler = SIG_IGN;
450 sigemptyset (&sig.sa_mask);
452 sig.sa_flags = SA_INTERRUPT; /* SunOS */
454 sig.sa_flags = SA_RESTART;
456 if (0 != sigaction (SIGPIPE, &sig, &oldsig))
458 "Failed to install SIGPIPE handler: %s\n",
464 * Turn the given file descriptor in to '/dev/null'.
466 * @param fd fd to bind to /dev/null
467 * @param flags flags to use (O_RDONLY or O_WRONLY)
470 make_dev_zero (int fd, int flags)
474 GNUNET_assert (0 == close (fd));
475 z = open ("/dev/null", flags);
476 GNUNET_assert (-1 != z);
479 GNUNET_break (fd == dup2 (z, fd));
480 GNUNET_assert (0 == close (z));
485 * Main function of the helper process to extract meta data.
487 * @param argc should be 3
488 * @param argv [0] our binary name
489 * [1] name of the file or directory to process
490 * [2] "-" to disable extraction, NULL for defaults,
491 * otherwise custom plugins to load from LE
492 * @return 0 on success
495 main (int argc, char *const *argv)
497 const char *filename_expanded;
499 struct ScanTreeNode *root;
502 /* move stdout to some other FD for IPC, bind
503 stdout/stderr to /dev/null */
504 output_stream = dup (1);
505 make_dev_zero (1, O_WRONLY);
506 make_dev_zero (2, O_WRONLY);
508 /* parse command line */
509 if ((3 != argc) && (2 != argc))
513 "gnunet-helper-fs-publish needs exactly one or two arguments\n");
516 filename_expanded = argv[1];
518 if ((NULL == ex) || (0 != strcmp (ex, "-")))
520 #if HAVE_LIBEXTRACTOR
521 plugins = EXTRACTOR_plugin_add_defaults (EXTRACTOR_OPTION_DEFAULT_POLICY);
523 plugins = EXTRACTOR_plugin_add_config (plugins,
525 EXTRACTOR_OPTION_DEFAULT_POLICY);
529 /* scan tree to find out how much work there is to be done */
530 if (GNUNET_OK != preprocess_file (filename_expanded, &root))
532 (void) write_message (GNUNET_MESSAGE_TYPE_FS_PUBLISH_HELPER_ERROR, NULL, 0);
533 #if HAVE_LIBEXTRACTOR
534 EXTRACTOR_plugin_remove_all (plugins);
538 /* signal that we're done counting files, so that a percentage of
539 progress can now be calculated */
541 write_message (GNUNET_MESSAGE_TYPE_FS_PUBLISH_HELPER_COUNTING_DONE,
545 #if HAVE_LIBEXTRACTOR
546 EXTRACTOR_plugin_remove_all (plugins);
552 if (GNUNET_OK != extract_files (root))
554 (void) write_message (GNUNET_MESSAGE_TYPE_FS_PUBLISH_HELPER_ERROR,
558 #if HAVE_LIBEXTRACTOR
559 EXTRACTOR_plugin_remove_all (plugins);
565 /* enable "clean" shutdown by telling parent that we are done */
566 (void) write_message (GNUNET_MESSAGE_TYPE_FS_PUBLISH_HELPER_FINISHED,
569 #if HAVE_LIBEXTRACTOR
570 EXTRACTOR_plugin_remove_all (plugins);
575 /* end of gnunet-helper-fs-publish.c */