2 This file is part of GNUnet.
3 (C) 2012 Christian Grothoff (and other contributing authors)
5 GNUnet is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published
7 by the Free Software Foundation; either version 3, or (at your
8 option) any later version.
10 GNUnet is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with GNUnet; see the file COPYING. If not, write to the
17 Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA.
22 * @file src/fs/gnunet-helper-fs-publish.c
23 * @brief Tool to help extract meta data asynchronously
24 * @author Christian Grothoff
26 * This program will scan a directory for files with meta data
27 * and report the results to stdout.
30 #include "gnunet_fs_service.h"
34 * A node of a directory tree.
40 * This is a doubly-linked list
42 struct ScanTreeNode *next;
45 * This is a doubly-linked list
47 struct ScanTreeNode *prev;
50 * Parent of this node, NULL for top-level entries.
52 struct ScanTreeNode *parent;
55 * This is a doubly-linked tree
56 * NULL for files and empty directories
58 struct ScanTreeNode *children_head;
61 * This is a doubly-linked tree
62 * NULL for files and empty directories
64 struct ScanTreeNode *children_tail;
67 * Name of the file/directory
72 * Size of the file (if it is a file), in bytes
77 * GNUNET_YES if this is a directory
85 * List of libextractor plugins to use for extracting.
87 static struct EXTRACTOR_PluginList *plugins;
91 * Add meta data that libextractor finds to our meta data
94 * @param cls closure, our meta data container
95 * @param plugin_name name of the plugin that produced this value;
96 * special values can be used (i.e. '<zlib>' for zlib being
97 * used in the main libextractor library and yielding
99 * @param type libextractor-type describing the meta data
100 * @param format basic format information about data
101 * @param data_mime_type mime-type of data (not of the original file);
102 * can be NULL (if mime-type is not known)
103 * @param data actual meta-data found
104 * @param data_len number of bytes in data
105 * @return always 0 to continue extracting
108 add_to_md (void *cls, const char *plugin_name, enum EXTRACTOR_MetaType type,
109 enum EXTRACTOR_MetaFormat format, const char *data_mime_type,
110 const char *data, size_t data_len)
112 struct GNUNET_CONTAINER_MetaData *md = cls;
114 (void) GNUNET_CONTAINER_meta_data_insert (md, plugin_name, type, format,
115 data_mime_type, data, data_len);
121 * Free memory of the 'tree' structure
123 * @param tree tree to free
126 free_tree (struct ScanTreeNode *tree)
128 struct ScanTreeNode *pos;
130 while (NULL != (pos = tree->children_head))
132 if (NULL != tree->parent)
133 GNUNET_CONTAINER_DLL_remove (tree->parent->children_head,
134 tree->parent->children_tail,
136 GNUNET_free (tree->filename);
142 * Write 'size' bytes from 'buf' into 'out'.
144 * @param buf buffer with data to write
145 * @param size number of bytes to write
146 * @return GNUNET_OK on success, GNUNET_SYSERR on error
149 write_all (const void *buf,
152 const char *cbuf = buf;
164 } while ( (wr > 0) && (total < size) );
166 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
167 "Failed to write to stdout: %s\n",
169 return (total == size) ? GNUNET_OK : GNUNET_SYSERR;
174 * Write message to the master process.
176 * @param message_type message type to use
177 * @param data data to append, NULL for none
178 * @param data_length number of bytes in data
179 * @return GNUNET_SYSERR to stop scanning (the pipe was broken somehow)
182 write_message (uint16_t message_type,
186 struct GNUNET_MessageHeader hdr;
188 hdr.type = htons (message_type);
189 hdr.size = htons (sizeof (struct GNUNET_MessageHeader) + data_length);
196 return GNUNET_SYSERR;
202 * Function called to (recursively) add all of the files in the
203 * directory to the tree. Called by the directory scanner to initiate
204 * the scan. Does NOT yet add any metadata.
206 * @param filename file or directory to scan
207 * @param dst where to store the resulting share tree item
208 * @return GNUNET_OK on success, GNUNET_SYSERR on error
211 preprocess_file (const char *filename,
212 struct ScanTreeNode **dst);
216 * Closure for the 'scan_callback'
218 struct RecursionContext
221 * Parent to add the files to.
223 struct ScanTreeNode *parent;
226 * Flag to set to GNUNET_YES on serious errors.
233 * Function called by the directory iterator to (recursively) add all
234 * of the files in the directory to the tree. Called by the directory
235 * scanner to initiate the scan. Does NOT yet add any metadata.
237 * @param cls the 'struct RecursionContext'
238 * @param filename file or directory to scan
239 * @return GNUNET_OK on success, GNUNET_SYSERR on error
242 scan_callback (void *cls,
243 const char *filename)
245 struct RecursionContext *rc = cls;
246 struct ScanTreeNode *chld;
249 preprocess_file (filename,
252 rc->stop = GNUNET_YES;
253 return GNUNET_SYSERR;
255 chld->parent = rc->parent;
256 GNUNET_CONTAINER_DLL_insert (rc->parent->children_head,
257 rc->parent->children_tail,
264 * Function called to (recursively) add all of the files in the
265 * directory to the tree. Called by the directory scanner to initiate
266 * the scan. Does NOT yet add any metadata.
268 * @param filename file or directory to scan
269 * @param dst where to store the resulting share tree item
270 * @return GNUNET_OK on success, GNUNET_SYSERR on error
273 preprocess_file (const char *filename,
274 struct ScanTreeNode **dst)
276 struct ScanTreeNode *item;
279 if (0 != STAT (filename, &sbuf))
281 /* If the file doesn't exist (or is not stat-able for any other reason)
282 skip it (but report it), but do continue. */
284 write_message (GNUNET_MESSAGE_TYPE_FS_PUBLISH_HELPER_SKIP_FILE,
285 filename, strlen (filename) + 1))
286 return GNUNET_SYSERR;
290 /* Report the progress */
292 write_message (S_ISDIR (sbuf.st_mode)
293 ? GNUNET_MESSAGE_TYPE_FS_PUBLISH_HELPER_PROGRESS_DIRECTORY
294 : GNUNET_MESSAGE_TYPE_FS_PUBLISH_HELPER_PROGRESS_FILE,
295 filename, strlen (filename) + 1))
296 return GNUNET_SYSERR;
297 item = GNUNET_malloc (sizeof (struct ScanTreeNode));
298 item->filename = GNUNET_strdup (filename);
299 item->is_directory = (S_ISDIR (sbuf.st_mode)) ? GNUNET_YES : GNUNET_NO;
300 item->file_size = (uint64_t) sbuf.st_size;
301 if (item->is_directory == GNUNET_YES)
303 struct RecursionContext rc;
307 GNUNET_DISK_directory_scan (filename,
310 if ( (rc.stop == GNUNET_YES) ||
312 write_message (GNUNET_MESSAGE_TYPE_FS_PUBLISH_HELPER_PROGRESS_DIRECTORY,
316 return GNUNET_SYSERR;
325 * Extract metadata from files.
327 * @param item entry we are processing
328 * @return GNUNET_OK on success, GNUNET_SYSERR on fatal errors
331 extract_files (struct ScanTreeNode *item)
333 struct GNUNET_CONTAINER_MetaData *meta;
337 if (item->is_directory == GNUNET_YES)
339 /* for directories, we simply only descent, no extraction, no
340 progress reporting */
341 struct ScanTreeNode *pos;
343 for (pos = item->children_head; NULL != pos; pos = pos->next)
346 return GNUNET_SYSERR;
350 /* this is the expensive operation, *afterwards* we'll check for aborts */
351 meta = GNUNET_CONTAINER_meta_data_create ();
353 EXTRACTOR_extract (plugins, item->filename, NULL, 0, &add_to_md, meta);
354 slen = strlen (item->filename) + 1;
355 size = GNUNET_CONTAINER_meta_data_get_serialized_size (meta);
359 GNUNET_CONTAINER_meta_data_destroy (meta);
361 write_message (GNUNET_MESSAGE_TYPE_FS_PUBLISH_HELPER_META_DATA,
362 item->filename, slen))
363 return GNUNET_SYSERR;
367 char buf[size + slen];
368 char *dst = &buf[slen];
370 memcpy (buf, item->filename, slen);
371 size = GNUNET_CONTAINER_meta_data_serialize (meta,
373 GNUNET_CONTAINER_META_DATA_SERIALIZE_PART);
374 GNUNET_CONTAINER_meta_data_destroy (meta);
376 write_message (GNUNET_MESSAGE_TYPE_FS_PUBLISH_HELPER_META_DATA,
379 return GNUNET_SYSERR;
386 * Main function of the helper process to extract meta data.
388 * @param argc should be 3
389 * @param argv [0] our binary name
390 * [1] name of the file or directory to process
391 * [2] "-" to disable extraction, NULL for defaults,
392 * otherwise custom plugins to load from LE
393 * @return 0 on success
398 const char *filename_expanded;
400 struct ScanTreeNode *root;
403 /* We're using stdout to communicate binary data back to the parent; use
406 _setmode (1, _O_BINARY);
409 /* parse command line */
410 if ( (argc != 3) && (argc != 2) )
414 "gnunet-helper-fs-publish needs exactly one or two arguments\n");
417 filename_expanded = argv[1];
420 (0 != strcmp (ex, "-")) )
422 plugins = EXTRACTOR_plugin_add_defaults (EXTRACTOR_OPTION_DEFAULT_POLICY);
424 plugins = EXTRACTOR_plugin_add_config (plugins, ex,
425 EXTRACTOR_OPTION_DEFAULT_POLICY);
428 /* scan tree to find out how much work there is to be done */
429 if (GNUNET_OK != preprocess_file (filename_expanded,
432 (void) write_message (GNUNET_MESSAGE_TYPE_FS_PUBLISH_HELPER_ERROR, NULL, 0);
435 /* signal that we're done counting files, so that a percentage of
436 progress can now be calculated */
438 write_message (GNUNET_MESSAGE_TYPE_FS_PUBLISH_HELPER_COUNTING_DONE, NULL, 0))
441 extract_files (root))
443 (void) write_message (GNUNET_MESSAGE_TYPE_FS_PUBLISH_HELPER_ERROR, NULL, 0);
448 /* enable "clean" shutdown by telling parent that we are done */
449 (void) write_message (GNUNET_MESSAGE_TYPE_FS_PUBLISH_HELPER_FINISHED, NULL, 0);
451 EXTRACTOR_plugin_remove_all (plugins);
456 /* end of gnunet-helper-fs-publish.c */