uncrustify as demanded.
[oweals/gnunet.git] / src / fs / fs_dirmetascan.c
1 /*
2      This file is part of GNUnet
3      Copyright (C) 2005-2012 GNUnet e.V.
4
5      GNUnet is free software: you can redistribute it and/or modify it
6      under the terms of the GNU Affero General Public License as published
7      by the Free Software Foundation, either version 3 of the License,
8      or (at your option) any later version.
9
10      GNUnet is distributed in the hope that it will be useful, but
11      WITHOUT ANY WARRANTY; without even the implied warranty of
12      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13      Affero General Public License for more details.
14
15      You should have received a copy of the GNU Affero General Public License
16      along with this program.  If not, see <http://www.gnu.org/licenses/>.
17
18      SPDX-License-Identifier: AGPL3.0-or-later
19  */
20
21 /**
22  * @file fs/fs_dirmetascan.c
23  * @brief code to asynchronously build a 'struct GNUNET_FS_ShareTreeItem'
24  *        from an on-disk directory for publishing; use the 'gnunet-helper-fs-publish'.
25  * @author LRN
26  * @author Christian Grothoff
27  */
28 #include "platform.h"
29 #include "gnunet_fs_service.h"
30 #include "gnunet_scheduler_lib.h"
31 #include <pthread.h>
32
33
34 /**
35  * An opaque structure a pointer to which is returned to the
36  * caller to be used to control the scanner.
37  */
38 struct GNUNET_FS_DirScanner {
39   /**
40    * Helper process.
41    */
42   struct GNUNET_HELPER_Handle *helper;
43
44   /**
45    * Expanded filename (as given by the scan initiator).
46    * The scanner thread stores a copy here, and frees it when it finishes.
47    */
48   char *filename_expanded;
49
50   /**
51    * Second argument to helper process.
52    */
53   char *ex_arg;
54
55   /**
56    * The function that will be called every time there's a progress
57    * message.
58    */
59   GNUNET_FS_DirScannerProgressCallback progress_callback;
60
61   /**
62    * A closure for progress_callback.
63    */
64   void *progress_callback_cls;
65
66   /**
67    * After the scan is finished, it will contain a pointer to the
68    * top-level directory entry in the directory tree built by the
69    * scanner.
70    */
71   struct GNUNET_FS_ShareTreeItem *toplevel;
72
73   /**
74    * Current position during processing.
75    */
76   struct GNUNET_FS_ShareTreeItem *pos;
77
78   /**
79    * Task scheduled when we are done.
80    */
81   struct GNUNET_SCHEDULER_Task *stop_task;
82
83   /**
84    * Arguments for helper.
85    */
86   char *args[4];
87 };
88
89
90 /**
91  * Abort the scan.  Must not be called from within the progress_callback
92  * function.
93  *
94  * @param ds directory scanner structure
95  */
96 void
97 GNUNET_FS_directory_scan_abort(struct GNUNET_FS_DirScanner *ds)
98 {
99   /* terminate helper */
100   if (NULL != ds->helper)
101     GNUNET_HELPER_stop(ds->helper, GNUNET_NO);
102
103   /* free resources */
104   if (NULL != ds->toplevel)
105     GNUNET_FS_share_tree_free(ds->toplevel);
106   if (NULL != ds->stop_task)
107     GNUNET_SCHEDULER_cancel(ds->stop_task);
108   GNUNET_free_non_null(ds->ex_arg);
109   GNUNET_free(ds->filename_expanded);
110   GNUNET_free(ds);
111 }
112
113
114 /**
115  * Obtain the result of the scan after the scan has signalled
116  * completion.  Must not be called prior to completion.  The 'ds' is
117  * freed as part of this call.
118  *
119  * @param ds directory scanner structure
120  * @return the results of the scan (a directory tree)
121  */
122 struct GNUNET_FS_ShareTreeItem *
123 GNUNET_FS_directory_scan_get_result(struct GNUNET_FS_DirScanner *ds)
124 {
125   struct GNUNET_FS_ShareTreeItem *result;
126
127   /* check that we're actually done */
128   GNUNET_assert(NULL == ds->helper);
129   /* preserve result */
130   result = ds->toplevel;
131   ds->toplevel = NULL;
132   GNUNET_FS_directory_scan_abort(ds);
133   return result;
134 }
135
136
137 /**
138  * Move in the directory from the given position to the next file
139  * in DFS traversal.
140  *
141  * @param pos current position
142  * @return next file, NULL for none
143  */
144 static struct GNUNET_FS_ShareTreeItem *
145 advance(struct GNUNET_FS_ShareTreeItem *pos)
146 {
147   int moved;
148
149   GNUNET_assert(NULL != pos);
150   moved = 0; /* must not terminate, even on file, otherwise "normal" */
151   while ((pos->is_directory == GNUNET_YES) || (0 == moved))
152     {
153       if ((moved != -1) && (NULL != pos->children_head))
154         {
155           pos = pos->children_head;
156           moved = 1; /* can terminate if file */
157           continue;
158         }
159       if (NULL != pos->next)
160         {
161           pos = pos->next;
162           moved = 1; /* can terminate if file */
163           continue;
164         }
165       if (NULL != pos->parent)
166         {
167           pos = pos->parent;
168           moved = -1; /* force move to 'next' or 'parent' */
169           continue;
170         }
171       /* no more options, end of traversal */
172       return NULL;
173     }
174   return pos;
175 }
176
177
178 /**
179  * Add another child node to the tree.
180  *
181  * @param parent parent of the child, NULL for top level
182  * @param filename name of the file or directory
183  * @param is_directory GNUNET_YES for directories
184  * @return new entry that was just created
185  */
186 static struct GNUNET_FS_ShareTreeItem *
187 expand_tree(struct GNUNET_FS_ShareTreeItem *parent,
188             const char *filename,
189             int is_directory)
190 {
191   struct GNUNET_FS_ShareTreeItem *chld;
192   size_t slen;
193
194   chld = GNUNET_new(struct GNUNET_FS_ShareTreeItem);
195   chld->parent = parent;
196   chld->filename = GNUNET_strdup(filename);
197   GNUNET_asprintf(&chld->short_filename,
198                   "%s%s",
199                   GNUNET_STRINGS_get_short_name(filename),
200                   is_directory == GNUNET_YES ? "/" : "");
201   /* make sure we do not end with '//' */
202   slen = strlen(chld->short_filename);
203   if ((slen >= 2) && (chld->short_filename[slen - 1] == '/') &&
204       (chld->short_filename[slen - 2] == '/'))
205     chld->short_filename[slen - 1] = '\0';
206   chld->is_directory = is_directory;
207   if (NULL != parent)
208     GNUNET_CONTAINER_DLL_insert(parent->children_head,
209                                 parent->children_tail,
210                                 chld);
211   return chld;
212 }
213
214
215 /**
216  * Task run last to shut everything down.
217  *
218  * @param cls the 'struct GNUNET_FS_DirScanner'
219  */
220 static void
221 finish_scan(void *cls)
222 {
223   struct GNUNET_FS_DirScanner *ds = cls;
224
225   ds->stop_task = NULL;
226   if (NULL != ds->helper)
227     {
228       GNUNET_HELPER_stop(ds->helper, GNUNET_NO);
229       ds->helper = NULL;
230     }
231   ds->progress_callback(ds->progress_callback_cls,
232                         NULL,
233                         GNUNET_SYSERR,
234                         GNUNET_FS_DIRSCANNER_FINISHED);
235 }
236
237
238 /**
239  * Called every time there is data to read from the scanner.
240  * Calls the scanner progress handler.
241  *
242  * @param cls the closure (directory scanner object)
243  * @param msg message from the helper process
244  * @return #GNUNET_OK on success,
245  *    #GNUNET_NO to stop further processing (no error)
246  *    #GNUNET_SYSERR to stop further processing with error
247  */
248 static int
249 process_helper_msgs(void *cls, const struct GNUNET_MessageHeader *msg)
250 {
251   struct GNUNET_FS_DirScanner *ds = cls;
252   const char *filename;
253   size_t left;
254
255 #if 0
256   fprintf(stderr,
257           "DMS parses %u-byte message of type %u\n",
258           (unsigned int)ntohs(msg->size),
259           (unsigned int)ntohs(msg->type));
260 #endif
261   left = ntohs(msg->size) - sizeof(struct GNUNET_MessageHeader);
262   filename = (const char *)&msg[1];
263   switch (ntohs(msg->type))
264     {
265     case GNUNET_MESSAGE_TYPE_FS_PUBLISH_HELPER_PROGRESS_FILE:
266       if (filename[left - 1] != '\0')
267         {
268           GNUNET_break(0);
269           break;
270         }
271       ds->progress_callback(ds->progress_callback_cls,
272                             filename,
273                             GNUNET_NO,
274                             GNUNET_FS_DIRSCANNER_FILE_START);
275       if (NULL == ds->toplevel)
276         {
277           ds->toplevel = expand_tree(ds->pos, filename, GNUNET_NO);
278         }
279       else
280         {
281           GNUNET_assert(NULL != ds->pos);
282           (void)expand_tree(ds->pos, filename, GNUNET_NO);
283         }
284       return GNUNET_OK;
285
286     case GNUNET_MESSAGE_TYPE_FS_PUBLISH_HELPER_PROGRESS_DIRECTORY:
287       if (filename[left - 1] != '\0')
288         {
289           GNUNET_break(0);
290           break;
291         }
292       if (0 == strcmp("..", filename))
293         {
294           if (NULL == ds->pos)
295             {
296               GNUNET_break(0);
297               break;
298             }
299           ds->pos = ds->pos->parent;
300           return GNUNET_OK;
301         }
302       ds->progress_callback(ds->progress_callback_cls,
303                             filename,
304                             GNUNET_YES,
305                             GNUNET_FS_DIRSCANNER_FILE_START);
306       ds->pos = expand_tree(ds->pos, filename, GNUNET_YES);
307       if (NULL == ds->toplevel)
308         ds->toplevel = ds->pos;
309       return GNUNET_OK;
310
311     case GNUNET_MESSAGE_TYPE_FS_PUBLISH_HELPER_ERROR:
312       break;
313
314     case GNUNET_MESSAGE_TYPE_FS_PUBLISH_HELPER_SKIP_FILE:
315       if ('\0' != filename[left - 1])
316         break;
317       ds->progress_callback(ds->progress_callback_cls,
318                             filename,
319                             GNUNET_SYSERR,
320                             GNUNET_FS_DIRSCANNER_FILE_IGNORED);
321       return GNUNET_OK;
322
323     case GNUNET_MESSAGE_TYPE_FS_PUBLISH_HELPER_COUNTING_DONE:
324       if (0 != left)
325         {
326           GNUNET_break(0);
327           break;
328         }
329       if (NULL == ds->toplevel)
330         break;
331       ds->progress_callback(ds->progress_callback_cls,
332                             NULL,
333                             GNUNET_SYSERR,
334                             GNUNET_FS_DIRSCANNER_ALL_COUNTED);
335       ds->pos = ds->toplevel;
336       if (GNUNET_YES == ds->pos->is_directory)
337         ds->pos = advance(ds->pos);
338       return GNUNET_OK;
339
340     case GNUNET_MESSAGE_TYPE_FS_PUBLISH_HELPER_META_DATA: {
341       size_t nlen;
342       const char *end;
343
344       if (NULL == ds->pos)
345         {
346           GNUNET_break(0);
347           break;
348         }
349       end = memchr(filename, 0, left);
350       if (NULL == end)
351         {
352           GNUNET_break(0);
353           break;
354         }
355       end++;
356       nlen = end - filename;
357       left -= nlen;
358       if (0 != strcmp(filename, ds->pos->filename))
359         {
360           GNUNET_break(0);
361           break;
362         }
363       ds->progress_callback(ds->progress_callback_cls,
364                             filename,
365                             GNUNET_YES,
366                             GNUNET_FS_DIRSCANNER_EXTRACT_FINISHED);
367       if (0 < left)
368         {
369           ds->pos->meta = GNUNET_CONTAINER_meta_data_deserialize(end, left);
370           if (NULL == ds->pos->meta)
371             {
372               GNUNET_break(0);
373               break;
374             }
375           /* having full filenames is too dangerous; always make sure we clean them up */
376           GNUNET_CONTAINER_meta_data_delete(ds->pos->meta,
377                                             EXTRACTOR_METATYPE_FILENAME,
378                                             NULL,
379                                             0);
380           /* instead, put in our 'safer' original filename */
381           GNUNET_CONTAINER_meta_data_insert(ds->pos->meta,
382                                             "<libgnunetfs>",
383                                             EXTRACTOR_METATYPE_GNUNET_ORIGINAL_FILENAME,
384                                             EXTRACTOR_METAFORMAT_UTF8,
385                                             "text/plain",
386                                             ds->pos->short_filename,
387                                             strlen(ds->pos->short_filename) + 1);
388         }
389       ds->pos->ksk_uri = GNUNET_FS_uri_ksk_create_from_meta_data(ds->pos->meta);
390       ds->pos = advance(ds->pos);
391       return GNUNET_OK;
392     }
393
394     case GNUNET_MESSAGE_TYPE_FS_PUBLISH_HELPER_FINISHED:
395       if (NULL != ds->pos)
396         {
397           GNUNET_break(0);
398           break;
399         }
400       if (0 != left)
401         {
402           GNUNET_break(0);
403           break;
404         }
405       if (NULL == ds->toplevel)
406         break;
407       ds->stop_task = GNUNET_SCHEDULER_add_now(&finish_scan, ds);
408       return GNUNET_OK;
409
410     default:
411       GNUNET_break(0);
412       break;
413     }
414   ds->progress_callback(ds->progress_callback_cls,
415                         NULL,
416                         GNUNET_SYSERR,
417                         GNUNET_FS_DIRSCANNER_INTERNAL_ERROR);
418   return GNUNET_OK;
419 }
420
421
422 /**
423  * Function called if our helper process died.
424  *
425  * @param cls the 'struct GNUNET_FS_DirScanner' callback.
426  */
427 static void
428 helper_died_cb(void *cls)
429 {
430   struct GNUNET_FS_DirScanner *ds = cls;
431
432   ds->helper = NULL;
433   if (NULL != ds->stop_task)
434     return; /* normal death, was finished */
435   ds->progress_callback(ds->progress_callback_cls,
436                         NULL,
437                         GNUNET_SYSERR,
438                         GNUNET_FS_DIRSCANNER_INTERNAL_ERROR);
439 }
440
441
442 /**
443  * Start a directory scanner thread.
444  *
445  * @param filename name of the directory to scan
446  * @param disable_extractor #GNUNET_YES to not run libextractor on files (only
447  *        build a tree)
448  * @param ex if not NULL, must be a list of extra plugins for extractor
449  * @param cb the callback to call when there are scanning progress messages
450  * @param cb_cls closure for 'cb'
451  * @return directory scanner object to be used for controlling the scanner
452  */
453 struct GNUNET_FS_DirScanner *
454 GNUNET_FS_directory_scan_start(const char *filename,
455                                int disable_extractor,
456                                const char *ex,
457                                GNUNET_FS_DirScannerProgressCallback cb,
458                                void *cb_cls)
459 {
460   struct stat sbuf;
461   char *filename_expanded;
462   struct GNUNET_FS_DirScanner *ds;
463
464   if (0 != stat(filename, &sbuf))
465     return NULL;
466   filename_expanded = GNUNET_STRINGS_filename_expand(filename);
467   if (NULL == filename_expanded)
468     return NULL;
469   GNUNET_log(GNUNET_ERROR_TYPE_DEBUG,
470              "Starting to scan directory `%s'\n",
471              filename_expanded);
472   ds = GNUNET_new(struct GNUNET_FS_DirScanner);
473   ds->progress_callback = cb;
474   ds->progress_callback_cls = cb_cls;
475   ds->filename_expanded = filename_expanded;
476   if (disable_extractor)
477     ds->ex_arg = GNUNET_strdup("-");
478   else
479     ds->ex_arg = (NULL != ex) ? GNUNET_strdup(ex) : NULL;
480   ds->args[0] = "gnunet-helper-fs-publish";
481   ds->args[1] = ds->filename_expanded;
482   ds->args[2] = ds->ex_arg;
483   ds->args[3] = NULL;
484   ds->helper = GNUNET_HELPER_start(GNUNET_NO,
485                                    "gnunet-helper-fs-publish",
486                                    ds->args,
487                                    &process_helper_msgs,
488                                    &helper_died_cb,
489                                    ds);
490   if (NULL == ds->helper)
491     {
492       GNUNET_free(filename_expanded);
493       GNUNET_free(ds);
494       return NULL;
495     }
496   return ds;
497 }
498
499
500 /* end of fs_dirmetascan.c */