2 This file is part of GNUnet.
3 (C) 2009, 2011 Christian Grothoff (and other contributing authors)
5 GNUnet is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published
7 by the Free Software Foundation; either version 3, or (at your
8 option) any later version.
10 GNUnet is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with GNUnet; see the file COPYING. If not, write to the
17 Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA.
22 * @file fs/fs_file_information.c
23 * @brief Manage information for publishing directory hierarchies
24 * @author Christian Grothoff
27 * - metadata filename clean up code
28 * - metadata/ksk generation for directories from contained files
31 #include <extractor.h>
32 #include "gnunet_fs_service.h"
38 * Add meta data that libextractor finds to our meta data
41 * @param cls closure, our meta data container
42 * @param plugin_name name of the plugin that produced this value;
43 * special values can be used (i.e. '<zlib>' for zlib being
44 * used in the main libextractor library and yielding
46 * @param type libextractor-type describing the meta data
47 * @param format basic format information about data
48 * @param data_mime_type mime-type of data (not of the original file);
49 * can be NULL (if mime-type is not known)
50 * @param data actual meta-data found
51 * @param data_len number of bytes in data
52 * @return always 0 to continue extracting
55 add_to_md (void *cls, const char *plugin_name, enum EXTRACTOR_MetaType type,
56 enum EXTRACTOR_MetaFormat format, const char *data_mime_type,
57 const char *data, size_t data_len)
59 struct GNUNET_CONTAINER_MetaData *md = cls;
61 (void) GNUNET_CONTAINER_meta_data_insert (md, plugin_name, type, format,
62 data_mime_type, data, data_len);
68 * Extract meta-data from a file.
70 * @return GNUNET_SYSERR on error, otherwise the number
71 * of meta-data items obtained
74 GNUNET_FS_meta_data_extract_from_file (struct GNUNET_CONTAINER_MetaData *md,
76 struct EXTRACTOR_PluginList *extractors)
82 if (extractors == NULL)
84 old = GNUNET_CONTAINER_meta_data_iterate (md, NULL, NULL);
85 GNUNET_assert (old >= 0);
86 EXTRACTOR_extract (extractors, filename, NULL, 0, &add_to_md, md);
87 return (GNUNET_CONTAINER_meta_data_iterate (md, NULL, NULL) - old);
93 * Obtain the name under which this file information
94 * structure is stored on disk. Only works for top-level
95 * file information structures.
97 * @param s structure to get the filename for
98 * @return NULL on error, otherwise filename that
99 * can be passed to "GNUNET_FS_file_information_recover"
100 * to read this fi-struct from disk.
103 GNUNET_FS_file_information_get_id (struct GNUNET_FS_FileInformation *s)
107 return s->serialization;
112 * Create an entry for a file in a publish-structure.
114 * @param h handle to the file sharing subsystem
115 * @param client_info initial value for the client-info value for this entry
116 * @param filename name of the file or directory to publish
117 * @param keywords under which keywords should this file be available
118 * directly; can be NULL
119 * @param meta metadata for the file
120 * @param do_index GNUNET_YES for index, GNUNET_NO for insertion,
121 * GNUNET_SYSERR for simulation
122 * @param bo block options
123 * @return publish structure entry for the file
125 struct GNUNET_FS_FileInformation *
126 GNUNET_FS_file_information_create_from_file (struct GNUNET_FS_Handle *h,
128 const char *filename,
129 const struct GNUNET_FS_Uri
132 GNUNET_CONTAINER_MetaData *meta,
134 const struct GNUNET_FS_BlockOptions
139 struct GNUNET_FS_FileInformation *ret;
144 char fn_conv[MAX_PATH];
147 if (0 != STAT (filename, &sbuf))
149 GNUNET_log_strerror_file (GNUNET_ERROR_TYPE_WARNING, "stat", filename);
152 fi = GNUNET_FS_make_file_reader_context_ (filename);
159 GNUNET_FS_file_information_create_from_reader (h, client_info,
161 &GNUNET_FS_data_reader_file_,
167 ret->filename = GNUNET_strdup (filename);
171 plibc_conv_to_win_path (filename, fn_conv);
174 while (NULL != (ss = strstr (fn, DIR_SEPARATOR_STR)))
176 GNUNET_CONTAINER_meta_data_insert (ret->meta, "<gnunet>",
177 EXTRACTOR_METATYPE_GNUNET_ORIGINAL_FILENAME,
178 EXTRACTOR_METAFORMAT_C_STRING,
179 "text/plain", fn, strlen (fn) + 1);
185 * Create an entry for a file in a publish-structure.
187 * @param h handle to the file sharing subsystem
188 * @param client_info initial value for the client-info value for this entry
189 * @param length length of the file
190 * @param data data for the file (should not be used afterwards by
191 * the caller; callee will "free")
192 * @param keywords under which keywords should this file be available
193 * directly; can be NULL
194 * @param meta metadata for the file
195 * @param do_index GNUNET_YES for index, GNUNET_NO for insertion,
196 * GNUNET_SYSERR for simulation
197 * @param bo block options
198 * @return publish structure entry for the file
200 struct GNUNET_FS_FileInformation *
201 GNUNET_FS_file_information_create_from_data (struct GNUNET_FS_Handle *h,
202 void *client_info, uint64_t length,
204 const struct GNUNET_FS_Uri
207 GNUNET_CONTAINER_MetaData *meta,
209 const struct GNUNET_FS_BlockOptions
212 if (GNUNET_YES == do_index)
217 return GNUNET_FS_file_information_create_from_reader (h, client_info, length,
218 &GNUNET_FS_data_reader_copy_,
219 data, keywords, meta,
225 * Create an entry for a file in a publish-structure.
227 * @param h handle to the file sharing subsystem
228 * @param client_info initial value for the client-info value for this entry
229 * @param length length of the file
230 * @param reader function that can be used to obtain the data for the file
231 * @param reader_cls closure for "reader"
232 * @param keywords under which keywords should this file be available
233 * directly; can be NULL
234 * @param meta metadata for the file
235 * @param do_index GNUNET_YES for index, GNUNET_NO for insertion,
236 * GNUNET_SYSERR for simulation
237 * @param bo block options
238 * @return publish structure entry for the file
240 struct GNUNET_FS_FileInformation *
241 GNUNET_FS_file_information_create_from_reader (struct GNUNET_FS_Handle *h,
244 GNUNET_FS_DataReader reader,
246 const struct GNUNET_FS_Uri
249 GNUNET_CONTAINER_MetaData *meta,
252 GNUNET_FS_BlockOptions *bo)
254 struct GNUNET_FS_FileInformation *ret;
256 if ((GNUNET_YES == do_index) && (reader != &GNUNET_FS_data_reader_file_))
261 ret = GNUNET_malloc (sizeof (struct GNUNET_FS_FileInformation));
263 ret->client_info = client_info;
264 ret->meta = GNUNET_CONTAINER_meta_data_duplicate (meta);
265 if (ret->meta == NULL)
266 ret->meta = GNUNET_CONTAINER_meta_data_create ();
267 ret->keywords = (keywords == NULL) ? NULL : GNUNET_FS_uri_dup (keywords);
268 ret->data.file.reader = reader;
269 ret->data.file.reader_cls = reader_cls;
270 ret->data.file.do_index = do_index;
271 ret->data.file.file_size = length;
278 * Closure for "dir_scan_cb".
283 * Metadata extractors to use.
285 struct EXTRACTOR_PluginList *extractors;
290 struct GNUNET_FS_Handle *h;
293 * Function to call on each directory entry.
295 GNUNET_FS_FileProcessor proc;
303 * Scanner to use for subdirectories.
305 GNUNET_FS_DirectoryScanner scanner;
308 * Closure for scanner.
313 * Set to an error message (if any).
320 const struct GNUNET_FS_BlockOptions *bo;
323 * Should files be indexed?
331 * Function called on each entry in a file to
332 * cause default-publishing.
333 * @param cls closure (struct DirScanCls)
334 * @param filename name of the file to be published
335 * @return GNUNET_OK on success, GNUNET_SYSERR to abort
338 dir_scan_cb (void *cls, const char *filename)
340 struct DirScanCls *dsc = cls;
342 struct GNUNET_FS_FileInformation *fi;
343 struct GNUNET_FS_Uri *ksk_uri;
344 struct GNUNET_FS_Uri *keywords;
345 struct GNUNET_CONTAINER_MetaData *meta;
347 if (0 != STAT (filename, &sbuf))
349 GNUNET_asprintf (&dsc->emsg, _("`%s' failed on file `%s': %s"), "stat",
350 filename, STRERROR (errno));
351 return GNUNET_SYSERR;
353 if (S_ISDIR (sbuf.st_mode))
355 fi = GNUNET_FS_file_information_create_from_directory (dsc->h, NULL,
360 dsc->bo, &dsc->emsg);
363 GNUNET_assert (NULL != dsc->emsg);
364 return GNUNET_SYSERR;
369 meta = GNUNET_CONTAINER_meta_data_create ();
370 GNUNET_FS_meta_data_extract_from_file (meta, filename, dsc->extractors);
371 keywords = GNUNET_FS_uri_ksk_create_from_meta_data (meta);
372 ksk_uri = GNUNET_FS_uri_ksk_canonicalize (keywords);
373 fi = GNUNET_FS_file_information_create_from_file (dsc->h, NULL, filename,
375 dsc->do_index, dsc->bo);
376 GNUNET_CONTAINER_meta_data_destroy (meta);
377 GNUNET_FS_uri_destroy (keywords);
378 GNUNET_FS_uri_destroy (ksk_uri);
380 dsc->proc (dsc->proc_cls, filename, fi);
386 * Simple, useful default implementation of a directory scanner
387 * (GNUNET_FS_DirectoryScanner). This implementation expects to get a
388 * UNIX filename, will publish all files in the directory except hidden
389 * files (those starting with a "."). Metadata will be extracted
390 * using GNU libextractor; the specific list of plugins should be
391 * specified in "cls", passing NULL will disable (!) metadata
392 * extraction. Keywords will be derived from the metadata and be
393 * subject to default canonicalization. This is strictly a
394 * convenience function.
396 * @param cls must be of type "struct EXTRACTOR_Extractor*"
397 * @param h handle to the file sharing subsystem
398 * @param dirname name of the directory to scan
399 * @param do_index should files be indexed or inserted
400 * @param bo block options
401 * @param proc function called on each entry
402 * @param proc_cls closure for proc
403 * @param emsg where to store an error message (on errors)
404 * @return GNUNET_OK on success
407 GNUNET_FS_directory_scanner_default (void *cls, struct GNUNET_FS_Handle *h,
408 const char *dirname, int do_index,
409 const struct GNUNET_FS_BlockOptions *bo,
410 GNUNET_FS_FileProcessor proc,
411 void *proc_cls, char **emsg)
413 struct EXTRACTOR_PluginList *ex = cls;
414 struct DirScanCls dsc;
419 dsc.proc_cls = proc_cls;
420 dsc.scanner = &GNUNET_FS_directory_scanner_default;
421 dsc.scanner_cls = cls;
422 dsc.do_index = do_index;
424 if (-1 == GNUNET_DISK_directory_scan (dirname, &dir_scan_cb, &dsc))
426 GNUNET_assert (NULL != dsc.emsg);
428 return GNUNET_SYSERR;
435 * Closure for dirproc function.
440 * Linked list of directory entries that is being
443 struct GNUNET_FS_FileInformation *entries;
449 * Function that processes a directory entry that
450 * was obtained from the scanner.
451 * @param cls our closure
452 * @param filename name of the file (unused, why there???)
453 * @param fi information for publishing the file
456 dirproc (void *cls, const char *filename, struct GNUNET_FS_FileInformation *fi)
458 struct EntryProcCls *dc = cls;
460 GNUNET_assert (fi->next == NULL);
461 GNUNET_assert (fi->dir == NULL);
462 fi->next = dc->entries;
468 * Create a publish-structure from an existing file hierarchy, inferring
469 * and organizing keywords and metadata as much as possible. This
470 * function primarily performs the recursive build and re-organizes
471 * keywords and metadata; for automatically getting metadata
472 * extraction, scanning of directories and creation of the respective
473 * GNUNET_FS_FileInformation entries the default scanner should be
474 * passed (GNUNET_FS_directory_scanner_default). This is strictly a
475 * convenience function.
477 * @param h handle to the file sharing subsystem
478 * @param client_info initial value for the client-info value for this entry
479 * @param filename name of the top-level file or directory
480 * @param scanner function used to get a list of files in a directory
481 * @param scanner_cls closure for scanner
482 * @param do_index should files in the hierarchy be indexed?
483 * @param bo block options
484 * @param emsg where to store an error message
485 * @return publish structure entry for the directory, NULL on error
487 struct GNUNET_FS_FileInformation *
488 GNUNET_FS_file_information_create_from_directory (struct GNUNET_FS_Handle *h,
490 const char *filename,
491 GNUNET_FS_DirectoryScanner
492 scanner, void *scanner_cls,
495 GNUNET_FS_BlockOptions *bo,
498 struct GNUNET_FS_FileInformation *ret;
499 struct EntryProcCls dc;
500 struct GNUNET_FS_Uri *ksk;
501 struct GNUNET_CONTAINER_MetaData *meta;
507 meta = GNUNET_CONTAINER_meta_data_create ();
508 GNUNET_FS_meta_data_make_directory (meta);
509 scanner (scanner_cls, h, filename, do_index, bo, &dirproc, &dc, emsg);
510 ksk = NULL; // FIXME...
511 // FIXME: create meta!
513 GNUNET_FS_file_information_create_empty_directory (h, client_info, ksk,
515 GNUNET_CONTAINER_meta_data_destroy (meta);
516 ret->data.dir.entries = dc.entries;
517 while (dc.entries != NULL)
519 dc.entries->dir = ret;
520 dc.entries = dc.entries->next;
523 while ((NULL != (ss = strstr (fn, DIR_SEPARATOR_STR))) && (strlen (ss) > 1))
525 GNUNET_asprintf (&dn, "%s/", fn);
526 GNUNET_CONTAINER_meta_data_insert (ret->meta, "<gnunet>",
527 EXTRACTOR_METATYPE_GNUNET_ORIGINAL_FILENAME,
528 EXTRACTOR_METAFORMAT_C_STRING,
529 "text/plain", dn, strlen (dn) + 1);
531 ret->filename = GNUNET_strdup (filename);
537 * Test if a given entry represents a directory.
539 * @param ent check if this FI represents a directory
540 * @return GNUNET_YES if so, GNUNET_NO if not
543 GNUNET_FS_file_information_is_directory (const struct GNUNET_FS_FileInformation
546 return ent->is_directory;
551 * Create an entry for an empty directory in a publish-structure.
552 * This function should be used by applications for which the
553 * use of "GNUNET_FS_file_information_create_from_directory"
554 * is not appropriate.
556 * @param h handle to the file sharing subsystem
557 * @param client_info initial value for the client-info value for this entry
558 * @param meta metadata for the directory
559 * @param keywords under which keywords should this directory be available
560 * directly; can be NULL
561 * @param bo block options
562 * @return publish structure entry for the directory , NULL on error
564 struct GNUNET_FS_FileInformation *
565 GNUNET_FS_file_information_create_empty_directory (struct GNUNET_FS_Handle *h,
567 const struct GNUNET_FS_Uri
570 GNUNET_CONTAINER_MetaData
573 GNUNET_FS_BlockOptions *bo)
575 struct GNUNET_FS_FileInformation *ret;
577 ret = GNUNET_malloc (sizeof (struct GNUNET_FS_FileInformation));
579 ret->client_info = client_info;
580 ret->meta = GNUNET_CONTAINER_meta_data_duplicate (meta);
581 ret->keywords = GNUNET_FS_uri_dup (keywords);
583 ret->is_directory = GNUNET_YES;
589 * Add an entry to a directory in a publish-structure. Clients
590 * should never modify publish structures that were passed to
591 * "GNUNET_FS_publish_start" already.
593 * @param dir the directory
594 * @param ent the entry to add; the entry must not have been
595 * added to any other directory at this point and
596 * must not include "dir" in its structure
597 * @return GNUNET_OK on success, GNUNET_SYSERR on error
600 GNUNET_FS_file_information_add (struct GNUNET_FS_FileInformation *dir,
601 struct GNUNET_FS_FileInformation *ent)
603 if ((ent->dir != NULL) || (ent->next != NULL) || (!dir->is_directory))
606 return GNUNET_SYSERR;
609 ent->next = dir->data.dir.entries;
610 dir->data.dir.entries = ent;
611 dir->data.dir.dir_size = 0;
617 * Inspect a file or directory in a publish-structure. Clients
618 * should never modify publish structures that were passed to
619 * "GNUNET_FS_publish_start" already. When called on a directory,
620 * this function will FIRST call "proc" with information about
621 * the directory itself and then for each of the files in the
622 * directory (but not for files in subdirectories). When called
623 * on a file, "proc" will be called exactly once (with information
624 * about the specific file).
626 * @param dir the directory
627 * @param proc function to call on each entry
628 * @param proc_cls closure for proc
631 GNUNET_FS_file_information_inspect (struct GNUNET_FS_FileInformation *dir,
632 GNUNET_FS_FileInformationProcessor proc,
635 struct GNUNET_FS_FileInformation *pos;
641 (dir->is_directory) ? dir->data.dir.dir_size : dir->data.
642 file.file_size, dir->meta, &dir->keywords, &dir->bo,
643 (dir->is_directory) ? &no : &dir->data.file.do_index,
646 if (!dir->is_directory)
648 pos = dir->data.dir.entries;
654 (pos->is_directory) ? pos->data.dir.dir_size : pos->data.
655 file.file_size, pos->meta, &pos->keywords, &pos->bo,
656 (dir->is_directory) ? &no : &dir->data.file.do_index,
665 * Destroy publish-structure. Clients should never destroy publish
666 * structures that were passed to "GNUNET_FS_publish_start" already.
668 * @param fi structure to destroy
669 * @param cleaner function to call on each entry in the structure
670 * (useful to clean up client_info); can be NULL; return
672 * @param cleaner_cls closure for cleaner
675 GNUNET_FS_file_information_destroy (struct GNUNET_FS_FileInformation *fi,
676 GNUNET_FS_FileInformationProcessor cleaner,
679 struct GNUNET_FS_FileInformation *pos;
683 if (fi->is_directory)
685 /* clean up directory */
686 while (NULL != (pos = fi->data.dir.entries))
688 fi->data.dir.entries = pos->next;
689 GNUNET_FS_file_information_destroy (pos, cleaner, cleaner_cls);
691 /* clean up client-info */
693 cleaner (cleaner_cls, fi, fi->data.dir.dir_size, fi->meta, &fi->keywords,
694 &fi->bo, &no, &fi->client_info);
695 GNUNET_free_non_null (fi->data.dir.dir_data);
699 /* call clean-up function of the reader */
700 if (fi->data.file.reader != NULL)
701 fi->data.file.reader (fi->data.file.reader_cls, 0, 0, NULL, NULL);
702 /* clean up client-info */
704 cleaner (cleaner_cls, fi, fi->data.file.file_size, fi->meta,
705 &fi->keywords, &fi->bo, &fi->data.file.do_index,
708 GNUNET_free_non_null (fi->filename);
709 GNUNET_free_non_null (fi->emsg);
710 GNUNET_free_non_null (fi->chk_uri);
711 /* clean up serialization */
712 if ((NULL != fi->serialization) && (0 != UNLINK (fi->serialization)))
713 GNUNET_log_strerror_file (GNUNET_ERROR_TYPE_WARNING, "unlink",
715 if (NULL != fi->keywords)
716 GNUNET_FS_uri_destroy (fi->keywords);
717 if (NULL != fi->meta)
718 GNUNET_CONTAINER_meta_data_destroy (fi->meta);
719 GNUNET_free_non_null (fi->serialization);
722 GNUNET_FS_tree_encoder_finish (fi->te, NULL, NULL);
729 /* end of fs_file_information.c */