2 This file is part of GNUnet.
3 (C) 2009 Christian Grothoff (and other contributing authors)
5 GNUnet is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published
7 by the Free Software Foundation; either version 3, or (at your
8 option) any later version.
10 GNUnet is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with GNUnet; see the file COPYING. If not, write to the
17 Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA.
22 * @file fs/fs_file_information.c
23 * @brief Manage information for publishing directory hierarchies
24 * @author Christian Grothoff
27 * - serialization/deserialization (& deserialization API)
28 * - metadata filename clean up code
29 * - metadata/ksk generation for directories from contained files
32 #include <extractor.h>
33 #include "gnunet_fs_service.h"
39 * Add meta data that libextractor finds to our meta data
42 * @param cls closure, our meta data container
43 * @param plugin_name name of the plugin that produced this value;
44 * special values can be used (i.e. '<zlib>' for zlib being
45 * used in the main libextractor library and yielding
47 * @param type libextractor-type describing the meta data
48 * @param format basic format information about data
49 * @param data_mime_type mime-type of data (not of the original file);
50 * can be NULL (if mime-type is not known)
51 * @param data actual meta-data found
52 * @param data_len number of bytes in data
53 * @return always 0 to continue extracting
57 const char *plugin_name,
58 enum EXTRACTOR_MetaType type,
59 enum EXTRACTOR_MetaFormat format,
60 const char *data_mime_type,
64 struct GNUNET_CONTAINER_MetaData *md = cls;
65 (void) GNUNET_CONTAINER_meta_data_insert (md,
77 * Extract meta-data from a file.
79 * @return GNUNET_SYSERR on error, otherwise the number
80 * of meta-data items obtained
83 GNUNET_FS_meta_data_extract_from_file (struct GNUNET_CONTAINER_MetaData
84 *md, const char *filename,
85 struct EXTRACTOR_PluginList *
92 if (extractors == NULL)
94 old = GNUNET_CONTAINER_meta_data_iterate (md, NULL, NULL);
95 GNUNET_assert (old >= 0);
96 EXTRACTOR_extract (extractors,
101 return (GNUNET_CONTAINER_meta_data_iterate (md, NULL, NULL) - old);
107 * Obtain the name under which this file information
108 * structure is stored on disk. Only works for top-level
109 * file information structures.
111 * @param s structure to get the filename for
112 * @return NULL on error, otherwise filename that
113 * can be passed to "GNUNET_FS_file_information_recover"
114 * to read this fi-struct from disk.
117 GNUNET_FS_file_information_get_id (struct GNUNET_FS_FileInformation *s)
121 return s->serialization;
126 * Create an entry for a file in a publish-structure.
128 * @param h handle to the file sharing subsystem
129 * @param client_info initial value for the client-info value for this entry
130 * @param filename name of the file or directory to publish
131 * @param keywords under which keywords should this file be available
132 * directly; can be NULL
133 * @param meta metadata for the file
134 * @param do_index GNUNET_YES for index, GNUNET_NO for insertion,
135 * GNUNET_SYSERR for simulation
136 * @param anonymity what is the desired anonymity level for sharing?
137 * @param priority what is the priority for OUR node to
138 * keep this file available? Use 0 for maximum anonymity and
139 * minimum reliability...
140 * @param expirationTime when should this content expire?
141 * @return publish structure entry for the file
143 struct GNUNET_FS_FileInformation *
144 GNUNET_FS_file_information_create_from_file (struct GNUNET_FS_Handle *h,
146 const char *filename,
147 const struct GNUNET_FS_Uri *keywords,
148 const struct GNUNET_CONTAINER_MetaData *meta,
152 struct GNUNET_TIME_Absolute expirationTime)
156 struct GNUNET_FS_FileInformation *ret;
160 if (0 != STAT (filename, &sbuf))
162 GNUNET_log_strerror_file (GNUNET_ERROR_TYPE_WARNING,
167 fi = GNUNET_FS_make_file_reader_context_ (filename);
173 ret = GNUNET_FS_file_information_create_from_reader (h,
176 &GNUNET_FS_data_reader_file_,
187 ret->filename = GNUNET_strdup (filename);
189 while (NULL != (ss = strstr (fn,
192 GNUNET_CONTAINER_meta_data_insert (ret->meta,
194 EXTRACTOR_METATYPE_FILENAME,
195 EXTRACTOR_METAFORMAT_C_STRING,
204 * Create an entry for a file in a publish-structure.
206 * @param h handle to the file sharing subsystem
207 * @param client_info initial value for the client-info value for this entry
208 * @param length length of the file
209 * @param data data for the file (should not be used afterwards by
210 * the caller; callee will "free")
211 * @param keywords under which keywords should this file be available
212 * directly; can be NULL
213 * @param meta metadata for the file
214 * @param do_index GNUNET_YES for index, GNUNET_NO for insertion,
215 * GNUNET_SYSERR for simulation
216 * @param anonymity what is the desired anonymity level for sharing?
217 * @param priority what is the priority for OUR node to
218 * keep this file available? Use 0 for maximum anonymity and
219 * minimum reliability...
220 * @param expirationTime when should this content expire?
221 * @return publish structure entry for the file
223 struct GNUNET_FS_FileInformation *
224 GNUNET_FS_file_information_create_from_data (struct GNUNET_FS_Handle *h,
228 const struct GNUNET_FS_Uri *keywords,
229 const struct GNUNET_CONTAINER_MetaData *meta,
233 struct GNUNET_TIME_Absolute expirationTime)
235 if (GNUNET_YES == do_index)
240 return GNUNET_FS_file_information_create_from_reader (h,
243 &GNUNET_FS_data_reader_copy_,
255 * Create an entry for a file in a publish-structure.
257 * @param h handle to the file sharing subsystem
258 * @param client_info initial value for the client-info value for this entry
259 * @param length length of the file
260 * @param reader function that can be used to obtain the data for the file
261 * @param reader_cls closure for "reader"
262 * @param keywords under which keywords should this file be available
263 * directly; can be NULL
264 * @param meta metadata for the file
265 * @param do_index GNUNET_YES for index, GNUNET_NO for insertion,
266 * GNUNET_SYSERR for simulation
267 * @param anonymity what is the desired anonymity level for sharing?
268 * @param priority what is the priority for OUR node to
269 * keep this file available? Use 0 for maximum anonymity and
270 * minimum reliability...
271 * @param expirationTime when should this content expire?
272 * @return publish structure entry for the file
274 struct GNUNET_FS_FileInformation *
275 GNUNET_FS_file_information_create_from_reader (struct GNUNET_FS_Handle *h,
278 GNUNET_FS_DataReader reader,
280 const struct GNUNET_FS_Uri *keywords,
281 const struct GNUNET_CONTAINER_MetaData *meta,
285 struct GNUNET_TIME_Absolute expirationTime)
287 struct GNUNET_FS_FileInformation *ret;
289 if ( (GNUNET_YES == do_index) &&
290 (reader != &GNUNET_FS_data_reader_file_) )
295 ret = GNUNET_malloc (sizeof (struct GNUNET_FS_FileInformation));
297 ret->client_info = client_info;
298 ret->meta = GNUNET_CONTAINER_meta_data_duplicate (meta);
299 if (ret->meta == NULL)
300 ret->meta = GNUNET_CONTAINER_meta_data_create ();
301 ret->keywords = (keywords == NULL) ? NULL : GNUNET_FS_uri_dup (keywords);
302 ret->expirationTime = expirationTime;
303 ret->data.file.reader = reader;
304 ret->data.file.reader_cls = reader_cls;
305 ret->data.file.do_index = do_index;
306 ret->data.file.file_size = length;
307 ret->anonymity = anonymity;
308 ret->priority = priority;
314 * Closure for "dir_scan_cb".
319 * Metadata extractors to use.
321 struct EXTRACTOR_PluginList *extractors;
326 struct GNUNET_FS_Handle *h;
329 * Function to call on each directory entry.
331 GNUNET_FS_FileProcessor proc;
339 * Scanner to use for subdirectories.
341 GNUNET_FS_DirectoryScanner scanner;
344 * Closure for scanner.
349 * Set to an error message (if any).
354 * Should files be indexed?
359 * Desired anonymity level.
364 * Desired publishing priority.
369 * Expiration time for publication.
371 struct GNUNET_TIME_Absolute expiration;
376 * Function called on each entry in a file to
377 * cause default-publishing.
378 * @param cls closure (struct DirScanCls)
379 * @param filename name of the file to be published
380 * @return GNUNET_OK on success, GNUNET_SYSERR to abort
383 dir_scan_cb (void *cls,
384 const char *filename)
386 struct DirScanCls *dsc = cls;
388 struct GNUNET_FS_FileInformation *fi;
389 struct GNUNET_FS_Uri *ksk_uri;
390 struct GNUNET_FS_Uri *keywords;
391 struct GNUNET_CONTAINER_MetaData *meta;
393 if (0 != STAT (filename, &sbuf))
395 GNUNET_asprintf (&dsc->emsg,
396 _("`%s' failed on file `%s': %s"),
400 return GNUNET_SYSERR;
402 if (S_ISDIR (sbuf.st_mode))
404 fi = GNUNET_FS_file_information_create_from_directory (dsc->h,
416 GNUNET_assert (NULL != dsc->emsg);
417 return GNUNET_SYSERR;
422 meta = GNUNET_CONTAINER_meta_data_create ();
423 GNUNET_FS_meta_data_extract_from_file (meta,
426 // FIXME: remove path from filename in metadata!
427 keywords = GNUNET_FS_uri_ksk_create_from_meta_data (meta);
428 ksk_uri = GNUNET_FS_uri_ksk_canonicalize (keywords);
429 fi = GNUNET_FS_file_information_create_from_file (dsc->h,
438 GNUNET_CONTAINER_meta_data_destroy (meta);
439 GNUNET_FS_uri_destroy (keywords);
440 GNUNET_FS_uri_destroy (ksk_uri);
442 dsc->proc (dsc->proc_cls,
450 * Simple, useful default implementation of a directory scanner
451 * (GNUNET_FS_DirectoryScanner). This implementation expects to get a
452 * UNIX filename, will publish all files in the directory except hidden
453 * files (those starting with a "."). Metadata will be extracted
454 * using GNU libextractor; the specific list of plugins should be
455 * specified in "cls", passing NULL will disable (!) metadata
456 * extraction. Keywords will be derived from the metadata and be
457 * subject to default canonicalization. This is strictly a
458 * convenience function.
460 * @param cls must be of type "struct EXTRACTOR_Extractor*"
461 * @param h handle to the file sharing subsystem
462 * @param dirname name of the directory to scan
463 * @param do_index should files be indexed or inserted
464 * @param anonymity desired anonymity level
465 * @param priority priority for publishing
466 * @param expirationTime expiration for publication
467 * @param proc function called on each entry
468 * @param proc_cls closure for proc
469 * @param emsg where to store an error message (on errors)
470 * @return GNUNET_OK on success
473 GNUNET_FS_directory_scanner_default (void *cls,
474 struct GNUNET_FS_Handle *h,
479 struct GNUNET_TIME_Absolute expirationTime,
480 GNUNET_FS_FileProcessor proc,
484 struct EXTRACTOR_PluginList *ex = cls;
485 struct DirScanCls dsc;
490 dsc.proc_cls = proc_cls;
491 dsc.scanner = &GNUNET_FS_directory_scanner_default;
492 dsc.scanner_cls = cls;
493 dsc.do_index = do_index;
494 dsc.anonymity = anonymity;
495 dsc.priority = priority;
496 dsc.expiration = expirationTime;
497 if (-1 == GNUNET_DISK_directory_scan (dirname,
501 GNUNET_assert (NULL != dsc.emsg);
503 return GNUNET_SYSERR;
510 * Closure for dirproc function.
515 * Linked list of directory entries that is being
518 struct GNUNET_FS_FileInformation *entries;
524 * Function that processes a directory entry that
525 * was obtained from the scanner.
526 * @param cls our closure
527 * @param filename name of the file (unused, why there???)
528 * @param fi information for publishing the file
532 const char *filename,
533 struct GNUNET_FS_FileInformation *fi)
535 struct EntryProcCls *dc = cls;
537 GNUNET_assert (fi->next == NULL);
538 GNUNET_assert (fi->dir == NULL);
539 fi->next = dc->entries;
545 * Create a publish-structure from an existing file hierarchy, inferring
546 * and organizing keywords and metadata as much as possible. This
547 * function primarily performs the recursive build and re-organizes
548 * keywords and metadata; for automatically getting metadata
549 * extraction, scanning of directories and creation of the respective
550 * GNUNET_FS_FileInformation entries the default scanner should be
551 * passed (GNUNET_FS_directory_scanner_default). This is strictly a
552 * convenience function.
554 * @param h handle to the file sharing subsystem
555 * @param client_info initial value for the client-info value for this entry
556 * @param filename name of the top-level file or directory
557 * @param scanner function used to get a list of files in a directory
558 * @param scanner_cls closure for scanner
559 * @param do_index should files in the hierarchy be indexed?
560 * @param anonymity what is the desired anonymity level for sharing?
561 * @param priority what is the priority for OUR node to
562 * keep this file available? Use 0 for maximum anonymity and
563 * minimum reliability...
564 * @param expirationTime when should this content expire?
565 * @param emsg where to store an error message
566 * @return publish structure entry for the directory, NULL on error
568 struct GNUNET_FS_FileInformation *
569 GNUNET_FS_file_information_create_from_directory (struct GNUNET_FS_Handle *h,
571 const char *filename,
572 GNUNET_FS_DirectoryScanner scanner,
577 struct GNUNET_TIME_Absolute expirationTime,
580 struct GNUNET_FS_FileInformation *ret;
581 struct EntryProcCls dc;
582 struct GNUNET_FS_Uri *ksk;
583 struct GNUNET_CONTAINER_MetaData *meta;
588 meta = GNUNET_CONTAINER_meta_data_create ();
589 GNUNET_FS_meta_data_make_directory (meta);
590 scanner (scanner_cls,
600 ksk = NULL; // FIXME...
601 // FIXME: create meta!
602 ret = GNUNET_FS_file_information_create_empty_directory (h,
609 GNUNET_CONTAINER_meta_data_destroy (meta);
610 ret->data.dir.entries = dc.entries;
611 while (dc.entries != NULL)
613 dc.entries->dir = ret;
614 dc.entries = dc.entries->next;
617 while ( (NULL != (ss = strstr (fn,
618 DIR_SEPARATOR_STR))) &&
621 GNUNET_CONTAINER_meta_data_insert (ret->meta,
623 EXTRACTOR_METATYPE_FILENAME,
624 EXTRACTOR_METAFORMAT_C_STRING,
628 ret->filename = GNUNET_strdup (filename);
634 * Test if a given entry represents a directory.
636 * @param ent check if this FI represents a directory
637 * @return GNUNET_YES if so, GNUNET_NO if not
640 GNUNET_FS_file_information_is_directory (struct GNUNET_FS_FileInformation *ent)
642 return ent->is_directory;
647 * Create an entry for an empty directory in a publish-structure.
648 * This function should be used by applications for which the
649 * use of "GNUNET_FS_file_information_create_from_directory"
650 * is not appropriate.
652 * @param h handle to the file sharing subsystem
653 * @param client_info initial value for the client-info value for this entry
654 * @param meta metadata for the directory
655 * @param keywords under which keywords should this directory be available
656 * directly; can be NULL
657 * @param anonymity what is the desired anonymity level for sharing?
658 * @param priority what is the priority for OUR node to
659 * keep this file available? Use 0 for maximum anonymity and
660 * minimum reliability...
661 * @param expirationTime when should this content expire?
662 * @return publish structure entry for the directory , NULL on error
664 struct GNUNET_FS_FileInformation *
665 GNUNET_FS_file_information_create_empty_directory (struct GNUNET_FS_Handle *h,
667 const struct GNUNET_FS_Uri *keywords,
668 const struct GNUNET_CONTAINER_MetaData *meta,
671 struct GNUNET_TIME_Absolute expirationTime)
673 struct GNUNET_FS_FileInformation *ret;
675 ret = GNUNET_malloc (sizeof (struct GNUNET_FS_FileInformation));
677 ret->client_info = client_info;
678 ret->meta = GNUNET_CONTAINER_meta_data_duplicate (meta);
679 ret->keywords = GNUNET_FS_uri_dup (keywords);
680 ret->expirationTime = expirationTime;
681 ret->is_directory = GNUNET_YES;
682 ret->anonymity = anonymity;
683 ret->priority = priority;
689 * Add an entry to a directory in a publish-structure. Clients
690 * should never modify publish structures that were passed to
691 * "GNUNET_FS_publish_start" already.
693 * @param dir the directory
694 * @param ent the entry to add; the entry must not have been
695 * added to any other directory at this point and
696 * must not include "dir" in its structure
697 * @return GNUNET_OK on success, GNUNET_SYSERR on error
700 GNUNET_FS_file_information_add (struct GNUNET_FS_FileInformation *dir,
701 struct GNUNET_FS_FileInformation *ent)
703 if ( (ent->dir != NULL) ||
704 (ent->next != NULL) ||
705 (! dir->is_directory) )
708 return GNUNET_SYSERR;
711 ent->next = dir->data.dir.entries;
712 dir->data.dir.entries = ent;
713 dir->data.dir.dir_size = 0;
719 * Inspect a file or directory in a publish-structure. Clients
720 * should never modify publish structures that were passed to
721 * "GNUNET_FS_publish_start" already. When called on a directory,
722 * this function will FIRST call "proc" with information about
723 * the directory itself and then for each of the files in the
724 * directory (but not for files in subdirectories). When called
725 * on a file, "proc" will be called exactly once (with information
726 * about the specific file).
728 * @param dir the directory
729 * @param proc function to call on each entry
730 * @param proc_cls closure for proc
733 GNUNET_FS_file_information_inspect (struct GNUNET_FS_FileInformation *dir,
734 GNUNET_FS_FileInformationProcessor proc,
737 struct GNUNET_FS_FileInformation *pos;
744 (dir->is_directory) ? dir->data.dir.dir_size : dir->data.file.file_size,
749 (dir->is_directory) ? &no : &dir->data.file.do_index,
750 &dir->expirationTime,
753 if (! dir->is_directory)
755 pos = dir->data.dir.entries;
762 (pos->is_directory) ? pos->data.dir.dir_size : pos->data.file.file_size,
767 (dir->is_directory) ? &no : &dir->data.file.do_index,
768 &pos->expirationTime,
777 * Destroy publish-structure. Clients should never destroy publish
778 * structures that were passed to "GNUNET_FS_publish_start" already.
780 * @param fi structure to destroy
781 * @param cleaner function to call on each entry in the structure
782 * (useful to clean up client_info); can be NULL; return
784 * @param cleaner_cls closure for cleaner
787 GNUNET_FS_file_information_destroy (struct GNUNET_FS_FileInformation *fi,
788 GNUNET_FS_FileInformationProcessor cleaner,
791 struct GNUNET_FS_FileInformation *pos;
795 if (fi->is_directory)
797 /* clean up directory */
798 while (NULL != (pos = fi->data.dir.entries))
800 fi->data.dir.entries = pos->next;
801 GNUNET_FS_file_information_destroy (pos, cleaner, cleaner_cls);
803 /* clean up client-info */
805 cleaner (cleaner_cls,
807 fi->data.dir.dir_size,
815 GNUNET_free_non_null (fi->data.dir.dir_data);
819 /* call clean-up function of the reader */
820 if (fi->data.file.reader != NULL)
821 fi->data.file.reader (fi->data.file.reader_cls, 0, 0,
823 /* clean up client-info */
825 cleaner (cleaner_cls,
827 fi->data.file.file_size,
832 &fi->data.file.do_index,
836 GNUNET_free_non_null (fi->filename);
837 GNUNET_free_non_null (fi->emsg);
838 GNUNET_free_non_null (fi->chk_uri);
839 /* clean up serialization */
840 if ( (NULL != fi->serialization) &&
841 (0 != UNLINK (fi->serialization)) )
842 GNUNET_log_strerror_file (GNUNET_ERROR_TYPE_WARNING,
845 if (NULL != fi->keywords)
846 GNUNET_FS_uri_destroy (fi->keywords);
847 if (NULL != fi->meta)
848 GNUNET_CONTAINER_meta_data_destroy (fi->meta);
849 GNUNET_free_non_null (fi->serialization);
852 GNUNET_FS_tree_encoder_finish (fi->te,
860 /* end of fs_file_information.c */