2 This file is part of GNUnet.
3 (C) 2009 Christian Grothoff (and other contributing authors)
5 GNUnet is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published
7 by the Free Software Foundation; either version 2, or (at your
8 option) any later version.
10 GNUnet is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with GNUnet; see the file COPYING. If not, write to the
17 Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA.
22 * @file fs/fs_file_information.c
23 * @brief Manage information for publishing directory hierarchies
24 * @author Christian Grothoff
27 * - serialization/deserialization (& deserialization API)
28 * - metadata filename clean up code
29 * - metadata/ksk generation for directories from contained files
32 #include <extractor.h>
33 #include "gnunet_fs_service.h"
39 * Create a temporary file on disk to store the current
42 * @param fi file information to sync with disk
45 GNUNET_FS_file_information_sync (struct GNUNET_FS_FileInformation * fi)
47 if (NULL == fi->serialization)
49 fi->serialization = NULL; // FIXME -- need cfg!
56 * Load file information from the file to which
59 * @param fn name of the file to use
60 * @return NULL on error
62 struct GNUNET_FS_FileInformation *
63 GNUNET_FS_file_information_recover (const char *fn)
65 struct GNUNET_FS_FileInformation *ret;
73 * Obtain the name under which this file information
74 * structure is stored on disk. Only works for top-level
75 * file information structures.
77 * @param s structure to get the filename for
78 * @return NULL on error, otherwise filename that
79 * can be passed to "GNUNET_FS_file_information_recover"
80 * to read this fi-struct from disk.
83 GNUNET_FS_file_information_get_id (struct GNUNET_FS_FileInformation *s)
87 return s->serialization;
92 * Closure for "data_reader_file".
97 * Name of the file to read.
102 * File descriptor, NULL if it has not yet been opened.
104 struct GNUNET_DISK_FileHandle *fd;
109 * Function that provides data by reading from a file.
111 * @param cls closure (points to the file information)
112 * @param offset offset to read from; it is possible
113 * that the caller might need to go backwards
115 * @param max maximum number of bytes that should be
116 * copied to buf; readers are not allowed
117 * to provide less data unless there is an error;
118 * a value of "0" will be used at the end to allow
119 * the reader to clean up its internal state
120 * @param buf where the reader should write the data
121 * @param emsg location for the reader to store an error message
122 * @return number of bytes written, usually "max", 0 on error
125 data_reader_file(void *cls,
131 struct FileInfo *fi = cls;
137 GNUNET_DISK_file_close (fi->fd);
138 GNUNET_free (fi->filename);
144 fi->fd = GNUNET_DISK_file_open (fi->filename,
145 GNUNET_DISK_OPEN_READ,
146 GNUNET_DISK_PERM_NONE);
149 GNUNET_asprintf (emsg,
150 _("Could not open file `%s': %s"),
156 GNUNET_DISK_file_seek (fi->fd, offset, GNUNET_DISK_SEEK_SET);
157 ret = GNUNET_DISK_file_read (fi->fd, buf, max);
160 GNUNET_asprintf (emsg,
161 _("Could not read file `%s': %s"),
168 GNUNET_asprintf (emsg,
169 _("Short read reading from file `%s'!"),
178 * Create an entry for a file in a publish-structure.
180 * @param client_info initial value for the client-info value for this entry
181 * @param filename name of the file or directory to publish
182 * @param keywords under which keywords should this file be available
183 * directly; can be NULL
184 * @param meta metadata for the file
185 * @param do_index GNUNET_YES for index, GNUNET_NO for insertion,
186 * GNUNET_SYSERR for simulation
187 * @param anonymity what is the desired anonymity level for sharing?
188 * @param priority what is the priority for OUR node to
189 * keep this file available? Use 0 for maximum anonymity and
190 * minimum reliability...
191 * @param expirationTime when should this content expire?
192 * @return publish structure entry for the file
194 struct GNUNET_FS_FileInformation *
195 GNUNET_FS_file_information_create_from_file (void *client_info,
196 const char *filename,
197 const struct GNUNET_FS_Uri *keywords,
198 const struct GNUNET_CONTAINER_MetaData *meta,
202 struct GNUNET_TIME_Absolute expirationTime)
206 struct GNUNET_FS_FileInformation *ret;
210 if (0 != STAT (filename, &sbuf))
212 GNUNET_log_strerror_file (GNUNET_ERROR_TYPE_WARNING,
217 fi = GNUNET_malloc (sizeof(struct FileInfo));
218 fi->filename = GNUNET_STRINGS_filename_expand (filename);
219 if (fi->filename == NULL)
224 ret = GNUNET_FS_file_information_create_from_reader (client_info,
234 ret->data.file.filename = GNUNET_strdup (filename);
236 while (NULL != (ss = strstr (fn,
239 GNUNET_CONTAINER_meta_data_insert (ret->meta,
241 EXTRACTOR_METATYPE_FILENAME,
242 EXTRACTOR_METAFORMAT_C_STRING,
251 * Function that provides data by copying from a buffer.
253 * @param cls closure (points to the buffer)
254 * @param offset offset to read from; it is possible
255 * that the caller might need to go backwards
257 * @param max maximum number of bytes that should be
258 * copied to buf; readers are not allowed
259 * to provide less data unless there is an error;
260 * a value of "0" will be used at the end to allow
261 * the reader to clean up its internal state
262 * @param buf where the reader should write the data
263 * @param emsg location for the reader to store an error message
264 * @return number of bytes written, usually "max", 0 on error
267 data_reader_copy(void *cls,
280 memcpy (buf, &data[offset], max);
286 * Create an entry for a file in a publish-structure.
288 * @param client_info initial value for the client-info value for this entry
289 * @param length length of the file
290 * @param data data for the file (should not be used afterwards by
291 * the caller; callee will "free")
292 * @param keywords under which keywords should this file be available
293 * directly; can be NULL
294 * @param meta metadata for the file
295 * @param do_index GNUNET_YES for index, GNUNET_NO for insertion,
296 * GNUNET_SYSERR for simulation
297 * @param anonymity what is the desired anonymity level for sharing?
298 * @param priority what is the priority for OUR node to
299 * keep this file available? Use 0 for maximum anonymity and
300 * minimum reliability...
301 * @param expirationTime when should this content expire?
302 * @return publish structure entry for the file
304 struct GNUNET_FS_FileInformation *
305 GNUNET_FS_file_information_create_from_data (void *client_info,
308 const struct GNUNET_FS_Uri *keywords,
309 const struct GNUNET_CONTAINER_MetaData *meta,
313 struct GNUNET_TIME_Absolute expirationTime)
315 return GNUNET_FS_file_information_create_from_reader (client_info,
329 * Create an entry for a file in a publish-structure.
331 * @param client_info initial value for the client-info value for this entry
332 * @param length length of the file
333 * @param reader function that can be used to obtain the data for the file
334 * @param reader_cls closure for "reader"
335 * @param keywords under which keywords should this file be available
336 * directly; can be NULL
337 * @param meta metadata for the file
338 * @param do_index GNUNET_YES for index, GNUNET_NO for insertion,
339 * GNUNET_SYSERR for simulation
340 * @param anonymity what is the desired anonymity level for sharing?
341 * @param priority what is the priority for OUR node to
342 * keep this file available? Use 0 for maximum anonymity and
343 * minimum reliability...
344 * @param expirationTime when should this content expire?
345 * @return publish structure entry for the file
347 struct GNUNET_FS_FileInformation *
348 GNUNET_FS_file_information_create_from_reader (void *client_info,
350 GNUNET_FS_DataReader reader,
352 const struct GNUNET_FS_Uri *keywords,
353 const struct GNUNET_CONTAINER_MetaData *meta,
357 struct GNUNET_TIME_Absolute expirationTime)
359 struct GNUNET_FS_FileInformation *ret;
361 ret = GNUNET_malloc (sizeof (struct GNUNET_FS_FileInformation));
362 ret->client_info = client_info;
363 ret->meta = GNUNET_CONTAINER_meta_data_duplicate (meta);
364 if (ret->meta == NULL)
365 ret->meta = GNUNET_CONTAINER_meta_data_create ();
366 ret->keywords = (keywords == NULL) ? NULL : GNUNET_FS_uri_dup (keywords);
367 ret->expirationTime = expirationTime;
368 ret->data.file.reader = reader;
369 ret->data.file.reader_cls = reader_cls;
370 ret->data.file.do_index = do_index;
371 ret->data.file.file_size = length;
372 ret->anonymity = anonymity;
373 ret->priority = priority;
374 GNUNET_FS_file_information_sync (ret);
380 * Closure for "dir_scan_cb".
385 * Metadata extractors to use.
387 struct EXTRACTOR_PluginList *extractors;
390 * Function to call on each directory entry.
392 GNUNET_FS_FileProcessor proc;
400 * Scanner to use for subdirectories.
402 GNUNET_FS_DirectoryScanner scanner;
405 * Closure for scanner.
410 * Set to an error message (if any).
415 * Should files be indexed?
420 * Desired anonymity level.
425 * Desired publishing priority.
430 * Expiration time for publication.
432 struct GNUNET_TIME_Absolute expiration;
437 * Function called on each entry in a file to
438 * cause default-publishing.
439 * @param cls closure (struct DirScanCls)
440 * @param filename name of the file to be published
441 * @return GNUNET_OK on success, GNUNET_SYSERR to abort
444 dir_scan_cb (void *cls,
445 const char *filename)
447 struct DirScanCls *dsc = cls;
449 struct GNUNET_FS_FileInformation *fi;
450 struct GNUNET_FS_Uri *ksk_uri;
451 struct GNUNET_FS_Uri *keywords;
452 struct GNUNET_CONTAINER_MetaData *meta;
454 if (0 != STAT (filename, &sbuf))
456 GNUNET_asprintf (&dsc->emsg,
457 _("`%s' failed on file `%s': %s"),
461 return GNUNET_SYSERR;
463 if (S_ISDIR (sbuf.st_mode))
465 fi = GNUNET_FS_file_information_create_from_directory (NULL,
476 GNUNET_assert (NULL != dsc->emsg);
477 return GNUNET_SYSERR;
482 meta = GNUNET_CONTAINER_meta_data_create ();
483 GNUNET_CONTAINER_meta_data_extract_from_file (meta,
486 // FIXME: remove path from filename in metadata!
487 keywords = GNUNET_FS_uri_ksk_create_from_meta_data (meta);
488 ksk_uri = GNUNET_FS_uri_ksk_canonicalize (keywords);
489 fi = GNUNET_FS_file_information_create_from_file (NULL,
497 GNUNET_CONTAINER_meta_data_destroy (meta);
498 GNUNET_FS_uri_destroy (keywords);
499 GNUNET_FS_uri_destroy (ksk_uri);
501 dsc->proc (dsc->proc_cls,
509 * Simple, useful default implementation of a directory scanner
510 * (GNUNET_FS_DirectoryScanner). This implementation expects to get a
511 * UNIX filename, will publish all files in the directory except hidden
512 * files (those starting with a "."). Metadata will be extracted
513 * using GNU libextractor; the specific list of plugins should be
514 * specified in "cls", passing NULL will disable (!) metadata
515 * extraction. Keywords will be derived from the metadata and be
516 * subject to default canonicalization. This is strictly a
517 * convenience function.
519 * @param cls must be of type "struct EXTRACTOR_Extractor*"
520 * @param dirname name of the directory to scan
521 * @param do_index should files be indexed or inserted
522 * @param anonymity desired anonymity level
523 * @param priority priority for publishing
524 * @param expirationTime expiration for publication
525 * @param proc function called on each entry
526 * @param proc_cls closure for proc
527 * @param emsg where to store an error message (on errors)
528 * @return GNUNET_OK on success
531 GNUNET_FS_directory_scanner_default (void *cls,
536 struct GNUNET_TIME_Absolute expirationTime,
537 GNUNET_FS_FileProcessor proc,
541 struct EXTRACTOR_PluginList *ex = cls;
542 struct DirScanCls dsc;
546 dsc.proc_cls = proc_cls;
547 dsc.scanner = &GNUNET_FS_directory_scanner_default;
548 dsc.scanner_cls = cls;
549 dsc.do_index = do_index;
550 dsc.anonymity = anonymity;
551 dsc.priority = priority;
552 dsc.expiration = expirationTime;
553 if (-1 == GNUNET_DISK_directory_scan (dirname,
557 GNUNET_assert (NULL != dsc.emsg);
559 return GNUNET_SYSERR;
566 * Closure for dirproc function.
571 * Linked list of directory entries that is being
574 struct GNUNET_FS_FileInformation *entries;
580 * Function that processes a directory entry that
581 * was obtained from the scanner.
582 * @param cls our closure
583 * @param filename name of the file (unused, why there???)
584 * @param fi information for publishing the file
588 const char *filename,
589 struct GNUNET_FS_FileInformation *fi)
591 struct EntryProcCls *dc = cls;
593 GNUNET_assert (fi->next == NULL);
594 GNUNET_assert (fi->dir == NULL);
595 fi->next = dc->entries;
601 * Create a publish-structure from an existing file hierarchy, inferring
602 * and organizing keywords and metadata as much as possible. This
603 * function primarily performs the recursive build and re-organizes
604 * keywords and metadata; for automatically getting metadata
605 * extraction, scanning of directories and creation of the respective
606 * GNUNET_FS_FileInformation entries the default scanner should be
607 * passed (GNUNET_FS_directory_scanner_default). This is strictly a
608 * convenience function.
610 * @param client_info initial value for the client-info value for this entry
611 * @param filename name of the top-level file or directory
612 * @param scanner function used to get a list of files in a directory
613 * @param scanner_cls closure for scanner
614 * @param do_index should files in the hierarchy be indexed?
615 * @param anonymity what is the desired anonymity level for sharing?
616 * @param priority what is the priority for OUR node to
617 * keep this file available? Use 0 for maximum anonymity and
618 * minimum reliability...
619 * @param expirationTime when should this content expire?
620 * @param emsg where to store an error message
621 * @return publish structure entry for the directory, NULL on error
623 struct GNUNET_FS_FileInformation *
624 GNUNET_FS_file_information_create_from_directory (void *client_info,
625 const char *filename,
626 GNUNET_FS_DirectoryScanner scanner,
631 struct GNUNET_TIME_Absolute expirationTime,
634 struct GNUNET_FS_FileInformation *ret;
635 struct EntryProcCls dc;
636 struct GNUNET_FS_Uri *ksk;
637 struct GNUNET_CONTAINER_MetaData *meta;
642 meta = GNUNET_CONTAINER_meta_data_create ();
643 GNUNET_FS_meta_data_make_directory (meta);
644 scanner (scanner_cls,
653 ksk = NULL; // FIXME...
654 // FIXME: create meta!
655 ret = GNUNET_FS_file_information_create_empty_directory (client_info,
661 GNUNET_CONTAINER_meta_data_destroy (meta);
662 ret->data.dir.entries = dc.entries;
663 while (dc.entries != NULL)
665 dc.entries->dir = ret;
666 GNUNET_FS_file_information_sync (dc.entries);
667 dc.entries = dc.entries->next;
670 while (NULL != (ss = strstr (fn,
673 GNUNET_CONTAINER_meta_data_insert (ret->meta,
675 EXTRACTOR_METATYPE_FILENAME,
676 EXTRACTOR_METAFORMAT_C_STRING,
680 ret->data.dir.dirname = GNUNET_strdup (filename);
681 GNUNET_FS_file_information_sync (ret);
687 * Create an entry for an empty directory in a publish-structure.
688 * This function should be used by applications for which the
689 * use of "GNUNET_FS_file_information_create_from_directory"
690 * is not appropriate.
692 * @param client_info initial value for the client-info value for this entry
693 * @param meta metadata for the directory
694 * @param keywords under which keywords should this directory be available
695 * directly; can be NULL
696 * @param anonymity what is the desired anonymity level for sharing?
697 * @param priority what is the priority for OUR node to
698 * keep this file available? Use 0 for maximum anonymity and
699 * minimum reliability...
700 * @param expirationTime when should this content expire?
701 * @return publish structure entry for the directory , NULL on error
703 struct GNUNET_FS_FileInformation *
704 GNUNET_FS_file_information_create_empty_directory (void *client_info,
705 const struct GNUNET_FS_Uri *keywords,
706 const struct GNUNET_CONTAINER_MetaData *meta,
709 struct GNUNET_TIME_Absolute expirationTime)
711 struct GNUNET_FS_FileInformation *ret;
713 ret = GNUNET_malloc (sizeof (struct GNUNET_FS_FileInformation));
714 ret->client_info = client_info;
715 ret->meta = GNUNET_CONTAINER_meta_data_duplicate (meta);
716 ret->keywords = GNUNET_FS_uri_dup (keywords);
717 ret->expirationTime = expirationTime;
718 ret->is_directory = GNUNET_YES;
719 ret->anonymity = anonymity;
720 ret->priority = priority;
721 GNUNET_FS_file_information_sync (ret);
727 * Add an entry to a directory in a publish-structure. Clients
728 * should never modify publish structures that were passed to
729 * "GNUNET_FS_publish_start" already.
731 * @param dir the directory
732 * @param ent the entry to add; the entry must not have been
733 * added to any other directory at this point and
734 * must not include "dir" in its structure
735 * @return GNUNET_OK on success, GNUNET_SYSERR on error
738 GNUNET_FS_file_information_add (struct GNUNET_FS_FileInformation *dir,
739 struct GNUNET_FS_FileInformation *ent)
741 if ( (ent->dir != NULL) ||
742 (ent->next != NULL) ||
743 (! dir->is_directory) )
746 return GNUNET_SYSERR;
749 ent->next = dir->data.dir.entries;
750 dir->data.dir.entries = ent;
751 dir->data.dir.dir_size = 0;
752 GNUNET_FS_file_information_sync (ent);
753 GNUNET_FS_file_information_sync (dir);
759 * Inspect a file or directory in a publish-structure. Clients
760 * should never modify publish structures that were passed to
761 * "GNUNET_FS_publish_start" already. When called on a directory,
762 * this function will FIRST call "proc" with information about
763 * the directory itself and then for each of the files in the
764 * directory (but not for files in subdirectories). When called
765 * on a file, "proc" will be called exactly once (with information
766 * about the specific file).
768 * @param dir the directory
769 * @param proc function to call on each entry
770 * @param proc_cls closure for proc
773 GNUNET_FS_file_information_inspect (struct GNUNET_FS_FileInformation *dir,
774 GNUNET_FS_FileInformationProcessor proc,
777 struct GNUNET_FS_FileInformation *pos;
782 (dir->is_directory) ? dir->data.dir.dir_size : dir->data.file.file_size,
787 &dir->expirationTime,
790 if (! dir->is_directory)
792 pos = dir->data.dir.entries;
798 (pos->is_directory) ? pos->data.dir.dir_size : pos->data.file.file_size,
803 &pos->expirationTime,
812 * Destroy publish-structure. Clients should never destroy publish
813 * structures that were passed to "GNUNET_FS_publish_start" already.
815 * @param fi structure to destroy
816 * @param cleaner function to call on each entry in the structure
817 * (useful to clean up client_info); can be NULL; return
819 * @param cleaner_cls closure for cleaner
822 GNUNET_FS_file_information_destroy (struct GNUNET_FS_FileInformation *fi,
823 GNUNET_FS_FileInformationProcessor cleaner,
826 struct GNUNET_FS_FileInformation *pos;
828 if (fi->is_directory)
830 /* clean up directory */
831 while (NULL != (pos = fi->data.dir.entries))
833 fi->data.dir.entries = pos->next;
834 GNUNET_FS_file_information_destroy (pos, cleaner, cleaner_cls);
836 /* clean up client-info */
838 cleaner (cleaner_cls,
840 fi->data.dir.dir_size,
847 GNUNET_free_non_null (fi->data.dir.dir_data);
848 GNUNET_free_non_null (fi->data.dir.dirname);
852 /* call clean-up function of the reader */
853 fi->data.file.reader (fi->data.file.reader_cls, 0, 0, NULL, NULL);
854 /* clean up client-info */
856 cleaner (cleaner_cls,
858 fi->data.file.file_size,
866 GNUNET_free_non_null (fi->emsg);
867 GNUNET_free_non_null (fi->chk_uri);
868 /* clean up serialization */
869 if ( (NULL != fi->serialization) &&
870 (0 != UNLINK (fi->serialization)) )
871 GNUNET_log_strerror_file (GNUNET_ERROR_TYPE_WARNING,
874 if (NULL != fi->keywords)
875 GNUNET_FS_uri_destroy (fi->keywords);
876 GNUNET_CONTAINER_meta_data_destroy (fi->meta);
877 GNUNET_free_non_null (fi->serialization);
880 GNUNET_FS_tree_encoder_finish (fi->te,
888 /* end of fs_file_information.c */