2 This file is part of GNUnet.
3 (C) 2009 Christian Grothoff (and other contributing authors)
5 GNUnet is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published
7 by the Free Software Foundation; either version 2, or (at your
8 option) any later version.
10 GNUnet is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with GNUnet; see the file COPYING. If not, write to the
17 Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA.
22 * @file fs/fs_file_information.c
23 * @brief Manage information for publishing directory hierarchies
24 * @author Christian Grothoff
27 * - serialization/deserialization (& deserialization API)
28 * - metadata filename clean up code
29 * - metadata/ksk generation for directories from contained files
32 #include <extractor.h>
33 #include "gnunet_fs_service.h"
39 * Obtain the name under which this file information
40 * structure is stored on disk. Only works for top-level
41 * file information structures.
43 * @param s structure to get the filename for
44 * @return NULL on error, otherwise filename that
45 * can be passed to "GNUNET_FS_file_information_recover"
46 * to read this fi-struct from disk.
49 GNUNET_FS_file_information_get_id (struct GNUNET_FS_FileInformation *s)
53 return s->serialization;
58 * Closure for "data_reader_file".
63 * Name of the file to read.
68 * File descriptor, NULL if it has not yet been opened.
70 struct GNUNET_DISK_FileHandle *fd;
75 * Function that provides data by reading from a file.
77 * @param cls closure (points to the file information)
78 * @param offset offset to read from; it is possible
79 * that the caller might need to go backwards
81 * @param max maximum number of bytes that should be
82 * copied to buf; readers are not allowed
83 * to provide less data unless there is an error;
84 * a value of "0" will be used at the end to allow
85 * the reader to clean up its internal state
86 * @param buf where the reader should write the data
87 * @param emsg location for the reader to store an error message
88 * @return number of bytes written, usually "max", 0 on error
91 data_reader_file(void *cls,
97 struct FileInfo *fi = cls;
103 GNUNET_DISK_file_close (fi->fd);
104 GNUNET_free (fi->filename);
110 fi->fd = GNUNET_DISK_file_open (fi->filename,
111 GNUNET_DISK_OPEN_READ,
112 GNUNET_DISK_PERM_NONE);
115 GNUNET_asprintf (emsg,
116 _("Could not open file `%s': %s"),
122 GNUNET_DISK_file_seek (fi->fd, offset, GNUNET_DISK_SEEK_SET);
123 ret = GNUNET_DISK_file_read (fi->fd, buf, max);
126 GNUNET_asprintf (emsg,
127 _("Could not read file `%s': %s"),
134 GNUNET_asprintf (emsg,
135 _("Short read reading from file `%s'!"),
144 * Create an entry for a file in a publish-structure.
146 * @param h handle to the file sharing subsystem
147 * @param client_info initial value for the client-info value for this entry
148 * @param filename name of the file or directory to publish
149 * @param keywords under which keywords should this file be available
150 * directly; can be NULL
151 * @param meta metadata for the file
152 * @param do_index GNUNET_YES for index, GNUNET_NO for insertion,
153 * GNUNET_SYSERR for simulation
154 * @param anonymity what is the desired anonymity level for sharing?
155 * @param priority what is the priority for OUR node to
156 * keep this file available? Use 0 for maximum anonymity and
157 * minimum reliability...
158 * @param expirationTime when should this content expire?
159 * @return publish structure entry for the file
161 struct GNUNET_FS_FileInformation *
162 GNUNET_FS_file_information_create_from_file (struct GNUNET_FS_Handle *h,
164 const char *filename,
165 const struct GNUNET_FS_Uri *keywords,
166 const struct GNUNET_CONTAINER_MetaData *meta,
170 struct GNUNET_TIME_Absolute expirationTime)
174 struct GNUNET_FS_FileInformation *ret;
178 if (0 != STAT (filename, &sbuf))
180 GNUNET_log_strerror_file (GNUNET_ERROR_TYPE_WARNING,
185 fi = GNUNET_malloc (sizeof(struct FileInfo));
186 fi->filename = GNUNET_STRINGS_filename_expand (filename);
187 if (fi->filename == NULL)
192 ret = GNUNET_FS_file_information_create_from_reader (h,
204 ret->filename = GNUNET_strdup (filename);
206 while (NULL != (ss = strstr (fn,
209 GNUNET_CONTAINER_meta_data_insert (ret->meta,
211 EXTRACTOR_METATYPE_FILENAME,
212 EXTRACTOR_METAFORMAT_C_STRING,
221 * Function that provides data by copying from a buffer.
223 * @param cls closure (points to the buffer)
224 * @param offset offset to read from; it is possible
225 * that the caller might need to go backwards
227 * @param max maximum number of bytes that should be
228 * copied to buf; readers are not allowed
229 * to provide less data unless there is an error;
230 * a value of "0" will be used at the end to allow
231 * the reader to clean up its internal state
232 * @param buf where the reader should write the data
233 * @param emsg location for the reader to store an error message
234 * @return number of bytes written, usually "max", 0 on error
237 data_reader_copy(void *cls,
250 memcpy (buf, &data[offset], max);
256 * Create an entry for a file in a publish-structure.
258 * @param h handle to the file sharing subsystem
259 * @param client_info initial value for the client-info value for this entry
260 * @param length length of the file
261 * @param data data for the file (should not be used afterwards by
262 * the caller; callee will "free")
263 * @param keywords under which keywords should this file be available
264 * directly; can be NULL
265 * @param meta metadata for the file
266 * @param do_index GNUNET_YES for index, GNUNET_NO for insertion,
267 * GNUNET_SYSERR for simulation
268 * @param anonymity what is the desired anonymity level for sharing?
269 * @param priority what is the priority for OUR node to
270 * keep this file available? Use 0 for maximum anonymity and
271 * minimum reliability...
272 * @param expirationTime when should this content expire?
273 * @return publish structure entry for the file
275 struct GNUNET_FS_FileInformation *
276 GNUNET_FS_file_information_create_from_data (struct GNUNET_FS_Handle *h,
280 const struct GNUNET_FS_Uri *keywords,
281 const struct GNUNET_CONTAINER_MetaData *meta,
285 struct GNUNET_TIME_Absolute expirationTime)
287 return GNUNET_FS_file_information_create_from_reader (h,
302 * Create an entry for a file in a publish-structure.
304 * @param h handle to the file sharing subsystem
305 * @param client_info initial value for the client-info value for this entry
306 * @param length length of the file
307 * @param reader function that can be used to obtain the data for the file
308 * @param reader_cls closure for "reader"
309 * @param keywords under which keywords should this file be available
310 * directly; can be NULL
311 * @param meta metadata for the file
312 * @param do_index GNUNET_YES for index, GNUNET_NO for insertion,
313 * GNUNET_SYSERR for simulation
314 * @param anonymity what is the desired anonymity level for sharing?
315 * @param priority what is the priority for OUR node to
316 * keep this file available? Use 0 for maximum anonymity and
317 * minimum reliability...
318 * @param expirationTime when should this content expire?
319 * @return publish structure entry for the file
321 struct GNUNET_FS_FileInformation *
322 GNUNET_FS_file_information_create_from_reader (struct GNUNET_FS_Handle *h,
325 GNUNET_FS_DataReader reader,
327 const struct GNUNET_FS_Uri *keywords,
328 const struct GNUNET_CONTAINER_MetaData *meta,
332 struct GNUNET_TIME_Absolute expirationTime)
334 struct GNUNET_FS_FileInformation *ret;
336 ret = GNUNET_malloc (sizeof (struct GNUNET_FS_FileInformation));
338 ret->client_info = client_info;
339 ret->meta = GNUNET_CONTAINER_meta_data_duplicate (meta);
340 if (ret->meta == NULL)
341 ret->meta = GNUNET_CONTAINER_meta_data_create ();
342 ret->keywords = (keywords == NULL) ? NULL : GNUNET_FS_uri_dup (keywords);
343 ret->expirationTime = expirationTime;
344 ret->data.file.reader = reader;
345 ret->data.file.reader_cls = reader_cls;
346 ret->data.file.do_index = do_index;
347 ret->data.file.file_size = length;
348 ret->anonymity = anonymity;
349 ret->priority = priority;
355 * Closure for "dir_scan_cb".
360 * Metadata extractors to use.
362 struct EXTRACTOR_PluginList *extractors;
367 struct GNUNET_FS_Handle *h;
370 * Function to call on each directory entry.
372 GNUNET_FS_FileProcessor proc;
380 * Scanner to use for subdirectories.
382 GNUNET_FS_DirectoryScanner scanner;
385 * Closure for scanner.
390 * Set to an error message (if any).
395 * Should files be indexed?
400 * Desired anonymity level.
405 * Desired publishing priority.
410 * Expiration time for publication.
412 struct GNUNET_TIME_Absolute expiration;
417 * Function called on each entry in a file to
418 * cause default-publishing.
419 * @param cls closure (struct DirScanCls)
420 * @param filename name of the file to be published
421 * @return GNUNET_OK on success, GNUNET_SYSERR to abort
424 dir_scan_cb (void *cls,
425 const char *filename)
427 struct DirScanCls *dsc = cls;
429 struct GNUNET_FS_FileInformation *fi;
430 struct GNUNET_FS_Uri *ksk_uri;
431 struct GNUNET_FS_Uri *keywords;
432 struct GNUNET_CONTAINER_MetaData *meta;
434 if (0 != STAT (filename, &sbuf))
436 GNUNET_asprintf (&dsc->emsg,
437 _("`%s' failed on file `%s': %s"),
441 return GNUNET_SYSERR;
443 if (S_ISDIR (sbuf.st_mode))
445 fi = GNUNET_FS_file_information_create_from_directory (dsc->h,
457 GNUNET_assert (NULL != dsc->emsg);
458 return GNUNET_SYSERR;
463 meta = GNUNET_CONTAINER_meta_data_create ();
464 GNUNET_CONTAINER_meta_data_extract_from_file (meta,
467 // FIXME: remove path from filename in metadata!
468 keywords = GNUNET_FS_uri_ksk_create_from_meta_data (meta);
469 ksk_uri = GNUNET_FS_uri_ksk_canonicalize (keywords);
470 fi = GNUNET_FS_file_information_create_from_file (dsc->h,
479 GNUNET_CONTAINER_meta_data_destroy (meta);
480 GNUNET_FS_uri_destroy (keywords);
481 GNUNET_FS_uri_destroy (ksk_uri);
483 dsc->proc (dsc->proc_cls,
491 * Simple, useful default implementation of a directory scanner
492 * (GNUNET_FS_DirectoryScanner). This implementation expects to get a
493 * UNIX filename, will publish all files in the directory except hidden
494 * files (those starting with a "."). Metadata will be extracted
495 * using GNU libextractor; the specific list of plugins should be
496 * specified in "cls", passing NULL will disable (!) metadata
497 * extraction. Keywords will be derived from the metadata and be
498 * subject to default canonicalization. This is strictly a
499 * convenience function.
501 * @param cls must be of type "struct EXTRACTOR_Extractor*"
502 * @param h handle to the file sharing subsystem
503 * @param dirname name of the directory to scan
504 * @param do_index should files be indexed or inserted
505 * @param anonymity desired anonymity level
506 * @param priority priority for publishing
507 * @param expirationTime expiration for publication
508 * @param proc function called on each entry
509 * @param proc_cls closure for proc
510 * @param emsg where to store an error message (on errors)
511 * @return GNUNET_OK on success
514 GNUNET_FS_directory_scanner_default (void *cls,
515 struct GNUNET_FS_Handle *h,
520 struct GNUNET_TIME_Absolute expirationTime,
521 GNUNET_FS_FileProcessor proc,
525 struct EXTRACTOR_PluginList *ex = cls;
526 struct DirScanCls dsc;
531 dsc.proc_cls = proc_cls;
532 dsc.scanner = &GNUNET_FS_directory_scanner_default;
533 dsc.scanner_cls = cls;
534 dsc.do_index = do_index;
535 dsc.anonymity = anonymity;
536 dsc.priority = priority;
537 dsc.expiration = expirationTime;
538 if (-1 == GNUNET_DISK_directory_scan (dirname,
542 GNUNET_assert (NULL != dsc.emsg);
544 return GNUNET_SYSERR;
551 * Closure for dirproc function.
556 * Linked list of directory entries that is being
559 struct GNUNET_FS_FileInformation *entries;
565 * Function that processes a directory entry that
566 * was obtained from the scanner.
567 * @param cls our closure
568 * @param filename name of the file (unused, why there???)
569 * @param fi information for publishing the file
573 const char *filename,
574 struct GNUNET_FS_FileInformation *fi)
576 struct EntryProcCls *dc = cls;
578 GNUNET_assert (fi->next == NULL);
579 GNUNET_assert (fi->dir == NULL);
580 fi->next = dc->entries;
586 * Create a publish-structure from an existing file hierarchy, inferring
587 * and organizing keywords and metadata as much as possible. This
588 * function primarily performs the recursive build and re-organizes
589 * keywords and metadata; for automatically getting metadata
590 * extraction, scanning of directories and creation of the respective
591 * GNUNET_FS_FileInformation entries the default scanner should be
592 * passed (GNUNET_FS_directory_scanner_default). This is strictly a
593 * convenience function.
595 * @param h handle to the file sharing subsystem
596 * @param client_info initial value for the client-info value for this entry
597 * @param filename name of the top-level file or directory
598 * @param scanner function used to get a list of files in a directory
599 * @param scanner_cls closure for scanner
600 * @param do_index should files in the hierarchy be indexed?
601 * @param anonymity what is the desired anonymity level for sharing?
602 * @param priority what is the priority for OUR node to
603 * keep this file available? Use 0 for maximum anonymity and
604 * minimum reliability...
605 * @param expirationTime when should this content expire?
606 * @param emsg where to store an error message
607 * @return publish structure entry for the directory, NULL on error
609 struct GNUNET_FS_FileInformation *
610 GNUNET_FS_file_information_create_from_directory (struct GNUNET_FS_Handle *h,
612 const char *filename,
613 GNUNET_FS_DirectoryScanner scanner,
618 struct GNUNET_TIME_Absolute expirationTime,
621 struct GNUNET_FS_FileInformation *ret;
622 struct EntryProcCls dc;
623 struct GNUNET_FS_Uri *ksk;
624 struct GNUNET_CONTAINER_MetaData *meta;
629 meta = GNUNET_CONTAINER_meta_data_create ();
630 GNUNET_FS_meta_data_make_directory (meta);
631 scanner (scanner_cls,
641 ksk = NULL; // FIXME...
642 // FIXME: create meta!
643 ret = GNUNET_FS_file_information_create_empty_directory (h,
650 GNUNET_CONTAINER_meta_data_destroy (meta);
651 ret->data.dir.entries = dc.entries;
652 while (dc.entries != NULL)
654 dc.entries->dir = ret;
655 dc.entries = dc.entries->next;
658 while (NULL != (ss = strstr (fn,
661 GNUNET_CONTAINER_meta_data_insert (ret->meta,
663 EXTRACTOR_METATYPE_FILENAME,
664 EXTRACTOR_METAFORMAT_C_STRING,
668 ret->filename = GNUNET_strdup (filename);
674 * Create an entry for an empty directory in a publish-structure.
675 * This function should be used by applications for which the
676 * use of "GNUNET_FS_file_information_create_from_directory"
677 * is not appropriate.
679 * @param h handle to the file sharing subsystem
680 * @param client_info initial value for the client-info value for this entry
681 * @param meta metadata for the directory
682 * @param keywords under which keywords should this directory be available
683 * directly; can be NULL
684 * @param anonymity what is the desired anonymity level for sharing?
685 * @param priority what is the priority for OUR node to
686 * keep this file available? Use 0 for maximum anonymity and
687 * minimum reliability...
688 * @param expirationTime when should this content expire?
689 * @return publish structure entry for the directory , NULL on error
691 struct GNUNET_FS_FileInformation *
692 GNUNET_FS_file_information_create_empty_directory (struct GNUNET_FS_Handle *h,
694 const struct GNUNET_FS_Uri *keywords,
695 const struct GNUNET_CONTAINER_MetaData *meta,
698 struct GNUNET_TIME_Absolute expirationTime)
700 struct GNUNET_FS_FileInformation *ret;
702 ret = GNUNET_malloc (sizeof (struct GNUNET_FS_FileInformation));
704 ret->client_info = client_info;
705 ret->meta = GNUNET_CONTAINER_meta_data_duplicate (meta);
706 ret->keywords = GNUNET_FS_uri_dup (keywords);
707 ret->expirationTime = expirationTime;
708 ret->is_directory = GNUNET_YES;
709 ret->anonymity = anonymity;
710 ret->priority = priority;
716 * Add an entry to a directory in a publish-structure. Clients
717 * should never modify publish structures that were passed to
718 * "GNUNET_FS_publish_start" already.
720 * @param dir the directory
721 * @param ent the entry to add; the entry must not have been
722 * added to any other directory at this point and
723 * must not include "dir" in its structure
724 * @return GNUNET_OK on success, GNUNET_SYSERR on error
727 GNUNET_FS_file_information_add (struct GNUNET_FS_FileInformation *dir,
728 struct GNUNET_FS_FileInformation *ent)
730 if ( (ent->dir != NULL) ||
731 (ent->next != NULL) ||
732 (! dir->is_directory) )
735 return GNUNET_SYSERR;
738 ent->next = dir->data.dir.entries;
739 dir->data.dir.entries = ent;
740 dir->data.dir.dir_size = 0;
746 * Inspect a file or directory in a publish-structure. Clients
747 * should never modify publish structures that were passed to
748 * "GNUNET_FS_publish_start" already. When called on a directory,
749 * this function will FIRST call "proc" with information about
750 * the directory itself and then for each of the files in the
751 * directory (but not for files in subdirectories). When called
752 * on a file, "proc" will be called exactly once (with information
753 * about the specific file).
755 * @param dir the directory
756 * @param proc function to call on each entry
757 * @param proc_cls closure for proc
760 GNUNET_FS_file_information_inspect (struct GNUNET_FS_FileInformation *dir,
761 GNUNET_FS_FileInformationProcessor proc,
764 struct GNUNET_FS_FileInformation *pos;
769 (dir->is_directory) ? dir->data.dir.dir_size : dir->data.file.file_size,
774 &dir->expirationTime,
777 if (! dir->is_directory)
779 pos = dir->data.dir.entries;
785 (pos->is_directory) ? pos->data.dir.dir_size : pos->data.file.file_size,
790 &pos->expirationTime,
799 * Destroy publish-structure. Clients should never destroy publish
800 * structures that were passed to "GNUNET_FS_publish_start" already.
802 * @param fi structure to destroy
803 * @param cleaner function to call on each entry in the structure
804 * (useful to clean up client_info); can be NULL; return
806 * @param cleaner_cls closure for cleaner
809 GNUNET_FS_file_information_destroy (struct GNUNET_FS_FileInformation *fi,
810 GNUNET_FS_FileInformationProcessor cleaner,
813 struct GNUNET_FS_FileInformation *pos;
815 if (fi->is_directory)
817 /* clean up directory */
818 while (NULL != (pos = fi->data.dir.entries))
820 fi->data.dir.entries = pos->next;
821 GNUNET_FS_file_information_destroy (pos, cleaner, cleaner_cls);
823 /* clean up client-info */
825 cleaner (cleaner_cls,
827 fi->data.dir.dir_size,
834 GNUNET_free_non_null (fi->data.dir.dir_data);
838 /* call clean-up function of the reader */
839 if (fi->data.file.reader != NULL)
840 fi->data.file.reader (fi->data.file.reader_cls, 0, 0,
842 /* clean up client-info */
844 cleaner (cleaner_cls,
846 fi->data.file.file_size,
854 GNUNET_free_non_null (fi->filename);
855 GNUNET_free_non_null (fi->serialization);
856 GNUNET_free_non_null (fi->emsg);
857 GNUNET_free_non_null (fi->chk_uri);
858 /* clean up serialization */
859 if ( (NULL != fi->serialization) &&
860 (0 != UNLINK (fi->serialization)) )
861 GNUNET_log_strerror_file (GNUNET_ERROR_TYPE_WARNING,
864 if (NULL != fi->keywords)
865 GNUNET_FS_uri_destroy (fi->keywords);
866 if (NULL != fi->meta)
867 GNUNET_CONTAINER_meta_data_destroy (fi->meta);
868 GNUNET_free_non_null (fi->serialization);
871 GNUNET_FS_tree_encoder_finish (fi->te,
879 /* end of fs_file_information.c */