2 This file is part of GNUnet.
3 (C) 2009 Christian Grothoff (and other contributing authors)
5 GNUnet is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published
7 by the Free Software Foundation; either version 2, or (at your
8 option) any later version.
10 GNUnet is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with GNUnet; see the file COPYING. If not, write to the
17 Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA.
22 * @file fs/fs_file_information.c
23 * @brief Manage information for publishing directory hierarchies
24 * @author Christian Grothoff
27 * - serialization/deserialization (& deserialization API)
28 * - metadata filename clean up code
29 * - metadata/ksk generation for directories from contained files
32 #include <extractor.h>
33 #include "gnunet_fs_service.h"
38 * Create a temporary file on disk to store the current
41 * @param fi file information to sync with disk
44 GNUNET_FS_file_information_sync (struct GNUNET_FS_FileInformation * fi)
46 if (NULL == fi->serialization)
48 fi->serialization = NULL; // FIXME -- need cfg!
55 * Load file information from the file to which
58 * @param fn name of the file to use
59 * @return NULL on error
61 struct GNUNET_FS_FileInformation *
62 GNUNET_FS_file_information_recover (const char *fn)
64 struct GNUNET_FS_FileInformation *ret;
72 * Obtain the name under which this file information
73 * structure is stored on disk. Only works for top-level
74 * file information structures.
76 * @param s structure to get the filename for
77 * @return NULL on error, otherwise filename that
78 * can be passed to "GNUNET_FS_file_information_recover"
79 * to read this fi-struct from disk.
82 GNUNET_FS_file_information_get_id (struct GNUNET_FS_FileInformation *s)
86 return s->serialization;
91 * Closure for "data_reader_file".
96 * Name of the file to read.
101 * File descriptor, NULL if it has not yet been opened.
103 struct GNUNET_DISK_FileHandle *fd;
108 * Function that provides data by reading from a file.
110 * @param cls closure (points to the file information)
111 * @param offset offset to read from; it is possible
112 * that the caller might need to go backwards
114 * @param max maximum number of bytes that should be
115 * copied to buf; readers are not allowed
116 * to provide less data unless there is an error;
117 * a value of "0" will be used at the end to allow
118 * the reader to clean up its internal state
119 * @param buf where the reader should write the data
120 * @param emsg location for the reader to store an error message
121 * @return number of bytes written, usually "max", 0 on error
124 data_reader_file(void *cls,
130 struct FileInfo *fi = cls;
136 GNUNET_DISK_file_close (fi->fd);
137 GNUNET_free (fi->filename);
143 fi->fd = GNUNET_DISK_file_open (fi->filename,
144 GNUNET_DISK_OPEN_READ,
145 GNUNET_DISK_PERM_NONE);
148 GNUNET_asprintf (emsg,
149 _("Could not open file `%s': %s"),
155 GNUNET_DISK_file_seek (fi->fd, offset, GNUNET_DISK_SEEK_SET);
156 ret = GNUNET_DISK_file_read (fi->fd, buf, max);
159 GNUNET_asprintf (emsg,
160 _("Could not read file `%s': %s"),
167 GNUNET_asprintf (emsg,
168 _("Short read reading from file `%s'!"),
177 * Create an entry for a file in a publish-structure.
179 * @param client_info initial value for the client-info value for this entry
180 * @param filename name of the file or directory to publish
181 * @param keywords under which keywords should this file be available
182 * directly; can be NULL
183 * @param meta metadata for the file
184 * @param do_index GNUNET_YES for index, GNUNET_NO for insertion,
185 * GNUNET_SYSERR for simulation
186 * @param anonymity what is the desired anonymity level for sharing?
187 * @param priority what is the priority for OUR node to
188 * keep this file available? Use 0 for maximum anonymity and
189 * minimum reliability...
190 * @param expirationTime when should this content expire?
191 * @return publish structure entry for the file
193 struct GNUNET_FS_FileInformation *
194 GNUNET_FS_file_information_create_from_file (void *client_info,
195 const char *filename,
196 const struct GNUNET_FS_Uri *keywords,
197 const struct GNUNET_CONTAINER_MetaData *meta,
201 struct GNUNET_TIME_Absolute expirationTime)
206 if (0 != STAT (filename, &sbuf))
208 GNUNET_log_strerror_file (GNUNET_ERROR_TYPE_WARNING,
213 fi = GNUNET_malloc (sizeof(struct FileInfo));
214 fi->filename = GNUNET_strdup (filename);
215 return GNUNET_FS_file_information_create_from_reader (client_info,
229 * Function that provides data by copying from a buffer.
231 * @param cls closure (points to the buffer)
232 * @param offset offset to read from; it is possible
233 * that the caller might need to go backwards
235 * @param max maximum number of bytes that should be
236 * copied to buf; readers are not allowed
237 * to provide less data unless there is an error;
238 * a value of "0" will be used at the end to allow
239 * the reader to clean up its internal state
240 * @param buf where the reader should write the data
241 * @param emsg location for the reader to store an error message
242 * @return number of bytes written, usually "max", 0 on error
245 data_reader_copy(void *cls,
258 memcpy (buf, &data[offset], max);
264 * Create an entry for a file in a publish-structure.
266 * @param client_info initial value for the client-info value for this entry
267 * @param length length of the file
268 * @param data data for the file (should not be used afterwards by
269 * the caller; callee will "free")
270 * @param keywords under which keywords should this file be available
271 * directly; can be NULL
272 * @param meta metadata for the file
273 * @param do_index GNUNET_YES for index, GNUNET_NO for insertion,
274 * GNUNET_SYSERR for simulation
275 * @param anonymity what is the desired anonymity level for sharing?
276 * @param priority what is the priority for OUR node to
277 * keep this file available? Use 0 for maximum anonymity and
278 * minimum reliability...
279 * @param expirationTime when should this content expire?
280 * @return publish structure entry for the file
282 struct GNUNET_FS_FileInformation *
283 GNUNET_FS_file_information_create_from_data (void *client_info,
286 const struct GNUNET_FS_Uri *keywords,
287 const struct GNUNET_CONTAINER_MetaData *meta,
291 struct GNUNET_TIME_Absolute expirationTime)
293 return GNUNET_FS_file_information_create_from_reader (client_info,
307 * Create an entry for a file in a publish-structure.
309 * @param client_info initial value for the client-info value for this entry
310 * @param length length of the file
311 * @param reader function that can be used to obtain the data for the file
312 * @param reader_cls closure for "reader"
313 * @param keywords under which keywords should this file be available
314 * directly; can be NULL
315 * @param meta metadata for the file
316 * @param do_index GNUNET_YES for index, GNUNET_NO for insertion,
317 * GNUNET_SYSERR for simulation
318 * @param anonymity what is the desired anonymity level for sharing?
319 * @param priority what is the priority for OUR node to
320 * keep this file available? Use 0 for maximum anonymity and
321 * minimum reliability...
322 * @param expirationTime when should this content expire?
323 * @return publish structure entry for the file
325 struct GNUNET_FS_FileInformation *
326 GNUNET_FS_file_information_create_from_reader (void *client_info,
328 GNUNET_FS_DataReader reader,
330 const struct GNUNET_FS_Uri *keywords,
331 const struct GNUNET_CONTAINER_MetaData *meta,
335 struct GNUNET_TIME_Absolute expirationTime)
337 struct GNUNET_FS_FileInformation *ret;
339 ret = GNUNET_malloc (sizeof (struct GNUNET_FS_FileInformation));
340 ret->client_info = client_info;
341 ret->meta = GNUNET_CONTAINER_meta_data_duplicate (meta);
342 ret->keywords = (keywords == NULL) ? NULL : GNUNET_FS_uri_dup (keywords);
343 ret->expirationTime = expirationTime;
344 ret->data.file.reader = reader;
345 ret->data.file.reader_cls = reader_cls;
346 ret->data.file.do_index = do_index;
347 ret->data.file.file_size = length;
348 ret->anonymity = anonymity;
349 ret->priority = priority;
350 GNUNET_FS_file_information_sync (ret);
356 * Closure for "dir_scan_cb".
361 * Metadata extractors to use.
363 struct EXTRACTOR_Extractor *extractors;
366 * Function to call on each directory entry.
368 GNUNET_FS_FileProcessor proc;
376 * Scanner to use for subdirectories.
378 GNUNET_FS_DirectoryScanner scanner;
381 * Closure for scanner.
386 * Set to an error message (if any).
391 * Should files be indexed?
396 * Desired anonymity level.
401 * Desired publishing priority.
406 * Expiration time for publication.
408 struct GNUNET_TIME_Absolute expiration;
413 * Function called on each entry in a file to
414 * cause default-publishing.
415 * @param cls closure (struct DirScanCls)
416 * @param filename name of the file to be published
417 * @return GNUNET_OK on success, GNUNET_SYSERR to abort
420 dir_scan_cb (void *cls,
421 const char *filename)
423 struct DirScanCls *dsc = cls;
425 struct GNUNET_FS_FileInformation *fi;
426 struct GNUNET_FS_Uri *ksk_uri;
427 struct GNUNET_FS_Uri *keywords;
428 struct GNUNET_CONTAINER_MetaData *meta;
430 if (0 != STAT (filename, &sbuf))
432 GNUNET_asprintf (&dsc->emsg,
433 _("`%s' failed on file `%s': %s"),
437 return GNUNET_SYSERR;
439 if (S_ISDIR (sbuf.st_mode))
441 fi = GNUNET_FS_file_information_create_from_directory (NULL,
452 GNUNET_assert (NULL != dsc->emsg);
453 return GNUNET_SYSERR;
458 meta = GNUNET_CONTAINER_meta_data_create ();
459 GNUNET_CONTAINER_meta_data_extract_from_file (meta,
462 // FIXME: remove path from filename in metadata!
463 keywords = GNUNET_FS_uri_ksk_create_from_meta_data (meta);
464 ksk_uri = GNUNET_FS_uri_ksk_canonicalize (keywords);
465 fi = GNUNET_FS_file_information_create_from_file (NULL,
473 GNUNET_CONTAINER_meta_data_destroy (meta);
474 GNUNET_FS_uri_destroy (keywords);
475 GNUNET_FS_uri_destroy (ksk_uri);
477 dsc->proc (dsc->proc_cls,
485 * Simple, useful default implementation of a directory scanner
486 * (GNUNET_FS_DirectoryScanner). This implementation expects to get a
487 * UNIX filename, will publish all files in the directory except hidden
488 * files (those starting with a "."). Metadata will be extracted
489 * using GNU libextractor; the specific list of plugins should be
490 * specified in "cls", passing NULL will disable (!) metadata
491 * extraction. Keywords will be derived from the metadata and be
492 * subject to default canonicalization. This is strictly a
493 * convenience function.
495 * @param cls must be of type "struct EXTRACTOR_Extractor*"
496 * @param dirname name of the directory to scan
497 * @param do_index should files be indexed or inserted
498 * @param anonymity desired anonymity level
499 * @param priority priority for publishing
500 * @param expirationTime expiration for publication
501 * @param proc function called on each entry
502 * @param proc_cls closure for proc
503 * @param emsg where to store an error message (on errors)
504 * @return GNUNET_OK on success
507 GNUNET_FS_directory_scanner_default (void *cls,
512 struct GNUNET_TIME_Absolute expirationTime,
513 GNUNET_FS_FileProcessor proc,
517 struct EXTRACTOR_Extractor *ex = cls;
518 struct DirScanCls dsc;
522 dsc.proc_cls = proc_cls;
523 dsc.scanner = &GNUNET_FS_directory_scanner_default;
524 dsc.scanner_cls = cls;
525 dsc.do_index = do_index;
526 dsc.anonymity = anonymity;
527 dsc.priority = priority;
528 dsc.expiration = expirationTime;
529 if (-1 == GNUNET_DISK_directory_scan (dirname,
533 GNUNET_assert (NULL != dsc.emsg);
535 return GNUNET_SYSERR;
542 * Closure for dirproc function.
547 * Linked list of directory entries that is being
550 struct GNUNET_FS_FileInformation *entries;
556 * Function that processes a directory entry that
557 * was obtained from the scanner.
558 * @param cls our closure
559 * @param filename name of the file (unused, why there???)
560 * @param fi information for publishing the file
564 const char *filename,
565 struct GNUNET_FS_FileInformation *fi)
567 struct EntryProcCls *dc = cls;
569 GNUNET_assert (fi->next == NULL);
570 GNUNET_assert (fi->dir == NULL);
571 fi->next = dc->entries;
577 * Create a publish-structure from an existing file hierarchy, inferring
578 * and organizing keywords and metadata as much as possible. This
579 * function primarily performs the recursive build and re-organizes
580 * keywords and metadata; for automatically getting metadata
581 * extraction, scanning of directories and creation of the respective
582 * GNUNET_FS_FileInformation entries the default scanner should be
583 * passed (GNUNET_FS_directory_scanner_default). This is strictly a
584 * convenience function.
586 * @param client_info initial value for the client-info value for this entry
587 * @param filename name of the top-level file or directory
588 * @param scanner function used to get a list of files in a directory
589 * @param scanner_cls closure for scanner
590 * @param do_index should files in the hierarchy be indexed?
591 * @param anonymity what is the desired anonymity level for sharing?
592 * @param priority what is the priority for OUR node to
593 * keep this file available? Use 0 for maximum anonymity and
594 * minimum reliability...
595 * @param expirationTime when should this content expire?
596 * @param emsg where to store an error message
597 * @return publish structure entry for the directory, NULL on error
599 struct GNUNET_FS_FileInformation *
600 GNUNET_FS_file_information_create_from_directory (void *client_info,
601 const char *filename,
602 GNUNET_FS_DirectoryScanner scanner,
607 struct GNUNET_TIME_Absolute expirationTime,
610 struct GNUNET_FS_FileInformation *ret;
611 struct EntryProcCls dc;
612 struct GNUNET_FS_Uri *ksk;
613 struct GNUNET_CONTAINER_MetaData *meta;
616 meta = GNUNET_CONTAINER_meta_data_create ();
617 GNUNET_FS_meta_data_make_directory (meta);
619 scanner (scanner_cls,
628 ksk = NULL; // FIXME...
629 // FIXME: create meta!
630 ret = GNUNET_FS_file_information_create_empty_directory (client_info,
636 ret->data.dir.entries = dc.entries;
637 while (dc.entries != NULL)
639 dc.entries->dir = ret;
640 GNUNET_FS_file_information_sync (dc.entries);
641 dc.entries = dc.entries->next;
643 GNUNET_FS_file_information_sync (ret);
649 * Create an entry for an empty directory in a publish-structure.
650 * This function should be used by applications for which the
651 * use of "GNUNET_FS_file_information_create_from_directory"
652 * is not appropriate.
654 * @param client_info initial value for the client-info value for this entry
655 * @param meta metadata for the directory
656 * @param keywords under which keywords should this directory be available
657 * directly; can be NULL
658 * @param anonymity what is the desired anonymity level for sharing?
659 * @param priority what is the priority for OUR node to
660 * keep this file available? Use 0 for maximum anonymity and
661 * minimum reliability...
662 * @param expirationTime when should this content expire?
663 * @return publish structure entry for the directory , NULL on error
665 struct GNUNET_FS_FileInformation *
666 GNUNET_FS_file_information_create_empty_directory (void *client_info,
667 const struct GNUNET_CONTAINER_MetaData *meta,
668 const struct GNUNET_FS_Uri *keywords,
671 struct GNUNET_TIME_Absolute expirationTime)
673 struct GNUNET_FS_FileInformation *ret;
675 ret = GNUNET_malloc (sizeof (struct GNUNET_FS_FileInformation));
676 ret->client_info = client_info;
677 ret->meta = GNUNET_CONTAINER_meta_data_duplicate (meta);
678 ret->keywords = GNUNET_FS_uri_dup (keywords);
679 ret->expirationTime = expirationTime;
680 ret->is_directory = GNUNET_YES;
681 ret->anonymity = anonymity;
682 ret->priority = priority;
683 GNUNET_FS_file_information_sync (ret);
689 * Add an entry to a directory in a publish-structure. Clients
690 * should never modify publish structures that were passed to
691 * "GNUNET_FS_publish_start" already.
693 * @param dir the directory
694 * @param ent the entry to add; the entry must not have been
695 * added to any other directory at this point and
696 * must not include "dir" in its structure
697 * @return GNUNET_OK on success, GNUNET_SYSERR on error
700 GNUNET_FS_file_information_add (struct GNUNET_FS_FileInformation *dir,
701 struct GNUNET_FS_FileInformation *ent)
703 if ( (ent->dir != NULL) ||
704 (ent->next != NULL) ||
705 (! dir->is_directory) )
708 return GNUNET_SYSERR;
711 ent->next = dir->data.dir.entries;
712 dir->data.dir.entries = ent;
713 dir->data.dir.dir_size = 0;
714 GNUNET_FS_file_information_sync (ent);
715 GNUNET_FS_file_information_sync (dir);
721 * Inspect a file or directory in a publish-structure. Clients
722 * should never modify publish structures that were passed to
723 * "GNUNET_FS_publish_start" already. When called on a directory,
724 * this function will FIRST call "proc" with information about
725 * the directory itself and then for each of the files in the
726 * directory (but not for files in subdirectories). When called
727 * on a file, "proc" will be called exactly once (with information
728 * about the specific file).
730 * @param dir the directory
731 * @param proc function to call on each entry
732 * @param proc_cls closure for proc
735 GNUNET_FS_file_information_inspect (struct GNUNET_FS_FileInformation *dir,
736 GNUNET_FS_FileInformationProcessor proc,
739 struct GNUNET_FS_FileInformation *pos;
741 if (dir->is_directory)
745 dir->data.dir.dir_size,
750 &dir->expirationTime,
752 pos = dir->data.dir.entries;
757 pos->data.dir.dir_size,
762 &pos->expirationTime,
771 dir->data.file.file_size,
776 &dir->expirationTime,
783 * Destroy publish-structure. Clients should never destroy publish
784 * structures that were passed to "GNUNET_FS_publish_start" already.
786 * @param fi structure to destroy
787 * @param cleaner function to call on each entry in the structure
788 * (useful to clean up client_info); can be NULL; return
790 * @param cleaner_cls closure for cleaner
793 GNUNET_FS_file_information_destroy (struct GNUNET_FS_FileInformation *fi,
794 GNUNET_FS_FileInformationProcessor cleaner,
797 struct GNUNET_FS_FileInformation *pos;
799 if (fi->is_directory)
801 /* clean up directory */
802 while (NULL != (pos = fi->data.dir.entries))
804 fi->data.dir.entries = pos->next;
805 GNUNET_FS_file_information_destroy (pos, cleaner, cleaner_cls);
807 /* clean up client-info */
809 cleaner (cleaner_cls,
811 fi->data.dir.dir_size,
818 GNUNET_free_non_null (fi->data.dir.dir_data);
819 GNUNET_free (fi->data.dir.dirname);
823 /* call clean-up function of the reader */
824 fi->data.file.reader (fi->data.file.reader_cls, 0, 0, NULL, NULL);
825 /* clean up client-info */
827 cleaner (cleaner_cls,
829 fi->data.file.file_size,
837 GNUNET_free_non_null (fi->emsg);
838 GNUNET_free_non_null (fi->chk_uri);
839 /* clean up serialization */
840 if ( (NULL != fi->serialization) &&
841 (0 != UNLINK (fi->serialization)) )
842 GNUNET_log_strerror_file (GNUNET_ERROR_TYPE_WARNING,
845 GNUNET_FS_uri_destroy (fi->keywords);
846 GNUNET_CONTAINER_meta_data_destroy (fi->meta);
847 GNUNET_free_non_null (fi->serialization);
852 /* end of fs_file_information.c */