2 This file is part of GNUnet.
3 (C) 2003, 2004, 2005, 2006, 2008, 2009, 2010 Christian Grothoff (and other contributing authors)
5 GNUnet is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published
7 by the Free Software Foundation; either version 2, or (at your
8 option) any later version.
10 GNUnet is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with GNUnet; see the file COPYING. If not, write to the
17 Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA.
22 * @file util/container_meta_data.c
23 * @brief Storing of meta data
24 * @author Christian Grothoff
28 #include "gnunet_common.h"
29 #include "gnunet_container_lib.h"
30 #include "gnunet_strings_lib.h"
31 #include "gnunet_time_lib.h"
32 #include <extractor.h>
41 * This is a linked list.
43 struct MetaItem *next;
46 * Name of the extracting plugin.
56 * The actual meta data.
61 * Number of bytes in 'data'.
66 * Type of the meta data.
68 enum EXTRACTOR_MetaType type;
71 * Format of the meta data.
73 enum EXTRACTOR_MetaFormat format;
78 * Meta data to associate with a file, directory or namespace.
80 struct GNUNET_CONTAINER_MetaData
83 * Linked list of the meta data items.
85 struct MetaItem *items;
88 * Complete serialized and compressed buffer of the items.
89 * NULL if we have not computed that buffer yet.
94 * Number of bytes in 'sbuf'. 0 if the buffer is stale.
99 * Number of items in the linked list.
101 unsigned int item_count;
107 * Create a fresh struct CONTAINER_MetaData token.
109 * @return empty meta-data container
111 struct GNUNET_CONTAINER_MetaData *
112 GNUNET_CONTAINER_meta_data_create ()
114 return GNUNET_malloc (sizeof (struct GNUNET_CONTAINER_MetaData));
119 * Free meta data item.
121 * @param item item to free
124 meta_item_free (struct MetaItem *item)
126 GNUNET_free_non_null (item->plugin_name);
127 GNUNET_free_non_null (item->mime_type);
128 GNUNET_free_non_null (item->data);
134 * The meta data has changed, invalidate its serialization
137 * @param md meta data that changed
140 invalidate_sbuf (struct GNUNET_CONTAINER_MetaData *md)
142 if (md->sbuf == NULL)
144 GNUNET_free (md->sbuf);
153 * @param md what to free
156 GNUNET_CONTAINER_meta_data_destroy (struct GNUNET_CONTAINER_MetaData *md)
158 struct MetaItem *item;
162 while (NULL != (item = md->items))
164 md->items = item->next;
165 meta_item_free (item);
167 GNUNET_free_non_null (md->sbuf);
173 * Test if two MDs are equal. We consider them equal if
174 * the meta types, formats and content match (we do not
175 * include the mime types and plugins names in this
178 * @param md1 first value to check
179 * @param md2 other value to check
180 * @return GNUNET_YES if they are equal
183 GNUNET_CONTAINER_meta_data_test_equal (const struct GNUNET_CONTAINER_MetaData
185 const struct GNUNET_CONTAINER_MetaData
194 if (md1->item_count != md2->item_count)
204 if ( (i->type == j->type) &&
205 (i->format == j->format) &&
206 (i->data_size == j->data_size) &&
207 (0 == memcmp (i->data,
216 if (found == GNUNET_NO)
225 * Extend metadata. Note that the list of meta data items is
226 * sorted by size (largest first).
228 * @param md metadata to extend
229 * @param plugin_name name of the plugin that produced this value;
230 * special values can be used (i.e. '<zlib>' for zlib being
231 * used in the main libextractor library and yielding
233 * @param type libextractor-type describing the meta data
234 * @param format basic format information about data
235 * @param data_mime_type mime-type of data (not of the original file);
236 * can be NULL (if mime-type is not known)
237 * @param data actual meta-data found
238 * @param data_len number of bytes in data
239 * @return GNUNET_OK on success, GNUNET_SYSERR if this entry already exists
240 * data_mime_type and plugin_name are not considered for "exists" checks
243 GNUNET_CONTAINER_meta_data_insert (struct GNUNET_CONTAINER_MetaData *md,
244 const char *plugin_name,
245 enum EXTRACTOR_MetaType type,
246 enum EXTRACTOR_MetaFormat format,
247 const char *data_mime_type,
251 struct MetaItem *prev;
252 struct MetaItem *pos;
260 if (pos->data_size < data_len)
262 if ( (pos->type == type) &&
263 (pos->format == format) &&
264 (pos->data_size == data_len) &&
265 (0 == memcmp (pos->data,
269 if ( (pos->mime_type == NULL) &&
270 (data_mime_type != NULL) )
272 pos->mime_type = GNUNET_strdup (data_mime_type);
273 invalidate_sbuf (md);
275 return GNUNET_SYSERR;
281 i = GNUNET_malloc (sizeof (struct MetaItem));
284 i->data_size = data_len;
290 i->mime_type = (data_mime_type == NULL) ? NULL : GNUNET_strdup (data_mime_type);
291 i->plugin_name = (plugin_name == NULL) ? NULL : GNUNET_strdup (plugin_name);
292 i->data = GNUNET_malloc (data_len);
293 memcpy (i->data, data, data_len);
294 /* change OS native dir separators to unix '/' and others to '_' */
295 if (type == EXTRACTOR_METATYPE_FILENAME)
298 while ( (*p != '\0') &&
299 (p < i->data + data_len) )
301 if (*p == DIR_SEPARATOR)
308 invalidate_sbuf (md);
316 * @param md metadata to manipulate
317 * @param type type of the item to remove
318 * @param data specific value to remove, NULL to remove all
319 * entries of the given type
320 * @param data_len number of bytes in data
321 * @return GNUNET_OK on success, GNUNET_SYSERR if the item does not exist in md
324 GNUNET_CONTAINER_meta_data_delete (struct GNUNET_CONTAINER_MetaData *md,
325 enum EXTRACTOR_MetaType type,
329 struct MetaItem *pos;
330 struct MetaItem *prev;
336 if ( (pos->type == type) &&
338 ( (pos->data_size == data_len) &&
339 (0 == memcmp (pos->data,
344 md->items = pos->next;
346 prev->next = pos->next;
347 meta_item_free (pos);
349 invalidate_sbuf (md);
355 return GNUNET_SYSERR;
360 * Add the current time as the publication date
363 * @param md metadata to modify
366 GNUNET_CONTAINER_meta_data_add_publication_date (struct
367 GNUNET_CONTAINER_MetaData
371 struct GNUNET_TIME_Absolute t;
373 t = GNUNET_TIME_absolute_get ();
374 GNUNET_CONTAINER_meta_data_delete (md,
375 EXTRACTOR_METATYPE_PUBLICATION_DATE,
378 dat = GNUNET_STRINGS_absolute_time_to_string (t);
379 GNUNET_CONTAINER_meta_data_insert (md,
381 EXTRACTOR_METATYPE_PUBLICATION_DATE,
382 EXTRACTOR_METAFORMAT_UTF8,
391 * Iterate over MD entries.
393 * @param md metadata to inspect
394 * @param iter function to call on each entry
395 * @param iter_cls closure for iterator
396 * @return number of entries
399 GNUNET_CONTAINER_meta_data_iterate (const struct
400 GNUNET_CONTAINER_MetaData *md,
401 EXTRACTOR_MetaDataProcessor
402 iter, void *iter_cls)
404 struct MetaItem *pos;
407 return md->item_count;
411 if (0 != iter (iter_cls,
418 return md->item_count;
421 return md->item_count;
426 * Get the first MD entry of the given type. Caller
427 * is responsible for freeing the return value.
428 * Also, only meta data items that are strings (0-terminated)
429 * are returned by this function.
431 * @param md metadata to inspect
432 * @param type type to look for
433 * @return NULL if no entry was found
436 GNUNET_CONTAINER_meta_data_get_by_type (const struct GNUNET_CONTAINER_MetaData
437 *md, enum EXTRACTOR_MetaType type)
439 struct MetaItem *pos;
444 if ( (type == pos->type) &&
445 ( (pos->format == EXTRACTOR_METAFORMAT_UTF8) ||
446 (pos->format == EXTRACTOR_METAFORMAT_C_STRING) ) )
447 return GNUNET_strdup (pos->data);
455 * Get the first matching MD entry of the given types. Caller is
456 * responsible for freeing the return value. Also, only meta data
457 * items that are strings (0-terminated) are returned by this
460 * @param md metadata to inspect
461 * @param ... -1-terminated list of types
462 * @return NULL if we do not have any such entry,
463 * otherwise client is responsible for freeing the value!
466 GNUNET_CONTAINER_meta_data_get_first_by_types (const struct
467 GNUNET_CONTAINER_MetaData *md,
472 enum EXTRACTOR_MetaType type;
478 type = va_arg (args, enum EXTRACTOR_MetaType);
481 ret = GNUNET_CONTAINER_meta_data_get_by_type (md, type);
491 * Get a thumbnail from the meta-data (if present).
493 * @param md metadata to get the thumbnail from
494 * @param thumb will be set to the thumbnail data. Must be
495 * freed by the caller!
496 * @return number of bytes in thumbnail, 0 if not available
499 GNUNET_CONTAINER_meta_data_get_thumbnail (const struct
500 GNUNET_CONTAINER_MetaData * md,
501 unsigned char **thumb)
503 struct MetaItem *pos;
504 struct MetaItem *match;
510 if ( (0 == strncasecmp ("image/", pos->mime_type,
511 strlen("image/"))) &&
512 (pos->format == EXTRACTOR_METAFORMAT_BINARY) )
516 else if ( (match->type != EXTRACTOR_METATYPE_THUMBNAIL) &&
517 (pos->type == EXTRACTOR_METATYPE_THUMBNAIL) )
522 if ( (match == NULL) ||
523 (match->data_size == 0) )
525 *thumb = GNUNET_malloc (match->data_size);
526 memcpy (*thumb, match->data, match->data_size);
527 return match->data_size;
532 * Duplicate struct GNUNET_CONTAINER_MetaData.
534 * @param md what to duplicate
535 * @return duplicate meta-data container
537 struct GNUNET_CONTAINER_MetaData *
538 GNUNET_CONTAINER_meta_data_duplicate (const struct GNUNET_CONTAINER_MetaData
541 struct GNUNET_CONTAINER_MetaData *ret;
542 struct MetaItem *pos;
546 ret = GNUNET_CONTAINER_meta_data_create ();
550 GNUNET_CONTAINER_meta_data_insert (ret,
564 * Add meta data that libextractor finds to our meta data
567 * @param cls closure, our meta data container
568 * @param plugin_name name of the plugin that produced this value;
569 * special values can be used (i.e. '<zlib>' for zlib being
570 * used in the main libextractor library and yielding
572 * @param type libextractor-type describing the meta data
573 * @param format basic format information about data
574 * @param data_mime_type mime-type of data (not of the original file);
575 * can be NULL (if mime-type is not known)
576 * @param data actual meta-data found
577 * @param data_len number of bytes in data
578 * @return always 0 to continue extracting
582 const char *plugin_name,
583 enum EXTRACTOR_MetaType type,
584 enum EXTRACTOR_MetaFormat format,
585 const char *data_mime_type,
589 struct GNUNET_CONTAINER_MetaData *md = cls;
590 (void) GNUNET_CONTAINER_meta_data_insert (md,
602 * Extract meta-data from a file.
604 * @return GNUNET_SYSERR on error, otherwise the number
605 * of meta-data items obtained
608 GNUNET_CONTAINER_meta_data_extract_from_file (struct GNUNET_CONTAINER_MetaData
609 *md, const char *filename,
610 struct EXTRACTOR_PluginList *
615 if (filename == NULL)
616 return GNUNET_SYSERR;
617 if (extractors == NULL)
619 old = md->item_count;
620 EXTRACTOR_extract (extractors,
625 return (int) (md->item_count - old);
630 * Try to compress the given block of data.
632 * @param data block to compress; if compression
633 * resulted in a smaller block, the first
634 * bytes of data are updated to the compressed
636 * @param oldSize number of bytes in data
637 * @param result set to the compressed data
638 * @param newSize set to size of result
639 * @return GNUNET_YES if compression reduce the size,
640 * GNUNET_NO if compression did not help
643 try_compression (const char *data,
652 dlen = compressBound (oldSize);
654 dlen = oldSize + (oldSize / 100) + 20;
655 /* documentation says 100.1% oldSize + 12 bytes, but we
656 should be able to overshoot by more to be safe */
658 tmp = GNUNET_malloc (dlen);
659 if (Z_OK == compress2 ((Bytef *) tmp,
660 &dlen, (const Bytef *) data, oldSize, 9))
675 * Flag in 'version' that indicates compressed meta-data.
677 #define HEADER_COMPRESSED 0x80000000
681 * Bits in 'version' that give the version number.
683 #define HEADER_VERSION_MASK 0x7FFFFFFF
687 * Header for serialized meta data.
689 struct MetaDataHeader
692 * The version of the MD serialization. The highest bit is used to
693 * indicate compression.
695 * Version 0 is traditional (pre-0.9) meta data (unsupported)
696 * Version is 1 for a NULL pointer
697 * Version 2 is for 0.9.x (and possibly higher)
698 * Other version numbers are not yet defined.
703 * How many MD entries are there?
708 * Size of the decompressed meta data.
713 * This is followed by 'entries' values of type 'struct MetaDataEntry'
714 * and then by 'entry' plugin names, mime-types and data blocks
715 * as specified in those meta data entries.
721 * Entry of serialized meta data.
726 * Meta data type. Corresponds to an 'enum EXTRACTOR_MetaType'
731 * Meta data format. Corresponds to an 'enum EXTRACTOR_MetaFormat'
736 * Number of bytes of meta data.
741 * Number of bytes in the plugin name including 0-terminator. 0 for NULL.
743 uint32_t plugin_name_len;
746 * Number of bytes in the mime type including 0-terminator. 0 for NULL.
748 uint32_t mime_type_len;
754 * Serialize meta-data to target.
756 * @param md metadata to serialize
757 * @param target where to write the serialized metadata;
758 * *target can be NULL, in which case memory is allocated
759 * @param max maximum number of bytes available in target
760 * @param opt is it ok to just write SOME of the
761 * meta-data to match the size constraint,
762 * possibly discarding some data?
763 * @return number of bytes written on success,
764 * GNUNET_SYSERR on error (typically: not enough
768 GNUNET_CONTAINER_meta_data_serialize (const struct GNUNET_CONTAINER_MetaData
769 *md, char **target, size_t max,
771 GNUNET_CONTAINER_MetaDataSerializationOptions
774 struct GNUNET_CONTAINER_MetaData *vmd;
775 struct MetaItem *pos;
776 struct MetaDataHeader *hdr;
777 struct MetaDataEntry *ent;
790 if (max < sizeof (struct MetaDataHeader))
791 return GNUNET_SYSERR; /* far too small */
795 if (md->sbuf != NULL)
797 /* try to use serialization cache */
798 if (md->sbuf_size <= max)
801 *target = GNUNET_malloc (md->sbuf_size);
805 return md->sbuf_size;
807 if (0 == (opt & GNUNET_CONTAINER_META_DATA_SERIALIZE_PART))
808 return GNUNET_SYSERR; /* can say that this will fail */
809 /* need to compute a partial serialization, sbuf useless ... */
817 msize += sizeof (struct MetaDataEntry);
818 msize += pos->data_size;
819 if (pos->plugin_name != NULL)
820 msize += strlen (pos->plugin_name) + 1;
821 if (pos->mime_type != NULL)
822 msize += strlen (pos->mime_type) + 1;
825 size = (size_t) msize;
828 GNUNET_break (0); /* integer overflow */
829 return GNUNET_SYSERR;
831 if (size >= GNUNET_MAX_MALLOC_CHECKED)
833 /* too large to be processed */
834 return GNUNET_SYSERR;
836 ent = GNUNET_malloc (size);
837 mdata = (char *) &ent[md->item_count];
838 off = size - (md->item_count * sizeof(struct MetaDataEntry));
843 ent[i].type = htonl ((uint32_t) pos->type);
844 ent[i].format = htonl ((uint32_t) pos->format);
845 ent[i].data_size = htonl ((uint32_t) pos->data_size);
846 if (pos->plugin_name == NULL)
849 plen = strlen (pos->plugin_name) + 1;
850 ent[i].plugin_name_len = htonl ( (uint32_t) plen);
851 if (pos->mime_type == NULL)
854 mlen = strlen (pos->mime_type) + 1;
855 ent[i].mime_type_len = htonl ((uint32_t) mlen);
856 off -= pos->data_size;
857 memcpy (&mdata[off], pos->data, pos->data_size);
859 if (pos->plugin_name != NULL)
860 memcpy (&mdata[off], pos->plugin_name, plen);
862 if (pos->mime_type != NULL)
863 memcpy (&mdata[off], pos->mime_type, mlen);
867 GNUNET_assert (off == 0);
873 for (i=0;i<md->item_count;i++)
876 if (0 == (opt & GNUNET_CONTAINER_META_DATA_SERIALIZE_NO_COMPRESS))
877 comp = try_compression ((const char*) &ent[i],
882 if ( (md->sbuf == NULL) &&
885 /* fill 'sbuf'; this "modifies" md, but since this is only
886 an internal cache we will cast away the 'const' instead
887 of making the API look strange. */
888 vmd = (struct GNUNET_CONTAINER_MetaData*) md;
889 hdr = GNUNET_malloc (left + sizeof (struct MetaDataHeader));
890 hdr->size = htonl (left);
891 hdr->entries = htonl (md->item_count);
892 if (GNUNET_YES == comp)
894 GNUNET_assert (clen < left);
895 hdr->version = htonl (2 | HEADER_COMPRESSED);
899 vmd->sbuf_size = clen + sizeof (struct MetaDataHeader);
903 hdr->version = htonl (2);
907 vmd->sbuf_size = left + sizeof (struct MetaDataHeader);
909 vmd->sbuf = (char*) hdr;
912 if ( ( (left + sizeof (struct MetaDataHeader)) <= max) ||
913 ( (comp == GNUNET_YES) &&
916 /* success, this now fits! */
917 if (GNUNET_YES == comp)
919 hdr = (struct MetaDataHeader*) *target;
922 hdr = GNUNET_malloc (clen + sizeof (struct MetaDataHeader));
923 *target = (char*) hdr;
925 hdr->version = htonl (2 | HEADER_COMPRESSED);
926 hdr->size = htonl (left);
927 hdr->entries = htonl (md->item_count - i);
928 memcpy (&(*target)[sizeof(struct MetaDataHeader)],
933 return clen + sizeof (struct MetaDataHeader);
937 hdr = (struct MetaDataHeader*) *target;
940 hdr = GNUNET_malloc (left + sizeof (struct MetaDataHeader));
941 *target = (char*) hdr;
943 hdr->version = htonl (2);
944 hdr->entries = htonl (md->item_count - i);
945 hdr->size = htonl (left);
946 memcpy (&(*target)[sizeof(struct MetaDataHeader)],
950 return left + sizeof (struct MetaDataHeader);
954 if (0 == (opt & GNUNET_CONTAINER_META_DATA_SERIALIZE_PART))
958 return GNUNET_SYSERR;
961 /* next iteration: ignore the corresponding meta data at the
962 end and try again without it */
963 left -= sizeof (struct MetaDataEntry);
964 left -= pos->data_size;
965 if (pos->plugin_name != NULL)
966 left -= strlen (pos->plugin_name) + 1;
967 if (pos->mime_type != NULL)
968 left -= strlen (pos->mime_type) + 1;
973 /* nothing fit, only write header! */
974 hdr = (struct MetaDataHeader*) *target;
977 hdr = GNUNET_malloc (sizeof (struct MetaDataHeader));
978 *target = (char*) hdr;
980 hdr->version = htonl (2);
981 hdr->entries = htonl (0);
982 hdr->size = htonl (0);
983 return sizeof (struct MetaDataHeader);
988 * Get the size of the full meta-data in serialized form.
990 * @param md metadata to inspect
991 * @return number of bytes needed for serialization, -1 on error
994 GNUNET_CONTAINER_meta_data_get_serialized_size (const struct GNUNET_CONTAINER_MetaData *md)
999 if (md->sbuf != NULL)
1000 return md->sbuf_size;
1002 ret = GNUNET_CONTAINER_meta_data_serialize (md,
1004 GNUNET_MAX_MALLOC_CHECKED,
1005 GNUNET_CONTAINER_META_DATA_SERIALIZE_FULL);
1013 * Decompress input, return the decompressed data
1014 * as output, set outputSize to the number of bytes
1017 * @param input compressed data
1018 * @param inputSize number of bytes in input
1019 * @param outputSize expected size of the output
1020 * @return NULL on error
1023 decompress (const char *input,
1031 output = GNUNET_malloc (olen);
1032 if (Z_OK == uncompress ((Bytef *) output,
1033 &olen, (const Bytef *) input, inputSize))
1039 GNUNET_free (output);
1046 * Deserialize meta-data. Initializes md.
1048 * @param input buffer with the serialized metadata
1049 * @param size number of bytes available in input
1050 * @return MD on success, NULL on error (i.e.
1053 struct GNUNET_CONTAINER_MetaData *
1054 GNUNET_CONTAINER_meta_data_deserialize (const char *input, size_t size)
1056 struct GNUNET_CONTAINER_MetaData *md;
1057 struct MetaDataHeader hdr;
1058 struct MetaDataEntry ent;
1071 const char *meta_data;
1072 const char *plugin_name;
1073 const char *mime_type;
1074 enum EXTRACTOR_MetaFormat format;
1076 if (size < sizeof (struct MetaDataHeader))
1080 sizeof (struct MetaDataHeader));
1081 version = ntohl (hdr.version) & HEADER_VERSION_MASK;
1082 compressed = (ntohl (hdr.version) & HEADER_COMPRESSED) != 0;
1085 return NULL; /* null pointer */
1088 GNUNET_break_op (0); /* unsupported version */
1092 ic = ntohl (hdr.entries);
1093 dataSize = ntohl (hdr.size);
1094 if ((sizeof (struct MetaDataEntry) * ic) > dataSize)
1096 GNUNET_break_op (0);
1102 if (dataSize >= GNUNET_MAX_MALLOC_CHECKED)
1104 /* make sure we don't blow our memory limit because of a mal-formed
1106 GNUNET_break_op (0);
1110 decompress ((const char *) &input[sizeof (struct MetaDataHeader)],
1111 size - sizeof (struct MetaDataHeader), dataSize);
1114 GNUNET_break_op (0);
1122 cdata = (const char *) &input[sizeof (struct MetaDataHeader)];
1123 if (dataSize != size - sizeof (struct MetaDataHeader))
1125 GNUNET_break_op (0);
1130 md = GNUNET_CONTAINER_meta_data_create ();
1131 left = dataSize - ic * sizeof (struct MetaDataEntry);
1132 mdata = &cdata[ic * sizeof (struct MetaDataEntry)];
1136 &cdata[i * sizeof(struct MetaDataEntry)],
1137 sizeof (struct MetaDataEntry));
1138 format = (enum EXTRACTOR_MetaFormat) ntohl (ent.format);
1139 if ( (format != EXTRACTOR_METAFORMAT_UTF8) &&
1140 (format != EXTRACTOR_METAFORMAT_C_STRING) &&
1141 (format != EXTRACTOR_METAFORMAT_BINARY) )
1143 GNUNET_break_op (0);
1146 dlen = ntohl (ent.data_size);
1147 plen = ntohl (ent.plugin_name_len);
1148 mlen = ntohl (ent.mime_type_len);
1151 GNUNET_break_op (0);
1155 meta_data = &mdata[left];
1156 if ( (format == EXTRACTOR_METAFORMAT_UTF8) ||
1157 (format == EXTRACTOR_METAFORMAT_C_STRING) )
1160 (mdata[left + dlen - 1] != '\0') )
1162 GNUNET_break_op (0);
1168 GNUNET_break_op (0);
1173 (mdata[left + plen - 1] != '\0') )
1175 GNUNET_break_op (0);
1181 plugin_name = &mdata[left];
1185 GNUNET_break_op (0);
1190 (mdata[left + mlen - 1] != '\0') )
1192 GNUNET_break_op (0);
1198 mime_type = &mdata[left];
1199 GNUNET_CONTAINER_meta_data_insert (md,
1201 (enum EXTRACTOR_MetaType) ntohl (ent.type),
1207 GNUNET_free_non_null (data);
1212 /* end of container_meta_data.c */