2 This file is part of GNUnet.
3 (C) 2003, 2004, 2005, 2006, 2008, 2009, 2010 Christian Grothoff (and other contributing authors)
5 GNUnet is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published
7 by the Free Software Foundation; either version 2, or (at your
8 option) any later version.
10 GNUnet is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with GNUnet; see the file COPYING. If not, write to the
17 Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA.
22 * @file util/container_meta_data.c
23 * @brief Storing of meta data
24 * @author Christian Grothoff
28 #include "gnunet_common.h"
29 #include "gnunet_container_lib.h"
30 #include "gnunet_strings_lib.h"
31 #include "gnunet_time_lib.h"
32 #include <extractor.h>
41 * This is a linked list.
43 struct MetaItem *next;
46 * Name of the extracting plugin.
56 * The actual meta data.
61 * Number of bytes in 'data'.
66 * Type of the meta data.
68 enum EXTRACTOR_MetaType type;
71 * Format of the meta data.
73 enum EXTRACTOR_MetaFormat format;
78 * Meta data to associate with a file, directory or namespace.
80 struct GNUNET_CONTAINER_MetaData
83 * Linked list of the meta data items.
85 struct MetaItem *items;
88 * Complete serialized and compressed buffer of the items.
89 * NULL if we have not computed that buffer yet.
94 * Number of bytes in 'sbuf'. 0 if the buffer is stale.
99 * Number of items in the linked list.
101 unsigned int item_count;
107 * Create a fresh struct CONTAINER_MetaData token.
109 * @return empty meta-data container
111 struct GNUNET_CONTAINER_MetaData *
112 GNUNET_CONTAINER_meta_data_create ()
114 return GNUNET_malloc (sizeof (struct GNUNET_CONTAINER_MetaData));
119 * Free meta data item.
121 * @param item item to free
124 meta_item_free (struct MetaItem *item)
126 GNUNET_free_non_null (item->plugin_name);
127 GNUNET_free_non_null (item->mime_type);
128 GNUNET_free_non_null (item->data);
134 * The meta data has changed, invalidate its serialization
137 * @param md meta data that changed
140 invalidate_sbuf (struct GNUNET_CONTAINER_MetaData *md)
142 if (md->sbuf == NULL)
144 GNUNET_free (md->sbuf);
153 * @param md what to free
156 GNUNET_CONTAINER_meta_data_destroy (struct GNUNET_CONTAINER_MetaData *md)
158 struct MetaItem *item;
162 while (NULL != (item = md->items))
164 md->items = item->next;
165 meta_item_free (item);
167 GNUNET_free_non_null (md->sbuf);
173 * Test if two MDs are equal. We consider them equal if
174 * the meta types, formats and content match (we do not
175 * include the mime types and plugins names in this
178 * @param md1 first value to check
179 * @param md2 other value to check
180 * @return GNUNET_YES if they are equal
183 GNUNET_CONTAINER_meta_data_test_equal (const struct GNUNET_CONTAINER_MetaData
185 const struct GNUNET_CONTAINER_MetaData
194 if (md1->item_count != md2->item_count)
204 if ( (i->type == j->type) &&
205 (i->format == j->format) &&
206 (i->data_size == j->data_size) &&
207 (0 == memcmp (i->data,
216 if (found == GNUNET_NO)
225 * Extend metadata. Note that the list of meta data items is
226 * sorted by size (largest first).
228 * @param md metadata to extend
229 * @param plugin_name name of the plugin that produced this value;
230 * special values can be used (i.e. '<zlib>' for zlib being
231 * used in the main libextractor library and yielding
233 * @param type libextractor-type describing the meta data
234 * @param format basic format information about data
235 * @param data_mime_type mime-type of data (not of the original file);
236 * can be NULL (if mime-type is not known)
237 * @param data actual meta-data found
238 * @param data_len number of bytes in data
239 * @return GNUNET_OK on success, GNUNET_SYSERR if this entry already exists
240 * data_mime_type and plugin_name are not considered for "exists" checks
243 GNUNET_CONTAINER_meta_data_insert (struct GNUNET_CONTAINER_MetaData *md,
244 const char *plugin_name,
245 enum EXTRACTOR_MetaType type,
246 enum EXTRACTOR_MetaFormat format,
247 const char *data_mime_type,
251 struct MetaItem *prev;
252 struct MetaItem *pos;
260 if (pos->data_size < data_len)
262 if ( (pos->type == type) &&
263 (pos->format == format) &&
264 (pos->data_size == data_len) &&
265 (0 == memcmp (pos->data,
269 if ( (pos->mime_type == NULL) &&
270 (data_mime_type != NULL) )
272 pos->mime_type = GNUNET_strdup (data_mime_type);
273 invalidate_sbuf (md);
275 return GNUNET_SYSERR;
281 i = GNUNET_malloc (sizeof (struct MetaItem));
284 i->data_size = data_len;
290 i->mime_type = (data_mime_type == NULL) ? NULL : GNUNET_strdup (data_mime_type);
291 i->plugin_name = (plugin_name == NULL) ? NULL : GNUNET_strdup (plugin_name);
292 i->data = GNUNET_malloc (data_len);
293 memcpy (i->data, data, data_len);
294 /* change OS native dir separators to unix '/' and others to '_' */
295 if (type == EXTRACTOR_METATYPE_FILENAME)
298 while ( (*p != '\0') &&
299 (p < i->data + data_len) )
301 if (*p == DIR_SEPARATOR)
308 invalidate_sbuf (md);
316 * @param md metadata to manipulate
317 * @param type type of the item to remove
318 * @param data specific value to remove, NULL to remove all
319 * entries of the given type
320 * @param data_len number of bytes in data
321 * @return GNUNET_OK on success, GNUNET_SYSERR if the item does not exist in md
324 GNUNET_CONTAINER_meta_data_delete (struct GNUNET_CONTAINER_MetaData *md,
325 enum EXTRACTOR_MetaType type,
329 struct MetaItem *pos;
330 struct MetaItem *prev;
336 if ( (pos->type == type) &&
338 ( (pos->data_size == data_len) &&
339 (0 == memcmp (pos->data,
344 md->items = pos->next;
346 prev->next = pos->next;
347 meta_item_free (pos);
349 invalidate_sbuf (md);
355 return GNUNET_SYSERR;
360 * Add the current time as the publication date
363 * @param md metadata to modify
366 GNUNET_CONTAINER_meta_data_add_publication_date (struct
367 GNUNET_CONTAINER_MetaData
371 struct GNUNET_TIME_Absolute t;
373 t = GNUNET_TIME_absolute_get ();
374 GNUNET_CONTAINER_meta_data_delete (md,
375 EXTRACTOR_METATYPE_PUBLICATION_DATE,
378 dat = GNUNET_STRINGS_absolute_time_to_string (t);
379 GNUNET_CONTAINER_meta_data_insert (md,
381 EXTRACTOR_METATYPE_PUBLICATION_DATE,
382 EXTRACTOR_METAFORMAT_UTF8,
391 * Iterate over MD entries.
393 * @param md metadata to inspect
394 * @param iter function to call on each entry
395 * @param iter_cls closure for iterator
396 * @return number of entries
399 GNUNET_CONTAINER_meta_data_iterate (const struct
400 GNUNET_CONTAINER_MetaData *md,
401 EXTRACTOR_MetaDataProcessor
402 iter, void *iter_cls)
404 struct MetaItem *pos;
407 return md->item_count;
411 if (0 != iter (iter_cls,
418 return md->item_count;
421 return md->item_count;
426 * Get the first MD entry of the given type. Caller
427 * is responsible for freeing the return value.
428 * Also, only meta data items that are strings (0-terminated)
429 * are returned by this function.
431 * @param md metadata to inspect
432 * @param type type to look for
433 * @return NULL if no entry was found
436 GNUNET_CONTAINER_meta_data_get_by_type (const struct GNUNET_CONTAINER_MetaData
437 *md, enum EXTRACTOR_MetaType type)
439 struct MetaItem *pos;
444 if ( (type == pos->type) &&
445 ( (pos->format == EXTRACTOR_METAFORMAT_UTF8) ||
446 (pos->format == EXTRACTOR_METAFORMAT_C_STRING) ) )
447 return GNUNET_strdup (pos->data);
455 * Get the first matching MD entry of the given types. Caller is
456 * responsible for freeing the return value. Also, only meta data
457 * items that are strings (0-terminated) are returned by this
460 * @param md metadata to inspect
461 * @param ... -1-terminated list of types
462 * @return NULL if we do not have any such entry,
463 * otherwise client is responsible for freeing the value!
466 GNUNET_CONTAINER_meta_data_get_first_by_types (const struct
467 GNUNET_CONTAINER_MetaData *md,
472 enum EXTRACTOR_MetaType type;
478 type = va_arg (args, enum EXTRACTOR_MetaType);
481 ret = GNUNET_CONTAINER_meta_data_get_by_type (md, type);
491 * Get a thumbnail from the meta-data (if present).
493 * @param md metadata to get the thumbnail from
494 * @param thumb will be set to the thumbnail data. Must be
495 * freed by the caller!
496 * @return number of bytes in thumbnail, 0 if not available
499 GNUNET_CONTAINER_meta_data_get_thumbnail (const struct
500 GNUNET_CONTAINER_MetaData * md,
501 unsigned char **thumb)
503 struct MetaItem *pos;
504 struct MetaItem *match;
510 if ( (0 == strncasecmp ("image/", pos->mime_type,
511 strlen("image/"))) &&
512 (pos->format == EXTRACTOR_METAFORMAT_BINARY) )
516 else if ( (match->type != EXTRACTOR_METATYPE_THUMBNAIL) &&
517 (pos->type == EXTRACTOR_METATYPE_THUMBNAIL) )
524 *thumb = GNUNET_malloc (match->data_size);
525 memcpy (*thumb, match->data, match->data_size);
526 return match->data_size;
531 * Duplicate struct GNUNET_CONTAINER_MetaData.
533 * @param md what to duplicate
534 * @return duplicate meta-data container
536 struct GNUNET_CONTAINER_MetaData *
537 GNUNET_CONTAINER_meta_data_duplicate (const struct GNUNET_CONTAINER_MetaData
540 struct GNUNET_CONTAINER_MetaData *ret;
541 struct MetaItem *pos;
545 ret = GNUNET_CONTAINER_meta_data_create ();
549 GNUNET_CONTAINER_meta_data_insert (ret,
563 * Add meta data that libextractor finds to our meta data
566 * @param cls closure, our meta data container
567 * @param plugin_name name of the plugin that produced this value;
568 * special values can be used (i.e. '<zlib>' for zlib being
569 * used in the main libextractor library and yielding
571 * @param type libextractor-type describing the meta data
572 * @param format basic format information about data
573 * @param data_mime_type mime-type of data (not of the original file);
574 * can be NULL (if mime-type is not known)
575 * @param data actual meta-data found
576 * @param data_len number of bytes in data
577 * @return always 0 to continue extracting
581 const char *plugin_name,
582 enum EXTRACTOR_MetaType type,
583 enum EXTRACTOR_MetaFormat format,
584 const char *data_mime_type,
588 struct GNUNET_CONTAINER_MetaData *md = cls;
589 (void) GNUNET_CONTAINER_meta_data_insert (md,
601 * Extract meta-data from a file.
603 * @return GNUNET_SYSERR on error, otherwise the number
604 * of meta-data items obtained
607 GNUNET_CONTAINER_meta_data_extract_from_file (struct GNUNET_CONTAINER_MetaData
608 *md, const char *filename,
609 struct EXTRACTOR_PluginList *
614 if (filename == NULL)
615 return GNUNET_SYSERR;
616 if (extractors == NULL)
618 old = md->item_count;
619 EXTRACTOR_extract (extractors,
624 return (int) (md->item_count - old);
629 * Try to compress the given block of data.
631 * @param data block to compress; if compression
632 * resulted in a smaller block, the first
633 * bytes of data are updated to the compressed
635 * @param oldSize number of bytes in data
636 * @param result set to the compressed data
637 * @param newSize set to size of result
638 * @return GNUNET_YES if compression reduce the size,
639 * GNUNET_NO if compression did not help
642 try_compression (const char *data,
651 dlen = compressBound (oldSize);
653 dlen = oldSize + (oldSize / 100) + 20;
654 /* documentation says 100.1% oldSize + 12 bytes, but we
655 should be able to overshoot by more to be safe */
657 tmp = GNUNET_malloc (dlen);
658 if (Z_OK == compress2 ((Bytef *) tmp,
659 &dlen, (const Bytef *) data, oldSize, 9))
674 * Flag in 'version' that indicates compressed meta-data.
676 #define HEADER_COMPRESSED 0x80000000
680 * Bits in 'version' that give the version number.
682 #define HEADER_VERSION_MASK 0x7FFFFFFF
686 * Header for serialized meta data.
688 struct MetaDataHeader
691 * The version of the MD serialization. The highest bit is used to
692 * indicate compression.
694 * Version 0 is traditional (pre-0.9) meta data (unsupported)
695 * Version is 1 for a NULL pointer
696 * Version 2 is for 0.9.x (and possibly higher)
697 * Other version numbers are not yet defined.
702 * How many MD entries are there?
707 * Size of the decompressed meta data.
712 * This is followed by 'entries' values of type 'struct MetaDataEntry'
713 * and then by 'entry' plugin names, mime-types and data blocks
714 * as specified in those meta data entries.
720 * Entry of serialized meta data.
725 * Meta data type. Corresponds to an 'enum EXTRACTOR_MetaType'
730 * Meta data format. Corresponds to an 'enum EXTRACTOR_MetaFormat'
735 * Number of bytes of meta data.
740 * Number of bytes in the plugin name including 0-terminator. 0 for NULL.
742 uint32_t plugin_name_len;
745 * Number of bytes in the mime type including 0-terminator. 0 for NULL.
747 uint32_t mime_type_len;
753 * Serialize meta-data to target.
755 * @param md metadata to serialize
756 * @param target where to write the serialized metadata;
757 * *target can be NULL, in which case memory is allocated
758 * @param max maximum number of bytes available in target
759 * @param opt is it ok to just write SOME of the
760 * meta-data to match the size constraint,
761 * possibly discarding some data?
762 * @return number of bytes written on success,
763 * GNUNET_SYSERR on error (typically: not enough
767 GNUNET_CONTAINER_meta_data_serialize (const struct GNUNET_CONTAINER_MetaData
768 *md, char **target, size_t max,
770 GNUNET_CONTAINER_MetaDataSerializationOptions
773 struct GNUNET_CONTAINER_MetaData *vmd;
774 struct MetaItem *pos;
775 struct MetaDataHeader *hdr;
776 struct MetaDataEntry *ent;
789 if (max < sizeof (struct MetaDataHeader))
790 return GNUNET_SYSERR; /* far too small */
794 if (md->sbuf != NULL)
796 /* try to use serialization cache */
797 if (md->sbuf_size <= max)
800 *target = GNUNET_malloc (md->sbuf_size);
804 return md->sbuf_size;
806 if (0 == (opt & GNUNET_CONTAINER_META_DATA_SERIALIZE_PART))
807 return GNUNET_SYSERR; /* can say that this will fail */
808 /* need to compute a partial serialization, sbuf useless ... */
816 msize += sizeof (struct MetaDataEntry);
817 msize += pos->data_size;
818 if (pos->plugin_name != NULL)
819 msize += strlen (pos->plugin_name) + 1;
820 if (pos->mime_type != NULL)
821 msize += strlen (pos->mime_type) + 1;
824 size = (size_t) msize;
827 GNUNET_break (0); /* integer overflow */
828 return GNUNET_SYSERR;
830 if (size >= GNUNET_MAX_MALLOC_CHECKED)
832 /* too large to be processed */
833 return GNUNET_SYSERR;
835 ent = GNUNET_malloc (size);
836 mdata = (char *) &ent[md->item_count];
837 off = size - (md->item_count * sizeof(struct MetaDataEntry));
842 ent[i].type = htonl ((uint32_t) pos->type);
843 ent[i].format = htonl ((uint32_t) pos->format);
844 ent[i].data_size = htonl ((uint32_t) pos->data_size);
845 if (pos->plugin_name == NULL)
848 plen = strlen (pos->plugin_name) + 1;
849 ent[i].plugin_name_len = htonl ( (uint32_t) plen);
850 if (pos->mime_type == NULL)
853 mlen = strlen (pos->mime_type) + 1;
854 ent[i].mime_type_len = htonl ((uint32_t) mlen);
855 off -= pos->data_size;
856 memcpy (&mdata[off], pos->data, pos->data_size);
858 memcpy (&mdata[off], pos->plugin_name, plen);
860 memcpy (&mdata[off], pos->mime_type, mlen);
864 GNUNET_assert (off == 0);
869 for (i=0;i<md->item_count;i++)
872 if (0 == (opt & GNUNET_CONTAINER_META_DATA_SERIALIZE_NO_COMPRESS))
873 comp = try_compression ((const char*) &ent[i],
878 if ( (md->sbuf == NULL) &&
881 /* fill 'sbuf'; this "modifies" md, but since this is only
882 an internal cache we will cast away the 'const' instead
883 of making the API look strange. */
884 vmd = (struct GNUNET_CONTAINER_MetaData*) md;
885 hdr = GNUNET_malloc (left + sizeof (struct MetaDataHeader));
886 hdr->size = htonl (left);
887 hdr->entries = htonl (md->item_count);
888 if (GNUNET_YES == comp)
890 hdr->version = htonl (2 | HEADER_COMPRESSED);
894 vmd->sbuf_size = clen + sizeof (struct MetaDataHeader);
898 hdr->version = htonl (2);
902 vmd->sbuf_size = left + sizeof (struct MetaDataHeader);
904 vmd->sbuf = (char*) hdr;
907 if ( ( (left + sizeof (struct MetaDataHeader)) <= max) ||
908 ( (comp == GNUNET_YES) &&
911 /* success, this now fits! */
912 if (GNUNET_YES == comp)
914 hdr = (struct MetaDataHeader*) *target;
917 hdr = GNUNET_malloc (clen + sizeof (struct MetaDataHeader));
918 *target = (char*) hdr;
920 hdr->version = htonl (2 | HEADER_COMPRESSED);
921 hdr->size = htonl (left);
922 hdr->entries = htonl (md->item_count - i);
923 memcpy (&(*target)[sizeof(struct MetaDataHeader)],
928 return clen + sizeof (struct MetaDataHeader);
932 hdr = (struct MetaDataHeader*) *target;
935 hdr = GNUNET_malloc (left + sizeof (struct MetaDataHeader));
936 *target = (char*) hdr;
938 hdr->version = htonl (2);
939 hdr->entries = htonl (md->item_count - i);
940 hdr->size = htonl (left);
941 memcpy (&(*target)[sizeof(struct MetaDataHeader)],
945 return left + sizeof (struct MetaDataHeader);
949 if (0 == (opt & GNUNET_CONTAINER_META_DATA_SERIALIZE_PART))
953 return GNUNET_SYSERR;
956 /* next iteration: ignore the corresponding meta data at the
957 end and try again without it */
958 left -= sizeof (struct MetaDataEntry);
959 left -= pos->data_size;
960 if (pos->plugin_name != NULL)
961 left -= strlen (pos->plugin_name) + 1;
962 if (pos->mime_type != NULL)
963 left -= strlen (pos->mime_type) + 1;
967 /* nothing fit, only write header! */
968 hdr = (struct MetaDataHeader*) *target;
971 hdr = GNUNET_malloc (sizeof (struct MetaDataHeader));
972 *target = (char*) hdr;
974 hdr->version = htonl (2);
975 hdr->entries = htonl (0);
976 hdr->size = htonl (0);
977 return sizeof (struct MetaDataHeader);
982 * Get the size of the full meta-data in serialized form.
984 * @param md metadata to inspect
985 * @return number of bytes needed for serialization, -1 on error
988 GNUNET_CONTAINER_meta_data_get_serialized_size (const struct GNUNET_CONTAINER_MetaData *md)
993 if (md->sbuf != NULL)
994 return md->sbuf_size;
996 ret = GNUNET_CONTAINER_meta_data_serialize (md,
998 GNUNET_MAX_MALLOC_CHECKED,
999 GNUNET_CONTAINER_META_DATA_SERIALIZE_FULL);
1007 * Decompress input, return the decompressed data
1008 * as output, set outputSize to the number of bytes
1011 * @param input compressed data
1012 * @param inputSize number of bytes in input
1013 * @param outputSize expected size of the output
1014 * @return NULL on error
1017 decompress (const char *input,
1025 output = GNUNET_malloc (olen);
1026 if (Z_OK == uncompress ((Bytef *) output,
1027 &olen, (const Bytef *) input, inputSize))
1033 GNUNET_free (output);
1040 * Deserialize meta-data. Initializes md.
1042 * @param input buffer with the serialized metadata
1043 * @param size number of bytes available in input
1044 * @return MD on success, NULL on error (i.e.
1047 struct GNUNET_CONTAINER_MetaData *
1048 GNUNET_CONTAINER_meta_data_deserialize (const char *input, size_t size)
1050 struct GNUNET_CONTAINER_MetaData *md;
1051 struct MetaDataHeader hdr;
1052 struct MetaDataEntry ent;
1065 const char *meta_data;
1066 const char *plugin_name;
1067 const char *mime_type;
1068 enum EXTRACTOR_MetaFormat format;
1070 if (size < sizeof (struct MetaDataHeader))
1074 sizeof (struct MetaDataHeader));
1075 version = ntohl (hdr.version) & HEADER_VERSION_MASK;
1076 compressed = (ntohl (hdr.version) & HEADER_COMPRESSED) != 0;
1079 return NULL; /* null pointer */
1082 GNUNET_break_op (0); /* unsupported version */
1086 ic = ntohl (hdr.entries);
1087 dataSize = ntohl (hdr.size);
1088 if ((sizeof (struct MetaDataEntry) * ic) > dataSize)
1090 GNUNET_break_op (0);
1096 if (dataSize >= GNUNET_MAX_MALLOC_CHECKED)
1098 /* make sure we don't blow our memory limit because of a mal-formed
1100 GNUNET_break_op (0);
1104 decompress ((const char *) &input[sizeof (struct MetaDataHeader)],
1105 size - sizeof (struct MetaDataHeader), dataSize);
1108 GNUNET_break_op (0);
1116 cdata = (const char *) &input[sizeof (struct MetaDataHeader)];
1117 if (dataSize != size - sizeof (struct MetaDataHeader))
1119 GNUNET_break_op (0);
1124 md = GNUNET_CONTAINER_meta_data_create ();
1125 left = dataSize - ic * sizeof (struct MetaDataEntry);
1126 mdata = &cdata[ic * sizeof (struct MetaDataEntry)];
1130 &cdata[i * sizeof(struct MetaDataEntry)],
1131 sizeof (struct MetaDataEntry));
1132 format = (enum EXTRACTOR_MetaFormat) ntohl (ent.format);
1133 if ( (format != EXTRACTOR_METAFORMAT_UTF8) &&
1134 (format != EXTRACTOR_METAFORMAT_C_STRING) &&
1135 (format != EXTRACTOR_METAFORMAT_BINARY) )
1137 GNUNET_break_op (0);
1140 dlen = ntohl (ent.data_size);
1141 plen = ntohl (ent.plugin_name_len);
1142 mlen = ntohl (ent.mime_type_len);
1145 GNUNET_break_op (0);
1149 meta_data = &mdata[left];
1150 if ( (format == EXTRACTOR_METAFORMAT_UTF8) ||
1151 (format == EXTRACTOR_METAFORMAT_C_STRING) )
1154 (mdata[left + dlen - 1] != '\0') )
1156 GNUNET_break_op (0);
1162 GNUNET_break_op (0);
1167 (mdata[left + plen - 1] != '\0') )
1169 GNUNET_break_op (0);
1175 plugin_name = &mdata[left];
1179 GNUNET_break_op (0);
1184 (mdata[left + mlen - 1] != '\0') )
1186 GNUNET_break_op (0);
1192 mime_type = &mdata[left];
1193 GNUNET_CONTAINER_meta_data_insert (md,
1195 (enum EXTRACTOR_MetaType) ntohl (ent.type),
1201 GNUNET_free_non_null (data);
1206 /* end of container_meta_data.c */