From a054f68e16155f955655d827c1b8d8af9c1b9cbd Mon Sep 17 00:00:00 2001 From: Florian Dold Date: Thu, 20 Dec 2012 00:39:17 +0000 Subject: [PATCH] implemented the invertible bloom filter --- src/consensus/Makefile.am | 14 +- src/consensus/gnunet-consensus-ibf.c | 192 +++++++++++++++++++++++ src/consensus/ibf.c | 218 +++++++++++++-------------- src/consensus/ibf.h | 59 ++++++-- 4 files changed, 356 insertions(+), 127 deletions(-) create mode 100644 src/consensus/gnunet-consensus-ibf.c diff --git a/src/consensus/Makefile.am b/src/consensus/Makefile.am index 29c466901..7c28b4869 100644 --- a/src/consensus/Makefile.am +++ b/src/consensus/Makefile.am @@ -17,7 +17,8 @@ endif bin_PROGRAMS = \ gnunet-consensus \ - gnunet-consensus-start-peers + gnunet-consensus-start-peers \ + gnunet-consensus-ibf libexec_PROGRAMS = \ gnunet-service-consensus @@ -26,7 +27,7 @@ lib_LTLIBRARIES = \ libgnunetconsensus.la gnunet_consensus_SOURCES = \ - gnunet-consensus.c + gnunet-consensus.c gnunet_consensus_LDADD = \ $(top_builddir)/src/util/libgnunetutil.la \ $(top_builddir)/src/consensus/libgnunetconsensus.la \ @@ -40,6 +41,13 @@ gnunet_consensus_start_peers_LDADD = \ $(top_builddir)/src/consensus/libgnunetconsensus.la \ $(GN_LIBINTL) +gnunet_consensus_ibf_SOURCES = \ + gnunet-consensus-ibf.c \ + ibf.c +gnunet_consensus_ibf_LDADD = \ + $(top_builddir)/src/util/libgnunetutil.la \ + $(GN_LIBINTL) + gnunet_service_consensus_SOURCES = \ gnunet-service-consensus.c gnunet_service_consensus_LDADD = \ @@ -69,6 +77,6 @@ test_consensus_api_LDADD = \ $(top_builddir)/src/testing/libgnunettesting.la \ $(top_builddir)/src/consensus/libgnunetconsensus.la - EXTRA_DIST = \ test_consensus.conf + diff --git a/src/consensus/gnunet-consensus-ibf.c b/src/consensus/gnunet-consensus-ibf.c new file mode 100644 index 000000000..f29b91704 --- /dev/null +++ b/src/consensus/gnunet-consensus-ibf.c @@ -0,0 +1,192 @@ +/* + This file is part of GNUnet. + (C) 2012 Christian Grothoff (and other contributing authors) + + GNUnet is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GNUnet is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GNUnet; see the file COPYING. If not, write to the + Free Software Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. +*/ + +/** + * @file consensus/gnunet-consensus-ibf + * @brief tool for reconciling data with invertible bloom filters + * @author Florian Dold + */ + + +#include "platform.h" +#include "gnunet_common.h" +#include "gnunet_container_lib.h" +#include "gnunet_util_lib.h" + +#include "ibf.h" + +static unsigned int asize = 10; +static unsigned int bsize = 10; +static unsigned int csize = 10; +static unsigned int hash_num = 3; +static unsigned int ibf_size = 80; + + +static struct GNUNET_CONTAINER_MultiHashMap *set_a; +static struct GNUNET_CONTAINER_MultiHashMap *set_b; +/* common elements in a and b */ +static struct GNUNET_CONTAINER_MultiHashMap *set_c; + +static struct InvertibleBloomFilter *ibf_a; +static struct InvertibleBloomFilter *ibf_b; + + + +static int +insert_iterator (void *cls, + const struct GNUNET_HashCode *key, + void *value) +{ + struct InvertibleBloomFilter *ibf = (struct InvertibleBloomFilter *) cls; + ibf_insert (ibf, key); + return GNUNET_YES; +} + + +static void +run (void *cls, char *const *args, const char *cfgfile, + const struct GNUNET_CONFIGURATION_Handle *cfg) +{ + struct GNUNET_HashCode id; + int i; + int side; + int res; + + set_a = GNUNET_CONTAINER_multihashmap_create (((asize == 0) ? 1 : (asize + csize)), + GNUNET_NO); + set_b = GNUNET_CONTAINER_multihashmap_create (((bsize == 0) ? 1 : (bsize + csize)), + GNUNET_NO); + set_c = GNUNET_CONTAINER_multihashmap_create (((csize == 0) ? 1 : csize), + GNUNET_NO); + + printf ("hash-num=%u, size=%u, #(A-B)=%u, #(B-A)=%u, #(A&B)=%u\n", + hash_num, ibf_size, asize, bsize, csize); + + i = 0; + while (i < asize) + { + GNUNET_CRYPTO_hash_create_random (GNUNET_CRYPTO_QUALITY_WEAK, &id); + if (GNUNET_YES == GNUNET_CONTAINER_multihashmap_contains (set_a, &id)) + continue; + printf("A: %s\n", GNUNET_h2s (&id)); + GNUNET_CONTAINER_multihashmap_put ( + set_a, &id, NULL, GNUNET_CONTAINER_MULTIHASHMAPOPTION_UNIQUE_ONLY); + i++; + } + i = 0; + while (i < bsize) + { + GNUNET_CRYPTO_hash_create_random (GNUNET_CRYPTO_QUALITY_WEAK, &id); + if (GNUNET_YES == GNUNET_CONTAINER_multihashmap_contains (set_a, &id)) + continue; + if (GNUNET_YES == GNUNET_CONTAINER_multihashmap_contains (set_b, &id)) + continue; + printf("B: %s\n", GNUNET_h2s (&id)); + GNUNET_CONTAINER_multihashmap_put ( + set_b, &id, NULL, GNUNET_CONTAINER_MULTIHASHMAPOPTION_UNIQUE_ONLY); + i++; + } + i = 0; + while (i < csize) + { + GNUNET_CRYPTO_hash_create_random (GNUNET_CRYPTO_QUALITY_WEAK, &id); + if (GNUNET_YES == GNUNET_CONTAINER_multihashmap_contains (set_a, &id)) + continue; + if (GNUNET_YES == GNUNET_CONTAINER_multihashmap_contains (set_b, &id)) + continue; + printf("C: %s\n", GNUNET_h2s (&id)); + GNUNET_CONTAINER_multihashmap_put ( + set_c, &id, NULL, GNUNET_CONTAINER_MULTIHASHMAPOPTION_UNIQUE_ONLY); + i++; + } + + + ibf_a = ibf_create (ibf_size, hash_num, 0); + ibf_b = ibf_create (ibf_size, hash_num, 0); + + GNUNET_CONTAINER_multihashmap_iterate (set_a, &insert_iterator, ibf_a); + GNUNET_CONTAINER_multihashmap_iterate (set_b, &insert_iterator, ibf_b); + GNUNET_CONTAINER_multihashmap_iterate (set_c, &insert_iterator, ibf_a); + GNUNET_CONTAINER_multihashmap_iterate (set_c, &insert_iterator, ibf_b); + + + printf ("-----------------\n"); + ibf_subtract (ibf_a, ibf_b); + + for (;;) + { + res = ibf_decode (ibf_a, &side, &id); + if (GNUNET_SYSERR == res) + { + printf ("decode failed\n"); + return; + } + if (GNUNET_NO == res) + { + if ((0 == GNUNET_CONTAINER_multihashmap_size (set_b)) && + (0 == GNUNET_CONTAINER_multihashmap_size (set_a))) + printf ("decode succeeded\n"); + else + printf ("decode missed elements\n"); + return; + } + + printf("R: %s\n", GNUNET_h2s (&id)); + printf("s: %d\n", side); + + if (side == 1) + res = GNUNET_CONTAINER_multihashmap_remove (set_a, &id, NULL); + if (side == -1) + res = GNUNET_CONTAINER_multihashmap_remove (set_b, &id, NULL); + if (GNUNET_YES != res) + { + printf ("decoded wrong element\n"); + return; + } + } +} + +int +main (int argc, char **argv) +{ + static const struct GNUNET_GETOPT_CommandLineOption options[] = { + {'A', "asize", NULL, + gettext_noop ("number of element in set A-B"), 1, + &GNUNET_GETOPT_set_uint, &asize}, + {'B', "bsize", NULL, + gettext_noop ("number of element in set B-A"), 1, + &GNUNET_GETOPT_set_uint, &bsize}, + {'C', "csize", NULL, + gettext_noop ("number of common elements in A and B"), 1, + &GNUNET_GETOPT_set_uint, &csize}, + {'k', "hash-num", NULL, + gettext_noop ("hash num"), 1, + &GNUNET_GETOPT_set_uint, &hash_num}, + {'s', "ibf-size", NULL, + gettext_noop ("ibf size"), 1, + &GNUNET_GETOPT_set_uint, &ibf_size}, + GNUNET_GETOPT_OPTION_END + }; + GNUNET_PROGRAM_run2 (argc, argv, "gnunet-consensus-ibf", + "help", + options, &run, NULL, GNUNET_YES); + return 0; +} + diff --git a/src/consensus/ibf.c b/src/consensus/ibf.c index d0cb218cc..12d0fd320 100644 --- a/src/consensus/ibf.c +++ b/src/consensus/ibf.c @@ -16,7 +16,7 @@ along with GNUnet; see the file COPYING. If not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - */ +*/ /** @@ -25,35 +25,25 @@ * @author Florian Dold */ -#include "platform.h" -#include "gnunet_common.h" #include "ibf.h" - -struct PureCells -{ - int index; - struct PureCells *next; - struct PureCells *prev; -}; - struct InvertibleBloomFilter { /** * How many cells does this IBF have? */ - int size; + unsigned int size; /** * In how many cells do we hash one element? * Usually 4 or 3. */ - int hash_num; + unsigned int hash_num; /** * Salt for mingling hashes */ - int salt; + uint32_t salt; /** * How many times has a bucket been hit? @@ -64,21 +54,13 @@ struct InvertibleBloomFilter /** * xor sums of the elements' hash codes, used to identify the elements. */ - GNUNET_HashCode *id_sum; + struct GNUNET_HashCode *id_sum; /** * xor sums of the "hash of the hash". */ - GNUNET_HashCode *hash_sum; - - struct PureCells *pure_head; - struct PureCells *pure_tail; + struct GNUNET_HashCode *hash_sum; - /** - * GNUNET_YES: fresh list is deprecated - * GNUNET_NO: fresh list is up to date - */ - int pure_fresh; }; @@ -86,145 +68,141 @@ struct InvertibleBloomFilter * Create an invertible bloom filter. */ struct InvertibleBloomFilter * -ibf_create(int size, int hash_num) +ibf_create (unsigned int size, unsigned int hash_num, uint32_t salt) { struct InvertibleBloomFilter *ibf; ibf = GNUNET_malloc (sizeof (struct InvertibleBloomFilter)); - ibf->count = GNUNET_malloc (size * sizeof uint8_t); + ibf->count = GNUNET_malloc (size * sizeof (uint8_t)); ibf->id_sum = GNUNET_malloc (size * sizeof (struct GNUNET_HashCode)); ibf->hash_sum = GNUNET_malloc (size * sizeof (struct GNUNET_HashCode)); ibf->size = size; ibf->hash_num = hash_num; + + return ibf; } + /** * Insert an element into an IBF. */ void -ibf_insert (struct InvertibleBloomFilter *ibf, struct GNUNET_HashCode *id) +ibf_insert_on_side (struct InvertibleBloomFilter *ibf, + const struct GNUNET_HashCode *key, + int side) { - struct GNUNET_HashCode key; - struct GNUNET_HashCode id_hash; + struct GNUNET_HashCode bucket_indices; + struct GNUNET_HashCode remove_key; + struct GNUNET_HashCode key_hash; int i; - key = *id; - GNUNET_hash (id, sizeof (struct GNUNET_HashCode), &id_hash); + /* copy the key, if key and an entry in the IBF alias */ + remove_key = *key; - for (i = 0; i < ibf->hash_num; i++) - { - int bucket; - int j; - if ((i != 0) && (i % 16) == 0) - { - GNUNET_hash (&key, sizeof (struct GNUNET_HashCode), &key); - } - bucket = hash.bits[i%16] % ibf->size; - - /* count<0 can happen after ibf subtraction, but then no insert should be done */ - GNUNET_assert (ibf->count[bucket] >= 0); + GNUNET_assert ((1 == side) || (-1 == side)); - ibf->count[bucket]++; - - for (j=0; j < 16; j++) - { - ibf->id_sum.bits[j] ^= &id; - ibf->hash_sum.bits[j] ^= &id_hash; - } + bucket_indices = remove_key; + GNUNET_CRYPTO_hash (key, sizeof (struct GNUNET_HashCode), &key_hash); + for (i = 0; i < ibf->hash_num; i++) + { + unsigned int bucket; + if ((i % 16) == 0) + GNUNET_CRYPTO_hash (&bucket_indices, sizeof (struct GNUNET_HashCode), + &bucket_indices); + + bucket = bucket_indices.bits[i%16] % ibf->size; + + printf ("inserting %s in bucket %u side %d\n", + GNUNET_h2s (&remove_key), bucket, side); + + ibf->count[bucket] += side; + + GNUNET_CRYPTO_hash_xor (&remove_key, &ibf->id_sum[bucket], + &ibf->id_sum[bucket]); + GNUNET_CRYPTO_hash_xor (&key_hash, &ibf->hash_sum[bucket], + &ibf->hash_sum[bucket]); } } - /** - * Update the linked list of pure cells, if not fresh anymore + * Insert an element into an IBF. */ void -update_pure (struct InvertibleBloomFilter *ibf) +ibf_insert (struct InvertibleBloomFilter *ibf, const struct GNUNET_HashCode *key) { - if (GNUNET_YES == ibf->pure_fresh) + ibf_insert_on_side (ibf, key, 1); +} + +static int +ibf_is_empty (struct InvertibleBloomFilter *ibf) +{ + int i; + for (i = 0; i < ibf->size; i++) { - return; + int j; + if (0 != ibf->count[i]) + return GNUNET_NO; + for (j = 0; j < 16; ++j) + { + if (0 != ibf->hash_sum[i].bits[j]) + return GNUNET_NO; + if (0 != ibf->id_sum[i].bits[j]) + return GNUNET_NO; + } } - - ibf->pure_fresh = GNUNET_YES; + return GNUNET_YES; } + /** * Decode and remove an element from the IBF, if possible. * * @param ibf the invertible bloom filter to decode * @param ret_id the hash code of the decoded element, if successful * @param side sign of the cell's count where the decoded element came from. - * A negative sign indicates that the element was recovered resides in an IBF - * that was previously subtracted from. - * @return GNUNET_YES if decoding an element was successful, GNUNET_NO if the IBF is empty, - * GNUNET_SYSERR if the decoding has faile + * A negative sign indicates that the element was recovered + * resides in an IBF that was previously subtracted from. + * @return GNUNET_YES if decoding an element was successful, + * GNUNET_NO if the IBF is empty, + * GNUNET_SYSERR if the decoding has failed */ int -ibf_decode (struct InvertibleBloomFilter *ibf, int *ret_side, struct GNUNET_HashCode *ret_id) +ibf_decode (struct InvertibleBloomFilter *ibf, + int *ret_side, struct GNUNET_HashCode *ret_id) { struct GNUNET_HashCode hash; - struct PureCells *pure; - int count; + int i; GNUNET_assert (NULL != ibf); - GNUNET_assert (NULL != red_id); - GNUNET_assert (NULL != side); + GNUNET_assert (NULL != ret_id); + GNUNET_assert (NULL != ret_side); - update_pure (ibf); - - pure = ibf->pure_head; - ibf->pure_head = pure->next; - - if (NULL == pure) + for (i = 0; i < ibf->size; i++) { - int i; - for (i = 0; i < ibf->size; i++) - { - int j; - if (0 != ibf->count[i]) - return GNUNET_SYSERR; - for (j = 0; j < 16; ++j) - if ((0 != ibf->hash_sum[i].bits[j]) || (0 != ibf->id_sum[i].bits[j])) - return GNUNET_SYSERR; - return GNUNET_NO; - } - } + if ((1 != ibf->count[i]) && (-1 != ibf->count[i])) + continue; - GNUNET_CRYPTO_hash (ibf->id_sum[pure->idx], sizeof (struct GNUNET_HashCode), &hash); + GNUNET_CRYPTO_hash (&ibf->id_sum[i], sizeof (struct GNUNET_HashCode), &hash); - if (0 == memcmp (&hash, ibf->hash_sum[pure->idx])) - { - struct GNUNET_HashCode key; - int i; + if (0 != memcmp (&hash, &ibf->hash_sum[i], sizeof (struct GNUNET_HashCode))) + continue; - *ret_side = ibf->count[pure->index]; - *ret_id = ibf->id_sum[pure->index]; + printf ("%d pure\n", i); + printf ("%d count\n", ibf->count[i]); - key = *ibf->id_sum[pure->index]; + *ret_side = ibf->count[i]; + *ret_id = ibf->id_sum[i]; - /* delete the item from all buckets */ - for (i = 0; i < ibf->hash_num; i++) - { - int bucket; - int j; - if ((i != 0) && (i % 16) == 0) - { - GNUNET_hash (&key, sizeof (struct GNUNET_HashCode), &key); - } - bucket = hash.bits[i%16] % ibf->size; - - ibf->count[bucket] -= count; - - for (j=0; j < 16; j++) - { - ibf->id_sum.bits[j] ^= &id; - ibf->hash_sum.bits[j] ^= &id_hash; - } - return GNUNET_YES; + /* insert on the opposite side, effectively removing the element */ + ibf_insert_on_side (ibf, &ibf->id_sum[i], -ibf->count[i]); + + return GNUNET_YES; } + + if (GNUNET_YES == ibf_is_empty (ibf)) + return GNUNET_NO; return GNUNET_SYSERR; } @@ -239,6 +217,22 @@ ibf_decode (struct InvertibleBloomFilter *ibf, int *ret_side, struct GNUNET_Hash void ibf_subtract (struct InvertibleBloomFilter *ibf1, struct InvertibleBloomFilter *ibf2) { - /* FIXME */ + int i; + + GNUNET_assert (ibf1->size == ibf2->size); + GNUNET_assert (ibf1->hash_num == ibf2->hash_num); + GNUNET_assert (ibf1->salt == ibf2->salt); + + for (i = 0; i < ibf1->size; i++) + { + printf ("ibf1->count[%d]=%d, ibf2->count[%d]=%d\n", i, ibf1->count[i], i, + ibf2->count[i]); + ibf1->count[i] -= ibf2->count[i]; + printf("diff: %d\n", ibf1->count[i]); + GNUNET_CRYPTO_hash_xor (&ibf1->id_sum[i], &ibf2->id_sum[i], + &ibf1->id_sum[i]); + GNUNET_CRYPTO_hash_xor (&ibf1->hash_sum[i], &ibf2->hash_sum[i], + &ibf1->hash_sum[i]); + } } diff --git a/src/consensus/ibf.h b/src/consensus/ibf.h index 2c9931e69..12aef5d2f 100644 --- a/src/consensus/ibf.h +++ b/src/consensus/ibf.h @@ -16,8 +16,7 @@ along with GNUnet; see the file COPYING. If not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - */ - +*/ /** * @file consensus/ibf.h @@ -25,6 +24,21 @@ * @author Florian Dold */ +#ifndef GNUNET_CONSENSUS_IBF_H +#define GNUNET_CONSENSUS_IBF_H + +#include "platform.h" +#include "gnunet_common.h" +#include "gnunet_util_lib.h" + +#ifdef __cplusplus +extern "C" +{ +#if 0 /* keep Emacsens' auto-indent happy */ +} +#endif +#endif + /** * Opaque handle to an invertible bloom filter (IBF). @@ -32,19 +46,21 @@ * An IBF is a counting bloom filter that has the ability to restore * the hashes of its stored elements with high probability. */ -struct InvertibleBloomFilter +struct InvertibleBloomFilter; + /** * Create an invertible bloom filter. * * @param size number of IBF buckets + * @param hash_num number of buckets one element is hashed in * @param salt salt for mingling hashes, different salt may * result in less (or more) collisions - * @param hash_num number of buckets one element is hashed in * @return the newly created invertible bloom filter */ struct InvertibleBloomFilter * -ibf_create(int size, int salt, int hash_num); +ibf_create(unsigned int size, unsigned int hash_num, uint32_t salt); + /** * Insert an element into an IBF. @@ -53,7 +69,8 @@ ibf_create(int size, int salt, int hash_num); * @param id the element's hash code */ void -ibf_insert (struct InvertibleBloomFilter *ibf, GNUNET_HashCode *id); +ibf_insert (struct InvertibleBloomFilter *ibf, const struct GNUNET_HashCode *id); + /** * Subtract ibf2 from ibf1, storing the result in ibf1. @@ -62,15 +79,20 @@ ibf_insert (struct InvertibleBloomFilter *ibf, GNUNET_HashCode *id); void ibf_subtract (struct InvertibleBloomFilter *ibf1, struct InvertibleBloomFilter *ibf2); + /** * Decode and remove an element from the IBF, if possible. * - * @param ibf the invertible bloom filter - * @param the id of the element is written to this hash code - * @return GNUNET_YES if decoding an element was successful, GNUNET_NO if it failed to decode + * @param ibf the invertible bloom filter to decode + * @param ret_id the hash code of the decoded element, if successful + * @param side sign of the cell's count where the decoded element came from. + * A negative sign indicates that the element was recovered resides in an IBF + * that was previously subtracted from. + * @return GNUNET_YES if decoding an element was successful, GNUNET_NO if the IBF is empty, + * GNUNET_SYSERR if the decoding has faile */ int -ibf_decode (struct InvertibleBloomFilter *ibf, struct GNUNET_HashCode *ret_id); +ibf_decode (struct InvertibleBloomFilter *ibf, int *side, struct GNUNET_HashCode *ret_id); /** @@ -81,10 +103,13 @@ ibf_decode (struct InvertibleBloomFilter *ibf, struct GNUNET_HashCode *ret_id); struct InvertibleBloomFilter * ibf_dup (struct InvertibleBloomFilter *ibf); + /* -ibf_hton(); +ibf_hton (); -ibf_ntoh(); +ibf_ntoh (); + +ibf_get_nbo_size (); */ /** @@ -96,3 +121,13 @@ ibf_ntoh(); void ibf_destroy (struct InvertibleBloomFilter *ibf); + +#if 0 /* keep Emacsens' auto-indent happy */ +{ +#endif +#ifdef __cplusplus +} +#endif + +#endif + -- 2.25.1