2 This file is part of GNUnet
3 Copyright (C) 2017 GNUnet e.V.
5 GNUnet is free software: you can redistribute it and/or modify it
6 under the terms of the GNU Affero General Public License as published
7 by the Free Software Foundation, either version 3 of the License,
8 or (at your option) any later version.
10 GNUnet is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Affero General Public License for more details.
17 * @brief implementation of a block group using a Bloom filter
18 * to drop duplicate blocks
19 * @author Christian Grothoff
22 #include "gnunet_util_lib.h"
23 #include "gnunet_block_group_lib.h"
24 #include "gnunet_block_plugin.h"
28 * Internal data structure for a block group.
30 struct BfGroupInternals
33 * A Bloom filter to weed out duplicate replies probabilistically.
35 struct GNUNET_CONTAINER_BloomFilter *bf;
38 * Set from the nonce to mingle the hashes before going into the @e bf.
51 * Serialize state of a block group.
53 * @param bg group to serialize
54 * @param[out] nonce set to the nonce of the @a bg
55 * @param[out] raw_data set to the serialized state
56 * @param[out] raw_data_size set to the number of bytes in @a raw_data
57 * @return #GNUNET_OK on success, #GNUNET_NO if serialization is not
58 * supported, #GNUNET_SYSERR on error
61 bf_group_serialize_cb (struct GNUNET_BLOCK_Group *bg,
64 size_t *raw_data_size)
66 struct BfGroupInternals *gi = bg->internal_cls;
69 raw = GNUNET_malloc (gi->bf_size);
71 GNUNET_CONTAINER_bloomfilter_get_raw_data (gi->bf,
79 *nonce = gi->bf_mutator;
81 *raw_data_size = gi->bf_size;
87 * Mark elements as "seen" using a hash of the element. Not supported
88 * by all block plugins.
90 * @param bg group to update
91 * @param seen_results results already seen
92 * @param seen_results_count number of entries in @a seen_results
95 bf_group_mark_seen_cb (struct GNUNET_BLOCK_Group *bg,
96 const struct GNUNET_HashCode *seen_results,
97 unsigned int seen_results_count)
99 struct BfGroupInternals *gi = bg->internal_cls;
101 for (unsigned int i=0;i<seen_results_count;i++)
103 struct GNUNET_HashCode mhash;
105 GNUNET_BLOCK_mingle_hash (&seen_results[i],
108 GNUNET_CONTAINER_bloomfilter_add (gi->bf,
115 * Merge two groups, if possible. Not supported by all block plugins,
116 * can also fail if the nonces were different.
118 * @param bg1 group to update
119 * @param bg2 group to merge into @a bg1
120 * @return #GNUNET_OK on success, #GNUNET_NO if the nonces were different and thus
124 bf_group_merge_cb (struct GNUNET_BLOCK_Group *bg1,
125 const struct GNUNET_BLOCK_Group *bg2)
127 struct BfGroupInternals *gi1 = bg1->internal_cls;
128 struct BfGroupInternals *gi2 = bg2->internal_cls;
130 if (gi1->bf_mutator != gi2->bf_mutator)
132 if (gi1->bf_size != gi2->bf_size)
134 GNUNET_CONTAINER_bloomfilter_or2 (gi1->bf,
141 * Destroy resources used by a block group.
143 * @param bg group to destroy, NULL is allowed
146 bf_group_destroy_cb (struct GNUNET_BLOCK_Group *bg)
148 struct BfGroupInternals *gi = bg->internal_cls;
150 GNUNET_CONTAINER_bloomfilter_free (gi->bf);
157 * Create a new block group that filters duplicates using a Bloom filter.
159 * @param ctx block context in which the block group is created
160 * @param bf_size size of the Bloom filter
161 * @param bf_k K-value for the Bloom filter
162 * @param type block type
163 * @param nonce random value used to seed the group creation
164 * @param raw_data optional serialized prior state of the group, NULL if unavailable/fresh
165 * @param raw_data_size number of bytes in @a raw_data, 0 if unavailable/fresh
166 * @return block group handle, NULL if block groups are not supported
167 * by this @a type of block (this is not an error)
169 struct GNUNET_BLOCK_Group *
170 GNUNET_BLOCK_GROUP_bf_create (void *cls,
173 enum GNUNET_BLOCK_Type type,
175 const void *raw_data,
176 size_t raw_data_size)
178 struct BfGroupInternals *gi;
179 struct GNUNET_BLOCK_Group *bg;
181 gi = GNUNET_new (struct BfGroupInternals);
182 gi->bf = GNUNET_CONTAINER_bloomfilter_init ((bf_size != raw_data_size) ? NULL : raw_data,
185 gi->bf_mutator = nonce;
186 gi->bf_size = bf_size;
187 bg = GNUNET_new (struct GNUNET_BLOCK_Group);
189 bg->serialize_cb = &bf_group_serialize_cb;
190 bg->mark_seen_cb = &bf_group_mark_seen_cb;
191 bg->merge_cb = &bf_group_merge_cb;
192 bg->destroy_cb = &bf_group_destroy_cb;
193 bg->internal_cls = gi;
199 * Test if @a hc is contained in the Bloom filter of @a bg. If so,
200 * return #GNUNET_YES. If not, add @a hc to the Bloom filter and
203 * @param bg block group to use for testing
204 * @param hc hash of element to evaluate
205 * @return #GNUNET_YES if @a hc is (likely) a duplicate
206 * #GNUNET_NO if @a hc was definitively not in @bg (but now is)
209 GNUNET_BLOCK_GROUP_bf_test_and_set (struct GNUNET_BLOCK_Group *bg,
210 const struct GNUNET_HashCode *hc)
212 struct BfGroupInternals *gi;
213 struct GNUNET_HashCode mhash;
217 gi = bg->internal_cls;
218 GNUNET_BLOCK_mingle_hash (hc,
222 GNUNET_CONTAINER_bloomfilter_test (gi->bf,
225 GNUNET_CONTAINER_bloomfilter_add (gi->bf,
232 * How many bytes should a bloomfilter be if we have already seen
233 * entry_count responses? Sized so that do not have to
234 * re-size the filter too often (to keep it cheap).
236 * Since other peers will also add entries but not resize the filter,
237 * we should generally pick a slightly larger size than what the
238 * strict math would suggest.
240 * @param entry_count expected number of entries in the Bloom filter
241 * @param k number of bits set per entry
242 * @return must be a power of two and smaller or equal to 2^15.
245 GNUNET_BLOCK_GROUP_compute_bloomfilter_size (unsigned int entry_count,
249 unsigned int ideal = (entry_count * k) / 4;
250 uint16_t max = 1 << 15;
252 if (entry_count > max)
255 while ((size < max) && (size < ideal))