2 This file is part of GNUnet.
3 (C) 2001, 2002, 2003, 2004, 2006, 2008 Christian Grothoff (and other contributing authors)
5 GNUnet is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published
7 by the Free Software Foundation; either version 2, or (at your
8 option) any later version.
10 GNUnet is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with GNUnet; see the file COPYING. If not, write to the
17 Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA.
21 * @file util/container_bloomfilter.c
22 * @brief data structure used to reduce disk accesses.
24 * The idea basically: Create a signature for each element in the
25 * database. Add those signatures to a bit array. When doing a lookup,
26 * check if the bit array matches the signature of the requested
27 * element. If yes, address the disk, otherwise return 'not found'.
29 * A property of the bloom filter is that sometimes we will have
30 * a match even if the element is not on the disk (then we do
31 * an unnecessary disk access), but what's most important is that
32 * we never get a single "false negative".
34 * To be able to delete entries from the bloom filter, we maintain
35 * a 4 bit counter in the file on the drive (we still use only one
38 * @author Igor Wronsky
39 * @author Christian Grothoff
43 #include "gnunet_common.h"
44 #include "gnunet_container_lib.h"
45 #include "gnunet_disk_lib.h"
47 struct GNUNET_CONTAINER_BloomFilter
51 * The actual bloomfilter bit array
56 * Filename of the filter
61 * The bit counter file on disk
63 struct GNUNET_DISK_FileHandle *fh;
66 * How many bits we set for each stored element
68 unsigned int addressesPerElement;
71 * Size of bitArray in bytes
73 unsigned int bitArraySize;
79 * Sets a bit active in the bitArray. Increment bit-specific
80 * usage counter on disk only if below 4bit max (==15).
82 * @param bitArray memory area to set the bit in
83 * @param bitIdx which bit to set
86 setBit (char *bitArray, unsigned int bitIdx)
88 unsigned int arraySlot;
89 unsigned int targetBit;
91 arraySlot = bitIdx / 8;
92 targetBit = (1L << (bitIdx % 8));
93 bitArray[arraySlot] |= targetBit;
97 * Clears a bit from bitArray. Bit is cleared from the array
98 * only if the respective usage counter on the disk hits/is zero.
100 * @param bitArray memory area to set the bit in
101 * @param bitIdx which bit to unset
104 clearBit (char *bitArray, unsigned int bitIdx)
107 unsigned int targetBit;
110 targetBit = (1L << (bitIdx % 8));
111 bitArray[slot] = bitArray[slot] & (~targetBit);
115 * Checks if a bit is active in the bitArray
117 * @param bitArray memory area to set the bit in
118 * @param bitIdx which bit to test
119 * @return GNUNET_YES if the bit is set, GNUNET_NO if not.
122 testBit (char *bitArray, unsigned int bitIdx)
125 unsigned int targetBit;
128 targetBit = (1L << (bitIdx % 8));
129 if (bitArray[slot] & targetBit)
136 * Sets a bit active in the bitArray and increments
137 * bit-specific usage counter on disk (but only if
138 * the counter was below 4 bit max (==15)).
140 * @param bitArray memory area to set the bit in
141 * @param bitIdx which bit to test
142 * @param fh A file to keep the 4 bit address usage counters in
145 incrementBit (char *bitArray, unsigned int bitIdx, const struct GNUNET_DISK_FileHandle *fh)
147 unsigned int fileSlot;
151 unsigned int targetLoc;
153 setBit (bitArray, bitIdx);
154 if (GNUNET_DISK_handle_invalid (fh))
156 /* Update the counter file on disk */
157 fileSlot = bitIdx / 2;
158 targetLoc = bitIdx % 2;
160 GNUNET_assert (fileSlot == (unsigned int) GNUNET_DISK_file_seek (fh, fileSlot, GNUNET_DISK_SEEK_SET));
161 if (1 != GNUNET_DISK_file_read (fh, &value, 1))
164 high = (value & (~0xF)) >> 4;
176 value = ((high << 4) | low);
177 GNUNET_assert (fileSlot == (unsigned int) GNUNET_DISK_file_seek (fh, fileSlot, GNUNET_DISK_SEEK_SET));
178 GNUNET_assert (1 == GNUNET_DISK_file_write (fh, &value, 1));
182 * Clears a bit from bitArray if the respective usage
183 * counter on the disk hits/is zero.
185 * @param bitArray memory area to set the bit in
186 * @param bitIdx which bit to test
187 * @param fh A file to keep the 4bit address usage counters in
190 decrementBit (char *bitArray, unsigned int bitIdx, const struct GNUNET_DISK_FileHandle *fh)
192 unsigned int fileSlot;
196 unsigned int targetLoc;
198 if (GNUNET_DISK_handle_invalid (fh))
199 return; /* cannot decrement! */
200 /* Each char slot in the counter file holds two 4 bit counters */
201 fileSlot = bitIdx / 2;
202 targetLoc = bitIdx % 2;
203 GNUNET_DISK_file_seek (fh, fileSlot, GNUNET_DISK_SEEK_SET);
204 if (1 != GNUNET_DISK_file_read (fh, &value, 1))
207 high = (value & 0xF0) >> 4;
209 /* decrement, but once we have reached the max, never go back! */
212 if ((low > 0) && (low < 0xF))
216 clearBit (bitArray, bitIdx);
221 if ((high > 0) && (high < 0xF))
225 clearBit (bitArray, bitIdx);
228 value = ((high << 4) | low);
229 GNUNET_DISK_file_seek (fh, fileSlot, GNUNET_DISK_SEEK_SET);
230 GNUNET_assert (1 == GNUNET_DISK_file_write (fh, &value, 1));
233 #define BUFFSIZE 65536
236 * Creates a file filled with zeroes
238 * @param fh the file handle
239 * @param size the size of the file
240 * @return GNUNET_OK if created ok, GNUNET_SYSERR otherwise
243 makeEmptyFile (const struct GNUNET_DISK_FileHandle *fh, unsigned int size)
246 unsigned int bytesleft = size;
249 if (GNUNET_DISK_handle_invalid (fh))
250 return GNUNET_SYSERR;
251 buffer = GNUNET_malloc (BUFFSIZE);
252 memset (buffer, 0, BUFFSIZE);
253 GNUNET_DISK_file_seek (fh, 0, GNUNET_DISK_SEEK_SET);
255 while (bytesleft > 0)
257 if (bytesleft > BUFFSIZE)
259 res = GNUNET_DISK_file_write (fh, buffer, BUFFSIZE);
260 bytesleft -= BUFFSIZE;
264 res = GNUNET_DISK_file_write (fh, buffer, bytesleft);
267 GNUNET_assert (res != GNUNET_SYSERR);
269 GNUNET_free (buffer);
273 /* ************** GNUNET_CONTAINER_BloomFilter GNUNET_CRYPTO_hash iterator ********* */
276 * Iterator (callback) method to be called by the
277 * bloomfilter iterator on each bit that is to be
278 * set or tested for the key.
280 * @param bf the filter to manipulate
281 * @param bit the current bit
282 * @param additional context specific argument
284 typedef void (*BitIterator) (struct GNUNET_CONTAINER_BloomFilter * bf,
285 unsigned int bit, void *arg);
288 * Call an iterator for each bit that the bloomfilter
289 * must test or set for this element.
291 * @param bf the filter
292 * @param callback the method to call
293 * @param arg extra argument to callback
294 * @param key the key for which we iterate over the BF bits
297 iterateBits (struct GNUNET_CONTAINER_BloomFilter *bf,
298 BitIterator callback, void *arg, const GNUNET_HashCode * key)
300 GNUNET_HashCode tmp[2];
303 unsigned int slot = 0;
305 bitCount = bf->addressesPerElement;
306 memcpy (&tmp[0], key, sizeof (GNUNET_HashCode));
310 while (slot < (sizeof (GNUNET_HashCode) / sizeof (unsigned int)))
313 (((unsigned int *) &tmp[round & 1])[slot]) &
314 ((bf->bitArraySize * 8) - 1), arg);
322 GNUNET_CRYPTO_hash (&tmp[round & 1], sizeof (GNUNET_HashCode),
323 &tmp[(round + 1) & 1]);
331 * Callback: increment bit
333 * @param bf the filter to manipulate
334 * @param bit the bit to increment
335 * @param arg not used
338 incrementBitCallback (struct GNUNET_CONTAINER_BloomFilter *bf,
339 unsigned int bit, void *arg)
341 incrementBit (bf->bitArray, bit, bf->fh);
345 * Callback: decrement bit
347 * @param bf the filter to manipulate
348 * @param bit the bit to decrement
349 * @param arg not used
352 decrementBitCallback (struct GNUNET_CONTAINER_BloomFilter *bf,
353 unsigned int bit, void *arg)
355 decrementBit (bf->bitArray, bit, bf->fh);
359 * Callback: test if all bits are set
361 * @param bf the filter
362 * @param bit the bit to test
363 * @param arg pointer set to GNUNET_NO if bit is not set
366 testBitCallback (struct GNUNET_CONTAINER_BloomFilter *bf, unsigned int bit,
370 if (GNUNET_NO == testBit (bf->bitArray, bit))
374 /* *********************** INTERFACE **************** */
377 * Load a bloom-filter from a file.
379 * @param filename the name of the file (or the prefix)
380 * @param size the size of the bloom-filter (number of
381 * bytes of storage space to use)
382 * @param k the number of GNUNET_CRYPTO_hash-functions to apply per
383 * element (number of bits set per element in the set)
384 * @return the bloomfilter
386 struct GNUNET_CONTAINER_BloomFilter *
387 GNUNET_CONTAINER_bloomfilter_load (const char *filename, unsigned int size,
390 struct GNUNET_CONTAINER_BloomFilter *bf;
396 if ((k == 0) || (size == 0))
403 size = ui; /* make sure it's a power of 2 */
405 bf = GNUNET_malloc (sizeof (struct GNUNET_CONTAINER_BloomFilter));
406 /* Try to open a bloomfilter file */
407 if (filename != NULL)
409 bf->fh = GNUNET_DISK_file_open (filename, GNUNET_DISK_OPEN_READWRITE
410 | GNUNET_DISK_OPEN_CREATE,
411 GNUNET_DISK_PERM_USER_READ | GNUNET_DISK_PERM_USER_WRITE);
417 bf->filename = GNUNET_strdup (filename);
425 bf->bitArray = GNUNET_malloc_large (size);
426 bf->bitArraySize = size;
427 bf->addressesPerElement = k;
428 memset (bf->bitArray, 0, bf->bitArraySize);
430 if (bf->filename != NULL)
432 /* Read from the file what bits we can */
433 rbuff = GNUNET_malloc (BUFFSIZE);
435 while (pos < size * 8)
439 res = GNUNET_DISK_file_read (bf->fh, rbuff, BUFFSIZE);
442 GNUNET_log_strerror_file (GNUNET_ERROR_TYPE_WARNING,
447 break; /* is ok! we just did not use that many bits yet */
448 for (i = 0; i < res; i++)
450 if ((rbuff[i] & 0x0F) != 0)
451 setBit (bf->bitArray, pos + i * 2);
452 if ((rbuff[i] & 0xF0) != 0)
453 setBit (bf->bitArray, pos + i * 2 + 1);
457 pos += BUFFSIZE * 2; /* 2 bits per byte in the buffer */
466 * Create a bloom filter from raw bits.
468 * @param data the raw bits in memory (maybe NULL,
469 * in which case all bits should be considered
471 * @param size the size of the bloom-filter (number of
472 * bytes of storage space to use); also size of data
473 * -- unless data is NULL
474 * @param k the number of GNUNET_CRYPTO_hash-functions to apply per
475 * element (number of bits set per element in the set)
476 * @return the bloomfilter
478 struct GNUNET_CONTAINER_BloomFilter *
479 GNUNET_CONTAINER_bloomfilter_init (const char *data, unsigned int size,
482 struct GNUNET_CONTAINER_BloomFilter *bf;
485 if ((k == 0) || (size == 0))
495 bf = GNUNET_malloc (sizeof (struct GNUNET_CONTAINER_BloomFilter));
498 bf->bitArray = GNUNET_malloc_large (size);
499 bf->bitArraySize = size;
500 bf->addressesPerElement = k;
502 memcpy (bf->bitArray, data, size);
504 memset (bf->bitArray, 0, bf->bitArraySize);
510 * Copy the raw data of this bloomfilter into
511 * the given data array.
513 * @param data where to write the data
514 * @param size the size of the given data array
515 * @return GNUNET_SYSERR if the data array is not big enough
518 GNUNET_CONTAINER_bloomfilter_get_raw_data (struct GNUNET_CONTAINER_BloomFilter
519 *bf, char *data, unsigned int size)
522 return GNUNET_SYSERR;
524 if (bf->bitArraySize != size)
525 return GNUNET_SYSERR;
526 memcpy (data, bf->bitArray, size);
531 * Free the space associated with a filter
532 * in memory, flush to drive if needed (do not
533 * free the space on the drive)
535 * @param bf the filter
538 GNUNET_CONTAINER_bloomfilter_free (struct GNUNET_CONTAINER_BloomFilter *bf)
542 if (bf->filename != NULL)
544 GNUNET_DISK_file_close (bf->fh);
545 GNUNET_free (bf->filename);
547 GNUNET_free (bf->bitArray);
552 * Reset a bloom filter to empty. Clears the file on disk.
554 * @param bf the filter
557 GNUNET_CONTAINER_bloomfilter_clear (struct GNUNET_CONTAINER_BloomFilter *bf)
562 memset (bf->bitArray, 0, bf->bitArraySize);
563 if (bf->filename != NULL)
564 makeEmptyFile (bf->fh, bf->bitArraySize * 4);
569 * Test if an element is in the filter.
571 * @param e the element
572 * @param bf the filter
573 * @return GNUNET_YES if the element is in the filter, GNUNET_NO if not
576 GNUNET_CONTAINER_bloomfilter_test (struct GNUNET_CONTAINER_BloomFilter *bf,
577 const GNUNET_HashCode * e)
584 iterateBits (bf, &testBitCallback, &res, e);
589 * Add an element to the filter
591 * @param bf the filter
592 * @param e the element
595 GNUNET_CONTAINER_bloomfilter_add (struct GNUNET_CONTAINER_BloomFilter *bf,
596 const GNUNET_HashCode * e)
601 iterateBits (bf, &incrementBitCallback, NULL, e);
606 * Or the entries of the given raw data array with the
607 * data of the given bloom filter. Assumes that
608 * the size of the data array and the current filter
610 * @param bf the filter
613 GNUNET_CONTAINER_bloomfilter_or (struct GNUNET_CONTAINER_BloomFilter *bf,
614 const char *data, unsigned int size)
620 if (bf->bitArraySize != size)
621 return GNUNET_SYSERR;
622 /* FIXME: we could do this 4-8x faster by
623 going over int/long arrays */
624 for (i = 0; i < size; i++)
625 bf->bitArray[i] |= data[i];
630 * Remove an element from the filter.
632 * @param bf the filter
633 * @param e the element to remove
636 GNUNET_CONTAINER_bloomfilter_remove (struct GNUNET_CONTAINER_BloomFilter *bf,
637 const GNUNET_HashCode * e)
641 if (bf->filename == NULL)
643 iterateBits (bf, &decrementBitCallback, NULL, e);
647 * Resize a bloom filter. Note that this operation
648 * is pretty costly. Essentially, the bloom filter
649 * needs to be completely re-build.
651 * @param bf the filter
652 * @param iterator an iterator over all elements stored in the BF
653 * @param iterator_arg argument to the iterator function
654 * @param size the new size for the filter
655 * @param k the new number of GNUNET_CRYPTO_hash-function to apply per element
658 GNUNET_CONTAINER_bloomfilter_resize (struct GNUNET_CONTAINER_BloomFilter *bf,
659 GNUNET_HashCodeIterator iterator,
660 void *iterator_arg, unsigned int size,
666 GNUNET_free (bf->bitArray);
670 size = i; /* make sure it's a power of 2 */
672 bf->bitArraySize = size;
673 bf->bitArray = GNUNET_malloc (size);
674 memset (bf->bitArray, 0, bf->bitArraySize);
675 if (bf->filename != NULL)
676 makeEmptyFile (bf->fh, bf->bitArraySize * 4);
677 while (GNUNET_YES == iterator (&hc, iterator_arg))
678 GNUNET_CONTAINER_bloomfilter_add (bf, &hc);
681 /* end of container_bloomfilter.c */