2 This file is part of GNUnet.
3 (C) 2001, 2002, 2003, 2004, 2006, 2008 Christian Grothoff (and other contributing authors)
5 GNUnet is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published
7 by the Free Software Foundation; either version 2, or (at your
8 option) any later version.
10 GNUnet is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with GNUnet; see the file COPYING. If not, write to the
17 Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA.
21 * @file util/container_bloomfilter.c
22 * @brief data structure used to reduce disk accesses.
24 * The idea basically: Create a signature for each element in the
25 * database. Add those signatures to a bit array. When doing a lookup,
26 * check if the bit array matches the signature of the requested
27 * element. If yes, address the disk, otherwise return 'not found'.
29 * A property of the bloom filter is that sometimes we will have
30 * a match even if the element is not on the disk (then we do
31 * an unnecessary disk access), but what's most important is that
32 * we never get a single "false negative".
34 * To be able to delete entries from the bloom filter, we maintain
35 * a 4 bit counter in the file on the drive (we still use only one
38 * @author Igor Wronsky
39 * @author Christian Grothoff
43 #include "gnunet_common.h"
44 #include "gnunet_container_lib.h"
45 #include "gnunet_disk_lib.h"
47 struct GNUNET_CONTAINER_BloomFilter
51 * The actual bloomfilter bit array
56 * Filename of the filter
61 * The bit counter file on disk
63 struct GNUNET_DISK_FileHandle *fh;
66 * How many bits we set for each stored element
68 unsigned int addressesPerElement;
71 * Size of bitArray in bytes
80 * Get size of the bloom filter.
82 * @param bf the filter
83 * @return number of bytes used for the data of the bloom filter
86 GNUNET_CONTAINER_bloomfilter_get_size (const struct GNUNET_CONTAINER_BloomFilter
91 return bf->bitArraySize;
96 * Copy an existing memory. Any association with a file
97 * on-disk will be lost in the process.
98 * @param bf the filter to copy
99 * @return copy of the bf
101 struct GNUNET_CONTAINER_BloomFilter *
102 GNUNET_CONTAINER_bloomfilter_copy (const struct GNUNET_CONTAINER_BloomFilter
105 return GNUNET_CONTAINER_bloomfilter_init (bf->bitArray, bf->bitArraySize,
106 bf->addressesPerElement);
111 * Sets a bit active in the bitArray. Increment bit-specific
112 * usage counter on disk only if below 4bit max (==15).
114 * @param bitArray memory area to set the bit in
115 * @param bitIdx which bit to set
118 setBit (char *bitArray, unsigned int bitIdx)
121 unsigned int targetBit;
123 arraySlot = bitIdx / 8;
124 targetBit = (1L << (bitIdx % 8));
125 bitArray[arraySlot] |= targetBit;
129 * Clears a bit from bitArray. Bit is cleared from the array
130 * only if the respective usage counter on the disk hits/is zero.
132 * @param bitArray memory area to set the bit in
133 * @param bitIdx which bit to unset
136 clearBit (char *bitArray, unsigned int bitIdx)
139 unsigned int targetBit;
142 targetBit = (1L << (bitIdx % 8));
143 bitArray[slot] = bitArray[slot] & (~targetBit);
147 * Checks if a bit is active in the bitArray
149 * @param bitArray memory area to set the bit in
150 * @param bitIdx which bit to test
151 * @return GNUNET_YES if the bit is set, GNUNET_NO if not.
154 testBit (char *bitArray, unsigned int bitIdx)
157 unsigned int targetBit;
160 targetBit = (1L << (bitIdx % 8));
161 if (bitArray[slot] & targetBit)
168 * Sets a bit active in the bitArray and increments
169 * bit-specific usage counter on disk (but only if
170 * the counter was below 4 bit max (==15)).
172 * @param bitArray memory area to set the bit in
173 * @param bitIdx which bit to test
174 * @param fh A file to keep the 4 bit address usage counters in
177 incrementBit (char *bitArray, unsigned int bitIdx,
178 const struct GNUNET_DISK_FileHandle *fh)
184 unsigned int targetLoc;
186 setBit (bitArray, bitIdx);
187 if (GNUNET_DISK_handle_invalid (fh))
189 /* Update the counter file on disk */
190 fileSlot = bitIdx / 2;
191 targetLoc = bitIdx % 2;
193 GNUNET_assert (fileSlot ==
194 GNUNET_DISK_file_seek (fh, fileSlot, GNUNET_DISK_SEEK_SET));
195 if (1 != GNUNET_DISK_file_read (fh, &value, 1))
198 high = (value & (~0xF)) >> 4;
210 value = ((high << 4) | low);
211 GNUNET_assert (fileSlot ==
212 GNUNET_DISK_file_seek (fh, fileSlot, GNUNET_DISK_SEEK_SET));
213 GNUNET_assert (1 == GNUNET_DISK_file_write (fh, &value, 1));
217 * Clears a bit from bitArray if the respective usage
218 * counter on the disk hits/is zero.
220 * @param bitArray memory area to set the bit in
221 * @param bitIdx which bit to test
222 * @param fh A file to keep the 4bit address usage counters in
225 decrementBit (char *bitArray, unsigned int bitIdx,
226 const struct GNUNET_DISK_FileHandle *fh)
232 unsigned int targetLoc;
234 if (GNUNET_DISK_handle_invalid (fh))
235 return; /* cannot decrement! */
236 /* Each char slot in the counter file holds two 4 bit counters */
237 fileSlot = bitIdx / 2;
238 targetLoc = bitIdx % 2;
239 GNUNET_DISK_file_seek (fh, fileSlot, GNUNET_DISK_SEEK_SET);
240 if (1 != GNUNET_DISK_file_read (fh, &value, 1))
243 high = (value & 0xF0) >> 4;
245 /* decrement, but once we have reached the max, never go back! */
248 if ((low > 0) && (low < 0xF))
252 clearBit (bitArray, bitIdx);
257 if ((high > 0) && (high < 0xF))
261 clearBit (bitArray, bitIdx);
264 value = ((high << 4) | low);
265 GNUNET_DISK_file_seek (fh, fileSlot, GNUNET_DISK_SEEK_SET);
266 GNUNET_assert (1 == GNUNET_DISK_file_write (fh, &value, 1));
269 #define BUFFSIZE 65536
272 * Creates a file filled with zeroes
274 * @param fh the file handle
275 * @param size the size of the file
276 * @return GNUNET_OK if created ok, GNUNET_SYSERR otherwise
279 makeEmptyFile (const struct GNUNET_DISK_FileHandle *fh, size_t size)
282 size_t bytesleft = size;
285 if (GNUNET_DISK_handle_invalid (fh))
286 return GNUNET_SYSERR;
287 buffer = GNUNET_malloc (BUFFSIZE);
288 memset (buffer, 0, BUFFSIZE);
289 GNUNET_DISK_file_seek (fh, 0, GNUNET_DISK_SEEK_SET);
291 while (bytesleft > 0)
293 if (bytesleft > BUFFSIZE)
295 res = GNUNET_DISK_file_write (fh, buffer, BUFFSIZE);
296 bytesleft -= BUFFSIZE;
300 res = GNUNET_DISK_file_write (fh, buffer, bytesleft);
303 GNUNET_assert (res != GNUNET_SYSERR);
305 GNUNET_free (buffer);
309 /* ************** GNUNET_CONTAINER_BloomFilter iterator ********* */
312 * Iterator (callback) method to be called by the
313 * bloomfilter iterator on each bit that is to be
314 * set or tested for the key.
317 * @param bf the filter to manipulate
318 * @param bit the current bit
319 * @return GNUNET_YES to continue, GNUNET_NO to stop early
321 typedef int (*BitIterator) (void *cls,
322 const struct GNUNET_CONTAINER_BloomFilter * bf,
327 * Call an iterator for each bit that the bloomfilter
328 * must test or set for this element.
330 * @param bf the filter
331 * @param callback the method to call
332 * @param arg extra argument to callback
333 * @param key the key for which we iterate over the BF bits
336 iterateBits (const struct GNUNET_CONTAINER_BloomFilter *bf,
337 BitIterator callback, void *arg, const GNUNET_HashCode * key)
339 GNUNET_HashCode tmp[2];
342 unsigned int slot = 0;
344 bitCount = bf->addressesPerElement;
349 while (slot < (sizeof (GNUNET_HashCode) / sizeof (uint32_t)))
353 (((uint32_t *) & tmp[round & 1])[slot]) &
354 ((bf->bitArraySize * 8) - 1)))
363 GNUNET_CRYPTO_hash (&tmp[round & 1], sizeof (GNUNET_HashCode),
364 &tmp[(round + 1) & 1]);
373 * Callback: increment bit
375 * @param cls pointer to writeable form of bf
376 * @param bf the filter to manipulate
377 * @param bit the bit to increment
381 incrementBitCallback (void *cls, const struct GNUNET_CONTAINER_BloomFilter *bf,
384 struct GNUNET_CONTAINER_BloomFilter *b = cls;
386 incrementBit (b->bitArray, bit, bf->fh);
392 * Callback: decrement bit
394 * @param cls pointer to writeable form of bf
395 * @param bf the filter to manipulate
396 * @param bit the bit to decrement
400 decrementBitCallback (void *cls, const struct GNUNET_CONTAINER_BloomFilter *bf,
403 struct GNUNET_CONTAINER_BloomFilter *b = cls;
405 decrementBit (b->bitArray, bit, bf->fh);
411 * Callback: test if all bits are set
413 * @param cls pointer set to GNUNET_NO if bit is not set
414 * @param bf the filter
415 * @param bit the bit to test
416 * @return YES if the bit is set, NO if not
419 testBitCallback (void *cls, const struct GNUNET_CONTAINER_BloomFilter *bf,
424 if (GNUNET_NO == testBit (bf->bitArray, bit))
432 /* *********************** INTERFACE **************** */
435 * Load a bloom-filter from a file.
437 * @param filename the name of the file (or the prefix)
438 * @param size the size of the bloom-filter (number of
439 * bytes of storage space to use)
440 * @param k the number of GNUNET_CRYPTO_hash-functions to apply per
441 * element (number of bits set per element in the set)
442 * @return the bloomfilter
444 struct GNUNET_CONTAINER_BloomFilter *
445 GNUNET_CONTAINER_bloomfilter_load (const char *filename, size_t size,
448 struct GNUNET_CONTAINER_BloomFilter *bf;
454 GNUNET_assert (NULL != filename);
455 if ((k == 0) || (size == 0))
462 size = ui; /* make sure it's a power of 2 */
464 bf = GNUNET_malloc (sizeof (struct GNUNET_CONTAINER_BloomFilter));
465 /* Try to open a bloomfilter file */
467 GNUNET_DISK_file_open (filename,
468 GNUNET_DISK_OPEN_READWRITE |
469 GNUNET_DISK_OPEN_CREATE,
470 GNUNET_DISK_PERM_USER_READ |
471 GNUNET_DISK_PERM_USER_WRITE);
477 bf->filename = GNUNET_strdup (filename);
479 bf->bitArray = GNUNET_malloc_large (size);
480 if (bf->bitArray == NULL)
483 GNUNET_DISK_file_close (bf->fh);
484 GNUNET_free (bf->filename);
488 bf->bitArraySize = size;
489 bf->addressesPerElement = k;
490 memset (bf->bitArray, 0, bf->bitArraySize);
492 /* Read from the file what bits we can */
493 rbuff = GNUNET_malloc (BUFFSIZE);
495 while (pos < size * 8)
499 res = GNUNET_DISK_file_read (bf->fh, rbuff, BUFFSIZE);
502 GNUNET_log_strerror_file (GNUNET_ERROR_TYPE_WARNING, "read",
506 break; /* is ok! we just did not use that many bits yet */
507 for (i = 0; i < res; i++)
509 if ((rbuff[i] & 0x0F) != 0)
510 setBit (bf->bitArray, pos + i * 2);
511 if ((rbuff[i] & 0xF0) != 0)
512 setBit (bf->bitArray, pos + i * 2 + 1);
516 pos += BUFFSIZE * 2; /* 2 bits per byte in the buffer */
524 * Create a bloom filter from raw bits.
526 * @param data the raw bits in memory (maybe NULL,
527 * in which case all bits should be considered
529 * @param size the size of the bloom-filter (number of
530 * bytes of storage space to use); also size of data
531 * -- unless data is NULL
532 * @param k the number of GNUNET_CRYPTO_hash-functions to apply per
533 * element (number of bits set per element in the set)
534 * @return the bloomfilter
536 struct GNUNET_CONTAINER_BloomFilter *
537 GNUNET_CONTAINER_bloomfilter_init (const char *data, size_t size,
540 struct GNUNET_CONTAINER_BloomFilter *bf;
543 if ((k == 0) || (size == 0))
553 bf = GNUNET_malloc (sizeof (struct GNUNET_CONTAINER_BloomFilter));
556 bf->bitArray = GNUNET_malloc_large (size);
557 if (bf->bitArray == NULL)
562 bf->bitArraySize = size;
563 bf->addressesPerElement = k;
565 memcpy (bf->bitArray, data, size);
567 memset (bf->bitArray, 0, bf->bitArraySize);
573 * Copy the raw data of this bloomfilter into
574 * the given data array.
576 * @param bf bloomfilter to take the raw data from
577 * @param data where to write the data
578 * @param size the size of the given data array
579 * @return GNUNET_SYSERR if the data array is not big enough
582 GNUNET_CONTAINER_bloomfilter_get_raw_data (const struct
583 GNUNET_CONTAINER_BloomFilter *bf,
584 char *data, size_t size)
587 return GNUNET_SYSERR;
588 if (bf->bitArraySize != size)
589 return GNUNET_SYSERR;
590 memcpy (data, bf->bitArray, size);
596 * Free the space associated with a filter
597 * in memory, flush to drive if needed (do not
598 * free the space on the drive)
600 * @param bf the filter
603 GNUNET_CONTAINER_bloomfilter_free (struct GNUNET_CONTAINER_BloomFilter *bf)
608 GNUNET_DISK_file_close (bf->fh);
609 GNUNET_free_non_null (bf->filename);
610 GNUNET_free (bf->bitArray);
616 * Reset a bloom filter to empty. Clears the file on disk.
618 * @param bf the filter
621 GNUNET_CONTAINER_bloomfilter_clear (struct GNUNET_CONTAINER_BloomFilter *bf)
626 memset (bf->bitArray, 0, bf->bitArraySize);
627 if (bf->filename != NULL)
628 makeEmptyFile (bf->fh, bf->bitArraySize * 4);
633 * Test if an element is in the filter.
635 * @param e the element
636 * @param bf the filter
637 * @return GNUNET_YES if the element is in the filter, GNUNET_NO if not
640 GNUNET_CONTAINER_bloomfilter_test (const struct GNUNET_CONTAINER_BloomFilter
641 *bf, const GNUNET_HashCode * e)
648 iterateBits (bf, &testBitCallback, &res, e);
654 * Add an element to the filter
656 * @param bf the filter
657 * @param e the element
660 GNUNET_CONTAINER_bloomfilter_add (struct GNUNET_CONTAINER_BloomFilter *bf,
661 const GNUNET_HashCode * e)
665 iterateBits (bf, &incrementBitCallback, bf, e);
670 * Or the entries of the given raw data array with the
671 * data of the given bloom filter. Assumes that
672 * the size of the data array and the current filter
675 * @param bf the filter
676 * @param data the data to or-in
677 * @param size number of bytes in data
680 GNUNET_CONTAINER_bloomfilter_or (struct GNUNET_CONTAINER_BloomFilter *bf,
681 const char *data, size_t size)
685 unsigned long long *fc;
686 const unsigned long long *dc;
690 if (bf->bitArraySize != size)
691 return GNUNET_SYSERR;
692 fc = (unsigned long long *) bf->bitArray;
693 dc = (const unsigned long long *) data;
694 n = size / sizeof (unsigned long long);
696 for (i = 0; i < n; i++)
698 for (i = n * sizeof (unsigned long long); i < size; i++)
699 bf->bitArray[i] |= data[i];
704 * Or the entries of the given raw data array with the
705 * data of the given bloom filter. Assumes that
706 * the size of the data array and the current filter
709 * @param bf the filter
710 * @param to_or the bloomfilter to or-in
711 * @param size number of bytes in data
714 GNUNET_CONTAINER_bloomfilter_or2 (struct GNUNET_CONTAINER_BloomFilter *bf,
715 const struct GNUNET_CONTAINER_BloomFilter
720 unsigned long long *fc;
721 const unsigned long long *dc;
725 if (bf->bitArraySize != size)
726 return GNUNET_SYSERR;
727 fc = (unsigned long long *) bf->bitArray;
728 dc = (const unsigned long long *) to_or->bitArray;
729 n = size / sizeof (unsigned long long);
731 for (i = 0; i < n; i++)
733 for (i = n * sizeof (unsigned long long); i < size; i++)
734 bf->bitArray[i] |= to_or->bitArray[i];
739 * Remove an element from the filter.
741 * @param bf the filter
742 * @param e the element to remove
745 GNUNET_CONTAINER_bloomfilter_remove (struct GNUNET_CONTAINER_BloomFilter *bf,
746 const GNUNET_HashCode * e)
750 if (bf->filename == NULL)
752 iterateBits (bf, &decrementBitCallback, bf, e);
756 * Resize a bloom filter. Note that this operation
757 * is pretty costly. Essentially, the bloom filter
758 * needs to be completely re-build.
760 * @param bf the filter
761 * @param iterator an iterator over all elements stored in the BF
762 * @param iterator_cls argument to the iterator function
763 * @param size the new size for the filter
764 * @param k the new number of GNUNET_CRYPTO_hash-function to apply per element
767 GNUNET_CONTAINER_bloomfilter_resize (struct GNUNET_CONTAINER_BloomFilter *bf,
768 GNUNET_HashCodeIterator iterator,
769 void *iterator_cls, size_t size,
775 GNUNET_free (bf->bitArray);
779 size = i; /* make sure it's a power of 2 */
781 bf->bitArraySize = size;
782 bf->bitArray = GNUNET_malloc (size);
783 memset (bf->bitArray, 0, bf->bitArraySize);
784 if (bf->filename != NULL)
785 makeEmptyFile (bf->fh, bf->bitArraySize * 4);
786 while (GNUNET_YES == iterator (iterator_cls, &hc))
787 GNUNET_CONTAINER_bloomfilter_add (bf, &hc);
790 /* end of container_bloomfilter.c */