2 This file is part of GNUnet.
3 (C) 2001, 2002, 2003, 2004, 2006, 2008 Christian Grothoff (and other contributing authors)
5 GNUnet is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published
7 by the Free Software Foundation; either version 2, or (at your
8 option) any later version.
10 GNUnet is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with GNUnet; see the file COPYING. If not, write to the
17 Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA.
21 * @file util/container_bloomfilter.c
22 * @brief data structure used to reduce disk accesses.
24 * The idea basically: Create a signature for each element in the
25 * database. Add those signatures to a bit array. When doing a lookup,
26 * check if the bit array matches the signature of the requested
27 * element. If yes, address the disk, otherwise return 'not found'.
29 * A property of the bloom filter is that sometimes we will have
30 * a match even if the element is not on the disk (then we do
31 * an unnecessary disk access), but what's most important is that
32 * we never get a single "false negative".
34 * To be able to delete entries from the bloom filter, we maintain
35 * a 4 bit counter in the file on the drive (we still use only one
38 * @author Igor Wronsky
39 * @author Christian Grothoff
43 #include "gnunet_common.h"
44 #include "gnunet_container_lib.h"
45 #include "gnunet_disk_lib.h"
47 struct GNUNET_CONTAINER_BloomFilter
51 * The actual bloomfilter bit array
56 * Filename of the filter
61 * The bit counter file on disk
63 struct GNUNET_DISK_FileHandle *fh;
66 * How many bits we set for each stored element
68 unsigned int addressesPerElement;
71 * Size of bitArray in bytes
80 * Get size of the bloom filter.
82 * @param bf the filter
83 * @return number of bytes used for the data of the bloom filter
86 GNUNET_CONTAINER_bloomfilter_get_size (const struct GNUNET_CONTAINER_BloomFilter
91 return bf->bitArraySize;
96 * Sets a bit active in the bitArray. Increment bit-specific
97 * usage counter on disk only if below 4bit max (==15).
99 * @param bitArray memory area to set the bit in
100 * @param bitIdx which bit to set
103 setBit (char *bitArray, unsigned int bitIdx)
106 unsigned int targetBit;
108 arraySlot = bitIdx / 8;
109 targetBit = (1L << (bitIdx % 8));
110 bitArray[arraySlot] |= targetBit;
114 * Clears a bit from bitArray. Bit is cleared from the array
115 * only if the respective usage counter on the disk hits/is zero.
117 * @param bitArray memory area to set the bit in
118 * @param bitIdx which bit to unset
121 clearBit (char *bitArray, unsigned int bitIdx)
124 unsigned int targetBit;
127 targetBit = (1L << (bitIdx % 8));
128 bitArray[slot] = bitArray[slot] & (~targetBit);
132 * Checks if a bit is active in the bitArray
134 * @param bitArray memory area to set the bit in
135 * @param bitIdx which bit to test
136 * @return GNUNET_YES if the bit is set, GNUNET_NO if not.
139 testBit (char *bitArray, unsigned int bitIdx)
142 unsigned int targetBit;
145 targetBit = (1L << (bitIdx % 8));
146 if (bitArray[slot] & targetBit)
153 * Sets a bit active in the bitArray and increments
154 * bit-specific usage counter on disk (but only if
155 * the counter was below 4 bit max (==15)).
157 * @param bitArray memory area to set the bit in
158 * @param bitIdx which bit to test
159 * @param fh A file to keep the 4 bit address usage counters in
162 incrementBit (char *bitArray, unsigned int bitIdx,
163 const struct GNUNET_DISK_FileHandle *fh)
169 unsigned int targetLoc;
171 setBit (bitArray, bitIdx);
172 if (GNUNET_DISK_handle_invalid (fh))
174 /* Update the counter file on disk */
175 fileSlot = bitIdx / 2;
176 targetLoc = bitIdx % 2;
178 GNUNET_assert (fileSlot ==
179 GNUNET_DISK_file_seek (fh, fileSlot, GNUNET_DISK_SEEK_SET));
180 if (1 != GNUNET_DISK_file_read (fh, &value, 1))
183 high = (value & (~0xF)) >> 4;
195 value = ((high << 4) | low);
196 GNUNET_assert (fileSlot == GNUNET_DISK_file_seek (fh,
198 GNUNET_DISK_SEEK_SET));
199 GNUNET_assert (1 == GNUNET_DISK_file_write (fh, &value, 1));
203 * Clears a bit from bitArray if the respective usage
204 * counter on the disk hits/is zero.
206 * @param bitArray memory area to set the bit in
207 * @param bitIdx which bit to test
208 * @param fh A file to keep the 4bit address usage counters in
211 decrementBit (char *bitArray, unsigned int bitIdx,
212 const struct GNUNET_DISK_FileHandle *fh)
218 unsigned int targetLoc;
220 if (GNUNET_DISK_handle_invalid (fh))
221 return; /* cannot decrement! */
222 /* Each char slot in the counter file holds two 4 bit counters */
223 fileSlot = bitIdx / 2;
224 targetLoc = bitIdx % 2;
225 GNUNET_DISK_file_seek (fh, fileSlot, GNUNET_DISK_SEEK_SET);
226 if (1 != GNUNET_DISK_file_read (fh, &value, 1))
229 high = (value & 0xF0) >> 4;
231 /* decrement, but once we have reached the max, never go back! */
234 if ((low > 0) && (low < 0xF))
238 clearBit (bitArray, bitIdx);
243 if ((high > 0) && (high < 0xF))
247 clearBit (bitArray, bitIdx);
250 value = ((high << 4) | low);
251 GNUNET_DISK_file_seek (fh, fileSlot, GNUNET_DISK_SEEK_SET);
252 GNUNET_assert (1 == GNUNET_DISK_file_write (fh, &value, 1));
255 #define BUFFSIZE 65536
258 * Creates a file filled with zeroes
260 * @param fh the file handle
261 * @param size the size of the file
262 * @return GNUNET_OK if created ok, GNUNET_SYSERR otherwise
265 makeEmptyFile (const struct GNUNET_DISK_FileHandle *fh, size_t size)
268 size_t bytesleft = size;
271 if (GNUNET_DISK_handle_invalid (fh))
272 return GNUNET_SYSERR;
273 buffer = GNUNET_malloc (BUFFSIZE);
274 memset (buffer, 0, BUFFSIZE);
275 GNUNET_DISK_file_seek (fh, 0, GNUNET_DISK_SEEK_SET);
277 while (bytesleft > 0)
279 if (bytesleft > BUFFSIZE)
281 res = GNUNET_DISK_file_write (fh, buffer, BUFFSIZE);
282 bytesleft -= BUFFSIZE;
286 res = GNUNET_DISK_file_write (fh, buffer, bytesleft);
289 GNUNET_assert (res != GNUNET_SYSERR);
291 GNUNET_free (buffer);
295 /* ************** GNUNET_CONTAINER_BloomFilter iterator ********* */
298 * Iterator (callback) method to be called by the
299 * bloomfilter iterator on each bit that is to be
300 * set or tested for the key.
303 * @param bf the filter to manipulate
304 * @param bit the current bit
306 typedef void (*BitIterator) (void *cls,
307 const struct GNUNET_CONTAINER_BloomFilter * bf,
311 * Call an iterator for each bit that the bloomfilter
312 * must test or set for this element.
314 * @param bf the filter
315 * @param callback the method to call
316 * @param arg extra argument to callback
317 * @param key the key for which we iterate over the BF bits
320 iterateBits (const struct GNUNET_CONTAINER_BloomFilter *bf,
321 BitIterator callback, void *arg, const GNUNET_HashCode * key)
323 GNUNET_HashCode tmp[2];
326 unsigned int slot = 0;
328 bitCount = bf->addressesPerElement;
329 memcpy (&tmp[0], key, sizeof (GNUNET_HashCode));
333 while (slot < (sizeof (GNUNET_HashCode) / sizeof (uint32_t)))
337 (((uint32_t *) & tmp[round & 1])[slot]) &
338 ((bf->bitArraySize * 8) - 1));
346 GNUNET_CRYPTO_hash (&tmp[round & 1], sizeof (GNUNET_HashCode),
347 &tmp[(round + 1) & 1]);
355 * Callback: increment bit
357 * @param cls pointer to writeable form of bf
358 * @param bf the filter to manipulate
359 * @param bit the bit to increment
362 incrementBitCallback (void *cls,
363 const struct GNUNET_CONTAINER_BloomFilter *bf,
366 struct GNUNET_CONTAINER_BloomFilter *b = cls;
367 incrementBit (b->bitArray, bit, bf->fh);
371 * Callback: decrement bit
373 * @param cls pointer to writeable form of bf
374 * @param bf the filter to manipulate
375 * @param bit the bit to decrement
378 decrementBitCallback (void *cls,
379 const struct GNUNET_CONTAINER_BloomFilter *bf,
382 struct GNUNET_CONTAINER_BloomFilter *b = cls;
383 decrementBit (b->bitArray, bit, bf->fh);
387 * Callback: test if all bits are set
389 * @param cls pointer set to GNUNET_NO if bit is not set
390 * @param bf the filter
391 * @param bit the bit to test
394 testBitCallback (void *cls,
395 const struct GNUNET_CONTAINER_BloomFilter *bf,
399 if (GNUNET_NO == testBit (bf->bitArray, bit))
403 /* *********************** INTERFACE **************** */
406 * Load a bloom-filter from a file.
408 * @param filename the name of the file (or the prefix)
409 * @param size the size of the bloom-filter (number of
410 * bytes of storage space to use)
411 * @param k the number of GNUNET_CRYPTO_hash-functions to apply per
412 * element (number of bits set per element in the set)
413 * @return the bloomfilter
415 struct GNUNET_CONTAINER_BloomFilter *
416 GNUNET_CONTAINER_bloomfilter_load (const char *filename,
417 size_t size, unsigned int k)
419 struct GNUNET_CONTAINER_BloomFilter *bf;
425 if ((k == 0) || (size == 0))
432 size = ui; /* make sure it's a power of 2 */
434 bf = GNUNET_malloc (sizeof (struct GNUNET_CONTAINER_BloomFilter));
435 /* Try to open a bloomfilter file */
436 if (filename != NULL)
438 bf->fh = GNUNET_DISK_file_open (filename, GNUNET_DISK_OPEN_READWRITE
439 | GNUNET_DISK_OPEN_CREATE,
440 GNUNET_DISK_PERM_USER_READ |
441 GNUNET_DISK_PERM_USER_WRITE);
447 bf->filename = GNUNET_strdup (filename);
455 bf->bitArray = GNUNET_malloc_large (size);
456 if (bf->bitArray == NULL)
459 GNUNET_DISK_file_close (bf->fh);
460 GNUNET_free_non_null (bf->filename);
464 bf->bitArraySize = size;
465 bf->addressesPerElement = k;
466 memset (bf->bitArray, 0, bf->bitArraySize);
468 if (bf->filename != NULL)
470 /* Read from the file what bits we can */
471 rbuff = GNUNET_malloc (BUFFSIZE);
473 while (pos < size * 8)
477 res = GNUNET_DISK_file_read (bf->fh, rbuff, BUFFSIZE);
480 GNUNET_log_strerror_file (GNUNET_ERROR_TYPE_WARNING,
481 "read", bf->filename);
484 break; /* is ok! we just did not use that many bits yet */
485 for (i = 0; i < res; i++)
487 if ((rbuff[i] & 0x0F) != 0)
488 setBit (bf->bitArray, pos + i * 2);
489 if ((rbuff[i] & 0xF0) != 0)
490 setBit (bf->bitArray, pos + i * 2 + 1);
494 pos += BUFFSIZE * 2; /* 2 bits per byte in the buffer */
503 * Create a bloom filter from raw bits.
505 * @param data the raw bits in memory (maybe NULL,
506 * in which case all bits should be considered
508 * @param size the size of the bloom-filter (number of
509 * bytes of storage space to use); also size of data
510 * -- unless data is NULL
511 * @param k the number of GNUNET_CRYPTO_hash-functions to apply per
512 * element (number of bits set per element in the set)
513 * @return the bloomfilter
515 struct GNUNET_CONTAINER_BloomFilter *
516 GNUNET_CONTAINER_bloomfilter_init (const char *data,
517 size_t size, unsigned int k)
519 struct GNUNET_CONTAINER_BloomFilter *bf;
522 if ((k == 0) || (size == 0))
532 bf = GNUNET_malloc (sizeof (struct GNUNET_CONTAINER_BloomFilter));
535 bf->bitArray = GNUNET_malloc_large (size);
536 if (bf->bitArray == NULL)
541 bf->bitArraySize = size;
542 bf->addressesPerElement = k;
544 memcpy (bf->bitArray, data, size);
546 memset (bf->bitArray, 0, bf->bitArraySize);
552 * Copy the raw data of this bloomfilter into
553 * the given data array.
555 * @param bf bloomfilter to take the raw data from
556 * @param data where to write the data
557 * @param size the size of the given data array
558 * @return GNUNET_SYSERR if the data array is not big enough
561 GNUNET_CONTAINER_bloomfilter_get_raw_data (const struct GNUNET_CONTAINER_BloomFilter
562 *bf, char *data, size_t size)
565 return GNUNET_SYSERR;
566 if (bf->bitArraySize != size)
567 return GNUNET_SYSERR;
568 memcpy (data, bf->bitArray, size);
573 * Free the space associated with a filter
574 * in memory, flush to drive if needed (do not
575 * free the space on the drive)
577 * @param bf the filter
580 GNUNET_CONTAINER_bloomfilter_free (struct GNUNET_CONTAINER_BloomFilter *bf)
585 GNUNET_DISK_file_close (bf->fh);
586 GNUNET_free_non_null (bf->filename);
587 GNUNET_free (bf->bitArray);
592 * Reset a bloom filter to empty. Clears the file on disk.
594 * @param bf the filter
597 GNUNET_CONTAINER_bloomfilter_clear (struct GNUNET_CONTAINER_BloomFilter *bf)
602 memset (bf->bitArray, 0, bf->bitArraySize);
603 if (bf->filename != NULL)
604 makeEmptyFile (bf->fh, bf->bitArraySize * 4);
609 * Test if an element is in the filter.
611 * @param e the element
612 * @param bf the filter
613 * @return GNUNET_YES if the element is in the filter, GNUNET_NO if not
616 GNUNET_CONTAINER_bloomfilter_test (const struct GNUNET_CONTAINER_BloomFilter *bf,
617 const GNUNET_HashCode * e)
624 iterateBits (bf, &testBitCallback, &res, e);
629 * Add an element to the filter
631 * @param bf the filter
632 * @param e the element
635 GNUNET_CONTAINER_bloomfilter_add (struct GNUNET_CONTAINER_BloomFilter *bf,
636 const GNUNET_HashCode * e)
641 iterateBits (bf, &incrementBitCallback, bf, e);
646 * Or the entries of the given raw data array with the
647 * data of the given bloom filter. Assumes that
648 * the size of the data array and the current filter
651 * @param bf the filter
652 * @param data the data to or-in
653 * @param size number of bytes in data
656 GNUNET_CONTAINER_bloomfilter_or (struct GNUNET_CONTAINER_BloomFilter *bf,
657 const char *data, size_t size)
661 unsigned long long* fc;
662 const unsigned long long* dc;
666 if (bf->bitArraySize != size)
667 return GNUNET_SYSERR;
668 fc = (unsigned long long*) bf->bitArray;
669 dc = (const unsigned long long*) data;
670 n = size / sizeof (unsigned long long);
672 for (i = 0; i < n; i++)
674 for (i = n * sizeof(unsigned long long); i < size; i++)
675 bf->bitArray[i] |= data[i];
680 * Or the entries of the given raw data array with the
681 * data of the given bloom filter. Assumes that
682 * the size of the data array and the current filter
685 * @param bf the filter
686 * @param to_or the bloomfilter to or-in
687 * @param size number of bytes in data
690 GNUNET_CONTAINER_bloomfilter_or2 (struct GNUNET_CONTAINER_BloomFilter *bf,
691 const struct GNUNET_CONTAINER_BloomFilter *to_or,
696 unsigned long long* fc;
697 const unsigned long long* dc;
701 if (bf->bitArraySize != size)
702 return GNUNET_SYSERR;
703 fc = (unsigned long long*) bf->bitArray;
704 dc = (const unsigned long long*) to_or->bitArray;
705 n = size / sizeof (unsigned long long);
707 for (i = 0; i < n; i++)
709 for (i = n * sizeof(unsigned long long); i < size; i++)
710 bf->bitArray[i] |= to_or->bitArray[i];
715 * Remove an element from the filter.
717 * @param bf the filter
718 * @param e the element to remove
721 GNUNET_CONTAINER_bloomfilter_remove (struct GNUNET_CONTAINER_BloomFilter *bf,
722 const GNUNET_HashCode * e)
726 if (bf->filename == NULL)
728 iterateBits (bf, &decrementBitCallback, bf, e);
732 * Resize a bloom filter. Note that this operation
733 * is pretty costly. Essentially, the bloom filter
734 * needs to be completely re-build.
736 * @param bf the filter
737 * @param iterator an iterator over all elements stored in the BF
738 * @param iterator_cls argument to the iterator function
739 * @param size the new size for the filter
740 * @param k the new number of GNUNET_CRYPTO_hash-function to apply per element
743 GNUNET_CONTAINER_bloomfilter_resize (struct GNUNET_CONTAINER_BloomFilter *bf,
744 GNUNET_HashCodeIterator iterator,
746 size_t size, unsigned int k)
751 GNUNET_free (bf->bitArray);
755 size = i; /* make sure it's a power of 2 */
757 bf->bitArraySize = size;
758 bf->bitArray = GNUNET_malloc (size);
759 memset (bf->bitArray, 0, bf->bitArraySize);
760 if (bf->filename != NULL)
761 makeEmptyFile (bf->fh, bf->bitArraySize * 4);
762 while (GNUNET_YES == iterator (iterator_cls, &hc))
763 GNUNET_CONTAINER_bloomfilter_add (bf, &hc);
766 /* end of container_bloomfilter.c */