2 This file is part of GNUnet.
3 (C) 2001, 2002, 2003, 2004, 2006, 2008, 2011 Christian Grothoff (and other contributing authors)
5 GNUnet is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published
7 by the Free Software Foundation; either version 2, or (at your
8 option) any later version.
10 GNUnet is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with GNUnet; see the file COPYING. If not, write to the
17 Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA.
21 * @file util/container_bloomfilter.c
22 * @brief data structure used to reduce disk accesses.
24 * The idea basically: Create a signature for each element in the
25 * database. Add those signatures to a bit array. When doing a lookup,
26 * check if the bit array matches the signature of the requested
27 * element. If yes, address the disk, otherwise return 'not found'.
29 * A property of the bloom filter is that sometimes we will have
30 * a match even if the element is not on the disk (then we do
31 * an unnecessary disk access), but what's most important is that
32 * we never get a single "false negative".
34 * To be able to delete entries from the bloom filter, we maintain
35 * a 4 bit counter in the file on the drive (we still use only one
38 * @author Igor Wronsky
39 * @author Christian Grothoff
43 #include "gnunet_common.h"
44 #include "gnunet_container_lib.h"
45 #include "gnunet_disk_lib.h"
47 #define LOG(kind,...) GNUNET_log_from (kind, "util", __VA_ARGS__)
49 #define LOG_STRERROR(kind,syscall) GNUNET_log_from_strerror (kind, "util", syscall)
51 #define LOG_STRERROR_FILE(kind,syscall,filename) GNUNET_log_from_strerror_file (kind, "util", syscall, filename)
53 struct GNUNET_CONTAINER_BloomFilter
57 * The actual bloomfilter bit array
62 * Filename of the filter
67 * The bit counter file on disk
69 struct GNUNET_DISK_FileHandle *fh;
72 * How many bits we set for each stored element
74 unsigned int addressesPerElement;
77 * Size of bitArray in bytes
86 * Get size of the bloom filter.
88 * @param bf the filter
89 * @return number of bytes used for the data of the bloom filter
92 GNUNET_CONTAINER_bloomfilter_get_size (const struct GNUNET_CONTAINER_BloomFilter
97 return bf->bitArraySize;
102 * Copy an existing memory. Any association with a file
103 * on-disk will be lost in the process.
104 * @param bf the filter to copy
105 * @return copy of the bf
107 struct GNUNET_CONTAINER_BloomFilter *
108 GNUNET_CONTAINER_bloomfilter_copy (const struct GNUNET_CONTAINER_BloomFilter
111 return GNUNET_CONTAINER_bloomfilter_init (bf->bitArray, bf->bitArraySize,
112 bf->addressesPerElement);
117 * Sets a bit active in the bitArray. Increment bit-specific
118 * usage counter on disk only if below 4bit max (==15).
120 * @param bitArray memory area to set the bit in
121 * @param bitIdx which bit to set
124 setBit (char *bitArray, unsigned int bitIdx)
127 unsigned int targetBit;
129 arraySlot = bitIdx / 8;
130 targetBit = (1L << (bitIdx % 8));
131 bitArray[arraySlot] |= targetBit;
135 * Clears a bit from bitArray. Bit is cleared from the array
136 * only if the respective usage counter on the disk hits/is zero.
138 * @param bitArray memory area to set the bit in
139 * @param bitIdx which bit to unset
142 clearBit (char *bitArray, unsigned int bitIdx)
145 unsigned int targetBit;
148 targetBit = (1L << (bitIdx % 8));
149 bitArray[slot] = bitArray[slot] & (~targetBit);
153 * Checks if a bit is active in the bitArray
155 * @param bitArray memory area to set the bit in
156 * @param bitIdx which bit to test
157 * @return GNUNET_YES if the bit is set, GNUNET_NO if not.
160 testBit (char *bitArray, unsigned int bitIdx)
163 unsigned int targetBit;
166 targetBit = (1L << (bitIdx % 8));
167 if (bitArray[slot] & targetBit)
174 * Sets a bit active in the bitArray and increments
175 * bit-specific usage counter on disk (but only if
176 * the counter was below 4 bit max (==15)).
178 * @param bitArray memory area to set the bit in
179 * @param bitIdx which bit to test
180 * @param fh A file to keep the 4 bit address usage counters in
183 incrementBit (char *bitArray, unsigned int bitIdx,
184 const struct GNUNET_DISK_FileHandle *fh)
190 unsigned int targetLoc;
192 setBit (bitArray, bitIdx);
193 if (GNUNET_DISK_handle_invalid (fh))
195 /* Update the counter file on disk */
196 fileSlot = bitIdx / 2;
197 targetLoc = bitIdx % 2;
199 GNUNET_assert (fileSlot ==
200 GNUNET_DISK_file_seek (fh, fileSlot, GNUNET_DISK_SEEK_SET));
201 if (1 != GNUNET_DISK_file_read (fh, &value, 1))
204 high = (value & (~0xF)) >> 4;
216 value = ((high << 4) | low);
217 GNUNET_assert (fileSlot ==
218 GNUNET_DISK_file_seek (fh, fileSlot, GNUNET_DISK_SEEK_SET));
219 GNUNET_assert (1 == GNUNET_DISK_file_write (fh, &value, 1));
223 * Clears a bit from bitArray if the respective usage
224 * counter on the disk hits/is zero.
226 * @param bitArray memory area to set the bit in
227 * @param bitIdx which bit to test
228 * @param fh A file to keep the 4bit address usage counters in
231 decrementBit (char *bitArray, unsigned int bitIdx,
232 const struct GNUNET_DISK_FileHandle *fh)
238 unsigned int targetLoc;
240 if (GNUNET_DISK_handle_invalid (fh))
241 return; /* cannot decrement! */
242 /* Each char slot in the counter file holds two 4 bit counters */
243 fileSlot = bitIdx / 2;
244 targetLoc = bitIdx % 2;
245 GNUNET_DISK_file_seek (fh, fileSlot, GNUNET_DISK_SEEK_SET);
246 if (1 != GNUNET_DISK_file_read (fh, &value, 1))
249 high = (value & 0xF0) >> 4;
251 /* decrement, but once we have reached the max, never go back! */
254 if ((low > 0) && (low < 0xF))
258 clearBit (bitArray, bitIdx);
263 if ((high > 0) && (high < 0xF))
267 clearBit (bitArray, bitIdx);
270 value = ((high << 4) | low);
271 GNUNET_DISK_file_seek (fh, fileSlot, GNUNET_DISK_SEEK_SET);
272 GNUNET_assert (1 == GNUNET_DISK_file_write (fh, &value, 1));
275 #define BUFFSIZE 65536
278 * Creates a file filled with zeroes
280 * @param fh the file handle
281 * @param size the size of the file
282 * @return GNUNET_OK if created ok, GNUNET_SYSERR otherwise
285 make_empty_file (const struct GNUNET_DISK_FileHandle *fh, size_t size)
287 char buffer[BUFFSIZE];
288 size_t bytesleft = size;
291 if (GNUNET_DISK_handle_invalid (fh))
292 return GNUNET_SYSERR;
293 memset (buffer, 0, sizeof (buffer));
294 GNUNET_DISK_file_seek (fh, 0, GNUNET_DISK_SEEK_SET);
295 while (bytesleft > 0)
297 if (bytesleft > sizeof (buffer))
299 res = GNUNET_DISK_file_write (fh, buffer, sizeof (buffer));
305 res = GNUNET_DISK_file_write (fh, buffer, bytesleft);
309 if (GNUNET_SYSERR == res)
310 return GNUNET_SYSERR;
315 /* ************** GNUNET_CONTAINER_BloomFilter iterator ********* */
318 * Iterator (callback) method to be called by the
319 * bloomfilter iterator on each bit that is to be
320 * set or tested for the key.
323 * @param bf the filter to manipulate
324 * @param bit the current bit
325 * @return GNUNET_YES to continue, GNUNET_NO to stop early
327 typedef int (*BitIterator) (void *cls,
328 const struct GNUNET_CONTAINER_BloomFilter * bf,
333 * Call an iterator for each bit that the bloomfilter
334 * must test or set for this element.
336 * @param bf the filter
337 * @param callback the method to call
338 * @param arg extra argument to callback
339 * @param key the key for which we iterate over the BF bits
342 iterateBits (const struct GNUNET_CONTAINER_BloomFilter *bf,
343 BitIterator callback, void *arg, const GNUNET_HashCode * key)
345 GNUNET_HashCode tmp[2];
348 unsigned int slot = 0;
350 bitCount = bf->addressesPerElement;
355 while (slot < (sizeof (GNUNET_HashCode) / sizeof (uint32_t)))
359 (((uint32_t *) & tmp[round & 1])[slot]) &
360 ((bf->bitArraySize * 8) - 1)))
369 GNUNET_CRYPTO_hash (&tmp[round & 1], sizeof (GNUNET_HashCode),
370 &tmp[(round + 1) & 1]);
379 * Callback: increment bit
381 * @param cls pointer to writeable form of bf
382 * @param bf the filter to manipulate
383 * @param bit the bit to increment
387 incrementBitCallback (void *cls, const struct GNUNET_CONTAINER_BloomFilter *bf,
390 struct GNUNET_CONTAINER_BloomFilter *b = cls;
392 incrementBit (b->bitArray, bit, bf->fh);
398 * Callback: decrement bit
400 * @param cls pointer to writeable form of bf
401 * @param bf the filter to manipulate
402 * @param bit the bit to decrement
406 decrementBitCallback (void *cls, const struct GNUNET_CONTAINER_BloomFilter *bf,
409 struct GNUNET_CONTAINER_BloomFilter *b = cls;
411 decrementBit (b->bitArray, bit, bf->fh);
417 * Callback: test if all bits are set
419 * @param cls pointer set to GNUNET_NO if bit is not set
420 * @param bf the filter
421 * @param bit the bit to test
422 * @return YES if the bit is set, NO if not
425 testBitCallback (void *cls, const struct GNUNET_CONTAINER_BloomFilter *bf,
430 if (GNUNET_NO == testBit (bf->bitArray, bit))
438 /* *********************** INTERFACE **************** */
441 * Load a bloom-filter from a file.
443 * @param filename the name of the file (or the prefix)
444 * @param size the size of the bloom-filter (number of
445 * bytes of storage space to use)
446 * @param k the number of GNUNET_CRYPTO_hash-functions to apply per
447 * element (number of bits set per element in the set)
448 * @return the bloomfilter
450 struct GNUNET_CONTAINER_BloomFilter *
451 GNUNET_CONTAINER_bloomfilter_load (const char *filename, size_t size,
454 struct GNUNET_CONTAINER_BloomFilter *bf;
462 GNUNET_assert (NULL != filename);
463 if ((k == 0) || (size == 0))
468 while ( (ui < size) &&
471 size = ui; /* make sure it's a power of 2 */
473 bf = GNUNET_malloc (sizeof (struct GNUNET_CONTAINER_BloomFilter));
474 /* Try to open a bloomfilter file */
475 if (GNUNET_YES == GNUNET_DISK_file_test (filename))
477 GNUNET_DISK_file_open (filename,
478 GNUNET_DISK_OPEN_READWRITE,
479 GNUNET_DISK_PERM_USER_READ |
480 GNUNET_DISK_PERM_USER_WRITE);
483 /* file existed, try to read it! */
484 must_read = GNUNET_YES;
486 GNUNET_DISK_file_handle_size (bf->fh, &fsize))
488 GNUNET_DISK_file_close (bf->fh);
494 /* found existing empty file, just overwrite */
495 if (GNUNET_OK != make_empty_file (bf->fh, size * 4LL))
497 GNUNET_log_strerror (GNUNET_ERROR_TYPE_WARNING,
499 GNUNET_DISK_file_close (bf->fh);
504 else if (fsize != size * 4LL)
506 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
507 _("Size of file on disk is incorrect for this Bloom filter (want %llu, have %llu)\n"),
508 (unsigned long long) (size * 4LL),
509 (unsigned long long) fsize);
510 GNUNET_DISK_file_close (bf->fh);
517 /* file did not exist, don't read, just create */
518 must_read = GNUNET_NO;
520 GNUNET_DISK_file_open (filename,
521 GNUNET_DISK_OPEN_CREATE |
522 GNUNET_DISK_OPEN_READWRITE,
523 GNUNET_DISK_PERM_USER_READ |
524 GNUNET_DISK_PERM_USER_WRITE);
530 if (GNUNET_OK != make_empty_file (bf->fh, size * 4LL))
532 GNUNET_log_strerror (GNUNET_ERROR_TYPE_WARNING,
534 GNUNET_DISK_file_close (bf->fh);
539 bf->filename = GNUNET_strdup (filename);
541 bf->bitArray = GNUNET_malloc_large (size);
542 if (bf->bitArray == NULL)
545 GNUNET_DISK_file_close (bf->fh);
546 GNUNET_free (bf->filename);
550 bf->bitArraySize = size;
551 bf->addressesPerElement = k;
552 memset (bf->bitArray, 0, bf->bitArraySize);
554 if (GNUNET_YES != must_read)
555 return bf; /* already done! */
556 /* Read from the file what bits we can */
557 rbuff = GNUNET_malloc (BUFFSIZE);
559 while (pos < size * 8LL)
563 res = GNUNET_DISK_file_read (bf->fh, rbuff, BUFFSIZE);
566 LOG_STRERROR_FILE (GNUNET_ERROR_TYPE_WARNING, "read", bf->filename);
568 GNUNET_free (bf->filename);
569 GNUNET_DISK_file_close (bf->fh);
574 break; /* is ok! we just did not use that many bits yet */
575 for (i = 0; i < res; i++)
577 if ((rbuff[i] & 0x0F) != 0)
578 setBit (bf->bitArray, pos + i * 2);
579 if ((rbuff[i] & 0xF0) != 0)
580 setBit (bf->bitArray, pos + i * 2 + 1);
584 pos += BUFFSIZE * 2; /* 2 bits per byte in the buffer */
592 * Create a bloom filter from raw bits.
594 * @param data the raw bits in memory (maybe NULL,
595 * in which case all bits should be considered
597 * @param size the size of the bloom-filter (number of
598 * bytes of storage space to use); also size of data
599 * -- unless data is NULL
600 * @param k the number of GNUNET_CRYPTO_hash-functions to apply per
601 * element (number of bits set per element in the set)
602 * @return the bloomfilter
604 struct GNUNET_CONTAINER_BloomFilter *
605 GNUNET_CONTAINER_bloomfilter_init (const char *data, size_t size,
608 struct GNUNET_CONTAINER_BloomFilter *bf;
611 if ((k == 0) || (size == 0))
621 bf = GNUNET_malloc (sizeof (struct GNUNET_CONTAINER_BloomFilter));
624 bf->bitArray = GNUNET_malloc_large (size);
625 if (bf->bitArray == NULL)
630 bf->bitArraySize = size;
631 bf->addressesPerElement = k;
633 memcpy (bf->bitArray, data, size);
635 memset (bf->bitArray, 0, bf->bitArraySize);
641 * Copy the raw data of this bloomfilter into
642 * the given data array.
644 * @param bf bloomfilter to take the raw data from
645 * @param data where to write the data
646 * @param size the size of the given data array
647 * @return GNUNET_SYSERR if the data array is not big enough
650 GNUNET_CONTAINER_bloomfilter_get_raw_data (const struct
651 GNUNET_CONTAINER_BloomFilter *bf,
652 char *data, size_t size)
655 return GNUNET_SYSERR;
656 if (bf->bitArraySize != size)
657 return GNUNET_SYSERR;
658 memcpy (data, bf->bitArray, size);
664 * Free the space associated with a filter
665 * in memory, flush to drive if needed (do not
666 * free the space on the drive)
668 * @param bf the filter
671 GNUNET_CONTAINER_bloomfilter_free (struct GNUNET_CONTAINER_BloomFilter *bf)
676 GNUNET_DISK_file_close (bf->fh);
677 GNUNET_free_non_null (bf->filename);
678 GNUNET_free (bf->bitArray);
684 * Reset a bloom filter to empty. Clears the file on disk.
686 * @param bf the filter
689 GNUNET_CONTAINER_bloomfilter_clear (struct GNUNET_CONTAINER_BloomFilter *bf)
694 memset (bf->bitArray, 0, bf->bitArraySize);
695 if (bf->filename != NULL)
696 make_empty_file (bf->fh, bf->bitArraySize * 4LL);
701 * Test if an element is in the filter.
703 * @param e the element
704 * @param bf the filter
705 * @return GNUNET_YES if the element is in the filter, GNUNET_NO if not
708 GNUNET_CONTAINER_bloomfilter_test (const struct GNUNET_CONTAINER_BloomFilter
709 *bf, const GNUNET_HashCode * e)
716 iterateBits (bf, &testBitCallback, &res, e);
722 * Add an element to the filter
724 * @param bf the filter
725 * @param e the element
728 GNUNET_CONTAINER_bloomfilter_add (struct GNUNET_CONTAINER_BloomFilter *bf,
729 const GNUNET_HashCode * e)
733 iterateBits (bf, &incrementBitCallback, bf, e);
738 * Or the entries of the given raw data array with the
739 * data of the given bloom filter. Assumes that
740 * the size of the data array and the current filter
743 * @param bf the filter
744 * @param data the data to or-in
745 * @param size number of bytes in data
748 GNUNET_CONTAINER_bloomfilter_or (struct GNUNET_CONTAINER_BloomFilter *bf,
749 const char *data, size_t size)
753 unsigned long long *fc;
754 const unsigned long long *dc;
758 if (bf->bitArraySize != size)
759 return GNUNET_SYSERR;
760 fc = (unsigned long long *) bf->bitArray;
761 dc = (const unsigned long long *) data;
762 n = size / sizeof (unsigned long long);
764 for (i = 0; i < n; i++)
766 for (i = n * sizeof (unsigned long long); i < size; i++)
767 bf->bitArray[i] |= data[i];
772 * Or the entries of the given raw data array with the
773 * data of the given bloom filter. Assumes that
774 * the size of the data array and the current filter
777 * @param bf the filter
778 * @param to_or the bloomfilter to or-in
779 * @param size number of bytes in data
782 GNUNET_CONTAINER_bloomfilter_or2 (struct GNUNET_CONTAINER_BloomFilter *bf,
783 const struct GNUNET_CONTAINER_BloomFilter
788 unsigned long long *fc;
789 const unsigned long long *dc;
793 if (bf->bitArraySize != size)
794 return GNUNET_SYSERR;
795 fc = (unsigned long long *) bf->bitArray;
796 dc = (const unsigned long long *) to_or->bitArray;
797 n = size / sizeof (unsigned long long);
799 for (i = 0; i < n; i++)
801 for (i = n * sizeof (unsigned long long); i < size; i++)
802 bf->bitArray[i] |= to_or->bitArray[i];
807 * Remove an element from the filter.
809 * @param bf the filter
810 * @param e the element to remove
813 GNUNET_CONTAINER_bloomfilter_remove (struct GNUNET_CONTAINER_BloomFilter *bf,
814 const GNUNET_HashCode * e)
818 if (bf->filename == NULL)
820 iterateBits (bf, &decrementBitCallback, bf, e);
824 * Resize a bloom filter. Note that this operation
825 * is pretty costly. Essentially, the bloom filter
826 * needs to be completely re-build.
828 * @param bf the filter
829 * @param iterator an iterator over all elements stored in the BF
830 * @param iterator_cls argument to the iterator function
831 * @param size the new size for the filter
832 * @param k the new number of GNUNET_CRYPTO_hash-function to apply per element
835 GNUNET_CONTAINER_bloomfilter_resize (struct GNUNET_CONTAINER_BloomFilter *bf,
836 GNUNET_HashCodeIterator iterator,
837 void *iterator_cls, size_t size,
843 GNUNET_free (bf->bitArray);
847 size = i; /* make sure it's a power of 2 */
849 bf->bitArraySize = size;
850 bf->bitArray = GNUNET_malloc (size);
851 memset (bf->bitArray, 0, bf->bitArraySize);
852 if (bf->filename != NULL)
853 make_empty_file (bf->fh, bf->bitArraySize * 4LL);
854 while (GNUNET_YES == iterator (iterator_cls, &hc))
855 GNUNET_CONTAINER_bloomfilter_add (bf, &hc);
858 /* end of container_bloomfilter.c */