2 This file is part of GNUnet.
3 Copyright (C) 2001, 2002, 2003, 2004, 2006, 2008, 2011, 2012, 2018 GNUnet e.V.
5 GNUnet is free software: you can redistribute it and/or modify it
6 under the terms of the GNU Affero General Public License as published
7 by the Free Software Foundation, either version 3 of the License,
8 or (at your option) any later version.
10 GNUnet is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Affero General Public License for more details.
15 You should have received a copy of the GNU Affero General Public License
16 along with this program. If not, see <http://www.gnu.org/licenses/>.
19 * @file util/container_bloomfilter.c
20 * @brief data structure used to reduce disk accesses.
22 * The idea basically: Create a signature for each element in the
23 * database. Add those signatures to a bit array. When doing a lookup,
24 * check if the bit array matches the signature of the requested
25 * element. If yes, address the disk, otherwise return 'not found'.
27 * A property of the bloom filter is that sometimes we will have
28 * a match even if the element is not on the disk (then we do
29 * an unnecessary disk access), but what's most important is that
30 * we never get a single "false negative".
32 * To be able to delete entries from the bloom filter, we maintain
33 * a 4 bit counter in the file on the drive (we still use only one
36 * @author Igor Wronsky
37 * @author Christian Grothoff
41 #include "gnunet_util_lib.h"
43 #define LOG(kind,...) GNUNET_log_from (kind, "util-container-bloomfilter", __VA_ARGS__)
45 #define LOG_STRERROR(kind,syscall) GNUNET_log_from_strerror (kind, "util-container-bloomfilter", syscall)
47 #define LOG_STRERROR_FILE(kind,syscall,filename) GNUNET_log_from_strerror_file (kind, "util-container-bloomfilter", syscall, filename)
49 struct GNUNET_CONTAINER_BloomFilter
53 * The actual bloomfilter bit array
58 * Filename of the filter
63 * The bit counter file on disk
65 struct GNUNET_DISK_FileHandle *fh;
68 * How many bits we set for each stored element
70 unsigned int addressesPerElement;
73 * Size of bitArray in bytes
81 * Get the number of the addresses set per element in the bloom filter.
83 * @param bf the filter
84 * @return addresses set per element in the bf
87 GNUNET_CONTAINER_bloomfilter_get_element_addresses (const struct GNUNET_CONTAINER_BloomFilter
92 return bf->addressesPerElement;
97 * Get size of the bloom filter.
99 * @param bf the filter
100 * @return number of bytes used for the data of the bloom filter
103 GNUNET_CONTAINER_bloomfilter_get_size (const struct GNUNET_CONTAINER_BloomFilter
108 return bf->bitArraySize;
113 * Copy an existing memory. Any association with a file
114 * on-disk will be lost in the process.
115 * @param bf the filter to copy
116 * @return copy of the bf
118 struct GNUNET_CONTAINER_BloomFilter *
119 GNUNET_CONTAINER_bloomfilter_copy (const struct GNUNET_CONTAINER_BloomFilter
122 return GNUNET_CONTAINER_bloomfilter_init (bf->bitArray, bf->bitArraySize,
123 bf->addressesPerElement);
128 * Sets a bit active in the bitArray. Increment bit-specific
129 * usage counter on disk only if below 4bit max (==15).
131 * @param bitArray memory area to set the bit in
132 * @param bitIdx which bit to set
135 setBit (char *bitArray, unsigned int bitIdx)
138 unsigned int targetBit;
140 arraySlot = bitIdx / 8;
141 targetBit = (1L << (bitIdx % 8));
142 bitArray[arraySlot] |= targetBit;
146 * Clears a bit from bitArray. Bit is cleared from the array
147 * only if the respective usage counter on the disk hits/is zero.
149 * @param bitArray memory area to set the bit in
150 * @param bitIdx which bit to unset
153 clearBit (char *bitArray, unsigned int bitIdx)
156 unsigned int targetBit;
159 targetBit = (1L << (bitIdx % 8));
160 bitArray[slot] = bitArray[slot] & (~targetBit);
164 * Checks if a bit is active in the bitArray
166 * @param bitArray memory area to set the bit in
167 * @param bitIdx which bit to test
168 * @return GNUNET_YES if the bit is set, GNUNET_NO if not.
171 testBit (char *bitArray, unsigned int bitIdx)
174 unsigned int targetBit;
177 targetBit = (1L << (bitIdx % 8));
178 if (bitArray[slot] & targetBit)
185 * Sets a bit active in the bitArray and increments
186 * bit-specific usage counter on disk (but only if
187 * the counter was below 4 bit max (==15)).
189 * @param bitArray memory area to set the bit in
190 * @param bitIdx which bit to test
191 * @param fh A file to keep the 4 bit address usage counters in
194 incrementBit (char *bitArray, unsigned int bitIdx,
195 const struct GNUNET_DISK_FileHandle *fh)
201 unsigned int targetLoc;
203 setBit (bitArray, bitIdx);
204 if (GNUNET_DISK_handle_invalid (fh))
206 /* Update the counter file on disk */
207 fileSlot = bitIdx / 2;
208 targetLoc = bitIdx % 2;
210 GNUNET_assert (fileSlot ==
211 GNUNET_DISK_file_seek (fh, fileSlot, GNUNET_DISK_SEEK_SET));
212 if (1 != GNUNET_DISK_file_read (fh, &value, 1))
215 high = (value & (~0xF)) >> 4;
227 value = ((high << 4) | low);
228 GNUNET_assert (fileSlot ==
229 GNUNET_DISK_file_seek (fh, fileSlot, GNUNET_DISK_SEEK_SET));
230 GNUNET_assert (1 == GNUNET_DISK_file_write (fh, &value, 1));
234 * Clears a bit from bitArray if the respective usage
235 * counter on the disk hits/is zero.
237 * @param bitArray memory area to set the bit in
238 * @param bitIdx which bit to test
239 * @param fh A file to keep the 4bit address usage counters in
242 decrementBit (char *bitArray, unsigned int bitIdx,
243 const struct GNUNET_DISK_FileHandle *fh)
249 unsigned int targetLoc;
251 if (GNUNET_DISK_handle_invalid (fh))
252 return; /* cannot decrement! */
253 /* Each char slot in the counter file holds two 4 bit counters */
254 fileslot = bitIdx / 2;
255 targetLoc = bitIdx % 2;
256 if (GNUNET_SYSERR == GNUNET_DISK_file_seek (fh, fileslot, GNUNET_DISK_SEEK_SET))
258 GNUNET_log_strerror (GNUNET_ERROR_TYPE_ERROR, "seek");
261 if (1 != GNUNET_DISK_file_read (fh, &value, 1))
264 high = (value & 0xF0) >> 4;
266 /* decrement, but once we have reached the max, never go back! */
269 if ((low > 0) && (low < 0xF))
273 clearBit (bitArray, bitIdx);
278 if ((high > 0) && (high < 0xF))
282 clearBit (bitArray, bitIdx);
285 value = ((high << 4) | low);
286 if (GNUNET_SYSERR == GNUNET_DISK_file_seek (fh, fileslot, GNUNET_DISK_SEEK_SET))
288 GNUNET_log_strerror (GNUNET_ERROR_TYPE_ERROR, "seek");
291 GNUNET_assert (1 == GNUNET_DISK_file_write (fh, &value, 1));
294 #define BUFFSIZE 65536
297 * Creates a file filled with zeroes
299 * @param fh the file handle
300 * @param size the size of the file
301 * @return GNUNET_OK if created ok, GNUNET_SYSERR otherwise
304 make_empty_file (const struct GNUNET_DISK_FileHandle *fh, size_t size)
306 char buffer[BUFFSIZE];
307 size_t bytesleft = size;
310 if (GNUNET_DISK_handle_invalid (fh))
311 return GNUNET_SYSERR;
312 memset (buffer, 0, sizeof (buffer));
313 GNUNET_DISK_file_seek (fh, 0, GNUNET_DISK_SEEK_SET);
314 while (bytesleft > 0)
316 if (bytesleft > sizeof (buffer))
318 res = GNUNET_DISK_file_write (fh, buffer, sizeof (buffer));
324 res = GNUNET_DISK_file_write (fh, buffer, bytesleft);
328 if (GNUNET_SYSERR == res)
329 return GNUNET_SYSERR;
334 /* ************** GNUNET_CONTAINER_BloomFilter iterator ********* */
337 * Iterator (callback) method to be called by the
338 * bloomfilter iterator on each bit that is to be
339 * set or tested for the key.
342 * @param bf the filter to manipulate
343 * @param bit the current bit
344 * @return GNUNET_YES to continue, GNUNET_NO to stop early
346 typedef int (*BitIterator) (void *cls,
347 const struct GNUNET_CONTAINER_BloomFilter * bf,
352 * Call an iterator for each bit that the bloomfilter
353 * must test or set for this element.
355 * @param bf the filter
356 * @param callback the method to call
357 * @param arg extra argument to callback
358 * @param key the key for which we iterate over the BF bits
361 iterateBits (const struct GNUNET_CONTAINER_BloomFilter *bf,
362 BitIterator callback, void *arg, const struct GNUNET_HashCode *key)
364 struct GNUNET_HashCode tmp[2];
367 unsigned int slot = 0;
369 bitCount = bf->addressesPerElement;
372 GNUNET_assert (bf->bitArraySize > 0);
373 GNUNET_assert (bf->bitArraySize * 8LL > bf->bitArraySize);
376 while (slot < (sizeof (struct GNUNET_HashCode) / sizeof (uint32_t)))
380 ntohl ((((uint32_t *) & tmp[round & 1])[slot])) %
381 ((bf->bitArraySize * 8LL))))
390 GNUNET_CRYPTO_hash (&tmp[round & 1], sizeof (struct GNUNET_HashCode),
391 &tmp[(round + 1) & 1]);
400 * Callback: increment bit
402 * @param cls pointer to writeable form of bf
403 * @param bf the filter to manipulate
404 * @param bit the bit to increment
408 incrementBitCallback (void *cls, const struct GNUNET_CONTAINER_BloomFilter *bf,
411 struct GNUNET_CONTAINER_BloomFilter *b = cls;
413 incrementBit (b->bitArray, bit, bf->fh);
419 * Callback: decrement bit
421 * @param cls pointer to writeable form of bf
422 * @param bf the filter to manipulate
423 * @param bit the bit to decrement
427 decrementBitCallback (void *cls, const struct GNUNET_CONTAINER_BloomFilter *bf,
430 struct GNUNET_CONTAINER_BloomFilter *b = cls;
432 decrementBit (b->bitArray, bit, bf->fh);
438 * Callback: test if all bits are set
440 * @param cls pointer set to GNUNET_NO if bit is not set
441 * @param bf the filter
442 * @param bit the bit to test
443 * @return YES if the bit is set, NO if not
446 testBitCallback (void *cls, const struct GNUNET_CONTAINER_BloomFilter *bf,
451 if (GNUNET_NO == testBit (bf->bitArray, bit))
459 /* *********************** INTERFACE **************** */
462 * Load a bloom-filter from a file.
464 * @param filename the name of the file (or the prefix)
465 * @param size the size of the bloom-filter (number of
466 * bytes of storage space to use); will be rounded up
468 * @param k the number of GNUNET_CRYPTO_hash-functions to apply per
469 * element (number of bits set per element in the set)
470 * @return the bloomfilter
472 struct GNUNET_CONTAINER_BloomFilter *
473 GNUNET_CONTAINER_bloomfilter_load (const char *filename, size_t size,
476 struct GNUNET_CONTAINER_BloomFilter *bf;
484 GNUNET_assert (NULL != filename);
485 if ((k == 0) || (size == 0))
490 while ( (ui < size) &&
493 size = ui; /* make sure it's a power of 2 */
495 bf = GNUNET_new (struct GNUNET_CONTAINER_BloomFilter);
496 /* Try to open a bloomfilter file */
497 if (GNUNET_YES == GNUNET_DISK_file_test (filename))
499 GNUNET_DISK_file_open (filename,
500 GNUNET_DISK_OPEN_READWRITE,
501 GNUNET_DISK_PERM_USER_READ |
502 GNUNET_DISK_PERM_USER_WRITE);
505 /* file existed, try to read it! */
506 must_read = GNUNET_YES;
508 GNUNET_DISK_file_handle_size (bf->fh, &fsize))
510 GNUNET_DISK_file_close (bf->fh);
516 /* found existing empty file, just overwrite */
518 make_empty_file (bf->fh, size * 4LL))
520 GNUNET_log_strerror (GNUNET_ERROR_TYPE_WARNING,
522 GNUNET_DISK_file_close (bf->fh);
527 else if (fsize != ((off_t) size) * 4LL)
529 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
530 _("Size of file on disk is incorrect for this Bloom filter (want %llu, have %llu)\n"),
531 (unsigned long long) (size * 4LL),
532 (unsigned long long) fsize);
533 GNUNET_DISK_file_close (bf->fh);
540 /* file did not exist, don't read, just create */
541 must_read = GNUNET_NO;
543 GNUNET_DISK_file_open (filename,
544 GNUNET_DISK_OPEN_CREATE |
545 GNUNET_DISK_OPEN_READWRITE,
546 GNUNET_DISK_PERM_USER_READ |
547 GNUNET_DISK_PERM_USER_WRITE);
553 if (GNUNET_OK != make_empty_file (bf->fh, size * 4LL))
555 GNUNET_log_strerror (GNUNET_ERROR_TYPE_WARNING,
557 GNUNET_DISK_file_close (bf->fh);
562 bf->filename = GNUNET_strdup (filename);
564 bf->bitArray = GNUNET_malloc_large (size);
565 if (NULL == bf->bitArray)
568 GNUNET_DISK_file_close (bf->fh);
569 GNUNET_free (bf->filename);
573 bf->bitArraySize = size;
574 bf->addressesPerElement = k;
575 if (GNUNET_YES != must_read)
576 return bf; /* already done! */
577 /* Read from the file what bits we can */
578 rbuff = GNUNET_malloc (BUFFSIZE);
580 while (pos < ((off_t) size) * 8LL)
584 res = GNUNET_DISK_file_read (bf->fh,
589 LOG_STRERROR_FILE (GNUNET_ERROR_TYPE_WARNING,
593 GNUNET_free (bf->filename);
594 GNUNET_DISK_file_close (bf->fh);
599 break; /* is ok! we just did not use that many bits yet */
600 for (i = 0; i < res; i++)
602 if ((rbuff[i] & 0x0F) != 0)
603 setBit (bf->bitArray, pos + i * 2);
604 if ((rbuff[i] & 0xF0) != 0)
605 setBit (bf->bitArray, pos + i * 2 + 1);
609 pos += BUFFSIZE * 2; /* 2 bits per byte in the buffer */
617 * Create a bloom filter from raw bits.
619 * @param data the raw bits in memory (maybe NULL,
620 * in which case all bits should be considered
622 * @param size the size of the bloom-filter (number of
623 * bytes of storage space to use); also size of data
624 * -- unless data is NULL
625 * @param k the number of GNUNET_CRYPTO_hash-functions to apply per
626 * element (number of bits set per element in the set)
627 * @return the bloomfilter
629 struct GNUNET_CONTAINER_BloomFilter *
630 GNUNET_CONTAINER_bloomfilter_init (const char *data, size_t size,
633 struct GNUNET_CONTAINER_BloomFilter *bf;
635 if ((0 == k) || (0 == size))
637 bf = GNUNET_new (struct GNUNET_CONTAINER_BloomFilter);
640 bf->bitArray = GNUNET_malloc_large (size);
641 if (NULL == bf->bitArray)
646 bf->bitArraySize = size;
647 bf->addressesPerElement = k;
649 GNUNET_memcpy (bf->bitArray, data, size);
655 * Copy the raw data of this bloomfilter into
656 * the given data array.
658 * @param bf bloomfilter to take the raw data from
659 * @param data where to write the data
660 * @param size the size of the given data array
661 * @return #GNUNET_SYSERR if the data array is not big enough
664 GNUNET_CONTAINER_bloomfilter_get_raw_data (const struct
665 GNUNET_CONTAINER_BloomFilter *bf,
666 char *data, size_t size)
669 return GNUNET_SYSERR;
670 if (bf->bitArraySize != size)
671 return GNUNET_SYSERR;
672 GNUNET_memcpy (data, bf->bitArray, size);
678 * Free the space associated with a filter
679 * in memory, flush to drive if needed (do not
680 * free the space on the drive)
682 * @param bf the filter
685 GNUNET_CONTAINER_bloomfilter_free (struct GNUNET_CONTAINER_BloomFilter *bf)
690 GNUNET_DISK_file_close (bf->fh);
691 GNUNET_free_non_null (bf->filename);
692 GNUNET_free (bf->bitArray);
698 * Reset a bloom filter to empty. Clears the file on disk.
700 * @param bf the filter
703 GNUNET_CONTAINER_bloomfilter_clear (struct GNUNET_CONTAINER_BloomFilter *bf)
708 memset (bf->bitArray, 0, bf->bitArraySize);
709 if (bf->filename != NULL)
710 make_empty_file (bf->fh, bf->bitArraySize * 4LL);
715 * Test if an element is in the filter.
717 * @param e the element
718 * @param bf the filter
719 * @return #GNUNET_YES if the element is in the filter, #GNUNET_NO if not
722 GNUNET_CONTAINER_bloomfilter_test (const struct GNUNET_CONTAINER_BloomFilter *bf,
723 const struct GNUNET_HashCode * e)
730 iterateBits (bf, &testBitCallback, &res, e);
736 * Add an element to the filter
738 * @param bf the filter
739 * @param e the element
742 GNUNET_CONTAINER_bloomfilter_add (struct GNUNET_CONTAINER_BloomFilter *bf,
743 const struct GNUNET_HashCode * e)
747 iterateBits (bf, &incrementBitCallback, bf, e);
752 * Or the entries of the given raw data array with the
753 * data of the given bloom filter. Assumes that
754 * the size of the data array and the current filter
757 * @param bf the filter
758 * @param data the data to or-in
759 * @param size number of bytes in data
762 GNUNET_CONTAINER_bloomfilter_or (struct GNUNET_CONTAINER_BloomFilter *bf,
768 unsigned long long *fc;
769 const unsigned long long *dc;
773 if (bf->bitArraySize != size)
774 return GNUNET_SYSERR;
775 fc = (unsigned long long *) bf->bitArray;
776 dc = (const unsigned long long *) data;
777 n = size / sizeof (unsigned long long);
779 for (i = 0; i < n; i++)
781 for (i = n * sizeof (unsigned long long); i < size; i++)
782 bf->bitArray[i] |= data[i];
787 * Or the entries of the given raw data array with the
788 * data of the given bloom filter. Assumes that
789 * the size of the data array and the current filter
792 * @param bf the filter
793 * @param to_or the bloomfilter to or-in
794 * @return #GNUNET_OK on success
797 GNUNET_CONTAINER_bloomfilter_or2 (struct GNUNET_CONTAINER_BloomFilter *bf,
798 const struct GNUNET_CONTAINER_BloomFilter *to_or)
802 unsigned long long *fc;
803 const unsigned long long *dc;
808 if (bf->bitArraySize != to_or->bitArraySize)
811 return GNUNET_SYSERR;
813 size = bf->bitArraySize;
814 fc = (unsigned long long *) bf->bitArray;
815 dc = (const unsigned long long *) to_or->bitArray;
816 n = size / sizeof (unsigned long long);
818 for (i = 0; i < n; i++)
820 for (i = n * sizeof (unsigned long long); i < size; i++)
821 bf->bitArray[i] |= to_or->bitArray[i];
827 * Remove an element from the filter.
829 * @param bf the filter
830 * @param e the element to remove
833 GNUNET_CONTAINER_bloomfilter_remove (struct GNUNET_CONTAINER_BloomFilter *bf,
834 const struct GNUNET_HashCode *e)
838 if (NULL == bf->filename)
841 &decrementBitCallback,
847 * Resize a bloom filter. Note that this operation
848 * is pretty costly. Essentially, the bloom filter
849 * needs to be completely re-build.
851 * @param bf the filter
852 * @param iterator an iterator over all elements stored in the BF
853 * @param iterator_cls argument to the iterator function
854 * @param size the new size for the filter
855 * @param k the new number of GNUNET_CRYPTO_hash-function to apply per element
858 GNUNET_CONTAINER_bloomfilter_resize (struct GNUNET_CONTAINER_BloomFilter *bf,
859 GNUNET_CONTAINER_HashCodeIterator iterator,
864 struct GNUNET_HashCode hc;
867 GNUNET_free (bf->bitArray);
871 size = i; /* make sure it's a power of 2 */
872 bf->addressesPerElement = k;
873 bf->bitArraySize = size;
874 bf->bitArray = GNUNET_malloc (size);
875 if (NULL != bf->filename)
876 make_empty_file (bf->fh,
877 bf->bitArraySize * 4LL);
878 while (GNUNET_YES == iterator (iterator_cls,
880 GNUNET_CONTAINER_bloomfilter_add (bf,
884 /* end of container_bloomfilter.c */