2 This file is part of GNUnet.
3 Copyright (C) 2001, 2002, 2003, 2004, 2006, 2008, 2011, 2012, 2018 GNUnet e.V.
5 GNUnet is free software: you can redistribute it and/or modify it
6 under the terms of the GNU Affero General Public License as published
7 by the Free Software Foundation, either version 3 of the License,
8 or (at your option) any later version.
10 GNUnet is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Affero General Public License for more details.
15 You should have received a copy of the GNU Affero General Public License
16 along with this program. If not, see <http://www.gnu.org/licenses/>.
18 SPDX-License-Identifier: AGPL3.0-or-later
21 * @file util/container_bloomfilter.c
22 * @brief data structure used to reduce disk accesses.
24 * The idea basically: Create a signature for each element in the
25 * database. Add those signatures to a bit array. When doing a lookup,
26 * check if the bit array matches the signature of the requested
27 * element. If yes, address the disk, otherwise return 'not found'.
29 * A property of the bloom filter is that sometimes we will have
30 * a match even if the element is not on the disk (then we do
31 * an unnecessary disk access), but what's most important is that
32 * we never get a single "false negative".
34 * To be able to delete entries from the bloom filter, we maintain
35 * a 4 bit counter in the file on the drive (we still use only one
38 * @author Igor Wronsky
39 * @author Christian Grothoff
43 #include "gnunet_util_lib.h"
45 #define LOG(kind, ...) \
46 GNUNET_log_from (kind, "util-container-bloomfilter", __VA_ARGS__)
48 #define LOG_STRERROR(kind, syscall) \
49 GNUNET_log_from_strerror (kind, "util-container-bloomfilter", syscall)
51 #define LOG_STRERROR_FILE(kind, syscall, filename) \
52 GNUNET_log_from_strerror_file (kind, \
53 "util-container-bloomfilter", \
57 struct GNUNET_CONTAINER_BloomFilter
60 * The actual bloomfilter bit array
65 * Filename of the filter
70 * The bit counter file on disk
72 struct GNUNET_DISK_FileHandle *fh;
75 * How many bits we set for each stored element
77 unsigned int addressesPerElement;
80 * Size of bitArray in bytes
87 * Get the number of the addresses set per element in the bloom filter.
89 * @param bf the filter
90 * @return addresses set per element in the bf
93 GNUNET_CONTAINER_bloomfilter_get_element_addresses (
94 const struct GNUNET_CONTAINER_BloomFilter *bf)
98 return bf->addressesPerElement;
103 * Get size of the bloom filter.
105 * @param bf the filter
106 * @return number of bytes used for the data of the bloom filter
109 GNUNET_CONTAINER_bloomfilter_get_size (
110 const struct GNUNET_CONTAINER_BloomFilter *bf)
114 return bf->bitArraySize;
119 * Copy an existing memory. Any association with a file
120 * on-disk will be lost in the process.
121 * @param bf the filter to copy
122 * @return copy of the bf
124 struct GNUNET_CONTAINER_BloomFilter *
125 GNUNET_CONTAINER_bloomfilter_copy (
126 const struct GNUNET_CONTAINER_BloomFilter *bf)
128 return GNUNET_CONTAINER_bloomfilter_init (bf->bitArray,
130 bf->addressesPerElement);
135 * Sets a bit active in the bitArray. Increment bit-specific
136 * usage counter on disk only if below 4bit max (==15).
138 * @param bitArray memory area to set the bit in
139 * @param bitIdx which bit to set
142 setBit (char *bitArray, unsigned int bitIdx)
145 unsigned int targetBit;
147 arraySlot = bitIdx / 8;
148 targetBit = (1L << (bitIdx % 8));
149 bitArray[arraySlot] |= targetBit;
154 * Clears a bit from bitArray. Bit is cleared from the array
155 * only if the respective usage counter on the disk hits/is zero.
157 * @param bitArray memory area to set the bit in
158 * @param bitIdx which bit to unset
161 clearBit (char *bitArray, unsigned int bitIdx)
164 unsigned int targetBit;
167 targetBit = (1L << (bitIdx % 8));
168 bitArray[slot] = bitArray[slot] & (~targetBit);
173 * Checks if a bit is active in the bitArray
175 * @param bitArray memory area to set the bit in
176 * @param bitIdx which bit to test
177 * @return GNUNET_YES if the bit is set, GNUNET_NO if not.
180 testBit (char *bitArray, unsigned int bitIdx)
183 unsigned int targetBit;
186 targetBit = (1L << (bitIdx % 8));
187 if (bitArray[slot] & targetBit)
195 * Sets a bit active in the bitArray and increments
196 * bit-specific usage counter on disk (but only if
197 * the counter was below 4 bit max (==15)).
199 * @param bitArray memory area to set the bit in
200 * @param bitIdx which bit to test
201 * @param fh A file to keep the 4 bit address usage counters in
204 incrementBit (char *bitArray,
206 const struct GNUNET_DISK_FileHandle *fh)
212 unsigned int targetLoc;
214 setBit (bitArray, bitIdx);
215 if (GNUNET_DISK_handle_invalid (fh))
217 /* Update the counter file on disk */
218 fileSlot = bitIdx / 2;
219 targetLoc = bitIdx % 2;
221 GNUNET_assert (fileSlot ==
222 GNUNET_DISK_file_seek (fh, fileSlot, GNUNET_DISK_SEEK_SET));
223 if (1 != GNUNET_DISK_file_read (fh, &value, 1))
226 high = (value & (~0xF)) >> 4;
238 value = ((high << 4) | low);
239 GNUNET_assert (fileSlot ==
240 GNUNET_DISK_file_seek (fh, fileSlot, GNUNET_DISK_SEEK_SET));
241 GNUNET_assert (1 == GNUNET_DISK_file_write (fh, &value, 1));
246 * Clears a bit from bitArray if the respective usage
247 * counter on the disk hits/is zero.
249 * @param bitArray memory area to set the bit in
250 * @param bitIdx which bit to test
251 * @param fh A file to keep the 4bit address usage counters in
254 decrementBit (char *bitArray,
256 const struct GNUNET_DISK_FileHandle *fh)
262 unsigned int targetLoc;
264 if (GNUNET_DISK_handle_invalid (fh))
265 return; /* cannot decrement! */
266 /* Each char slot in the counter file holds two 4 bit counters */
267 fileslot = bitIdx / 2;
268 targetLoc = bitIdx % 2;
270 GNUNET_DISK_file_seek (fh, fileslot, GNUNET_DISK_SEEK_SET))
272 GNUNET_log_strerror (GNUNET_ERROR_TYPE_ERROR, "seek");
275 if (1 != GNUNET_DISK_file_read (fh, &value, 1))
278 high = (value & 0xF0) >> 4;
280 /* decrement, but once we have reached the max, never go back! */
283 if ((low > 0) && (low < 0xF))
287 clearBit (bitArray, bitIdx);
292 if ((high > 0) && (high < 0xF))
296 clearBit (bitArray, bitIdx);
299 value = ((high << 4) | low);
301 GNUNET_DISK_file_seek (fh, fileslot, GNUNET_DISK_SEEK_SET))
303 GNUNET_log_strerror (GNUNET_ERROR_TYPE_ERROR, "seek");
306 GNUNET_assert (1 == GNUNET_DISK_file_write (fh, &value, 1));
310 #define BUFFSIZE 65536
313 * Creates a file filled with zeroes
315 * @param fh the file handle
316 * @param size the size of the file
317 * @return GNUNET_OK if created ok, GNUNET_SYSERR otherwise
320 make_empty_file (const struct GNUNET_DISK_FileHandle *fh, size_t size)
322 char buffer[BUFFSIZE];
323 size_t bytesleft = size;
326 if (GNUNET_DISK_handle_invalid (fh))
327 return GNUNET_SYSERR;
328 memset (buffer, 0, sizeof(buffer));
329 GNUNET_DISK_file_seek (fh, 0, GNUNET_DISK_SEEK_SET);
330 while (bytesleft > 0)
332 if (bytesleft > sizeof(buffer))
334 res = GNUNET_DISK_file_write (fh, buffer, sizeof(buffer));
340 res = GNUNET_DISK_file_write (fh, buffer, bytesleft);
344 if (GNUNET_SYSERR == res)
345 return GNUNET_SYSERR;
351 /* ************** GNUNET_CONTAINER_BloomFilter iterator ********* */
354 * Iterator (callback) method to be called by the
355 * bloomfilter iterator on each bit that is to be
356 * set or tested for the key.
359 * @param bf the filter to manipulate
360 * @param bit the current bit
361 * @return GNUNET_YES to continue, GNUNET_NO to stop early
363 typedef int (*BitIterator) (void *cls,
364 const struct GNUNET_CONTAINER_BloomFilter *bf,
369 * Call an iterator for each bit that the bloomfilter
370 * must test or set for this element.
372 * @param bf the filter
373 * @param callback the method to call
374 * @param arg extra argument to callback
375 * @param key the key for which we iterate over the BF bits
378 iterateBits (const struct GNUNET_CONTAINER_BloomFilter *bf,
379 BitIterator callback,
381 const struct GNUNET_HashCode *key)
383 struct GNUNET_HashCode tmp[2];
386 unsigned int slot = 0;
388 bitCount = bf->addressesPerElement;
391 GNUNET_assert (bf->bitArraySize > 0);
392 GNUNET_assert (bf->bitArraySize * 8LL > bf->bitArraySize);
395 while (slot < (sizeof(struct GNUNET_HashCode) / sizeof(uint32_t)))
400 ntohl ((((uint32_t *) &tmp[round & 1])[slot]))
401 % ((bf->bitArraySize * 8LL))))
410 GNUNET_CRYPTO_hash (&tmp[round & 1],
411 sizeof(struct GNUNET_HashCode),
412 &tmp[(round + 1) & 1]);
421 * Callback: increment bit
423 * @param cls pointer to writeable form of bf
424 * @param bf the filter to manipulate
425 * @param bit the bit to increment
429 incrementBitCallback (void *cls,
430 const struct GNUNET_CONTAINER_BloomFilter *bf,
433 struct GNUNET_CONTAINER_BloomFilter *b = cls;
435 incrementBit (b->bitArray, bit, bf->fh);
441 * Callback: decrement bit
443 * @param cls pointer to writeable form of bf
444 * @param bf the filter to manipulate
445 * @param bit the bit to decrement
449 decrementBitCallback (void *cls,
450 const struct GNUNET_CONTAINER_BloomFilter *bf,
453 struct GNUNET_CONTAINER_BloomFilter *b = cls;
455 decrementBit (b->bitArray, bit, bf->fh);
461 * Callback: test if all bits are set
463 * @param cls pointer set to GNUNET_NO if bit is not set
464 * @param bf the filter
465 * @param bit the bit to test
466 * @return YES if the bit is set, NO if not
469 testBitCallback (void *cls,
470 const struct GNUNET_CONTAINER_BloomFilter *bf,
475 if (GNUNET_NO == testBit (bf->bitArray, bit))
484 /* *********************** INTERFACE **************** */
487 * Load a bloom-filter from a file.
489 * @param filename the name of the file (or the prefix)
490 * @param size the size of the bloom-filter (number of
491 * bytes of storage space to use); will be rounded up
493 * @param k the number of GNUNET_CRYPTO_hash-functions to apply per
494 * element (number of bits set per element in the set)
495 * @return the bloomfilter
497 struct GNUNET_CONTAINER_BloomFilter *
498 GNUNET_CONTAINER_bloomfilter_load (const char *filename,
502 struct GNUNET_CONTAINER_BloomFilter *bf;
510 GNUNET_assert (NULL != filename);
511 if ((k == 0) || (size == 0))
516 while ((ui < size) && (ui * 2 > ui))
518 size = ui; /* make sure it's a power of 2 */
520 bf = GNUNET_new (struct GNUNET_CONTAINER_BloomFilter);
521 /* Try to open a bloomfilter file */
522 if (GNUNET_YES == GNUNET_DISK_file_test (filename))
523 bf->fh = GNUNET_DISK_file_open (filename,
524 GNUNET_DISK_OPEN_READWRITE,
525 GNUNET_DISK_PERM_USER_READ
526 | GNUNET_DISK_PERM_USER_WRITE);
529 /* file existed, try to read it! */
530 must_read = GNUNET_YES;
531 if (GNUNET_OK != GNUNET_DISK_file_handle_size (bf->fh, &fsize))
533 GNUNET_DISK_file_close (bf->fh);
539 /* found existing empty file, just overwrite */
540 if (GNUNET_OK != make_empty_file (bf->fh, size * 4LL))
542 GNUNET_log_strerror (GNUNET_ERROR_TYPE_WARNING, "write");
543 GNUNET_DISK_file_close (bf->fh);
548 else if (fsize != ((off_t) size) * 4LL)
551 GNUNET_ERROR_TYPE_ERROR,
553 "Size of file on disk is incorrect for this Bloom filter (want %llu, have %llu)\n"),
554 (unsigned long long) (size * 4LL),
555 (unsigned long long) fsize);
556 GNUNET_DISK_file_close (bf->fh);
563 /* file did not exist, don't read, just create */
564 must_read = GNUNET_NO;
565 bf->fh = GNUNET_DISK_file_open (filename,
566 GNUNET_DISK_OPEN_CREATE
567 | GNUNET_DISK_OPEN_READWRITE,
568 GNUNET_DISK_PERM_USER_READ
569 | GNUNET_DISK_PERM_USER_WRITE);
575 if (GNUNET_OK != make_empty_file (bf->fh, size * 4LL))
577 GNUNET_log_strerror (GNUNET_ERROR_TYPE_WARNING, "write");
578 GNUNET_DISK_file_close (bf->fh);
583 bf->filename = GNUNET_strdup (filename);
585 bf->bitArray = GNUNET_malloc_large (size);
586 if (NULL == bf->bitArray)
589 GNUNET_DISK_file_close (bf->fh);
590 GNUNET_free (bf->filename);
594 bf->bitArraySize = size;
595 bf->addressesPerElement = k;
596 if (GNUNET_YES != must_read)
597 return bf; /* already done! */
598 /* Read from the file what bits we can */
599 rbuff = GNUNET_malloc (BUFFSIZE);
601 while (pos < ((off_t) size) * 8LL)
605 res = GNUNET_DISK_file_read (bf->fh, rbuff, BUFFSIZE);
608 LOG_STRERROR_FILE (GNUNET_ERROR_TYPE_WARNING, "read", bf->filename);
610 GNUNET_free (bf->filename);
611 GNUNET_DISK_file_close (bf->fh);
616 break; /* is ok! we just did not use that many bits yet */
617 for (i = 0; i < res; i++)
619 if ((rbuff[i] & 0x0F) != 0)
620 setBit (bf->bitArray, pos + i * 2);
621 if ((rbuff[i] & 0xF0) != 0)
622 setBit (bf->bitArray, pos + i * 2 + 1);
626 pos += BUFFSIZE * 2; /* 2 bits per byte in the buffer */
634 * Create a bloom filter from raw bits.
636 * @param data the raw bits in memory (maybe NULL,
637 * in which case all bits should be considered
639 * @param size the size of the bloom-filter (number of
640 * bytes of storage space to use); also size of data
641 * -- unless data is NULL
642 * @param k the number of GNUNET_CRYPTO_hash-functions to apply per
643 * element (number of bits set per element in the set)
644 * @return the bloomfilter
646 struct GNUNET_CONTAINER_BloomFilter *
647 GNUNET_CONTAINER_bloomfilter_init (const char *data,
651 struct GNUNET_CONTAINER_BloomFilter *bf;
653 if ((0 == k) || (0 == size))
655 bf = GNUNET_new (struct GNUNET_CONTAINER_BloomFilter);
658 bf->bitArray = GNUNET_malloc_large (size);
659 if (NULL == bf->bitArray)
664 bf->bitArraySize = size;
665 bf->addressesPerElement = k;
667 GNUNET_memcpy (bf->bitArray, data, size);
673 * Copy the raw data of this bloomfilter into
674 * the given data array.
676 * @param bf bloomfilter to take the raw data from
677 * @param data where to write the data
678 * @param size the size of the given data array
679 * @return #GNUNET_SYSERR if the data array is not big enough
682 GNUNET_CONTAINER_bloomfilter_get_raw_data (
683 const struct GNUNET_CONTAINER_BloomFilter *bf,
688 return GNUNET_SYSERR;
689 if (bf->bitArraySize != size)
690 return GNUNET_SYSERR;
691 GNUNET_memcpy (data, bf->bitArray, size);
697 * Free the space associated with a filter
698 * in memory, flush to drive if needed (do not
699 * free the space on the drive)
701 * @param bf the filter
704 GNUNET_CONTAINER_bloomfilter_free (struct GNUNET_CONTAINER_BloomFilter *bf)
709 GNUNET_DISK_file_close (bf->fh);
710 GNUNET_free_non_null (bf->filename);
711 GNUNET_free (bf->bitArray);
717 * Reset a bloom filter to empty. Clears the file on disk.
719 * @param bf the filter
722 GNUNET_CONTAINER_bloomfilter_clear (struct GNUNET_CONTAINER_BloomFilter *bf)
727 memset (bf->bitArray, 0, bf->bitArraySize);
728 if (bf->filename != NULL)
729 make_empty_file (bf->fh, bf->bitArraySize * 4LL);
734 * Test if an element is in the filter.
736 * @param e the element
737 * @param bf the filter
738 * @return #GNUNET_YES if the element is in the filter, #GNUNET_NO if not
741 GNUNET_CONTAINER_bloomfilter_test (
742 const struct GNUNET_CONTAINER_BloomFilter *bf,
743 const struct GNUNET_HashCode *e)
750 iterateBits (bf, &testBitCallback, &res, e);
756 * Add an element to the filter
758 * @param bf the filter
759 * @param e the element
762 GNUNET_CONTAINER_bloomfilter_add (struct GNUNET_CONTAINER_BloomFilter *bf,
763 const struct GNUNET_HashCode *e)
767 iterateBits (bf, &incrementBitCallback, bf, e);
772 * Or the entries of the given raw data array with the
773 * data of the given bloom filter. Assumes that
774 * the size of the data array and the current filter
777 * @param bf the filter
778 * @param data the data to or-in
779 * @param size number of bytes in data
782 GNUNET_CONTAINER_bloomfilter_or (struct GNUNET_CONTAINER_BloomFilter *bf,
788 unsigned long long *fc;
789 const unsigned long long *dc;
793 if (bf->bitArraySize != size)
794 return GNUNET_SYSERR;
795 fc = (unsigned long long *) bf->bitArray;
796 dc = (const unsigned long long *) data;
797 n = size / sizeof(unsigned long long);
799 for (i = 0; i < n; i++)
801 for (i = n * sizeof(unsigned long long); i < size; i++)
802 bf->bitArray[i] |= data[i];
808 * Or the entries of the given raw data array with the
809 * data of the given bloom filter. Assumes that
810 * the size of the data array and the current filter
813 * @param bf the filter
814 * @param to_or the bloomfilter to or-in
815 * @return #GNUNET_OK on success
818 GNUNET_CONTAINER_bloomfilter_or2 (
819 struct GNUNET_CONTAINER_BloomFilter *bf,
820 const struct GNUNET_CONTAINER_BloomFilter *to_or)
824 unsigned long long *fc;
825 const unsigned long long *dc;
830 if (bf->bitArraySize != to_or->bitArraySize)
833 return GNUNET_SYSERR;
835 size = bf->bitArraySize;
836 fc = (unsigned long long *) bf->bitArray;
837 dc = (const unsigned long long *) to_or->bitArray;
838 n = size / sizeof(unsigned long long);
840 for (i = 0; i < n; i++)
842 for (i = n * sizeof(unsigned long long); i < size; i++)
843 bf->bitArray[i] |= to_or->bitArray[i];
849 * Remove an element from the filter.
851 * @param bf the filter
852 * @param e the element to remove
855 GNUNET_CONTAINER_bloomfilter_remove (struct GNUNET_CONTAINER_BloomFilter *bf,
856 const struct GNUNET_HashCode *e)
860 if (NULL == bf->filename)
862 iterateBits (bf, &decrementBitCallback, bf, e);
867 * Resize a bloom filter. Note that this operation
868 * is pretty costly. Essentially, the bloom filter
869 * needs to be completely re-build.
871 * @param bf the filter
872 * @param iterator an iterator over all elements stored in the BF
873 * @param iterator_cls argument to the iterator function
874 * @param size the new size for the filter
875 * @param k the new number of GNUNET_CRYPTO_hash-function to apply per element
878 GNUNET_CONTAINER_bloomfilter_resize (struct GNUNET_CONTAINER_BloomFilter *bf,
879 GNUNET_CONTAINER_HashCodeIterator iterator,
884 struct GNUNET_HashCode hc;
887 GNUNET_free (bf->bitArray);
891 size = i; /* make sure it's a power of 2 */
892 bf->addressesPerElement = k;
893 bf->bitArraySize = size;
894 bf->bitArray = GNUNET_malloc (size);
895 if (NULL != bf->filename)
896 make_empty_file (bf->fh, bf->bitArraySize * 4LL);
897 while (GNUNET_YES == iterator (iterator_cls, &hc))
898 GNUNET_CONTAINER_bloomfilter_add (bf, &hc);
902 /* end of container_bloomfilter.c */