2 This file is part of GNUnet.
3 Copyright (C) 2001, 2002, 2003, 2004, 2006, 2008, 2011, 2012, 2018 GNUnet e.V.
5 GNUnet is free software: you can redistribute it and/or modify it
6 under the terms of the GNU Affero General Public License as published
7 by the Free Software Foundation, either version 3 of the License,
8 or (at your option) any later version.
10 GNUnet is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Affero General Public License for more details.
15 You should have received a copy of the GNU Affero General Public License
16 along with this program. If not, see <http://www.gnu.org/licenses/>.
18 SPDX-License-Identifier: AGPL3.0-or-later
21 * @file util/container_bloomfilter.c
22 * @brief data structure used to reduce disk accesses.
24 * The idea basically: Create a signature for each element in the
25 * database. Add those signatures to a bit array. When doing a lookup,
26 * check if the bit array matches the signature of the requested
27 * element. If yes, address the disk, otherwise return 'not found'.
29 * A property of the bloom filter is that sometimes we will have
30 * a match even if the element is not on the disk (then we do
31 * an unnecessary disk access), but what's most important is that
32 * we never get a single "false negative".
34 * To be able to delete entries from the bloom filter, we maintain
35 * a 4 bit counter in the file on the drive (we still use only one
38 * @author Igor Wronsky
39 * @author Christian Grothoff
43 #include "gnunet_util_lib.h"
45 #define LOG(kind, ...) \
46 GNUNET_log_from(kind, "util-container-bloomfilter", __VA_ARGS__)
48 #define LOG_STRERROR(kind, syscall) \
49 GNUNET_log_from_strerror(kind, "util-container-bloomfilter", syscall)
51 #define LOG_STRERROR_FILE(kind, syscall, filename) \
52 GNUNET_log_from_strerror_file(kind, \
53 "util-container-bloomfilter", \
57 struct GNUNET_CONTAINER_BloomFilter {
59 * The actual bloomfilter bit array
64 * Filename of the filter
69 * The bit counter file on disk
71 struct GNUNET_DISK_FileHandle *fh;
74 * How many bits we set for each stored element
76 unsigned int addressesPerElement;
79 * Size of bitArray in bytes
86 * Get the number of the addresses set per element in the bloom filter.
88 * @param bf the filter
89 * @return addresses set per element in the bf
92 GNUNET_CONTAINER_bloomfilter_get_element_addresses(
93 const struct GNUNET_CONTAINER_BloomFilter *bf)
97 return bf->addressesPerElement;
102 * Get size of the bloom filter.
104 * @param bf the filter
105 * @return number of bytes used for the data of the bloom filter
108 GNUNET_CONTAINER_bloomfilter_get_size(
109 const struct GNUNET_CONTAINER_BloomFilter *bf)
113 return bf->bitArraySize;
118 * Copy an existing memory. Any association with a file
119 * on-disk will be lost in the process.
120 * @param bf the filter to copy
121 * @return copy of the bf
123 struct GNUNET_CONTAINER_BloomFilter *
124 GNUNET_CONTAINER_bloomfilter_copy(
125 const struct GNUNET_CONTAINER_BloomFilter *bf)
127 return GNUNET_CONTAINER_bloomfilter_init(bf->bitArray,
129 bf->addressesPerElement);
134 * Sets a bit active in the bitArray. Increment bit-specific
135 * usage counter on disk only if below 4bit max (==15).
137 * @param bitArray memory area to set the bit in
138 * @param bitIdx which bit to set
141 setBit(char *bitArray, unsigned int bitIdx)
144 unsigned int targetBit;
146 arraySlot = bitIdx / 8;
147 targetBit = (1L << (bitIdx % 8));
148 bitArray[arraySlot] |= targetBit;
152 * Clears a bit from bitArray. Bit is cleared from the array
153 * only if the respective usage counter on the disk hits/is zero.
155 * @param bitArray memory area to set the bit in
156 * @param bitIdx which bit to unset
159 clearBit(char *bitArray, unsigned int bitIdx)
162 unsigned int targetBit;
165 targetBit = (1L << (bitIdx % 8));
166 bitArray[slot] = bitArray[slot] & (~targetBit);
170 * Checks if a bit is active in the bitArray
172 * @param bitArray memory area to set the bit in
173 * @param bitIdx which bit to test
174 * @return GNUNET_YES if the bit is set, GNUNET_NO if not.
177 testBit(char *bitArray, unsigned int bitIdx)
180 unsigned int targetBit;
183 targetBit = (1L << (bitIdx % 8));
184 if (bitArray[slot] & targetBit)
191 * Sets a bit active in the bitArray and increments
192 * bit-specific usage counter on disk (but only if
193 * the counter was below 4 bit max (==15)).
195 * @param bitArray memory area to set the bit in
196 * @param bitIdx which bit to test
197 * @param fh A file to keep the 4 bit address usage counters in
200 incrementBit(char *bitArray,
202 const struct GNUNET_DISK_FileHandle *fh)
208 unsigned int targetLoc;
210 setBit(bitArray, bitIdx);
211 if (GNUNET_DISK_handle_invalid(fh))
213 /* Update the counter file on disk */
214 fileSlot = bitIdx / 2;
215 targetLoc = bitIdx % 2;
217 GNUNET_assert(fileSlot ==
218 GNUNET_DISK_file_seek(fh, fileSlot, GNUNET_DISK_SEEK_SET));
219 if (1 != GNUNET_DISK_file_read(fh, &value, 1))
222 high = (value & (~0xF)) >> 4;
234 value = ((high << 4) | low);
235 GNUNET_assert(fileSlot ==
236 GNUNET_DISK_file_seek(fh, fileSlot, GNUNET_DISK_SEEK_SET));
237 GNUNET_assert(1 == GNUNET_DISK_file_write(fh, &value, 1));
241 * Clears a bit from bitArray if the respective usage
242 * counter on the disk hits/is zero.
244 * @param bitArray memory area to set the bit in
245 * @param bitIdx which bit to test
246 * @param fh A file to keep the 4bit address usage counters in
249 decrementBit(char *bitArray,
251 const struct GNUNET_DISK_FileHandle *fh)
257 unsigned int targetLoc;
259 if (GNUNET_DISK_handle_invalid(fh))
260 return; /* cannot decrement! */
261 /* Each char slot in the counter file holds two 4 bit counters */
262 fileslot = bitIdx / 2;
263 targetLoc = bitIdx % 2;
265 GNUNET_DISK_file_seek(fh, fileslot, GNUNET_DISK_SEEK_SET))
267 GNUNET_log_strerror(GNUNET_ERROR_TYPE_ERROR, "seek");
270 if (1 != GNUNET_DISK_file_read(fh, &value, 1))
273 high = (value & 0xF0) >> 4;
275 /* decrement, but once we have reached the max, never go back! */
278 if ((low > 0) && (low < 0xF))
282 clearBit(bitArray, bitIdx);
287 if ((high > 0) && (high < 0xF))
291 clearBit(bitArray, bitIdx);
294 value = ((high << 4) | low);
296 GNUNET_DISK_file_seek(fh, fileslot, GNUNET_DISK_SEEK_SET))
298 GNUNET_log_strerror(GNUNET_ERROR_TYPE_ERROR, "seek");
301 GNUNET_assert(1 == GNUNET_DISK_file_write(fh, &value, 1));
304 #define BUFFSIZE 65536
307 * Creates a file filled with zeroes
309 * @param fh the file handle
310 * @param size the size of the file
311 * @return GNUNET_OK if created ok, GNUNET_SYSERR otherwise
314 make_empty_file(const struct GNUNET_DISK_FileHandle *fh, size_t size)
316 char buffer[BUFFSIZE];
317 size_t bytesleft = size;
320 if (GNUNET_DISK_handle_invalid(fh))
321 return GNUNET_SYSERR;
322 memset(buffer, 0, sizeof(buffer));
323 GNUNET_DISK_file_seek(fh, 0, GNUNET_DISK_SEEK_SET);
324 while (bytesleft > 0)
326 if (bytesleft > sizeof(buffer))
328 res = GNUNET_DISK_file_write(fh, buffer, sizeof(buffer));
334 res = GNUNET_DISK_file_write(fh, buffer, bytesleft);
338 if (GNUNET_SYSERR == res)
339 return GNUNET_SYSERR;
344 /* ************** GNUNET_CONTAINER_BloomFilter iterator ********* */
347 * Iterator (callback) method to be called by the
348 * bloomfilter iterator on each bit that is to be
349 * set or tested for the key.
352 * @param bf the filter to manipulate
353 * @param bit the current bit
354 * @return GNUNET_YES to continue, GNUNET_NO to stop early
356 typedef int (*BitIterator) (void *cls,
357 const struct GNUNET_CONTAINER_BloomFilter *bf,
362 * Call an iterator for each bit that the bloomfilter
363 * must test or set for this element.
365 * @param bf the filter
366 * @param callback the method to call
367 * @param arg extra argument to callback
368 * @param key the key for which we iterate over the BF bits
371 iterateBits(const struct GNUNET_CONTAINER_BloomFilter *bf,
372 BitIterator callback,
374 const struct GNUNET_HashCode *key)
376 struct GNUNET_HashCode tmp[2];
379 unsigned int slot = 0;
381 bitCount = bf->addressesPerElement;
384 GNUNET_assert(bf->bitArraySize > 0);
385 GNUNET_assert(bf->bitArraySize * 8LL > bf->bitArraySize);
388 while (slot < (sizeof(struct GNUNET_HashCode) / sizeof(uint32_t)))
393 ntohl((((uint32_t *)&tmp[round & 1])[slot])) %
394 ((bf->bitArraySize * 8LL))))
403 GNUNET_CRYPTO_hash(&tmp[round & 1],
404 sizeof(struct GNUNET_HashCode),
405 &tmp[(round + 1) & 1]);
414 * Callback: increment bit
416 * @param cls pointer to writeable form of bf
417 * @param bf the filter to manipulate
418 * @param bit the bit to increment
422 incrementBitCallback(void *cls,
423 const struct GNUNET_CONTAINER_BloomFilter *bf,
426 struct GNUNET_CONTAINER_BloomFilter *b = cls;
428 incrementBit(b->bitArray, bit, bf->fh);
434 * Callback: decrement bit
436 * @param cls pointer to writeable form of bf
437 * @param bf the filter to manipulate
438 * @param bit the bit to decrement
442 decrementBitCallback(void *cls,
443 const struct GNUNET_CONTAINER_BloomFilter *bf,
446 struct GNUNET_CONTAINER_BloomFilter *b = cls;
448 decrementBit(b->bitArray, bit, bf->fh);
454 * Callback: test if all bits are set
456 * @param cls pointer set to GNUNET_NO if bit is not set
457 * @param bf the filter
458 * @param bit the bit to test
459 * @return YES if the bit is set, NO if not
462 testBitCallback(void *cls,
463 const struct GNUNET_CONTAINER_BloomFilter *bf,
468 if (GNUNET_NO == testBit(bf->bitArray, bit))
476 /* *********************** INTERFACE **************** */
479 * Load a bloom-filter from a file.
481 * @param filename the name of the file (or the prefix)
482 * @param size the size of the bloom-filter (number of
483 * bytes of storage space to use); will be rounded up
485 * @param k the number of GNUNET_CRYPTO_hash-functions to apply per
486 * element (number of bits set per element in the set)
487 * @return the bloomfilter
489 struct GNUNET_CONTAINER_BloomFilter *
490 GNUNET_CONTAINER_bloomfilter_load(const char *filename,
494 struct GNUNET_CONTAINER_BloomFilter *bf;
502 GNUNET_assert(NULL != filename);
503 if ((k == 0) || (size == 0))
508 while ((ui < size) && (ui * 2 > ui))
510 size = ui; /* make sure it's a power of 2 */
512 bf = GNUNET_new(struct GNUNET_CONTAINER_BloomFilter);
513 /* Try to open a bloomfilter file */
514 if (GNUNET_YES == GNUNET_DISK_file_test(filename))
515 bf->fh = GNUNET_DISK_file_open(filename,
516 GNUNET_DISK_OPEN_READWRITE,
517 GNUNET_DISK_PERM_USER_READ |
518 GNUNET_DISK_PERM_USER_WRITE);
521 /* file existed, try to read it! */
522 must_read = GNUNET_YES;
523 if (GNUNET_OK != GNUNET_DISK_file_handle_size(bf->fh, &fsize))
525 GNUNET_DISK_file_close(bf->fh);
531 /* found existing empty file, just overwrite */
532 if (GNUNET_OK != make_empty_file(bf->fh, size * 4LL))
534 GNUNET_log_strerror(GNUNET_ERROR_TYPE_WARNING, "write");
535 GNUNET_DISK_file_close(bf->fh);
540 else if (fsize != ((off_t)size) * 4LL)
543 GNUNET_ERROR_TYPE_ERROR,
545 "Size of file on disk is incorrect for this Bloom filter (want %llu, have %llu)\n"),
546 (unsigned long long)(size * 4LL),
547 (unsigned long long)fsize);
548 GNUNET_DISK_file_close(bf->fh);
555 /* file did not exist, don't read, just create */
556 must_read = GNUNET_NO;
557 bf->fh = GNUNET_DISK_file_open(filename,
558 GNUNET_DISK_OPEN_CREATE |
559 GNUNET_DISK_OPEN_READWRITE,
560 GNUNET_DISK_PERM_USER_READ |
561 GNUNET_DISK_PERM_USER_WRITE);
567 if (GNUNET_OK != make_empty_file(bf->fh, size * 4LL))
569 GNUNET_log_strerror(GNUNET_ERROR_TYPE_WARNING, "write");
570 GNUNET_DISK_file_close(bf->fh);
575 bf->filename = GNUNET_strdup(filename);
577 bf->bitArray = GNUNET_malloc_large(size);
578 if (NULL == bf->bitArray)
581 GNUNET_DISK_file_close(bf->fh);
582 GNUNET_free(bf->filename);
586 bf->bitArraySize = size;
587 bf->addressesPerElement = k;
588 if (GNUNET_YES != must_read)
589 return bf; /* already done! */
590 /* Read from the file what bits we can */
591 rbuff = GNUNET_malloc(BUFFSIZE);
593 while (pos < ((off_t)size) * 8LL)
597 res = GNUNET_DISK_file_read(bf->fh, rbuff, BUFFSIZE);
600 LOG_STRERROR_FILE(GNUNET_ERROR_TYPE_WARNING, "read", bf->filename);
602 GNUNET_free(bf->filename);
603 GNUNET_DISK_file_close(bf->fh);
608 break; /* is ok! we just did not use that many bits yet */
609 for (i = 0; i < res; i++)
611 if ((rbuff[i] & 0x0F) != 0)
612 setBit(bf->bitArray, pos + i * 2);
613 if ((rbuff[i] & 0xF0) != 0)
614 setBit(bf->bitArray, pos + i * 2 + 1);
618 pos += BUFFSIZE * 2; /* 2 bits per byte in the buffer */
626 * Create a bloom filter from raw bits.
628 * @param data the raw bits in memory (maybe NULL,
629 * in which case all bits should be considered
631 * @param size the size of the bloom-filter (number of
632 * bytes of storage space to use); also size of data
633 * -- unless data is NULL
634 * @param k the number of GNUNET_CRYPTO_hash-functions to apply per
635 * element (number of bits set per element in the set)
636 * @return the bloomfilter
638 struct GNUNET_CONTAINER_BloomFilter *
639 GNUNET_CONTAINER_bloomfilter_init(const char *data,
643 struct GNUNET_CONTAINER_BloomFilter *bf;
645 if ((0 == k) || (0 == size))
647 bf = GNUNET_new(struct GNUNET_CONTAINER_BloomFilter);
650 bf->bitArray = GNUNET_malloc_large(size);
651 if (NULL == bf->bitArray)
656 bf->bitArraySize = size;
657 bf->addressesPerElement = k;
659 GNUNET_memcpy(bf->bitArray, data, size);
665 * Copy the raw data of this bloomfilter into
666 * the given data array.
668 * @param bf bloomfilter to take the raw data from
669 * @param data where to write the data
670 * @param size the size of the given data array
671 * @return #GNUNET_SYSERR if the data array is not big enough
674 GNUNET_CONTAINER_bloomfilter_get_raw_data(
675 const struct GNUNET_CONTAINER_BloomFilter *bf,
680 return GNUNET_SYSERR;
681 if (bf->bitArraySize != size)
682 return GNUNET_SYSERR;
683 GNUNET_memcpy(data, bf->bitArray, size);
689 * Free the space associated with a filter
690 * in memory, flush to drive if needed (do not
691 * free the space on the drive)
693 * @param bf the filter
696 GNUNET_CONTAINER_bloomfilter_free(struct GNUNET_CONTAINER_BloomFilter *bf)
701 GNUNET_DISK_file_close(bf->fh);
702 GNUNET_free_non_null(bf->filename);
703 GNUNET_free(bf->bitArray);
709 * Reset a bloom filter to empty. Clears the file on disk.
711 * @param bf the filter
714 GNUNET_CONTAINER_bloomfilter_clear(struct GNUNET_CONTAINER_BloomFilter *bf)
719 memset(bf->bitArray, 0, bf->bitArraySize);
720 if (bf->filename != NULL)
721 make_empty_file(bf->fh, bf->bitArraySize * 4LL);
726 * Test if an element is in the filter.
728 * @param e the element
729 * @param bf the filter
730 * @return #GNUNET_YES if the element is in the filter, #GNUNET_NO if not
733 GNUNET_CONTAINER_bloomfilter_test(
734 const struct GNUNET_CONTAINER_BloomFilter *bf,
735 const struct GNUNET_HashCode *e)
742 iterateBits(bf, &testBitCallback, &res, e);
748 * Add an element to the filter
750 * @param bf the filter
751 * @param e the element
754 GNUNET_CONTAINER_bloomfilter_add(struct GNUNET_CONTAINER_BloomFilter *bf,
755 const struct GNUNET_HashCode *e)
759 iterateBits(bf, &incrementBitCallback, bf, e);
764 * Or the entries of the given raw data array with the
765 * data of the given bloom filter. Assumes that
766 * the size of the data array and the current filter
769 * @param bf the filter
770 * @param data the data to or-in
771 * @param size number of bytes in data
774 GNUNET_CONTAINER_bloomfilter_or(struct GNUNET_CONTAINER_BloomFilter *bf,
780 unsigned long long *fc;
781 const unsigned long long *dc;
785 if (bf->bitArraySize != size)
786 return GNUNET_SYSERR;
787 fc = (unsigned long long *)bf->bitArray;
788 dc = (const unsigned long long *)data;
789 n = size / sizeof(unsigned long long);
791 for (i = 0; i < n; i++)
793 for (i = n * sizeof(unsigned long long); i < size; i++)
794 bf->bitArray[i] |= data[i];
799 * Or the entries of the given raw data array with the
800 * data of the given bloom filter. Assumes that
801 * the size of the data array and the current filter
804 * @param bf the filter
805 * @param to_or the bloomfilter to or-in
806 * @return #GNUNET_OK on success
809 GNUNET_CONTAINER_bloomfilter_or2(
810 struct GNUNET_CONTAINER_BloomFilter *bf,
811 const struct GNUNET_CONTAINER_BloomFilter *to_or)
815 unsigned long long *fc;
816 const unsigned long long *dc;
821 if (bf->bitArraySize != to_or->bitArraySize)
824 return GNUNET_SYSERR;
826 size = bf->bitArraySize;
827 fc = (unsigned long long *)bf->bitArray;
828 dc = (const unsigned long long *)to_or->bitArray;
829 n = size / sizeof(unsigned long long);
831 for (i = 0; i < n; i++)
833 for (i = n * sizeof(unsigned long long); i < size; i++)
834 bf->bitArray[i] |= to_or->bitArray[i];
840 * Remove an element from the filter.
842 * @param bf the filter
843 * @param e the element to remove
846 GNUNET_CONTAINER_bloomfilter_remove(struct GNUNET_CONTAINER_BloomFilter *bf,
847 const struct GNUNET_HashCode *e)
851 if (NULL == bf->filename)
853 iterateBits(bf, &decrementBitCallback, bf, e);
857 * Resize a bloom filter. Note that this operation
858 * is pretty costly. Essentially, the bloom filter
859 * needs to be completely re-build.
861 * @param bf the filter
862 * @param iterator an iterator over all elements stored in the BF
863 * @param iterator_cls argument to the iterator function
864 * @param size the new size for the filter
865 * @param k the new number of GNUNET_CRYPTO_hash-function to apply per element
868 GNUNET_CONTAINER_bloomfilter_resize(struct GNUNET_CONTAINER_BloomFilter *bf,
869 GNUNET_CONTAINER_HashCodeIterator iterator,
874 struct GNUNET_HashCode hc;
877 GNUNET_free(bf->bitArray);
881 size = i; /* make sure it's a power of 2 */
882 bf->addressesPerElement = k;
883 bf->bitArraySize = size;
884 bf->bitArray = GNUNET_malloc(size);
885 if (NULL != bf->filename)
886 make_empty_file(bf->fh, bf->bitArraySize * 4LL);
887 while (GNUNET_YES == iterator(iterator_cls, &hc))
888 GNUNET_CONTAINER_bloomfilter_add(bf, &hc);
891 /* end of container_bloomfilter.c */