2 This file is part of GNUnet.
3 Copyright (C) 2001, 2002, 2003, 2004, 2006, 2008, 2011, 2012, 2018 GNUnet e.V.
5 GNUnet is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published
7 by the Free Software Foundation; either version 3, or (at your
8 option) any later version.
10 GNUnet is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with GNUnet; see the file COPYING. If not, write to the
17 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
18 Boston, MA 02110-1301, USA.
21 * @file util/container_bloomfilter.c
22 * @brief data structure used to reduce disk accesses.
24 * The idea basically: Create a signature for each element in the
25 * database. Add those signatures to a bit array. When doing a lookup,
26 * check if the bit array matches the signature of the requested
27 * element. If yes, address the disk, otherwise return 'not found'.
29 * A property of the bloom filter is that sometimes we will have
30 * a match even if the element is not on the disk (then we do
31 * an unnecessary disk access), but what's most important is that
32 * we never get a single "false negative".
34 * To be able to delete entries from the bloom filter, we maintain
35 * a 4 bit counter in the file on the drive (we still use only one
38 * @author Igor Wronsky
39 * @author Christian Grothoff
43 #include "gnunet_util_lib.h"
45 #define LOG(kind,...) GNUNET_log_from (kind, "util-container-bloomfilter", __VA_ARGS__)
47 #define LOG_STRERROR(kind,syscall) GNUNET_log_from_strerror (kind, "util-container-bloomfilter", syscall)
49 #define LOG_STRERROR_FILE(kind,syscall,filename) GNUNET_log_from_strerror_file (kind, "util-container-bloomfilter", syscall, filename)
51 struct GNUNET_CONTAINER_BloomFilter
55 * The actual bloomfilter bit array
60 * Filename of the filter
65 * The bit counter file on disk
67 struct GNUNET_DISK_FileHandle *fh;
70 * How many bits we set for each stored element
72 unsigned int addressesPerElement;
75 * Size of bitArray in bytes
83 * Get the number of the addresses set per element in the bloom filter.
85 * @param bf the filter
86 * @return addresses set per element in the bf
89 GNUNET_CONTAINER_bloomfilter_get_element_addresses (const struct GNUNET_CONTAINER_BloomFilter
94 return bf->addressesPerElement;
99 * Get size of the bloom filter.
101 * @param bf the filter
102 * @return number of bytes used for the data of the bloom filter
105 GNUNET_CONTAINER_bloomfilter_get_size (const struct GNUNET_CONTAINER_BloomFilter
110 return bf->bitArraySize;
115 * Copy an existing memory. Any association with a file
116 * on-disk will be lost in the process.
117 * @param bf the filter to copy
118 * @return copy of the bf
120 struct GNUNET_CONTAINER_BloomFilter *
121 GNUNET_CONTAINER_bloomfilter_copy (const struct GNUNET_CONTAINER_BloomFilter
124 return GNUNET_CONTAINER_bloomfilter_init (bf->bitArray, bf->bitArraySize,
125 bf->addressesPerElement);
130 * Sets a bit active in the bitArray. Increment bit-specific
131 * usage counter on disk only if below 4bit max (==15).
133 * @param bitArray memory area to set the bit in
134 * @param bitIdx which bit to set
137 setBit (char *bitArray, unsigned int bitIdx)
140 unsigned int targetBit;
142 arraySlot = bitIdx / 8;
143 targetBit = (1L << (bitIdx % 8));
144 bitArray[arraySlot] |= targetBit;
148 * Clears a bit from bitArray. Bit is cleared from the array
149 * only if the respective usage counter on the disk hits/is zero.
151 * @param bitArray memory area to set the bit in
152 * @param bitIdx which bit to unset
155 clearBit (char *bitArray, unsigned int bitIdx)
158 unsigned int targetBit;
161 targetBit = (1L << (bitIdx % 8));
162 bitArray[slot] = bitArray[slot] & (~targetBit);
166 * Checks if a bit is active in the bitArray
168 * @param bitArray memory area to set the bit in
169 * @param bitIdx which bit to test
170 * @return GNUNET_YES if the bit is set, GNUNET_NO if not.
173 testBit (char *bitArray, unsigned int bitIdx)
176 unsigned int targetBit;
179 targetBit = (1L << (bitIdx % 8));
180 if (bitArray[slot] & targetBit)
187 * Sets a bit active in the bitArray and increments
188 * bit-specific usage counter on disk (but only if
189 * the counter was below 4 bit max (==15)).
191 * @param bitArray memory area to set the bit in
192 * @param bitIdx which bit to test
193 * @param fh A file to keep the 4 bit address usage counters in
196 incrementBit (char *bitArray, unsigned int bitIdx,
197 const struct GNUNET_DISK_FileHandle *fh)
203 unsigned int targetLoc;
205 setBit (bitArray, bitIdx);
206 if (GNUNET_DISK_handle_invalid (fh))
208 /* Update the counter file on disk */
209 fileSlot = bitIdx / 2;
210 targetLoc = bitIdx % 2;
212 GNUNET_assert (fileSlot ==
213 GNUNET_DISK_file_seek (fh, fileSlot, GNUNET_DISK_SEEK_SET));
214 if (1 != GNUNET_DISK_file_read (fh, &value, 1))
217 high = (value & (~0xF)) >> 4;
229 value = ((high << 4) | low);
230 GNUNET_assert (fileSlot ==
231 GNUNET_DISK_file_seek (fh, fileSlot, GNUNET_DISK_SEEK_SET));
232 GNUNET_assert (1 == GNUNET_DISK_file_write (fh, &value, 1));
236 * Clears a bit from bitArray if the respective usage
237 * counter on the disk hits/is zero.
239 * @param bitArray memory area to set the bit in
240 * @param bitIdx which bit to test
241 * @param fh A file to keep the 4bit address usage counters in
244 decrementBit (char *bitArray, unsigned int bitIdx,
245 const struct GNUNET_DISK_FileHandle *fh)
251 unsigned int targetLoc;
253 if (GNUNET_DISK_handle_invalid (fh))
254 return; /* cannot decrement! */
255 /* Each char slot in the counter file holds two 4 bit counters */
256 fileslot = bitIdx / 2;
257 targetLoc = bitIdx % 2;
258 if (GNUNET_SYSERR == GNUNET_DISK_file_seek (fh, fileslot, GNUNET_DISK_SEEK_SET))
260 GNUNET_log_strerror (GNUNET_ERROR_TYPE_ERROR, "seek");
263 if (1 != GNUNET_DISK_file_read (fh, &value, 1))
266 high = (value & 0xF0) >> 4;
268 /* decrement, but once we have reached the max, never go back! */
271 if ((low > 0) && (low < 0xF))
275 clearBit (bitArray, bitIdx);
280 if ((high > 0) && (high < 0xF))
284 clearBit (bitArray, bitIdx);
287 value = ((high << 4) | low);
288 if (GNUNET_SYSERR == GNUNET_DISK_file_seek (fh, fileslot, GNUNET_DISK_SEEK_SET))
290 GNUNET_log_strerror (GNUNET_ERROR_TYPE_ERROR, "seek");
293 GNUNET_assert (1 == GNUNET_DISK_file_write (fh, &value, 1));
296 #define BUFFSIZE 65536
299 * Creates a file filled with zeroes
301 * @param fh the file handle
302 * @param size the size of the file
303 * @return GNUNET_OK if created ok, GNUNET_SYSERR otherwise
306 make_empty_file (const struct GNUNET_DISK_FileHandle *fh, size_t size)
308 char buffer[BUFFSIZE];
309 size_t bytesleft = size;
312 if (GNUNET_DISK_handle_invalid (fh))
313 return GNUNET_SYSERR;
314 memset (buffer, 0, sizeof (buffer));
315 GNUNET_DISK_file_seek (fh, 0, GNUNET_DISK_SEEK_SET);
316 while (bytesleft > 0)
318 if (bytesleft > sizeof (buffer))
320 res = GNUNET_DISK_file_write (fh, buffer, sizeof (buffer));
326 res = GNUNET_DISK_file_write (fh, buffer, bytesleft);
330 if (GNUNET_SYSERR == res)
331 return GNUNET_SYSERR;
336 /* ************** GNUNET_CONTAINER_BloomFilter iterator ********* */
339 * Iterator (callback) method to be called by the
340 * bloomfilter iterator on each bit that is to be
341 * set or tested for the key.
344 * @param bf the filter to manipulate
345 * @param bit the current bit
346 * @return GNUNET_YES to continue, GNUNET_NO to stop early
348 typedef int (*BitIterator) (void *cls,
349 const struct GNUNET_CONTAINER_BloomFilter * bf,
354 * Call an iterator for each bit that the bloomfilter
355 * must test or set for this element.
357 * @param bf the filter
358 * @param callback the method to call
359 * @param arg extra argument to callback
360 * @param key the key for which we iterate over the BF bits
363 iterateBits (const struct GNUNET_CONTAINER_BloomFilter *bf,
364 BitIterator callback, void *arg, const struct GNUNET_HashCode *key)
366 struct GNUNET_HashCode tmp[2];
369 unsigned int slot = 0;
371 bitCount = bf->addressesPerElement;
374 GNUNET_assert (bf->bitArraySize > 0);
375 GNUNET_assert (bf->bitArraySize * 8LL > bf->bitArraySize);
378 while (slot < (sizeof (struct GNUNET_HashCode) / sizeof (uint32_t)))
382 ntohl ((((uint32_t *) & tmp[round & 1])[slot])) %
383 ((bf->bitArraySize * 8LL))))
392 GNUNET_CRYPTO_hash (&tmp[round & 1], sizeof (struct GNUNET_HashCode),
393 &tmp[(round + 1) & 1]);
402 * Callback: increment bit
404 * @param cls pointer to writeable form of bf
405 * @param bf the filter to manipulate
406 * @param bit the bit to increment
410 incrementBitCallback (void *cls, const struct GNUNET_CONTAINER_BloomFilter *bf,
413 struct GNUNET_CONTAINER_BloomFilter *b = cls;
415 incrementBit (b->bitArray, bit, bf->fh);
421 * Callback: decrement bit
423 * @param cls pointer to writeable form of bf
424 * @param bf the filter to manipulate
425 * @param bit the bit to decrement
429 decrementBitCallback (void *cls, const struct GNUNET_CONTAINER_BloomFilter *bf,
432 struct GNUNET_CONTAINER_BloomFilter *b = cls;
434 decrementBit (b->bitArray, bit, bf->fh);
440 * Callback: test if all bits are set
442 * @param cls pointer set to GNUNET_NO if bit is not set
443 * @param bf the filter
444 * @param bit the bit to test
445 * @return YES if the bit is set, NO if not
448 testBitCallback (void *cls, const struct GNUNET_CONTAINER_BloomFilter *bf,
453 if (GNUNET_NO == testBit (bf->bitArray, bit))
461 /* *********************** INTERFACE **************** */
464 * Load a bloom-filter from a file.
466 * @param filename the name of the file (or the prefix)
467 * @param size the size of the bloom-filter (number of
468 * bytes of storage space to use); will be rounded up
470 * @param k the number of GNUNET_CRYPTO_hash-functions to apply per
471 * element (number of bits set per element in the set)
472 * @return the bloomfilter
474 struct GNUNET_CONTAINER_BloomFilter *
475 GNUNET_CONTAINER_bloomfilter_load (const char *filename, size_t size,
478 struct GNUNET_CONTAINER_BloomFilter *bf;
486 GNUNET_assert (NULL != filename);
487 if ((k == 0) || (size == 0))
492 while ( (ui < size) &&
495 size = ui; /* make sure it's a power of 2 */
497 bf = GNUNET_new (struct GNUNET_CONTAINER_BloomFilter);
498 /* Try to open a bloomfilter file */
499 if (GNUNET_YES == GNUNET_DISK_file_test (filename))
501 GNUNET_DISK_file_open (filename,
502 GNUNET_DISK_OPEN_READWRITE,
503 GNUNET_DISK_PERM_USER_READ |
504 GNUNET_DISK_PERM_USER_WRITE);
507 /* file existed, try to read it! */
508 must_read = GNUNET_YES;
510 GNUNET_DISK_file_handle_size (bf->fh, &fsize))
512 GNUNET_DISK_file_close (bf->fh);
518 /* found existing empty file, just overwrite */
520 make_empty_file (bf->fh, size * 4LL))
522 GNUNET_log_strerror (GNUNET_ERROR_TYPE_WARNING,
524 GNUNET_DISK_file_close (bf->fh);
529 else if (fsize != ((off_t) size) * 4LL)
531 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
532 _("Size of file on disk is incorrect for this Bloom filter (want %llu, have %llu)\n"),
533 (unsigned long long) (size * 4LL),
534 (unsigned long long) fsize);
535 GNUNET_DISK_file_close (bf->fh);
542 /* file did not exist, don't read, just create */
543 must_read = GNUNET_NO;
545 GNUNET_DISK_file_open (filename,
546 GNUNET_DISK_OPEN_CREATE |
547 GNUNET_DISK_OPEN_READWRITE,
548 GNUNET_DISK_PERM_USER_READ |
549 GNUNET_DISK_PERM_USER_WRITE);
555 if (GNUNET_OK != make_empty_file (bf->fh, size * 4LL))
557 GNUNET_log_strerror (GNUNET_ERROR_TYPE_WARNING,
559 GNUNET_DISK_file_close (bf->fh);
564 bf->filename = GNUNET_strdup (filename);
566 bf->bitArray = GNUNET_malloc_large (size);
567 if (NULL == bf->bitArray)
570 GNUNET_DISK_file_close (bf->fh);
571 GNUNET_free (bf->filename);
575 bf->bitArraySize = size;
576 bf->addressesPerElement = k;
577 if (GNUNET_YES != must_read)
578 return bf; /* already done! */
579 /* Read from the file what bits we can */
580 rbuff = GNUNET_malloc (BUFFSIZE);
582 while (pos < ((off_t) size) * 8LL)
586 res = GNUNET_DISK_file_read (bf->fh,
591 LOG_STRERROR_FILE (GNUNET_ERROR_TYPE_WARNING,
595 GNUNET_free (bf->filename);
596 GNUNET_DISK_file_close (bf->fh);
601 break; /* is ok! we just did not use that many bits yet */
602 for (i = 0; i < res; i++)
604 if ((rbuff[i] & 0x0F) != 0)
605 setBit (bf->bitArray, pos + i * 2);
606 if ((rbuff[i] & 0xF0) != 0)
607 setBit (bf->bitArray, pos + i * 2 + 1);
611 pos += BUFFSIZE * 2; /* 2 bits per byte in the buffer */
619 * Create a bloom filter from raw bits.
621 * @param data the raw bits in memory (maybe NULL,
622 * in which case all bits should be considered
624 * @param size the size of the bloom-filter (number of
625 * bytes of storage space to use); also size of data
626 * -- unless data is NULL
627 * @param k the number of GNUNET_CRYPTO_hash-functions to apply per
628 * element (number of bits set per element in the set)
629 * @return the bloomfilter
631 struct GNUNET_CONTAINER_BloomFilter *
632 GNUNET_CONTAINER_bloomfilter_init (const char *data, size_t size,
635 struct GNUNET_CONTAINER_BloomFilter *bf;
637 if ((0 == k) || (0 == size))
639 bf = GNUNET_new (struct GNUNET_CONTAINER_BloomFilter);
642 bf->bitArray = GNUNET_malloc_large (size);
643 if (NULL == bf->bitArray)
648 bf->bitArraySize = size;
649 bf->addressesPerElement = k;
651 GNUNET_memcpy (bf->bitArray, data, size);
657 * Copy the raw data of this bloomfilter into
658 * the given data array.
660 * @param bf bloomfilter to take the raw data from
661 * @param data where to write the data
662 * @param size the size of the given data array
663 * @return GNUNET_SYSERR if the data array is not big enough
666 GNUNET_CONTAINER_bloomfilter_get_raw_data (const struct
667 GNUNET_CONTAINER_BloomFilter *bf,
668 char *data, size_t size)
671 return GNUNET_SYSERR;
672 if (bf->bitArraySize != size)
673 return GNUNET_SYSERR;
674 GNUNET_memcpy (data, bf->bitArray, size);
680 * Free the space associated with a filter
681 * in memory, flush to drive if needed (do not
682 * free the space on the drive)
684 * @param bf the filter
687 GNUNET_CONTAINER_bloomfilter_free (struct GNUNET_CONTAINER_BloomFilter *bf)
692 GNUNET_DISK_file_close (bf->fh);
693 GNUNET_free_non_null (bf->filename);
694 GNUNET_free (bf->bitArray);
700 * Reset a bloom filter to empty. Clears the file on disk.
702 * @param bf the filter
705 GNUNET_CONTAINER_bloomfilter_clear (struct GNUNET_CONTAINER_BloomFilter *bf)
710 memset (bf->bitArray, 0, bf->bitArraySize);
711 if (bf->filename != NULL)
712 make_empty_file (bf->fh, bf->bitArraySize * 4LL);
717 * Test if an element is in the filter.
719 * @param e the element
720 * @param bf the filter
721 * @return #GNUNET_YES if the element is in the filter, #GNUNET_NO if not
724 GNUNET_CONTAINER_bloomfilter_test (const struct GNUNET_CONTAINER_BloomFilter *bf,
725 const struct GNUNET_HashCode * e)
732 iterateBits (bf, &testBitCallback, &res, e);
738 * Add an element to the filter
740 * @param bf the filter
741 * @param e the element
744 GNUNET_CONTAINER_bloomfilter_add (struct GNUNET_CONTAINER_BloomFilter *bf,
745 const struct GNUNET_HashCode * e)
749 iterateBits (bf, &incrementBitCallback, bf, e);
754 * Or the entries of the given raw data array with the
755 * data of the given bloom filter. Assumes that
756 * the size of the data array and the current filter
759 * @param bf the filter
760 * @param data the data to or-in
761 * @param size number of bytes in data
764 GNUNET_CONTAINER_bloomfilter_or (struct GNUNET_CONTAINER_BloomFilter *bf,
770 unsigned long long *fc;
771 const unsigned long long *dc;
775 if (bf->bitArraySize != size)
776 return GNUNET_SYSERR;
777 fc = (unsigned long long *) bf->bitArray;
778 dc = (const unsigned long long *) data;
779 n = size / sizeof (unsigned long long);
781 for (i = 0; i < n; i++)
783 for (i = n * sizeof (unsigned long long); i < size; i++)
784 bf->bitArray[i] |= data[i];
789 * Or the entries of the given raw data array with the
790 * data of the given bloom filter. Assumes that
791 * the size of the data array and the current filter
794 * @param bf the filter
795 * @param to_or the bloomfilter to or-in
796 * @return #GNUNET_OK on success
799 GNUNET_CONTAINER_bloomfilter_or2 (struct GNUNET_CONTAINER_BloomFilter *bf,
800 const struct GNUNET_CONTAINER_BloomFilter *to_or)
804 unsigned long long *fc;
805 const unsigned long long *dc;
810 if (bf->bitArraySize != to_or->bitArraySize)
813 return GNUNET_SYSERR;
815 size = bf->bitArraySize;
816 fc = (unsigned long long *) bf->bitArray;
817 dc = (const unsigned long long *) to_or->bitArray;
818 n = size / sizeof (unsigned long long);
820 for (i = 0; i < n; i++)
822 for (i = n * sizeof (unsigned long long); i < size; i++)
823 bf->bitArray[i] |= to_or->bitArray[i];
829 * Remove an element from the filter.
831 * @param bf the filter
832 * @param e the element to remove
835 GNUNET_CONTAINER_bloomfilter_remove (struct GNUNET_CONTAINER_BloomFilter *bf,
836 const struct GNUNET_HashCode *e)
840 if (NULL == bf->filename)
843 &decrementBitCallback,
849 * Resize a bloom filter. Note that this operation
850 * is pretty costly. Essentially, the bloom filter
851 * needs to be completely re-build.
853 * @param bf the filter
854 * @param iterator an iterator over all elements stored in the BF
855 * @param iterator_cls argument to the iterator function
856 * @param size the new size for the filter
857 * @param k the new number of GNUNET_CRYPTO_hash-function to apply per element
860 GNUNET_CONTAINER_bloomfilter_resize (struct GNUNET_CONTAINER_BloomFilter *bf,
861 GNUNET_CONTAINER_HashCodeIterator iterator,
866 struct GNUNET_HashCode hc;
869 GNUNET_free (bf->bitArray);
873 size = i; /* make sure it's a power of 2 */
874 bf->addressesPerElement = k;
875 bf->bitArraySize = size;
876 bf->bitArray = GNUNET_malloc (size);
877 if (NULL != bf->filename)
878 make_empty_file (bf->fh,
879 bf->bitArraySize * 4LL);
880 while (GNUNET_YES == iterator (iterator_cls,
882 GNUNET_CONTAINER_bloomfilter_add (bf,
886 /* end of container_bloomfilter.c */