2 This file is part of GNUnet.
3 Copyright (C) 2001, 2002, 2003, 2004, 2006, 2008, 2011, 2012, 2018 GNUnet e.V.
5 GNUnet is free software: you can redistribute it and/or modify it
6 under the terms of the GNU Affero General Public License as published
7 by the Free Software Foundation, either version 3 of the License,
8 or (at your option) any later version.
10 GNUnet is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Affero General Public License for more details.
16 * @file util/container_bloomfilter.c
17 * @brief data structure used to reduce disk accesses.
19 * The idea basically: Create a signature for each element in the
20 * database. Add those signatures to a bit array. When doing a lookup,
21 * check if the bit array matches the signature of the requested
22 * element. If yes, address the disk, otherwise return 'not found'.
24 * A property of the bloom filter is that sometimes we will have
25 * a match even if the element is not on the disk (then we do
26 * an unnecessary disk access), but what's most important is that
27 * we never get a single "false negative".
29 * To be able to delete entries from the bloom filter, we maintain
30 * a 4 bit counter in the file on the drive (we still use only one
33 * @author Igor Wronsky
34 * @author Christian Grothoff
38 #include "gnunet_util_lib.h"
40 #define LOG(kind,...) GNUNET_log_from (kind, "util-container-bloomfilter", __VA_ARGS__)
42 #define LOG_STRERROR(kind,syscall) GNUNET_log_from_strerror (kind, "util-container-bloomfilter", syscall)
44 #define LOG_STRERROR_FILE(kind,syscall,filename) GNUNET_log_from_strerror_file (kind, "util-container-bloomfilter", syscall, filename)
46 struct GNUNET_CONTAINER_BloomFilter
50 * The actual bloomfilter bit array
55 * Filename of the filter
60 * The bit counter file on disk
62 struct GNUNET_DISK_FileHandle *fh;
65 * How many bits we set for each stored element
67 unsigned int addressesPerElement;
70 * Size of bitArray in bytes
78 * Get the number of the addresses set per element in the bloom filter.
80 * @param bf the filter
81 * @return addresses set per element in the bf
84 GNUNET_CONTAINER_bloomfilter_get_element_addresses (const struct GNUNET_CONTAINER_BloomFilter
89 return bf->addressesPerElement;
94 * Get size of the bloom filter.
96 * @param bf the filter
97 * @return number of bytes used for the data of the bloom filter
100 GNUNET_CONTAINER_bloomfilter_get_size (const struct GNUNET_CONTAINER_BloomFilter
105 return bf->bitArraySize;
110 * Copy an existing memory. Any association with a file
111 * on-disk will be lost in the process.
112 * @param bf the filter to copy
113 * @return copy of the bf
115 struct GNUNET_CONTAINER_BloomFilter *
116 GNUNET_CONTAINER_bloomfilter_copy (const struct GNUNET_CONTAINER_BloomFilter
119 return GNUNET_CONTAINER_bloomfilter_init (bf->bitArray, bf->bitArraySize,
120 bf->addressesPerElement);
125 * Sets a bit active in the bitArray. Increment bit-specific
126 * usage counter on disk only if below 4bit max (==15).
128 * @param bitArray memory area to set the bit in
129 * @param bitIdx which bit to set
132 setBit (char *bitArray, unsigned int bitIdx)
135 unsigned int targetBit;
137 arraySlot = bitIdx / 8;
138 targetBit = (1L << (bitIdx % 8));
139 bitArray[arraySlot] |= targetBit;
143 * Clears a bit from bitArray. Bit is cleared from the array
144 * only if the respective usage counter on the disk hits/is zero.
146 * @param bitArray memory area to set the bit in
147 * @param bitIdx which bit to unset
150 clearBit (char *bitArray, unsigned int bitIdx)
153 unsigned int targetBit;
156 targetBit = (1L << (bitIdx % 8));
157 bitArray[slot] = bitArray[slot] & (~targetBit);
161 * Checks if a bit is active in the bitArray
163 * @param bitArray memory area to set the bit in
164 * @param bitIdx which bit to test
165 * @return GNUNET_YES if the bit is set, GNUNET_NO if not.
168 testBit (char *bitArray, unsigned int bitIdx)
171 unsigned int targetBit;
174 targetBit = (1L << (bitIdx % 8));
175 if (bitArray[slot] & targetBit)
182 * Sets a bit active in the bitArray and increments
183 * bit-specific usage counter on disk (but only if
184 * the counter was below 4 bit max (==15)).
186 * @param bitArray memory area to set the bit in
187 * @param bitIdx which bit to test
188 * @param fh A file to keep the 4 bit address usage counters in
191 incrementBit (char *bitArray, unsigned int bitIdx,
192 const struct GNUNET_DISK_FileHandle *fh)
198 unsigned int targetLoc;
200 setBit (bitArray, bitIdx);
201 if (GNUNET_DISK_handle_invalid (fh))
203 /* Update the counter file on disk */
204 fileSlot = bitIdx / 2;
205 targetLoc = bitIdx % 2;
207 GNUNET_assert (fileSlot ==
208 GNUNET_DISK_file_seek (fh, fileSlot, GNUNET_DISK_SEEK_SET));
209 if (1 != GNUNET_DISK_file_read (fh, &value, 1))
212 high = (value & (~0xF)) >> 4;
224 value = ((high << 4) | low);
225 GNUNET_assert (fileSlot ==
226 GNUNET_DISK_file_seek (fh, fileSlot, GNUNET_DISK_SEEK_SET));
227 GNUNET_assert (1 == GNUNET_DISK_file_write (fh, &value, 1));
231 * Clears a bit from bitArray if the respective usage
232 * counter on the disk hits/is zero.
234 * @param bitArray memory area to set the bit in
235 * @param bitIdx which bit to test
236 * @param fh A file to keep the 4bit address usage counters in
239 decrementBit (char *bitArray, unsigned int bitIdx,
240 const struct GNUNET_DISK_FileHandle *fh)
246 unsigned int targetLoc;
248 if (GNUNET_DISK_handle_invalid (fh))
249 return; /* cannot decrement! */
250 /* Each char slot in the counter file holds two 4 bit counters */
251 fileslot = bitIdx / 2;
252 targetLoc = bitIdx % 2;
253 if (GNUNET_SYSERR == GNUNET_DISK_file_seek (fh, fileslot, GNUNET_DISK_SEEK_SET))
255 GNUNET_log_strerror (GNUNET_ERROR_TYPE_ERROR, "seek");
258 if (1 != GNUNET_DISK_file_read (fh, &value, 1))
261 high = (value & 0xF0) >> 4;
263 /* decrement, but once we have reached the max, never go back! */
266 if ((low > 0) && (low < 0xF))
270 clearBit (bitArray, bitIdx);
275 if ((high > 0) && (high < 0xF))
279 clearBit (bitArray, bitIdx);
282 value = ((high << 4) | low);
283 if (GNUNET_SYSERR == GNUNET_DISK_file_seek (fh, fileslot, GNUNET_DISK_SEEK_SET))
285 GNUNET_log_strerror (GNUNET_ERROR_TYPE_ERROR, "seek");
288 GNUNET_assert (1 == GNUNET_DISK_file_write (fh, &value, 1));
291 #define BUFFSIZE 65536
294 * Creates a file filled with zeroes
296 * @param fh the file handle
297 * @param size the size of the file
298 * @return GNUNET_OK if created ok, GNUNET_SYSERR otherwise
301 make_empty_file (const struct GNUNET_DISK_FileHandle *fh, size_t size)
303 char buffer[BUFFSIZE];
304 size_t bytesleft = size;
307 if (GNUNET_DISK_handle_invalid (fh))
308 return GNUNET_SYSERR;
309 memset (buffer, 0, sizeof (buffer));
310 GNUNET_DISK_file_seek (fh, 0, GNUNET_DISK_SEEK_SET);
311 while (bytesleft > 0)
313 if (bytesleft > sizeof (buffer))
315 res = GNUNET_DISK_file_write (fh, buffer, sizeof (buffer));
321 res = GNUNET_DISK_file_write (fh, buffer, bytesleft);
325 if (GNUNET_SYSERR == res)
326 return GNUNET_SYSERR;
331 /* ************** GNUNET_CONTAINER_BloomFilter iterator ********* */
334 * Iterator (callback) method to be called by the
335 * bloomfilter iterator on each bit that is to be
336 * set or tested for the key.
339 * @param bf the filter to manipulate
340 * @param bit the current bit
341 * @return GNUNET_YES to continue, GNUNET_NO to stop early
343 typedef int (*BitIterator) (void *cls,
344 const struct GNUNET_CONTAINER_BloomFilter * bf,
349 * Call an iterator for each bit that the bloomfilter
350 * must test or set for this element.
352 * @param bf the filter
353 * @param callback the method to call
354 * @param arg extra argument to callback
355 * @param key the key for which we iterate over the BF bits
358 iterateBits (const struct GNUNET_CONTAINER_BloomFilter *bf,
359 BitIterator callback, void *arg, const struct GNUNET_HashCode *key)
361 struct GNUNET_HashCode tmp[2];
364 unsigned int slot = 0;
366 bitCount = bf->addressesPerElement;
369 GNUNET_assert (bf->bitArraySize > 0);
370 GNUNET_assert (bf->bitArraySize * 8LL > bf->bitArraySize);
373 while (slot < (sizeof (struct GNUNET_HashCode) / sizeof (uint32_t)))
377 ntohl ((((uint32_t *) & tmp[round & 1])[slot])) %
378 ((bf->bitArraySize * 8LL))))
387 GNUNET_CRYPTO_hash (&tmp[round & 1], sizeof (struct GNUNET_HashCode),
388 &tmp[(round + 1) & 1]);
397 * Callback: increment bit
399 * @param cls pointer to writeable form of bf
400 * @param bf the filter to manipulate
401 * @param bit the bit to increment
405 incrementBitCallback (void *cls, const struct GNUNET_CONTAINER_BloomFilter *bf,
408 struct GNUNET_CONTAINER_BloomFilter *b = cls;
410 incrementBit (b->bitArray, bit, bf->fh);
416 * Callback: decrement bit
418 * @param cls pointer to writeable form of bf
419 * @param bf the filter to manipulate
420 * @param bit the bit to decrement
424 decrementBitCallback (void *cls, const struct GNUNET_CONTAINER_BloomFilter *bf,
427 struct GNUNET_CONTAINER_BloomFilter *b = cls;
429 decrementBit (b->bitArray, bit, bf->fh);
435 * Callback: test if all bits are set
437 * @param cls pointer set to GNUNET_NO if bit is not set
438 * @param bf the filter
439 * @param bit the bit to test
440 * @return YES if the bit is set, NO if not
443 testBitCallback (void *cls, const struct GNUNET_CONTAINER_BloomFilter *bf,
448 if (GNUNET_NO == testBit (bf->bitArray, bit))
456 /* *********************** INTERFACE **************** */
459 * Load a bloom-filter from a file.
461 * @param filename the name of the file (or the prefix)
462 * @param size the size of the bloom-filter (number of
463 * bytes of storage space to use); will be rounded up
465 * @param k the number of GNUNET_CRYPTO_hash-functions to apply per
466 * element (number of bits set per element in the set)
467 * @return the bloomfilter
469 struct GNUNET_CONTAINER_BloomFilter *
470 GNUNET_CONTAINER_bloomfilter_load (const char *filename, size_t size,
473 struct GNUNET_CONTAINER_BloomFilter *bf;
481 GNUNET_assert (NULL != filename);
482 if ((k == 0) || (size == 0))
487 while ( (ui < size) &&
490 size = ui; /* make sure it's a power of 2 */
492 bf = GNUNET_new (struct GNUNET_CONTAINER_BloomFilter);
493 /* Try to open a bloomfilter file */
494 if (GNUNET_YES == GNUNET_DISK_file_test (filename))
496 GNUNET_DISK_file_open (filename,
497 GNUNET_DISK_OPEN_READWRITE,
498 GNUNET_DISK_PERM_USER_READ |
499 GNUNET_DISK_PERM_USER_WRITE);
502 /* file existed, try to read it! */
503 must_read = GNUNET_YES;
505 GNUNET_DISK_file_handle_size (bf->fh, &fsize))
507 GNUNET_DISK_file_close (bf->fh);
513 /* found existing empty file, just overwrite */
515 make_empty_file (bf->fh, size * 4LL))
517 GNUNET_log_strerror (GNUNET_ERROR_TYPE_WARNING,
519 GNUNET_DISK_file_close (bf->fh);
524 else if (fsize != ((off_t) size) * 4LL)
526 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
527 _("Size of file on disk is incorrect for this Bloom filter (want %llu, have %llu)\n"),
528 (unsigned long long) (size * 4LL),
529 (unsigned long long) fsize);
530 GNUNET_DISK_file_close (bf->fh);
537 /* file did not exist, don't read, just create */
538 must_read = GNUNET_NO;
540 GNUNET_DISK_file_open (filename,
541 GNUNET_DISK_OPEN_CREATE |
542 GNUNET_DISK_OPEN_READWRITE,
543 GNUNET_DISK_PERM_USER_READ |
544 GNUNET_DISK_PERM_USER_WRITE);
550 if (GNUNET_OK != make_empty_file (bf->fh, size * 4LL))
552 GNUNET_log_strerror (GNUNET_ERROR_TYPE_WARNING,
554 GNUNET_DISK_file_close (bf->fh);
559 bf->filename = GNUNET_strdup (filename);
561 bf->bitArray = GNUNET_malloc_large (size);
562 if (NULL == bf->bitArray)
565 GNUNET_DISK_file_close (bf->fh);
566 GNUNET_free (bf->filename);
570 bf->bitArraySize = size;
571 bf->addressesPerElement = k;
572 if (GNUNET_YES != must_read)
573 return bf; /* already done! */
574 /* Read from the file what bits we can */
575 rbuff = GNUNET_malloc (BUFFSIZE);
577 while (pos < ((off_t) size) * 8LL)
581 res = GNUNET_DISK_file_read (bf->fh,
586 LOG_STRERROR_FILE (GNUNET_ERROR_TYPE_WARNING,
590 GNUNET_free (bf->filename);
591 GNUNET_DISK_file_close (bf->fh);
596 break; /* is ok! we just did not use that many bits yet */
597 for (i = 0; i < res; i++)
599 if ((rbuff[i] & 0x0F) != 0)
600 setBit (bf->bitArray, pos + i * 2);
601 if ((rbuff[i] & 0xF0) != 0)
602 setBit (bf->bitArray, pos + i * 2 + 1);
606 pos += BUFFSIZE * 2; /* 2 bits per byte in the buffer */
614 * Create a bloom filter from raw bits.
616 * @param data the raw bits in memory (maybe NULL,
617 * in which case all bits should be considered
619 * @param size the size of the bloom-filter (number of
620 * bytes of storage space to use); also size of data
621 * -- unless data is NULL
622 * @param k the number of GNUNET_CRYPTO_hash-functions to apply per
623 * element (number of bits set per element in the set)
624 * @return the bloomfilter
626 struct GNUNET_CONTAINER_BloomFilter *
627 GNUNET_CONTAINER_bloomfilter_init (const char *data, size_t size,
630 struct GNUNET_CONTAINER_BloomFilter *bf;
632 if ((0 == k) || (0 == size))
634 bf = GNUNET_new (struct GNUNET_CONTAINER_BloomFilter);
637 bf->bitArray = GNUNET_malloc_large (size);
638 if (NULL == bf->bitArray)
643 bf->bitArraySize = size;
644 bf->addressesPerElement = k;
646 GNUNET_memcpy (bf->bitArray, data, size);
652 * Copy the raw data of this bloomfilter into
653 * the given data array.
655 * @param bf bloomfilter to take the raw data from
656 * @param data where to write the data
657 * @param size the size of the given data array
658 * @return #GNUNET_SYSERR if the data array is not big enough
661 GNUNET_CONTAINER_bloomfilter_get_raw_data (const struct
662 GNUNET_CONTAINER_BloomFilter *bf,
663 char *data, size_t size)
666 return GNUNET_SYSERR;
667 if (bf->bitArraySize != size)
668 return GNUNET_SYSERR;
669 GNUNET_memcpy (data, bf->bitArray, size);
675 * Free the space associated with a filter
676 * in memory, flush to drive if needed (do not
677 * free the space on the drive)
679 * @param bf the filter
682 GNUNET_CONTAINER_bloomfilter_free (struct GNUNET_CONTAINER_BloomFilter *bf)
687 GNUNET_DISK_file_close (bf->fh);
688 GNUNET_free_non_null (bf->filename);
689 GNUNET_free (bf->bitArray);
695 * Reset a bloom filter to empty. Clears the file on disk.
697 * @param bf the filter
700 GNUNET_CONTAINER_bloomfilter_clear (struct GNUNET_CONTAINER_BloomFilter *bf)
705 memset (bf->bitArray, 0, bf->bitArraySize);
706 if (bf->filename != NULL)
707 make_empty_file (bf->fh, bf->bitArraySize * 4LL);
712 * Test if an element is in the filter.
714 * @param e the element
715 * @param bf the filter
716 * @return #GNUNET_YES if the element is in the filter, #GNUNET_NO if not
719 GNUNET_CONTAINER_bloomfilter_test (const struct GNUNET_CONTAINER_BloomFilter *bf,
720 const struct GNUNET_HashCode * e)
727 iterateBits (bf, &testBitCallback, &res, e);
733 * Add an element to the filter
735 * @param bf the filter
736 * @param e the element
739 GNUNET_CONTAINER_bloomfilter_add (struct GNUNET_CONTAINER_BloomFilter *bf,
740 const struct GNUNET_HashCode * e)
744 iterateBits (bf, &incrementBitCallback, bf, e);
749 * Or the entries of the given raw data array with the
750 * data of the given bloom filter. Assumes that
751 * the size of the data array and the current filter
754 * @param bf the filter
755 * @param data the data to or-in
756 * @param size number of bytes in data
759 GNUNET_CONTAINER_bloomfilter_or (struct GNUNET_CONTAINER_BloomFilter *bf,
765 unsigned long long *fc;
766 const unsigned long long *dc;
770 if (bf->bitArraySize != size)
771 return GNUNET_SYSERR;
772 fc = (unsigned long long *) bf->bitArray;
773 dc = (const unsigned long long *) data;
774 n = size / sizeof (unsigned long long);
776 for (i = 0; i < n; i++)
778 for (i = n * sizeof (unsigned long long); i < size; i++)
779 bf->bitArray[i] |= data[i];
784 * Or the entries of the given raw data array with the
785 * data of the given bloom filter. Assumes that
786 * the size of the data array and the current filter
789 * @param bf the filter
790 * @param to_or the bloomfilter to or-in
791 * @return #GNUNET_OK on success
794 GNUNET_CONTAINER_bloomfilter_or2 (struct GNUNET_CONTAINER_BloomFilter *bf,
795 const struct GNUNET_CONTAINER_BloomFilter *to_or)
799 unsigned long long *fc;
800 const unsigned long long *dc;
805 if (bf->bitArraySize != to_or->bitArraySize)
808 return GNUNET_SYSERR;
810 size = bf->bitArraySize;
811 fc = (unsigned long long *) bf->bitArray;
812 dc = (const unsigned long long *) to_or->bitArray;
813 n = size / sizeof (unsigned long long);
815 for (i = 0; i < n; i++)
817 for (i = n * sizeof (unsigned long long); i < size; i++)
818 bf->bitArray[i] |= to_or->bitArray[i];
824 * Remove an element from the filter.
826 * @param bf the filter
827 * @param e the element to remove
830 GNUNET_CONTAINER_bloomfilter_remove (struct GNUNET_CONTAINER_BloomFilter *bf,
831 const struct GNUNET_HashCode *e)
835 if (NULL == bf->filename)
838 &decrementBitCallback,
844 * Resize a bloom filter. Note that this operation
845 * is pretty costly. Essentially, the bloom filter
846 * needs to be completely re-build.
848 * @param bf the filter
849 * @param iterator an iterator over all elements stored in the BF
850 * @param iterator_cls argument to the iterator function
851 * @param size the new size for the filter
852 * @param k the new number of GNUNET_CRYPTO_hash-function to apply per element
855 GNUNET_CONTAINER_bloomfilter_resize (struct GNUNET_CONTAINER_BloomFilter *bf,
856 GNUNET_CONTAINER_HashCodeIterator iterator,
861 struct GNUNET_HashCode hc;
864 GNUNET_free (bf->bitArray);
868 size = i; /* make sure it's a power of 2 */
869 bf->addressesPerElement = k;
870 bf->bitArraySize = size;
871 bf->bitArray = GNUNET_malloc (size);
872 if (NULL != bf->filename)
873 make_empty_file (bf->fh,
874 bf->bitArraySize * 4LL);
875 while (GNUNET_YES == iterator (iterator_cls,
877 GNUNET_CONTAINER_bloomfilter_add (bf,
881 /* end of container_bloomfilter.c */