2 This file is part of GNUnet.
3 (C) 2001, 2002, 2003, 2004, 2006, 2008, 2011, 2012 Christian Grothoff (and other contributing authors)
5 GNUnet is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published
7 by the Free Software Foundation; either version 3, or (at your
8 option) any later version.
10 GNUnet is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with GNUnet; see the file COPYING. If not, write to the
17 Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA.
21 * @file util/container_bloomfilter.c
22 * @brief data structure used to reduce disk accesses.
24 * The idea basically: Create a signature for each element in the
25 * database. Add those signatures to a bit array. When doing a lookup,
26 * check if the bit array matches the signature of the requested
27 * element. If yes, address the disk, otherwise return 'not found'.
29 * A property of the bloom filter is that sometimes we will have
30 * a match even if the element is not on the disk (then we do
31 * an unnecessary disk access), but what's most important is that
32 * we never get a single "false negative".
34 * To be able to delete entries from the bloom filter, we maintain
35 * a 4 bit counter in the file on the drive (we still use only one
38 * @author Igor Wronsky
39 * @author Christian Grothoff
43 #include "gnunet_util_lib.h"
45 #define LOG(kind,...) GNUNET_log_from (kind, "util", __VA_ARGS__)
47 #define LOG_STRERROR(kind,syscall) GNUNET_log_from_strerror (kind, "util", syscall)
49 #define LOG_STRERROR_FILE(kind,syscall,filename) GNUNET_log_from_strerror_file (kind, "util", syscall, filename)
51 struct GNUNET_CONTAINER_BloomFilter
55 * The actual bloomfilter bit array
60 * Filename of the filter
65 * The bit counter file on disk
67 struct GNUNET_DISK_FileHandle *fh;
70 * How many bits we set for each stored element
72 unsigned int addressesPerElement;
75 * Size of bitArray in bytes
84 * Get size of the bloom filter.
86 * @param bf the filter
87 * @return number of bytes used for the data of the bloom filter
90 GNUNET_CONTAINER_bloomfilter_get_size (const struct GNUNET_CONTAINER_BloomFilter
95 return bf->bitArraySize;
100 * Copy an existing memory. Any association with a file
101 * on-disk will be lost in the process.
102 * @param bf the filter to copy
103 * @return copy of the bf
105 struct GNUNET_CONTAINER_BloomFilter *
106 GNUNET_CONTAINER_bloomfilter_copy (const struct GNUNET_CONTAINER_BloomFilter
109 return GNUNET_CONTAINER_bloomfilter_init (bf->bitArray, bf->bitArraySize,
110 bf->addressesPerElement);
115 * Sets a bit active in the bitArray. Increment bit-specific
116 * usage counter on disk only if below 4bit max (==15).
118 * @param bitArray memory area to set the bit in
119 * @param bitIdx which bit to set
122 setBit (char *bitArray, unsigned int bitIdx)
125 unsigned int targetBit;
127 arraySlot = bitIdx / 8;
128 targetBit = (1L << (bitIdx % 8));
129 bitArray[arraySlot] |= targetBit;
133 * Clears a bit from bitArray. Bit is cleared from the array
134 * only if the respective usage counter on the disk hits/is zero.
136 * @param bitArray memory area to set the bit in
137 * @param bitIdx which bit to unset
140 clearBit (char *bitArray, unsigned int bitIdx)
143 unsigned int targetBit;
146 targetBit = (1L << (bitIdx % 8));
147 bitArray[slot] = bitArray[slot] & (~targetBit);
151 * Checks if a bit is active in the bitArray
153 * @param bitArray memory area to set the bit in
154 * @param bitIdx which bit to test
155 * @return GNUNET_YES if the bit is set, GNUNET_NO if not.
158 testBit (char *bitArray, unsigned int bitIdx)
161 unsigned int targetBit;
164 targetBit = (1L << (bitIdx % 8));
165 if (bitArray[slot] & targetBit)
172 * Sets a bit active in the bitArray and increments
173 * bit-specific usage counter on disk (but only if
174 * the counter was below 4 bit max (==15)).
176 * @param bitArray memory area to set the bit in
177 * @param bitIdx which bit to test
178 * @param fh A file to keep the 4 bit address usage counters in
181 incrementBit (char *bitArray, unsigned int bitIdx,
182 const struct GNUNET_DISK_FileHandle *fh)
188 unsigned int targetLoc;
190 setBit (bitArray, bitIdx);
191 if (GNUNET_DISK_handle_invalid (fh))
193 /* Update the counter file on disk */
194 fileSlot = bitIdx / 2;
195 targetLoc = bitIdx % 2;
197 GNUNET_assert (fileSlot ==
198 GNUNET_DISK_file_seek (fh, fileSlot, GNUNET_DISK_SEEK_SET));
199 if (1 != GNUNET_DISK_file_read (fh, &value, 1))
202 high = (value & (~0xF)) >> 4;
214 value = ((high << 4) | low);
215 GNUNET_assert (fileSlot ==
216 GNUNET_DISK_file_seek (fh, fileSlot, GNUNET_DISK_SEEK_SET));
217 GNUNET_assert (1 == GNUNET_DISK_file_write (fh, &value, 1));
221 * Clears a bit from bitArray if the respective usage
222 * counter on the disk hits/is zero.
224 * @param bitArray memory area to set the bit in
225 * @param bitIdx which bit to test
226 * @param fh A file to keep the 4bit address usage counters in
229 decrementBit (char *bitArray, unsigned int bitIdx,
230 const struct GNUNET_DISK_FileHandle *fh)
236 unsigned int targetLoc;
238 if (GNUNET_DISK_handle_invalid (fh))
239 return; /* cannot decrement! */
240 /* Each char slot in the counter file holds two 4 bit counters */
241 fileslot = bitIdx / 2;
242 targetLoc = bitIdx % 2;
243 if (GNUNET_SYSERR == GNUNET_DISK_file_seek (fh, fileslot, GNUNET_DISK_SEEK_SET))
245 GNUNET_log_strerror (GNUNET_ERROR_TYPE_ERROR, "seek");
248 if (1 != GNUNET_DISK_file_read (fh, &value, 1))
251 high = (value & 0xF0) >> 4;
253 /* decrement, but once we have reached the max, never go back! */
256 if ((low > 0) && (low < 0xF))
260 clearBit (bitArray, bitIdx);
265 if ((high > 0) && (high < 0xF))
269 clearBit (bitArray, bitIdx);
272 value = ((high << 4) | low);
273 if (GNUNET_SYSERR == GNUNET_DISK_file_seek (fh, fileslot, GNUNET_DISK_SEEK_SET))
275 GNUNET_log_strerror (GNUNET_ERROR_TYPE_ERROR, "seek");
278 GNUNET_assert (1 == GNUNET_DISK_file_write (fh, &value, 1));
281 #define BUFFSIZE 65536
284 * Creates a file filled with zeroes
286 * @param fh the file handle
287 * @param size the size of the file
288 * @return GNUNET_OK if created ok, GNUNET_SYSERR otherwise
291 make_empty_file (const struct GNUNET_DISK_FileHandle *fh, size_t size)
293 char buffer[BUFFSIZE];
294 size_t bytesleft = size;
297 if (GNUNET_DISK_handle_invalid (fh))
298 return GNUNET_SYSERR;
299 memset (buffer, 0, sizeof (buffer));
300 GNUNET_DISK_file_seek (fh, 0, GNUNET_DISK_SEEK_SET);
301 while (bytesleft > 0)
303 if (bytesleft > sizeof (buffer))
305 res = GNUNET_DISK_file_write (fh, buffer, sizeof (buffer));
311 res = GNUNET_DISK_file_write (fh, buffer, bytesleft);
315 if (GNUNET_SYSERR == res)
316 return GNUNET_SYSERR;
321 /* ************** GNUNET_CONTAINER_BloomFilter iterator ********* */
324 * Iterator (callback) method to be called by the
325 * bloomfilter iterator on each bit that is to be
326 * set or tested for the key.
329 * @param bf the filter to manipulate
330 * @param bit the current bit
331 * @return GNUNET_YES to continue, GNUNET_NO to stop early
333 typedef int (*BitIterator) (void *cls,
334 const struct GNUNET_CONTAINER_BloomFilter * bf,
339 * Call an iterator for each bit that the bloomfilter
340 * must test or set for this element.
342 * @param bf the filter
343 * @param callback the method to call
344 * @param arg extra argument to callback
345 * @param key the key for which we iterate over the BF bits
348 iterateBits (const struct GNUNET_CONTAINER_BloomFilter *bf,
349 BitIterator callback, void *arg, const struct GNUNET_HashCode *key)
351 struct GNUNET_HashCode tmp[2];
354 unsigned int slot = 0;
356 bitCount = bf->addressesPerElement;
359 GNUNET_assert (bf->bitArraySize > 0);
360 GNUNET_assert (bf->bitArraySize * 8LL > bf->bitArraySize);
363 while (slot < (sizeof (struct GNUNET_HashCode) / sizeof (uint32_t)))
367 ntohl ((((uint32_t *) & tmp[round & 1])[slot])) %
368 ((bf->bitArraySize * 8LL))))
377 GNUNET_CRYPTO_hash (&tmp[round & 1], sizeof (struct GNUNET_HashCode),
378 &tmp[(round + 1) & 1]);
387 * Callback: increment bit
389 * @param cls pointer to writeable form of bf
390 * @param bf the filter to manipulate
391 * @param bit the bit to increment
395 incrementBitCallback (void *cls, const struct GNUNET_CONTAINER_BloomFilter *bf,
398 struct GNUNET_CONTAINER_BloomFilter *b = cls;
400 incrementBit (b->bitArray, bit, bf->fh);
406 * Callback: decrement bit
408 * @param cls pointer to writeable form of bf
409 * @param bf the filter to manipulate
410 * @param bit the bit to decrement
414 decrementBitCallback (void *cls, const struct GNUNET_CONTAINER_BloomFilter *bf,
417 struct GNUNET_CONTAINER_BloomFilter *b = cls;
419 decrementBit (b->bitArray, bit, bf->fh);
425 * Callback: test if all bits are set
427 * @param cls pointer set to GNUNET_NO if bit is not set
428 * @param bf the filter
429 * @param bit the bit to test
430 * @return YES if the bit is set, NO if not
433 testBitCallback (void *cls, const struct GNUNET_CONTAINER_BloomFilter *bf,
438 if (GNUNET_NO == testBit (bf->bitArray, bit))
446 /* *********************** INTERFACE **************** */
449 * Load a bloom-filter from a file.
451 * @param filename the name of the file (or the prefix)
452 * @param size the size of the bloom-filter (number of
453 * bytes of storage space to use); will be rounded up
455 * @param k the number of GNUNET_CRYPTO_hash-functions to apply per
456 * element (number of bits set per element in the set)
457 * @return the bloomfilter
459 struct GNUNET_CONTAINER_BloomFilter *
460 GNUNET_CONTAINER_bloomfilter_load (const char *filename, size_t size,
463 struct GNUNET_CONTAINER_BloomFilter *bf;
471 GNUNET_assert (NULL != filename);
472 if ((k == 0) || (size == 0))
477 while ( (ui < size) &&
480 size = ui; /* make sure it's a power of 2 */
482 bf = GNUNET_malloc (sizeof (struct GNUNET_CONTAINER_BloomFilter));
483 /* Try to open a bloomfilter file */
484 if (GNUNET_YES == GNUNET_DISK_file_test (filename))
486 GNUNET_DISK_file_open (filename,
487 GNUNET_DISK_OPEN_READWRITE,
488 GNUNET_DISK_PERM_USER_READ |
489 GNUNET_DISK_PERM_USER_WRITE);
492 /* file existed, try to read it! */
493 must_read = GNUNET_YES;
495 GNUNET_DISK_file_handle_size (bf->fh, &fsize))
497 GNUNET_DISK_file_close (bf->fh);
503 /* found existing empty file, just overwrite */
504 if (GNUNET_OK != make_empty_file (bf->fh, size * 4LL))
506 GNUNET_log_strerror (GNUNET_ERROR_TYPE_WARNING,
508 GNUNET_DISK_file_close (bf->fh);
513 else if (fsize != size * 4LL)
515 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
516 _("Size of file on disk is incorrect for this Bloom filter (want %llu, have %llu)\n"),
517 (unsigned long long) (size * 4LL),
518 (unsigned long long) fsize);
519 GNUNET_DISK_file_close (bf->fh);
526 /* file did not exist, don't read, just create */
527 must_read = GNUNET_NO;
529 GNUNET_DISK_file_open (filename,
530 GNUNET_DISK_OPEN_CREATE |
531 GNUNET_DISK_OPEN_READWRITE,
532 GNUNET_DISK_PERM_USER_READ |
533 GNUNET_DISK_PERM_USER_WRITE);
539 if (GNUNET_OK != make_empty_file (bf->fh, size * 4LL))
541 GNUNET_log_strerror (GNUNET_ERROR_TYPE_WARNING,
543 GNUNET_DISK_file_close (bf->fh);
548 bf->filename = GNUNET_strdup (filename);
550 bf->bitArray = GNUNET_malloc_large (size);
551 if (bf->bitArray == NULL)
554 GNUNET_DISK_file_close (bf->fh);
555 GNUNET_free (bf->filename);
559 bf->bitArraySize = size;
560 bf->addressesPerElement = k;
561 if (GNUNET_YES != must_read)
562 return bf; /* already done! */
563 /* Read from the file what bits we can */
564 rbuff = GNUNET_malloc (BUFFSIZE);
566 while (pos < size * 8LL)
570 res = GNUNET_DISK_file_read (bf->fh, rbuff, BUFFSIZE);
573 LOG_STRERROR_FILE (GNUNET_ERROR_TYPE_WARNING, "read", bf->filename);
575 GNUNET_free (bf->filename);
576 GNUNET_DISK_file_close (bf->fh);
581 break; /* is ok! we just did not use that many bits yet */
582 for (i = 0; i < res; i++)
584 if ((rbuff[i] & 0x0F) != 0)
585 setBit (bf->bitArray, pos + i * 2);
586 if ((rbuff[i] & 0xF0) != 0)
587 setBit (bf->bitArray, pos + i * 2 + 1);
591 pos += BUFFSIZE * 2; /* 2 bits per byte in the buffer */
599 * Create a bloom filter from raw bits.
601 * @param data the raw bits in memory (maybe NULL,
602 * in which case all bits should be considered
604 * @param size the size of the bloom-filter (number of
605 * bytes of storage space to use); also size of data
606 * -- unless data is NULL
607 * @param k the number of GNUNET_CRYPTO_hash-functions to apply per
608 * element (number of bits set per element in the set)
609 * @return the bloomfilter
611 struct GNUNET_CONTAINER_BloomFilter *
612 GNUNET_CONTAINER_bloomfilter_init (const char *data, size_t size,
615 struct GNUNET_CONTAINER_BloomFilter *bf;
617 if ((0 == k) || (0 == size))
619 bf = GNUNET_malloc (sizeof (struct GNUNET_CONTAINER_BloomFilter));
622 bf->bitArray = GNUNET_malloc_large (size);
623 if (NULL == bf->bitArray)
628 bf->bitArraySize = size;
629 bf->addressesPerElement = k;
631 memcpy (bf->bitArray, data, size);
637 * Copy the raw data of this bloomfilter into
638 * the given data array.
640 * @param bf bloomfilter to take the raw data from
641 * @param data where to write the data
642 * @param size the size of the given data array
643 * @return GNUNET_SYSERR if the data array is not big enough
646 GNUNET_CONTAINER_bloomfilter_get_raw_data (const struct
647 GNUNET_CONTAINER_BloomFilter *bf,
648 char *data, size_t size)
651 return GNUNET_SYSERR;
652 if (bf->bitArraySize != size)
653 return GNUNET_SYSERR;
654 memcpy (data, bf->bitArray, size);
660 * Free the space associated with a filter
661 * in memory, flush to drive if needed (do not
662 * free the space on the drive)
664 * @param bf the filter
667 GNUNET_CONTAINER_bloomfilter_free (struct GNUNET_CONTAINER_BloomFilter *bf)
672 GNUNET_DISK_file_close (bf->fh);
673 GNUNET_free_non_null (bf->filename);
674 GNUNET_free (bf->bitArray);
680 * Reset a bloom filter to empty. Clears the file on disk.
682 * @param bf the filter
685 GNUNET_CONTAINER_bloomfilter_clear (struct GNUNET_CONTAINER_BloomFilter *bf)
690 memset (bf->bitArray, 0, bf->bitArraySize);
691 if (bf->filename != NULL)
692 make_empty_file (bf->fh, bf->bitArraySize * 4LL);
697 * Test if an element is in the filter.
699 * @param e the element
700 * @param bf the filter
701 * @return GNUNET_YES if the element is in the filter, GNUNET_NO if not
704 GNUNET_CONTAINER_bloomfilter_test (const struct GNUNET_CONTAINER_BloomFilter
705 *bf, const struct GNUNET_HashCode * e)
712 iterateBits (bf, &testBitCallback, &res, e);
718 * Add an element to the filter
720 * @param bf the filter
721 * @param e the element
724 GNUNET_CONTAINER_bloomfilter_add (struct GNUNET_CONTAINER_BloomFilter *bf,
725 const struct GNUNET_HashCode * e)
729 iterateBits (bf, &incrementBitCallback, bf, e);
734 * Or the entries of the given raw data array with the
735 * data of the given bloom filter. Assumes that
736 * the size of the data array and the current filter
739 * @param bf the filter
740 * @param data the data to or-in
741 * @param size number of bytes in data
744 GNUNET_CONTAINER_bloomfilter_or (struct GNUNET_CONTAINER_BloomFilter *bf,
745 const char *data, size_t size)
749 unsigned long long *fc;
750 const unsigned long long *dc;
754 if (bf->bitArraySize != size)
755 return GNUNET_SYSERR;
756 fc = (unsigned long long *) bf->bitArray;
757 dc = (const unsigned long long *) data;
758 n = size / sizeof (unsigned long long);
760 for (i = 0; i < n; i++)
762 for (i = n * sizeof (unsigned long long); i < size; i++)
763 bf->bitArray[i] |= data[i];
768 * Or the entries of the given raw data array with the
769 * data of the given bloom filter. Assumes that
770 * the size of the data array and the current filter
773 * @param bf the filter
774 * @param to_or the bloomfilter to or-in
775 * @return #GNUNET_OK on success
778 GNUNET_CONTAINER_bloomfilter_or2 (struct GNUNET_CONTAINER_BloomFilter *bf,
779 const struct GNUNET_CONTAINER_BloomFilter
784 unsigned long long *fc;
785 const unsigned long long *dc;
790 if (bf->bitArraySize != to_or->bitArraySize)
793 return GNUNET_SYSERR;
795 size = bf->bitArraySize;
796 fc = (unsigned long long *) bf->bitArray;
797 dc = (const unsigned long long *) to_or->bitArray;
798 n = size / sizeof (unsigned long long);
800 for (i = 0; i < n; i++)
802 for (i = n * sizeof (unsigned long long); i < size; i++)
803 bf->bitArray[i] |= to_or->bitArray[i];
809 * Remove an element from the filter.
811 * @param bf the filter
812 * @param e the element to remove
815 GNUNET_CONTAINER_bloomfilter_remove (struct GNUNET_CONTAINER_BloomFilter *bf,
816 const struct GNUNET_HashCode * e)
820 if (bf->filename == NULL)
822 iterateBits (bf, &decrementBitCallback, bf, e);
826 * Resize a bloom filter. Note that this operation
827 * is pretty costly. Essentially, the bloom filter
828 * needs to be completely re-build.
830 * @param bf the filter
831 * @param iterator an iterator over all elements stored in the BF
832 * @param iterator_cls argument to the iterator function
833 * @param size the new size for the filter
834 * @param k the new number of GNUNET_CRYPTO_hash-function to apply per element
837 GNUNET_CONTAINER_bloomfilter_resize (struct GNUNET_CONTAINER_BloomFilter *bf,
838 GNUNET_HashCodeIterator iterator,
839 void *iterator_cls, size_t size,
842 struct GNUNET_HashCode hc;
845 GNUNET_free (bf->bitArray);
849 size = i; /* make sure it's a power of 2 */
851 bf->bitArraySize = size;
852 bf->bitArray = GNUNET_malloc (size);
853 if (bf->filename != NULL)
854 make_empty_file (bf->fh, bf->bitArraySize * 4LL);
855 while (GNUNET_YES == iterator (iterator_cls, &hc))
856 GNUNET_CONTAINER_bloomfilter_add (bf, &hc);
859 /* end of container_bloomfilter.c */