From: Christian Grothoff Date: Sat, 24 Oct 2015 13:37:48 +0000 (+0000) Subject: -adding Florian's IBF sim code X-Git-Tag: initial-import-from-subversion-38251~1217 X-Git-Url: https://git.librecmc.org/?a=commitdiff_plain;h=2c8b0facf033e0e8b6704795de6ac13b4ffd7a07;p=oweals%2Fgnunet.git -adding Florian's IBF sim code --- diff --git a/src/set/ibf_sim.c b/src/set/ibf_sim.c new file mode 100644 index 000000000..61c98dc52 --- /dev/null +++ b/src/set/ibf_sim.c @@ -0,0 +1,141 @@ +/* + This file is part of GNUnet + Copyright (C) 2013 Christian Grothoff (and other contributing authors) + + GNUnet is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GNUnet is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GNUnet; see the file COPYING. If not, write to the + Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + Boston, MA 02110-1301, USA. +*/ + +/** + * @file set/ibf_sim.c + * @brief implementation of simulation for invertible bloom filter + * @author Florian Dold + * + * This code was used for some internal experiments, it is not + * build or shipped as part of the GNUnet system. + */ +#include +#include +#include + +#define MAX_IBF_DECODE 16 + +/* report average over how many rounds? */ +#define ROUNDS 100000 + +/* enable one of the three below */ +// simple fix +#define FIX1 0 +// possibly slightly better fix for large IBF_DECODE values +#define FIX2 1 + +// SIGCOMM algorithm +#define STRATA 0 + +// print each value? +#define VERBOSE 0 +// avoid assembly? (ASM is about 50% faster) +#define SLOW 0 + +int +main(int argc, char **argv) +{ + unsigned int round; + unsigned int buckets[31]; // max is 2^31 as 'random' returns only between 0 and 2^31 + unsigned int i; + int j; + unsigned int r; + unsigned int ret; + unsigned long long total; + unsigned int want; + double predict; + + srandom (time (NULL)); + total = 0; + want = atoi (argv[1]); + for (round=0;round= 0; j--) + { +#if FIX1 + /* improved algorithm, for 1000 elements with IBF-DECODE 8, I + get 990/1000 elements on average over 1 million runs; key + idea being to stop short of the 'last' possible IBF as + otherwise a "lowball" per-chance would unduely influence the + result */ + if ( (j > 0) && + (buckets[j - 1] > MAX_IBF_DECODE) ) + { + ret *= (1 << (j + 1)); + break; + } +#endif +#if FIX2 + /* another improvement: don't just always cut off the last one, + but rather try to predict based on all previous values where + that "last" one is; additional prediction can only really + work if MAX_IBF_DECODE is sufficiently high */ + if ( (j > 0) && + ( (buckets[j - 1] > MAX_IBF_DECODE) || + (predict > MAX_IBF_DECODE) ) ) + { + ret *= (1 << (j + 1)); + break; + } +#endif +#if STRATA + /* original algorithm, for 1000 elements with IBF-DECODE 8, + I get 920/1000 elements on average over 1 million runs */ + if (buckets[j] > MAX_IBF_DECODE) + { + ret *= (1 << (j+1)); + break; + } +#endif + ret += buckets[j]; + predict = (buckets[j] + 2.0 * predict) / 2.0; + } +#if VERBOSE + fprintf (stderr, "%u ", ret); +#endif + total += ret; + } + fprintf (stderr, "\n"); + fprintf (stdout, "average %llu\n", total / ROUNDS); + return 0; +} + +/* TODO: should calculate stddev of the results to also be able to + say something about the stability of the results, outside of + large-scale averages -- gaining 8% precision at the expense of + 50% additional variance might not be worth it... */