From: Maximilian Szengel Date: Wed, 27 Jun 2012 16:13:48 +0000 (+0000) Subject: new and improved tests X-Git-Tag: initial-import-from-subversion-38251~12767 X-Git-Url: https://git.librecmc.org/?a=commitdiff_plain;h=24f2c9d570bd181c622955506f6ecc000d5b2a98;p=oweals%2Fgnunet.git new and improved tests --- diff --git a/src/include/gnunet_regex_lib.h b/src/include/gnunet_regex_lib.h index 64a370df3..911128647 100644 --- a/src/include/gnunet_regex_lib.h +++ b/src/include/gnunet_regex_lib.h @@ -42,6 +42,7 @@ extern "C" */ struct GNUNET_REGEX_Automaton; + /** * Edge representation. */ @@ -58,6 +59,7 @@ struct GNUNET_REGEX_Edge struct GNUNET_HashCode destination; }; + /** * Construct an NFA by parsing the regex string of length 'len'. * @@ -69,6 +71,7 @@ struct GNUNET_REGEX_Edge struct GNUNET_REGEX_Automaton * GNUNET_REGEX_construct_nfa (const char *regex, const size_t len); + /** * Construct DFA for the given 'regex' of length 'len'. * @@ -80,6 +83,7 @@ GNUNET_REGEX_construct_nfa (const char *regex, const size_t len); struct GNUNET_REGEX_Automaton * GNUNET_REGEX_construct_dfa (const char *regex, const size_t len); + /** * Free the memory allocated by constructing the GNUNET_REGEX_Automaton. * data structure. @@ -89,6 +93,7 @@ GNUNET_REGEX_construct_dfa (const char *regex, const size_t len); void GNUNET_REGEX_automaton_destroy (struct GNUNET_REGEX_Automaton *a); + /** * Save the given automaton as a GraphViz dot file. * @@ -111,19 +116,6 @@ int GNUNET_REGEX_eval (struct GNUNET_REGEX_Automaton *a, const char *string); -/** - * Get the canonical regex of the given automaton. - * When constructing the automaton a proof is computed for each state, - * consisting of the regular expression leading to this state. A complete - * regex for the automaton can be computed by combining these proofs. - * As of now this function is only useful for testing. - * - * @param a automaton for which the canonical regex should be returned. - * - * @return - */ -const char * -GNUNET_REGEX_get_canonical_regex (struct GNUNET_REGEX_Automaton *a); /** * Get the first key for the given 'input_string'. This hashes @@ -140,6 +132,7 @@ unsigned int /* FIXME: size_t */ GNUNET_REGEX_get_first_key (const char *input_string, /* FIXME: size_t */ unsigned int string_len, struct GNUNET_HashCode * key); + /** * Check if the given 'proof' matches the given 'key'. * @@ -152,6 +145,7 @@ int GNUNET_REGEX_check_proof (const char *proof, const struct GNUNET_HashCode *key); + /** * Iterator callback function. * @@ -169,6 +163,7 @@ typedef void (*GNUNET_REGEX_KeyIterator)(void *cls, unsigned int num_edges, const struct GNUNET_REGEX_Edge *edges); + /** * Iterate over all edges starting from start state of automaton 'a'. Calling * iterator for each edge. @@ -182,6 +177,7 @@ GNUNET_REGEX_iterate_all_edges (struct GNUNET_REGEX_Automaton *a, GNUNET_REGEX_KeyIterator iterator, void *iterator_cls); + #if 0 /* keep Emacsens' auto-indent happy */ { #endif diff --git a/src/regex/Makefile.am b/src/regex/Makefile.am index cb9bc093a..1284111d8 100644 --- a/src/regex/Makefile.am +++ b/src/regex/Makefile.am @@ -11,7 +11,8 @@ endif lib_LTLIBRARIES = libgnunetregex.la libgnunetregex_la_SOURCES = \ - regex.c + regex_internal.h regex.c \ + regex_random.c libgnunetregex_la_LIBADD = -lm \ $(top_builddir)/src/util/libgnunetutil.la libgnunetregex_la_LDFLAGS = \ diff --git a/src/regex/regex.c b/src/regex/regex.c index 411c72c08..f237334d8 100644 --- a/src/regex/regex.c +++ b/src/regex/regex.c @@ -26,7 +26,7 @@ #include "gnunet_container_lib.h" #include "gnunet_crypto_lib.h" #include "gnunet_regex_lib.h" -#include "regex.h" +#include "regex_internal.h" /** * Constant for how many bits the initial string regex should have. @@ -1078,12 +1078,6 @@ automaton_create_proofs (struct GNUNET_REGEX_Automaton *a) GNUNET_asprintf (&R_last[i][j], "%s|%c", R_last[i][j], t->label); GNUNET_free (temp_a); } - if (GNUNET_YES == needs_parentheses (R_last[i][j])) - { - temp_a = R_last[i][j]; - GNUNET_asprintf (&R_last[i][j], "(%s)", R_last[i][j]); - GNUNET_free (temp_a); - } } if (NULL == R_last[i][i]) GNUNET_asprintf (&R_last[i][i], ""); @@ -1094,7 +1088,16 @@ automaton_create_proofs (struct GNUNET_REGEX_Automaton *a) GNUNET_free (temp_a); } } + for (i = 0; i < n; i++) + for (j = 0; j < n; j++) + if (needs_parentheses (R_last[i][j])) + { + temp_a = R_last[i][j]; + GNUNET_asprintf (&R_last[i][j], "(%s)", R_last[i][j]); + GNUNET_free (temp_a); + } + // TODO: clean up and fix the induction part // INDUCTION for (k = 0; k < n; k++) diff --git a/src/regex/regex_internal.h b/src/regex/regex_internal.h new file mode 100644 index 000000000..8ea597d40 --- /dev/null +++ b/src/regex/regex_internal.h @@ -0,0 +1,96 @@ +/* + This file is part of GNUnet + (C) 2012 Christian Grothoff (and other contributing authors) + + GNUnet is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GNUnet is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GNUnet; see the file COPYING. If not, write to the + Free Software Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. +*/ +/** + * @file src/regex/regex_internal.h + * @brief common internal definitions for regex library + * @author Maximilian Szengel + */ +#ifndef REGEX_INTERNAL_H +#define REGEX_INTERNAL_H + +#include "gnunet_regex_lib.h" + +#ifdef __cplusplus +extern "C" +{ +#if 0 /* keep Emacsens' auto-indent happy */ +} +#endif +#endif + +/** + * char array of literals that are allowed inside a regex (apart from the + * operators) + */ +#define ALLOWED_LITERALS "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" + + +/** + * Get the canonical regex of the given automaton. + * When constructing the automaton a proof is computed for each state, + * consisting of the regular expression leading to this state. A complete + * regex for the automaton can be computed by combining these proofs. + * As of now this function is only useful for testing. + * + * @param a automaton for which the canonical regex should be returned. + * + * @return + */ +const char * +GNUNET_REGEX_get_canonical_regex (struct GNUNET_REGEX_Automaton *a); + + +/** + * Generate a (pseudo) random regular expression of length 'rx_length', as well + * as a (optional) string that will be matched by the generated regex. The + * returned regex needs to be freed. + * + * @param rx_length length of the random regex. + * @param matching_str (optional) pointer to a string that will contain a string + * that will be matched by the generated regex, if + * 'matching_str' pointer was not NULL. + * + * @return NULL if 'rx_length' is 0, a random regex of length 'rx_length', which + * needs to be freed, otherwise. + */ +char * +GNUNET_REGEX_generate_random_regex (size_t rx_length, char *matching_str); + + +/** + * Generate a random string of maximum length 'max_len' that only contains literals allowed + * in a regular expression. The string might be 0 chars long but is garantueed + * to be shorter or equal to 'max_len'. + * + * @param max_len maximum length of the string that should be generated. + * + * @return random string that needs to be freed. + */ +char * +GNUNET_REGEX_generate_random_string (size_t max_len); + +#if 0 /* keep Emacsens' auto-indent happy */ +{ +#endif +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/regex/regex_random.c b/src/regex/regex_random.c new file mode 100644 index 000000000..3af9b7c5a --- /dev/null +++ b/src/regex/regex_random.c @@ -0,0 +1,170 @@ +/* + This file is part of GNUnet + (C) 2012 Christian Grothoff (and other contributing authors) + + GNUnet is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GNUnet is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GNUnet; see the file COPYING. If not, write to the + Free Software Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. +*/ +/** + * @file src/regex/regex_random.c + * @brief functions for creating random regular expressions and strings + * @author Maximilian Szengel + */ +#include "platform.h" +#include "gnunet_regex_lib.h" +#include "gnunet_crypto_lib.h" +#include "regex_internal.h" + + +/** + * Get a (pseudo) random valid literal for building a regular expression. + * + * @return random valid literal + */ +char +get_random_literal () +{ + uint32_t ridx; + + ridx = + GNUNET_CRYPTO_random_u32 (GNUNET_CRYPTO_QUALITY_WEAK, + (uint32_t) strlen (ALLOWED_LITERALS)); + + return ALLOWED_LITERALS[ridx]; +} + + +/** + * Generate a (pseudo) random regular expression of length 'rx_length', as well + * as a (optional) string that will be matched by the generated regex. The + * returned regex needs to be freed. + * + * @param rx_length length of the random regex. + * @param matching_str (optional) pointer to a string that will contain a string + * that will be matched by the generated regex, if + * 'matching_str' pointer was not NULL. Make sure you + * allocated at least rx_length+1 bytes for this sting. + * + * @return NULL if 'rx_length' is 0, a random regex of length 'rx_length', which + * needs to be freed, otherwise. + */ +char * +GNUNET_REGEX_generate_random_regex (size_t rx_length, char *matching_str) +{ + char *rx; + char *rx_p; + char *matching_strp; + unsigned int i; + unsigned int char_op_switch; + unsigned int last_was_op; + int rx_op; + char current_char; + + if (0 == rx_length) + return NULL; + + if (NULL != matching_str) + matching_strp = matching_str; + else + matching_strp = NULL; + + rx = GNUNET_malloc (rx_length + 1); + rx_p = rx; + current_char = 0; + last_was_op = 1; + + for (i = 0; i < rx_length; i++) + { + char_op_switch = GNUNET_CRYPTO_random_u32 (GNUNET_CRYPTO_QUALITY_WEAK, 2); + + if (0 == char_op_switch && !last_was_op) + { + last_was_op = 1; + rx_op = GNUNET_CRYPTO_random_u32 (GNUNET_CRYPTO_QUALITY_WEAK, 4); + + switch (rx_op) + { + case 0: + current_char = '+'; + break; + case 1: + current_char = '*'; + break; + case 2: + current_char = '?'; + break; + case 3: + if (i < rx_length - 1) // '|' cannot be at the end + current_char = '|'; + else + current_char = get_random_literal (); + break; + } + } + else + { + current_char = get_random_literal (); + last_was_op = 0; + } + + if (NULL != matching_strp && + (current_char != '+' && current_char != '*' && current_char != '?' && + current_char != '|')) + { + *matching_strp = current_char; + matching_strp++; + } + + *rx_p = current_char; + rx_p++; + } + *rx_p = '\0'; + if (NULL != matching_strp) + *matching_strp = '\0'; + + return rx; +} + +/** + * Generate a random string of maximum length 'max_len' that only contains literals allowed + * in a regular expression. The string might be 0 chars long but is garantueed + * to be shorter or equal to 'max_len'. + * + * @param max_len maximum length of the string that should be generated. + * + * @return random string that needs to be freed. + */ +char * +GNUNET_REGEX_generate_random_string (size_t max_len) +{ + unsigned int i; + char *str; + size_t len; + + if (1 > max_len) + return GNUNET_strdup (""); + + len = (size_t) GNUNET_CRYPTO_random_u32 (GNUNET_CRYPTO_QUALITY_WEAK, max_len); + str = GNUNET_malloc (len + 1); + + for (i = 0; i < len; i++) + { + str[i] = get_random_literal (); + } + + str[i] = '\0'; + + return str; +} diff --git a/src/regex/test_regex_eval_api.c b/src/regex/test_regex_eval_api.c index b6cdbe100..6d575a05c 100644 --- a/src/regex/test_regex_eval_api.c +++ b/src/regex/test_regex_eval_api.c @@ -26,6 +26,7 @@ #include #include "platform.h" #include "gnunet_regex_lib.h" +#include "regex_internal.h" enum Match_Result { @@ -41,8 +42,6 @@ struct Regex_String_Pair enum Match_Result expected_results[20]; }; -static const char allowed_literals[] = - "0123456789" "ABCDEFGHIJKLMNOPQRSTUVWXYZ" "abcdefghijklmnopqrstuvwxyz"; /** * Random regex test. Generate a random regex as well as 'str_count' strings to @@ -60,15 +59,8 @@ test_random (unsigned int rx_length, unsigned int max_str_len, unsigned int str_count) { int i; - int j; - int rx_exp; - char rand_rx[rx_length + 1]; - char matching_str[str_count][max_str_len + 1]; - char *rand_rxp; - char *matching_strp; - int char_op_switch; - int last_was_op; - char current_char; + char *rand_rx; + char *matching_str; int eval; int eval_check; int eval_canonical; @@ -77,7 +69,7 @@ test_random (unsigned int rx_length, unsigned int max_str_len, regmatch_t matchptr[1]; char error[200]; int result; - unsigned int str_len; + size_t str_len; char *canonical_regex; // At least one string is needed for matching @@ -85,76 +77,20 @@ test_random (unsigned int rx_length, unsigned int max_str_len, // The string should be at least as long as the regex itself GNUNET_assert (max_str_len >= rx_length); - rand_rxp = rand_rx; - matching_strp = matching_str[0]; - current_char = 0; - last_was_op = 1; - // Generate random regex and a string that matches the regex - for (i = 0; i < rx_length; i++) - { - char_op_switch = 0 + (int) (1.0 * rand () / (RAND_MAX + 1.0)); - - if (0 == char_op_switch && !last_was_op) - { - last_was_op = 1; - rx_exp = rand () % 4; - - switch (rx_exp) - { - case 0: - current_char = '+'; - break; - case 1: - current_char = '*'; - break; - case 2: - current_char = '?'; - break; - case 3: - if (i < rx_length - 1) // '|' cannot be at the end - current_char = '|'; - else - current_char = - allowed_literals[rand () % (sizeof (allowed_literals) - 1)]; - break; - } - } - else - { - current_char = - allowed_literals[rand () % (sizeof (allowed_literals) - 1)]; - last_was_op = 0; - } - - if (current_char != '+' && current_char != '*' && current_char != '?' && - current_char != '|') - { - *matching_strp = current_char; - matching_strp++; - } - - *rand_rxp = current_char; - rand_rxp++; - } - *rand_rxp = '\0'; - *matching_strp = '\0'; - - // Generate some random strings for matching... - // Start at 1, because the first string is generated above during regex generation - for (i = 1; i < str_count; i++) - { - str_len = rand () % max_str_len; - for (j = 0; j < str_len; j++) - matching_str[i][j] = - allowed_literals[rand () % (sizeof (allowed_literals) - 1)]; - matching_str[i][str_len] = '\0'; - } + matching_str = GNUNET_malloc (rx_length + 1); + rand_rx = GNUNET_REGEX_generate_random_regex (rx_length, matching_str); // Now match result = 0; for (i = 0; i < str_count; i++) { + if (0 < i) + { + matching_str = GNUNET_REGEX_generate_random_string (max_str_len); + str_len = strlen (matching_str); + } + // Match string using DFA dfa = GNUNET_REGEX_construct_dfa (rand_rx, strlen (rand_rx)); if (NULL == dfa) @@ -163,7 +99,7 @@ test_random (unsigned int rx_length, unsigned int max_str_len, return -1; } - eval = GNUNET_REGEX_eval (dfa, matching_str[i]); + eval = GNUNET_REGEX_eval (dfa, matching_str); canonical_regex = GNUNET_strdup (GNUNET_REGEX_get_canonical_regex (dfa)); GNUNET_REGEX_automaton_destroy (dfa); @@ -175,7 +111,7 @@ test_random (unsigned int rx_length, unsigned int max_str_len, return -1; } - eval_check = regexec (&rx, matching_str[i], 1, matchptr, 0); + eval_check = regexec (&rx, matching_str, 1, matchptr, 0); regfree (&rx); // Match canonical regex @@ -187,14 +123,13 @@ test_random (unsigned int rx_length, unsigned int max_str_len, return -1; } - eval_canonical = regexec (&rx, matching_str[i], 1, matchptr, 0); + eval_canonical = regexec (&rx, matching_str, 1, matchptr, 0); regfree (&rx); GNUNET_free (canonical_regex); // We only want to match the whole string, because that's what our DFA does, too. if (eval_check == 0 && - (matchptr[0].rm_so != 0 || - matchptr[0].rm_eo != strlen (matching_str[i]))) + (matchptr[0].rm_so != 0 || matchptr[0].rm_eo != strlen (matching_str))) eval_check = 1; // compare result @@ -206,7 +141,12 @@ test_random (unsigned int rx_length, unsigned int max_str_len, rand_rx, matching_str, eval, eval_check, error); result += 1; } + + GNUNET_free (matching_str); } + + GNUNET_free (rand_rx); + return result; } diff --git a/src/regex/test_regex_proofs.c b/src/regex/test_regex_proofs.c index 5d0aabd00..85fc3079d 100644 --- a/src/regex/test_regex_proofs.c +++ b/src/regex/test_regex_proofs.c @@ -22,68 +22,146 @@ * @brief test for regex.c * @author Maximilian Szengel */ -#include -#include #include "platform.h" #include "gnunet_regex_lib.h" +#include "regex_internal.h" -int -main (int argc, char *argv[]) + +/** + * Test if the given regex's canonical regex is the same as this canonical + * regex's canonical regex. Confused? Ok, then: 1. construct a dfa A from the + * given 'regex' 2. get the canonical regex of dfa A 3. construct a dfa B from + * this canonical regex 3. compare the canonical regex of dfa A with the + * canonical regex of dfa B. + * + * @param regex regular expression used for this test (see above). + * + * @return 0 on success, 1 on failure + */ +unsigned int +test_proof (const char *regex) { - GNUNET_log_setup ("test-regex", -#if VERBOSE - "DEBUG", -#else - "WARNING", -#endif - NULL); + unsigned int error; + struct GNUNET_REGEX_Automaton *dfa; + char *c_rx1; + const char *c_rx2; - int error; - int i; - - const char *regex[21] = { - "ab(c|d)+c*(a(b|c)+d)+(bla)+", - "(bla)*", - "b(lab)*la", - "(ab)*", - "ab(c|d)+c*(a(b|c)+d)+(bla)(bla)*", - "z(abc|def)?xyz", - "1*0(0|1)*", - "a+X*y+c|p|R|Z*K*y*R+w|Y*6+n+h*k*w+V*F|W*B*e*", - "(cd|ab)*", - "abcd:(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1):(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)", - "abc(1|0)*def", - "ab|ac", - "(ab)(ab)*", - "ab|cd|ef|gh", - "a|b|c|d|e|f|g", - "(ab)|(ac)", - "a(b|c)", - "a*a", - "ab?(abcd)?", - "(ab|cs|df|sdf)*", - "a|aa*a" + dfa = GNUNET_REGEX_construct_dfa (regex, strlen (regex)); + c_rx1 = GNUNET_strdup (GNUNET_REGEX_get_canonical_regex (dfa)); + GNUNET_REGEX_automaton_destroy (dfa); + dfa = GNUNET_REGEX_construct_dfa (c_rx1, strlen (c_rx1)); + c_rx2 = GNUNET_REGEX_get_canonical_regex (dfa); + + error = (0 == strcmp (c_rx1, c_rx2)) ? 0 : 1; + + if (error > 0) + { + GNUNET_log (GNUNET_ERROR_TYPE_ERROR, + "Comparing canonical regex of\n%s\nfailed:\n%s\nvs.\n%s\n", + regex, c_rx1, c_rx2); + } + + GNUNET_free (c_rx1); + GNUNET_REGEX_automaton_destroy (dfa); + + return error; +} + +/** + * Use 'test_proof' function to randomly test the canonical regexes of 'count' + * random expressions of length 'rx_length'. + * + * @param count number of random regular expressions to test. + * @param rx_length length of the random regular expressions. + * + * @return 0 on succes, number of failures otherwise. + */ +unsigned int +test_proofs_random (unsigned int count, size_t rx_length) +{ + unsigned int i; + char *rand_rx; + unsigned int failures; + + failures = 0; + + for (i = 0; i < count; i++) + { + rand_rx = GNUNET_REGEX_generate_random_regex (rx_length, NULL); + failures += test_proof (rand_rx); + GNUNET_free (rand_rx); + } + + return failures; +} + +/** + * Test a number of known examples of regexes for proper canonicalization. + * + * @return 0 on success, number of failures otherwise. + */ +unsigned int +test_proofs_static (void) +{ + unsigned int i; + unsigned int error; + + const char *regex[4] = { + "a|aa*a", + "a+", + "a*", + "a*a*" }; + char *canonical_regex; struct GNUNET_REGEX_Automaton *dfa; error = 0; - for (i = 0; i < 21; i++) + for (i = 0; i < 4; i += 2) { dfa = GNUNET_REGEX_construct_dfa (regex[i], strlen (regex[i])); canonical_regex = GNUNET_strdup (GNUNET_REGEX_get_canonical_regex (dfa)); GNUNET_REGEX_automaton_destroy (dfa); - dfa = - GNUNET_REGEX_construct_dfa (canonical_regex, strlen (canonical_regex)); + dfa = GNUNET_REGEX_construct_dfa (regex[i + 1], strlen (regex[i + 1])); error += (0 == strcmp (canonical_regex, GNUNET_REGEX_get_canonical_regex (dfa))) ? 0 : 1; + + if (error > 0) + { + GNUNET_log (GNUNET_ERROR_TYPE_ERROR, + "Comparing canonical regex of %s with %s failed.\n", regex[i], + regex[i + 1]); + } + GNUNET_free (canonical_regex); GNUNET_REGEX_automaton_destroy (dfa); } return error; } + + +int +main (int argc, char *argv[]) +{ + GNUNET_log_setup ("test-regex", +#if VERBOSE + "DEBUG", +#else + "WARNING", +#endif + NULL); + + int error; + + error = 0; + + error += test_proofs_static (); +// error += test_proofs_random (100, 10); + + return error; +}