From 07ca698bc1cee73020c218dd6f13aceb187700cd Mon Sep 17 00:00:00 2001 From: Maximilian Szengel Date: Wed, 6 Jun 2012 14:35:05 +0000 Subject: [PATCH] Test for computed regex. --- src/include/gnunet_regex_lib.h | 10 ++++++ src/regex/regex.c | 55 +++++++++++++++++++++++++++++++-- src/regex/test_regex_eval_api.c | 16 ++++++++++ 3 files changed, 79 insertions(+), 2 deletions(-) diff --git a/src/include/gnunet_regex_lib.h b/src/include/gnunet_regex_lib.h index aec37c173..6e0a41275 100644 --- a/src/include/gnunet_regex_lib.h +++ b/src/include/gnunet_regex_lib.h @@ -111,6 +111,16 @@ int GNUNET_REGEX_eval (struct GNUNET_REGEX_Automaton *a, const char *string); +/** + * Get the computed regex of the given automaton. + * When constructing the automaton a proof is computed for each state, + * consisting of the regular expression leading to this state. A complete + * regex for the automaton can be computed by combining these proofs. + * As of now this computed regex is only useful for testing. + */ +const char * +GNUNET_REGEX_get_computed_regex (struct GNUNET_REGEX_Automaton *a); + /** * Get the first key for the given 'input_string'. This hashes * the first x bits of the 'input_strings'. diff --git a/src/regex/regex.c b/src/regex/regex.c index 67cadfc5e..cd08b8dcd 100644 --- a/src/regex/regex.c +++ b/src/regex/regex.c @@ -116,6 +116,16 @@ struct GNUNET_REGEX_Automaton * Type of the automaton. */ enum GNUNET_REGEX_automaton_type type; + + /** + * Regex + */ + char *regex; + + /** + * Computed regex (result of RX->NFA->DFA->RX) + */ + char *computed_regex; }; /** @@ -799,7 +809,7 @@ automaton_traverse (void *cls, struct GNUNET_REGEX_Automaton *a, /** * Create proofs for all states in the given automaton. Implementation of the - * algorithms descriped in chapter 3.2.1 of "Automata Theory, Languages, and + * algorithm descriped in chapter 3.2.1 of "Automata Theory, Languages, and * Computation 3rd Edition" by Hopcroft, Motwani and Ullman. * * @param a automaton. @@ -821,6 +831,7 @@ automaton_create_proofs (struct GNUNET_REGEX_Automaton *a) char *R_cur_r; int length_l; int length_r; + char *complete_regex; k = 0; n = a->state_count; @@ -853,7 +864,6 @@ automaton_create_proofs (struct GNUNET_REGEX_Automaton *a) } } - if (i == j) { if (NULL == R_last[i][j]) @@ -973,6 +983,26 @@ automaton_create_proofs (struct GNUNET_REGEX_Automaton *a) &states[i]->hash); } + // complete regex for whole DFA + complete_regex = NULL; + for (i = 0; i < n; i++) + { + if (states[i]->accepting) + { + if (NULL == complete_regex) + GNUNET_asprintf (&complete_regex, "%s", R_last[a->start->marked][i]); + else if (NULL != R_last[a->start->marked][i] && + 0 != strcmp (R_last[a->start->marked][i], "")) + { + temp = complete_regex; + GNUNET_asprintf (&complete_regex, "%s|%s", complete_regex, + R_last[a->start->marked][i]); + GNUNET_free (temp); + } + } + } + a->computed_regex = complete_regex; + // cleanup for (i = 0; i < n; i++) { @@ -1867,6 +1897,8 @@ GNUNET_REGEX_construct_nfa (const char *regex, const size_t len) goto error; } + nfa->regex = GNUNET_strdup (regex); + return nfa; error: @@ -1968,6 +2000,7 @@ GNUNET_REGEX_construct_dfa (const char *regex, const size_t len) dfa = GNUNET_malloc (sizeof (struct GNUNET_REGEX_Automaton)); dfa->type = DFA; + dfa->regex = GNUNET_strdup (regex); // Create DFA start state from epsilon closure nfa_set = nfa_closure_create (nfa, nfa->start, 0); @@ -2005,6 +2038,8 @@ GNUNET_REGEX_automaton_destroy (struct GNUNET_REGEX_Automaton *a) if (NULL == a) return; + GNUNET_free (a->regex); + for (s = a->states_head; NULL != s;) { next_state = s->next; @@ -2241,6 +2276,22 @@ GNUNET_REGEX_eval (struct GNUNET_REGEX_Automaton *a, const char *string) return result; } +/** + * Get the computed regex of the given automaton. + * When constructing the automaton a proof is computed for each state, + * consisting of the regular expression leading to this state. A complete + * regex for the automaton can be computed by combining these proofs. + * As of now this computed regex is only useful for testing. + */ +const char * +GNUNET_REGEX_get_computed_regex (struct GNUNET_REGEX_Automaton *a) +{ + if (NULL == a) + return NULL; + + return a->computed_regex; +} + /** * Get the first key for the given 'input_string'. This hashes the first x bits * of the 'input_strings'. diff --git a/src/regex/test_regex_eval_api.c b/src/regex/test_regex_eval_api.c index c63e97c11..392fb3b36 100644 --- a/src/regex/test_regex_eval_api.c +++ b/src/regex/test_regex_eval_api.c @@ -60,12 +60,14 @@ test_random (unsigned int rx_length, unsigned int max_str_len, char current_char; int eval; int eval_check; + int eval_computed; struct GNUNET_REGEX_Automaton *dfa; regex_t rx; regmatch_t matchptr[1]; char error[200]; int result; unsigned int str_len; + char *computed_regex; // At least one string is needed for matching GNUNET_assert (str_count > 0); @@ -151,6 +153,7 @@ test_random (unsigned int rx_length, unsigned int max_str_len, } eval = GNUNET_REGEX_eval (dfa, matching_str[i]); + computed_regex = GNUNET_strdup (GNUNET_REGEX_get_computed_regex (dfa)); GNUNET_REGEX_automaton_destroy (dfa); // Match string using glibc regex @@ -164,6 +167,19 @@ test_random (unsigned int rx_length, unsigned int max_str_len, eval_check = regexec (&rx, matching_str[i], 1, matchptr, 0); regfree (&rx); + // Match computed regex + if (0 != regcomp (&rx, computed_regex, REG_EXTENDED)) + { + GNUNET_log (GNUNET_ERROR_TYPE_ERROR, + "Could not compile regex using regcomp: %s\n", + computed_regex); + return -1; + } + + eval_computed = regexec (&rx, matching_str[i], 1, matchptr, 0); + regfree (&rx); + GNUNET_free (computed_regex); + // We only want to match the whole string, because that's what our DFA does, too. if (eval_check == 0 && (matchptr[0].rm_so != 0 || -- 2.25.1