From 4bddef66e721cf68b17effea2c23aebd89ca1b8b Mon Sep 17 00:00:00 2001 From: Maximilian Szengel Date: Thu, 5 Apr 2012 11:46:24 +0000 Subject: [PATCH] Automatic regex generation for testing --- src/regex/regex.c | 39 ++++++-- src/regex/test_regex.c | 215 ++++++++++++++++++++++++++++++++++------- 2 files changed, 210 insertions(+), 44 deletions(-) diff --git a/src/regex/regex.c b/src/regex/regex.c index 6432075a5..fc9de529c 100644 --- a/src/regex/regex.c +++ b/src/regex/regex.c @@ -416,6 +416,15 @@ dfa_state_create (struct GNUNET_REGEX_Context *ctx, struct StateSet *nfa_states) return s; } +/** + * Move from the given state 's' to the next state on + * transition 'literal' + * + * @param s starting state + * @param literal edge label to follow + * + * @return new state or NULL, if transition on literal not possible + */ static struct State * dfa_move (struct State *s, const char literal) { @@ -439,6 +448,12 @@ dfa_move (struct State *s, const char literal) return new_s; } +/** + * Remove all unreachable states from DFA 'a'. Unreachable states + * are those states that are not reachable from the starting state. + * + * @param a DFA automaton + */ static void dfa_remove_unreachable_states (struct GNUNET_REGEX_Automaton *a) { @@ -472,6 +487,12 @@ dfa_remove_unreachable_states (struct GNUNET_REGEX_Automaton *a) */ } +/** + * Remove all dead states from the DFA 'a'. Dead states are those + * states that do not transition to any other state but themselfes. + * + * @param a DFA automaton + */ static void dfa_remove_dead_states (struct GNUNET_REGEX_Automaton *a) { @@ -520,12 +541,23 @@ dfa_remove_dead_states (struct GNUNET_REGEX_Automaton *a) } } +/** + * Merge all non distinguishable states in the DFA 'a' + * + * @param a DFA automaton + */ static void dfa_merge_nondistinguishable_states (struct GNUNET_REGEX_Automaton *a) { } +/** + * Minimize the given DFA 'a' by removing all unreachable states, + * removing all dead states and merging all non distinguishable states + * + * @param a DFA automaton + */ static void dfa_minimize (struct GNUNET_REGEX_Automaton *a) { @@ -1029,10 +1061,6 @@ GNUNET_REGEX_construct_nfa (const char *regex, const size_t len) goto error; } - GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, - "Created NFA with %i States and a total of %i Transitions\n", - ctx.state_id, ctx.transition_id); - return nfa; error: @@ -1156,9 +1184,6 @@ GNUNET_REGEX_construct_dfa (const char *regex, const size_t len) dfa_minimize (dfa); - GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Created DFA with %i States\n", - ctx.state_id); - return dfa; } diff --git a/src/regex/test_regex.c b/src/regex/test_regex.c index 7428c001e..d32bc9ed8 100644 --- a/src/regex/test_regex.c +++ b/src/regex/test_regex.c @@ -23,7 +23,7 @@ * @author Maximilian Szengel */ #include - +#include #include "platform.h" #include "gnunet_regex_lib.h" @@ -36,44 +36,164 @@ enum Match_Result struct Regex_String_Pair { char *regex; - char *string; - enum Match_Result expected_result; + int string_count; + char **strings; + enum Match_Result *expected_results; }; - int -test_automaton (struct GNUNET_REGEX_Automaton *a, struct Regex_String_Pair *rxstr) +test_random (unsigned int rx_length, unsigned int max_str_len, unsigned int str_count) { + int i; + int rx_exp; + char rand_rx[rx_length+1]; + char matching_str[str_count][max_str_len+1]; + char *rand_rxp; + char *matching_strp; + int char_op_switch; + int last_was_op; + char current_char; + int eval; + int eval_check; + struct GNUNET_REGEX_Automaton *dfa; regex_t rx; + regmatch_t matchptr[1]; + int char_offset; + char error[200]; + int result; + + // At least one string is needed for matching + GNUNET_assert (str_count > 0); + // The string should be at least as long as the regex itself + GNUNET_assert (max_str_len >= rx_length); + + rand_rxp = rand_rx; + matching_strp = matching_str[0]; + + // Generate random regex and a string that matches the regex + for (i=0; istring); - regcomp (&rx, rxstr->regex, REG_EXTENDED); - eval_check = regexec (&rx, rxstr->string, 0, NULL, 0); - - if ((rxstr->expected_result == match - && (0 != eval || 0 != eval_check)) - || - (rxstr->expected_result == nomatch - && (0 == eval || 0 == eval_check))) + for (i=0; istring_count; i++) { - result = 1; - char error[200]; - regerror (eval_check, &rx, error, sizeof error); - GNUNET_log (GNUNET_ERROR_TYPE_ERROR, - "Unexpected result:\nregex: %s\nstring: %s\nexpected result: %i\ngnunet regex: %i\nglibc regex: %i\nglibc error: %s\n\n", - rxstr->regex, rxstr->string, rxstr->expected_result, eval, eval_check, error); - } - - regfree (&rx); + eval = GNUNET_REGEX_eval (a, rxstr->strings[i]); + eval_check = regexec (rx, rxstr->strings[i], 0, NULL, 0); + if ((rxstr->expected_results[i] == match + && (0 != eval || 0 != eval_check)) + || + (rxstr->expected_results[i] == nomatch + && (0 == eval || 0 == eval_check))) + { + result = 1; + regerror (eval_check, rx, error, sizeof error); + GNUNET_log (GNUNET_ERROR_TYPE_ERROR, + "Unexpected result:\nregex: %s\nstring: %s\nexpected result: %i\ngnunet regex: %i\nglibc regex: %i\nglibc error: %s\n\n", + rxstr->regex, rxstr->strings[i], rxstr->expected_results[i], eval, eval_check, error); + } + } return result; } @@ -90,34 +210,55 @@ main (int argc, char *argv[]) int check_nfa; int check_dfa; + int check_rand; struct Regex_String_Pair rxstr[3]; struct GNUNET_REGEX_Automaton *a; + regex_t rx; int i; - rxstr[0].regex = "ab(c|d)+c*(a(b|c)d)+"; - rxstr[0].string = "abcdcdcdcdddddabd"; - rxstr[0].expected_result = match; + check_nfa = 0; + check_dfa = 0; + check_rand = 0; - rxstr[1].regex = "a*"; - rxstr[1].string = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"; - rxstr[1].expected_result = match; - - rxstr[2].regex = "a*b*c*d+"; - rxstr[2].string = "a"; - rxstr[2].expected_result = nomatch; + rxstr[0].regex = "ab(c|d)+c*(a(b|c)d)+"; + rxstr[0].string_count = 5; + rxstr[0].strings = GNUNET_malloc (sizeof (char *) * rxstr[0].string_count); + rxstr[0].strings[0] = "abcdcdcdcdddddabd"; + rxstr[0].strings[1] = "abcd"; + rxstr[0].strings[2] = "abcddddddccccccccccccccccccccccccabdacdabd"; + rxstr[0].strings[3] = "abccccca"; + rxstr[0].strings[4] = "abcdcdcdccdabdabd"; + rxstr[0].expected_results = GNUNET_malloc (sizeof (enum Match_Result) * rxstr[0].string_count); + rxstr[0].expected_results[0] = match; + rxstr[0].expected_results[1] = nomatch; + rxstr[0].expected_results[2] = match; + rxstr[0].expected_results[3] = nomatch; + rxstr[0].expected_results[4] = match; - for (i=0; i<3; i++) + for (i=0; i<1; i++) { + if (0 != regcomp (&rx, rxstr->regex, REG_EXTENDED | REG_NOSUB)) + { + GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not compile regex using regcomp()\n"); + return 1; + } + // NFA test a = GNUNET_REGEX_construct_nfa (rxstr[i].regex, strlen (rxstr[i].regex)); - check_nfa += test_automaton (a, &rxstr[i]); + check_nfa += test_automaton (a, &rx, &rxstr[i]); GNUNET_REGEX_automaton_destroy (a); // DFA test a = GNUNET_REGEX_construct_dfa (rxstr[i].regex, strlen (rxstr[i].regex)); - check_dfa += test_automaton (a, &rxstr[i]); + check_dfa += test_automaton (a, &rx, &rxstr[i]); GNUNET_REGEX_automaton_destroy (a); + + regfree (&rx); } - return check_nfa + check_dfa; + srand (time(NULL)); + for (i=0; i< 100; i++) + check_rand += test_random (100, 100, 1); + + return check_nfa + check_dfa + check_rand; } -- 2.25.1