#include <time.h>
#include "platform.h"
#include "gnunet_regex_lib.h"
+#include "regex_internal.h"
enum Match_Result
{
enum Match_Result expected_results[20];
};
-static const char allowed_literals[] =
- "0123456789" "ABCDEFGHIJKLMNOPQRSTUVWXYZ" "abcdefghijklmnopqrstuvwxyz";
/**
* Random regex test. Generate a random regex as well as 'str_count' strings to
* match it against. Will match using GNUNET_REGEX implementation and compare
- * the result to glibc regex result. 'rx_length' has to be smaller then 'max_str_len'.
+ * the result to glibc regex result. 'rx_length' has to be smaller then
+ * 'max_str_len'.
*
* @param rx_length length of the regular expression.
* @param max_str_len maximum length of the random strings.
test_random (unsigned int rx_length, unsigned int max_str_len,
unsigned int str_count)
{
- int i;
- int j;
- int rx_exp;
- char rand_rx[rx_length + 1];
- char matching_str[str_count][max_str_len + 1];
- char *rand_rxp;
- char *matching_strp;
- int char_op_switch;
- int last_was_op;
- char current_char;
+ unsigned int i;
+ char *rand_rx;
+ char *matching_str;
int eval;
int eval_check;
- int eval_computed;
+ int eval_canonical;
+ int eval_canonical_check;
struct GNUNET_REGEX_Automaton *dfa;
regex_t rx;
regmatch_t matchptr[1];
char error[200];
int result;
- unsigned int str_len;
- char *computed_regex;
+ char *canonical_regex;
- // At least one string is needed for matching
+ /* At least one string is needed for matching */
GNUNET_assert (str_count > 0);
- // The string should be at least as long as the regex itself
+ /* The string should be at least as long as the regex itself */
GNUNET_assert (max_str_len >= rx_length);
- rand_rxp = rand_rx;
- matching_strp = matching_str[0];
- current_char = 0;
- last_was_op = 1;
+ /* Generate random regex and a string that matches the regex */
+ matching_str = GNUNET_malloc (rx_length + 1);
+ rand_rx = GNUNET_REGEX_generate_random_regex (rx_length, matching_str);
- // Generate random regex and a string that matches the regex
- for (i = 0; i < rx_length; i++)
+ /* Now match */
+ result = 0;
+ for (i = 0; i < str_count; i++)
{
- char_op_switch = 0 + (int) (1.0 * rand () / (RAND_MAX + 1.0));
-
- if (0 == char_op_switch && !last_was_op)
- {
- last_was_op = 1;
- rx_exp = rand () % 4;
-
- switch (rx_exp)
- {
- case 0:
- current_char = '+';
- break;
- case 1:
- current_char = '*';
- break;
- case 2:
- current_char = '?';
- break;
- case 3:
- if (i < rx_length - 1) // '|' cannot be at the end
- current_char = '|';
- else
- current_char =
- allowed_literals[rand () % (sizeof (allowed_literals) - 1)];
- break;
- }
- }
- else
+ if (0 < i)
{
- current_char =
- allowed_literals[rand () % (sizeof (allowed_literals) - 1)];
- last_was_op = 0;
+ matching_str = GNUNET_REGEX_generate_random_string (max_str_len);
}
- if (current_char != '+' && current_char != '*' && current_char != '?' &&
- current_char != '|')
- {
- *matching_strp = current_char;
- matching_strp++;
- }
-
- *rand_rxp = current_char;
- rand_rxp++;
- }
- *rand_rxp = '\0';
- *matching_strp = '\0';
-
- // Generate some random strings for matching...
- // Start at 1, because the first string is generated above during regex generation
- for (i = 1; i < str_count; i++)
- {
- str_len = rand () % max_str_len;
- for (j = 0; j < str_len; j++)
- matching_str[i][j] =
- allowed_literals[rand () % (sizeof (allowed_literals) - 1)];
- matching_str[i][str_len] = '\0';
- }
-
- // Now match
- result = 0;
- for (i = 0; i < str_count; i++)
- {
- // Match string using DFA
+ /* Match string using DFA */
dfa = GNUNET_REGEX_construct_dfa (rand_rx, strlen (rand_rx));
if (NULL == dfa)
{
return -1;
}
- eval = GNUNET_REGEX_eval (dfa, matching_str[i]);
- computed_regex = GNUNET_strdup (GNUNET_REGEX_get_computed_regex (dfa));
+ eval = GNUNET_REGEX_eval (dfa, matching_str);
+ /* save the canonical regex for later comparison */
+ canonical_regex = GNUNET_strdup (GNUNET_REGEX_get_canonical_regex (dfa));
GNUNET_REGEX_automaton_destroy (dfa);
- // Match string using glibc regex
+ /* Match string using glibc regex */
if (0 != regcomp (&rx, rand_rx, REG_EXTENDED))
{
GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
- "Could not compile regex using regcomp\n");
+ "Could not compile regex using regcomp: %s\n", rand_rx);
return -1;
}
- eval_check = regexec (&rx, matching_str[i], 1, matchptr, 0);
+ eval_check = regexec (&rx, matching_str, 1, matchptr, 0);
regfree (&rx);
- // Match computed regex
- if (0 != regcomp (&rx, computed_regex, REG_EXTENDED))
+ /* We only want to match the whole string, because that's what our DFA does,
+ * too. */
+ if (eval_check == 0 &&
+ (matchptr[0].rm_so != 0 || matchptr[0].rm_eo != strlen (matching_str)))
+ eval_check = 1;
+
+ /* Match canonical regex */
+ dfa =
+ GNUNET_REGEX_construct_dfa (canonical_regex, strlen (canonical_regex));
+ if (NULL == dfa)
+ {
+ GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Constructing DFA failed\n");
+ return -1;
+ }
+
+ eval_canonical = GNUNET_REGEX_eval (dfa, matching_str);
+ GNUNET_REGEX_automaton_destroy (dfa);
+
+ if (0 != regcomp (&rx, canonical_regex, REG_EXTENDED))
{
GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
"Could not compile regex using regcomp: %s\n",
- computed_regex);
+ canonical_regex);
return -1;
}
- eval_computed = regexec (&rx, matching_str[i], 1, matchptr, 0);
+ eval_canonical_check = regexec (&rx, matching_str, 1, matchptr, 0);
regfree (&rx);
- GNUNET_free (computed_regex);
- // We only want to match the whole string, because that's what our DFA does, too.
- if (eval_check == 0 &&
- (matchptr[0].rm_so != 0 ||
- matchptr[0].rm_eo != strlen (matching_str[i])))
- eval_check = 1;
+ /* We only want to match the whole string, because that's what our DFA does,
+ * too. */
+ if (eval_canonical_check == 0 &&
+ (matchptr[0].rm_so != 0 || matchptr[0].rm_eo != strlen (matching_str)))
+ eval_canonical_check = 1;
- // compare result
- if (eval_check != eval)
+ /* compare results */
+ if (eval_check != eval || eval_canonical != eval_canonical_check)
{
regerror (eval_check, &rx, error, sizeof error);
- GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
- "Unexpected result:\nregex: %s\nstring: %s\ngnunet regex: %i\nglibc regex: %i\nglibc error: %s\n\n",
- rand_rx, matching_str, eval, eval_check, error);
+ GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Unexpected result:\nregex: %s\ncanonical_regex: %s\n\
+ string: %s\ngnunet regex: %i\nglibc regex: %i\n\
+ canonical regex: %i\ncanonical regex glibc: %i\n\
+ glibc error: %s\n\n", rand_rx, canonical_regex, matching_str,
+ eval, eval_check, eval_canonical, eval_canonical_check, error);
result += 1;
}
+
+ GNUNET_free (matching_str);
}
+
+ GNUNET_free (rand_rx);
+ GNUNET_free (canonical_regex);
+
return result;
}
*
* @param a automaton.
* @param rx compiled glibc regex.
- * @param rxstr regular expression and strings with expected results to match against.
+ * @param rxstr regular expression and strings with expected results to
+ * match against.
*
* @return 0 on successfull, non 0 otherwise
*/
eval = GNUNET_REGEX_eval (a, rxstr->strings[i]);
eval_check = regexec (rx, rxstr->strings[i], 1, matchptr, 0);
- // We only want to match the whole string, because that's what our DFA does, too.
+ /* We only want to match the whole string, because that's what our DFA does,
+ * too. */
if (eval_check == 0 &&
(matchptr[0].rm_so != 0 ||
matchptr[0].rm_eo != strlen (rxstr->strings[i])))
result = 1;
regerror (eval_check, rx, error, sizeof error);
GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
- "Unexpected result:\nregex: %s\nstring: %s\nexpected result: %i\n"
- "gnunet regex: %i\nglibc regex: %i\nglibc error: %s\nrm_so: %i\nrm_eo: %i\n\n",
- rxstr->regex, rxstr->strings[i], rxstr->expected_results[i],
- eval, eval_check, error, matchptr[0].rm_so,
- matchptr[0].rm_eo);
+ "Unexpected result:\nregex: %s\ncanonical_regex: %s\n"
+ "string: %s\nexpected result: %i\n"
+ "gnunet regex: %i\nglibc regex: %i\nglibc error: %s\n"
+ "rm_so: %i\nrm_eo: %i\n\n", rxstr->regex,
+ GNUNET_REGEX_get_canonical_regex (a), rxstr->strings[i],
+ rxstr->expected_results[i], eval, eval_check, error,
+ matchptr[0].rm_so, matchptr[0].rm_eo);
}
}
return result;
int check_rand;
char *check_proof;
- struct Regex_String_Pair rxstr[14] = {
+ struct Regex_String_Pair rxstr[17] = {
{"ab?(abcd)?", 5,
{"ababcd", "abab", "aabcd", "a", "abb"},
{match, nomatch, match, match, nomatch}},
{"ab(c|d)+c*(a(b|c)d)+", 5,
- {"abcdcdcdcdddddabd", "abcd", "abcddddddccccccccccccccccccccccccabdacdabd",
+ {"abcdcdcdcdddddabd", "abcd",
+ "abcddddddccccccccccccccccccccccccabdacdabd",
"abccccca", "abcdcdcdccdabdabd"},
{match, nomatch, match, nomatch, match}},
{"ab+c*(a(bx|c)d)+", 5,
- {"abcdcdcdcdddddabd", "abcd", "abcddddddccccccccccccccccccccccccabdacdabd",
+ {"abcdcdcdcdddddabd", "abcd",
+ "abcddddddccccccccccccccccccccccccabdacdabd",
"abccccca", "abcdcdcdccdabdabd"},
{nomatch, nomatch, nomatch, nomatch, nomatch}},
{"a+X*y+c|p|R|Z*K*y*R+w|Y*6+n+h*k*w+V*F|W*B*e*", 1,
{"", "bla", "blabla", "bl", "la", "b", "l", "a"},
{match, match, match, nomatch, nomatch, nomatch, nomatch, nomatch}},
{"ab(c|d)+c*(a(b|c)+d)+(bla)(bla)*", 8,
- {"ab", "abcabdbla", "abdcccccccccccabcbccdblablabla", "bl", "la", "b", "l",
+ {"ab", "abcabdbla", "abdcccccccccccabcbccdblablabla", "bl", "la", "b",
+ "l",
"a"},
{nomatch, match, match, nomatch, nomatch, nomatch, nomatch, nomatch}},
{"a|aa*a", 6,
{"(ab|c)+", 7,
{"", "ab", "c", "abc", "ababcc", "acc", "abac"},
{nomatch, match, match, match, match, nomatch, nomatch}},
+ {"((j|2j)K|(j|2j)AK|(j|2j)(D|e|(j|2j)A(D|e))D*K)", 1,
+ {"", "2j2jADK", "j2jADK"},
+ {nomatch, match, match}},
+ {"((j|2j)K|(j|2j)(D|e|((j|2j)j|(j|2j)2j)A(D|e))D*K|(j|2j)AK)", 2,
+ {"", "2j2jjADK", "j2jADK"},
+ {nomatch, match, match}},
{"ab(c|d)+c*(a(b|c)d)+", 1,
{"abacd"},
+ {nomatch}},
+ {"d|5kl", 1,
+ {"d5kl"},
{nomatch}}
};
check_dfa = 0;
check_rand = 0;
- for (i = 0; i < 14; i++)
+ for (i = 0; i < 17; i++)
{
if (0 != regcomp (&rx, rxstr[i].regex, REG_EXTENDED))
{
return 1;
}
- // NFA test
+ /* NFA test */
a = GNUNET_REGEX_construct_nfa (rxstr[i].regex, strlen (rxstr[i].regex));
check_nfa += test_automaton (a, &rx, &rxstr[i]);
GNUNET_REGEX_automaton_destroy (a);
- // DFA test
+ /* DFA test */
a = GNUNET_REGEX_construct_dfa (rxstr[i].regex, strlen (rxstr[i].regex));
check_dfa += test_automaton (a, &rx, &rxstr[i]);
- check_proof = GNUNET_strdup (GNUNET_REGEX_get_computed_regex (a));
+ check_proof = GNUNET_strdup (GNUNET_REGEX_get_canonical_regex (a));
GNUNET_REGEX_automaton_destroy (a);
+
a = GNUNET_REGEX_construct_dfa (check_proof, strlen (check_proof));
check_dfa += test_automaton (a, &rx, &rxstr[i]);
GNUNET_REGEX_automaton_destroy (a);
regfree (&rx);
}
+ /* Random tests */
srand (time (NULL));
- /* for (i = 0; i < 50; i++) */
- /* check_rand += test_random (100, 120, 20); */
+ for (i = 0; i < 20; i++)
+ check_rand += test_random (50, 60, 10);
return check_nfa + check_dfa + check_rand;
}