2 This file is part of GNUnet
3 (C) 2012 Christian Grothoff (and other contributing authors)
5 GNUnet is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published
7 by the Free Software Foundation; either version 3, or (at your
8 option) any later version.
10 GNUnet is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with GNUnet; see the file COPYING. If not, write to the
17 Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA.
21 * @file regex/test_regex_eval_api.c
22 * @brief test for regex.c
23 * @author Maximilian Szengel
28 #include "gnunet_regex_lib.h"
36 struct Regex_String_Pair
41 enum Match_Result expected_results[20];
44 static const char allowed_literals[] =
45 "0123456789" "ABCDEFGHIJKLMNOPQRSTUVWXYZ" "abcdefghijklmnopqrstuvwxyz";
48 test_random (unsigned int rx_length, unsigned int max_str_len,
49 unsigned int str_count)
54 char rand_rx[rx_length + 1];
55 char matching_str[str_count][max_str_len + 1];
64 struct GNUNET_REGEX_Automaton *dfa;
66 regmatch_t matchptr[1];
72 // At least one string is needed for matching
73 GNUNET_assert (str_count > 0);
74 // The string should be at least as long as the regex itself
75 GNUNET_assert (max_str_len >= rx_length);
78 matching_strp = matching_str[0];
82 // Generate random regex and a string that matches the regex
83 for (i = 0; i < rx_length; i++)
85 char_op_switch = 0 + (int) (1.0 * rand () / (RAND_MAX + 1.0));
87 if (0 == char_op_switch && !last_was_op)
104 if (i < rx_length - 1) // '|' cannot be at the end
108 allowed_literals[rand () % (sizeof (allowed_literals) - 1)];
115 allowed_literals[rand () % (sizeof (allowed_literals) - 1)];
119 if (current_char != '+' && current_char != '*' && current_char != '?' &&
122 *matching_strp = current_char;
126 *rand_rxp = current_char;
130 *matching_strp = '\0';
132 // Generate some random strings for matching...
133 // Start at 1, because the first string is generated above during regex generation
134 for (i = 1; i < str_count; i++)
136 str_len = rand () % max_str_len;
137 for (j = 0; j < str_len; j++)
139 allowed_literals[rand () % (sizeof (allowed_literals) - 1)];
140 matching_str[i][str_len] = '\0';
145 for (i = 0; i < str_count; i++)
147 // Match string using DFA
148 dfa = GNUNET_REGEX_construct_dfa (rand_rx, strlen (rand_rx));
151 GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Constructing DFA failed\n");
155 eval = GNUNET_REGEX_eval (dfa, matching_str[i]);
156 computed_regex = GNUNET_strdup (GNUNET_REGEX_get_computed_regex (dfa));
157 GNUNET_REGEX_automaton_destroy (dfa);
159 // Match string using glibc regex
160 if (0 != regcomp (&rx, rand_rx, REG_EXTENDED))
162 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
163 "Could not compile regex using regcomp\n");
167 eval_check = regexec (&rx, matching_str[i], 1, matchptr, 0);
170 // Match computed regex
171 if (0 != regcomp (&rx, computed_regex, REG_EXTENDED))
173 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
174 "Could not compile regex using regcomp: %s\n",
179 eval_computed = regexec (&rx, matching_str[i], 1, matchptr, 0);
181 GNUNET_free (computed_regex);
183 // We only want to match the whole string, because that's what our DFA does, too.
184 if (eval_check == 0 &&
185 (matchptr[0].rm_so != 0 ||
186 matchptr[0].rm_eo != strlen (matching_str[i])))
190 if (eval_check != eval)
192 regerror (eval_check, &rx, error, sizeof error);
193 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
194 "Unexpected result:\nregex: %s\nstring: %s\ngnunet regex: %i\nglibc regex: %i\nglibc error: %s\n\n",
195 rand_rx, matching_str, eval, eval_check, error);
203 test_automaton (struct GNUNET_REGEX_Automaton *a, regex_t * rx,
204 struct Regex_String_Pair *rxstr)
210 regmatch_t matchptr[1];
215 GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Automaton was NULL\n");
221 for (i = 0; i < rxstr->string_count; i++)
223 eval = GNUNET_REGEX_eval (a, rxstr->strings[i]);
224 eval_check = regexec (rx, rxstr->strings[i], 1, matchptr, 0);
226 // We only want to match the whole string, because that's what our DFA does, too.
227 if (eval_check == 0 &&
228 (matchptr[0].rm_so != 0 ||
229 matchptr[0].rm_eo != strlen (rxstr->strings[i])))
232 if ((rxstr->expected_results[i] == match && (0 != eval || 0 != eval_check))
233 || (rxstr->expected_results[i] == nomatch &&
234 (0 == eval || 0 == eval_check)))
237 regerror (eval_check, rx, error, sizeof error);
238 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
239 "Unexpected result:\nregex: %s\nstring: %s\nexpected result: %i\n"
240 "gnunet regex: %i\nglibc regex: %i\nglibc error: %s\nrm_so: %i\nrm_eo: %i\n\n",
241 rxstr->regex, rxstr->strings[i], rxstr->expected_results[i],
242 eval, eval_check, error, matchptr[0].rm_so,
250 main (int argc, char *argv[])
252 GNUNET_log_setup ("test-regex",
260 struct GNUNET_REGEX_Automaton *a;
268 struct Regex_String_Pair rxstr[12] = {
270 {"ababcd", "abab", "aabcd", "a", "abb"},
271 {match, nomatch, match, match, nomatch}},
272 {"ab(c|d)+c*(a(b|c)d)+", 5,
273 {"abcdcdcdcdddddabd", "abcd", "abcddddddccccccccccccccccccccccccabdacdabd",
274 "abccccca", "abcdcdcdccdabdabd"},
275 {match, nomatch, match, nomatch, match}},
276 {"ab+c*(a(bx|c)d)+", 5,
277 {"abcdcdcdcdddddabd", "abcd", "abcddddddccccccccccccccccccccccccabdacdabd",
278 "abccccca", "abcdcdcdccdabdabd"},
279 {nomatch, nomatch, nomatch, nomatch, nomatch}},
280 {"a+X*y+c|p|R|Z*K*y*R+w|Y*6+n+h*k*w+V*F|W*B*e*", 1,
281 {"kaXycQepRZKyRwY6nhkwVFWBegNVtLPj39XhJJ6bEifRSZRYZg"},
283 {"k|a+X*y+c|Q*e|p|R|Z*K*y*R+w|Y*6+n+h*k*w+V*F|W*B*e*g|N+V|t+L|P*j*3*9+X*h*J|J*6|b|E*i*f*R+S|Z|R|Y*Z|g*", 1,
284 {"kaXycQepRZKyRwY6nhkwVFWBegNVtLPj39XhJJ6bEifRSZRYZg"},
286 {"F?W+m+2*6*c*s|P?U?a|B|y*i+t+A|V|6*C*7*e?Z*n*i|J?5+g?W*V?7*j?p?1|r?B?C+E+3+6*i+W*P?K?0|D+7?y*m+3?g?K?", 1,
287 {"osfjsodfonONONOnosndfsdnfsd"},
289 {"V|M*o?x*p*d+h+b|E*m?h?Y*E*O?W*W*P+o?Z+H*M|I*q+C*a+5?5*9|b?z|G*y*k?R|p+u|8*h?B+l*H|e|L*O|1|F?v*0?5|C+", 1,
290 {"VMoxpdhbEmhYEOWWPoZHMIqCa559bzGykRpu8hBlHeLO1Fv05C"},
293 {"", "bla", "blabla", "bl", "la", "b", "l", "a"},
294 {match, match, match, nomatch, nomatch, nomatch, nomatch, nomatch}},
295 {"ab(c|d)+c*(a(b|c)+d)+(bla)(bla)*", 8,
296 {"ab", "abcabdbla", "abdcccccccccccabcbccdblablabla", "bl", "la", "b", "l",
298 {nomatch, match, match, nomatch, nomatch, nomatch, nomatch, nomatch}},
300 {"", "a", "aa", "aaa", "aaaa", "aaaaa"},
301 {nomatch, match, match, match, match, match}},
302 {"ab(c|d)+c*(a(b|c)+d)+(bla)+", 1,
305 {"ab(c|d)+c*(a(b|c)d)+", 1,
314 for (i = 0; i < 12; i++)
316 if (0 != regcomp (&rx, rxstr[i].regex, REG_EXTENDED))
318 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
319 "Could not compile regex using regcomp()\n");
324 a = GNUNET_REGEX_construct_nfa (rxstr[i].regex, strlen (rxstr[i].regex));
325 check_nfa += test_automaton (a, &rx, &rxstr[i]);
326 GNUNET_REGEX_automaton_destroy (a);
329 a = GNUNET_REGEX_construct_dfa (rxstr[i].regex, strlen (rxstr[i].regex));
330 check_dfa += test_automaton (a, &rx, &rxstr[i]);
331 check_proof = GNUNET_strdup (GNUNET_REGEX_get_computed_regex (a));
332 GNUNET_REGEX_automaton_destroy (a);
333 a = GNUNET_REGEX_construct_dfa (check_proof, strlen (check_proof));
334 check_dfa += test_automaton (a, &rx, &rxstr[i]);
335 GNUNET_REGEX_automaton_destroy (a);
337 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "check_proof: %s\n", check_proof);
338 GNUNET_free_non_null (check_proof);
344 for (i = 0; i < 150; i++)
345 check_rand += test_random (150, 200, 25);
347 return check_nfa + check_dfa + check_rand;