2 This file is part of GNUnet
3 (C) 2012 Christian Grothoff (and other contributing authors)
5 GNUnet is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published
7 by the Free Software Foundation; either version 3, or (at your
8 option) any later version.
10 GNUnet is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with GNUnet; see the file COPYING. If not, write to the
17 Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA.
21 * @file regex/test_regex_eval_api.c
22 * @brief test for regex.c
23 * @author Maximilian Szengel
28 #include "gnunet_regex_lib.h"
29 #include "regex_internal.h"
37 struct Regex_String_Pair
42 enum Match_Result expected_results[20];
47 * Random regex test. Generate a random regex as well as 'str_count' strings to
48 * match it against. Will match using GNUNET_REGEX implementation and compare
49 * the result to glibc regex result. 'rx_length' has to be smaller then
52 * @param rx_length length of the regular expression.
53 * @param max_str_len maximum length of the random strings.
54 * @param str_count number of generated random strings.
56 * @return 0 on success, non 0 otherwise.
59 test_random (unsigned int rx_length, unsigned int max_str_len,
60 unsigned int str_count)
68 int eval_canonical_check;
69 struct GNUNET_REGEX_Automaton *dfa;
71 regmatch_t matchptr[1];
74 char *canonical_regex;
76 /* At least one string is needed for matching */
77 GNUNET_assert (str_count > 0);
78 /* The string should be at least as long as the regex itself */
79 GNUNET_assert (max_str_len >= rx_length);
81 /* Generate random regex and a string that matches the regex */
82 matching_str = GNUNET_malloc (rx_length + 1);
83 rand_rx = GNUNET_REGEX_generate_random_regex (rx_length, matching_str);
87 for (i = 0; i < str_count; i++)
91 matching_str = GNUNET_REGEX_generate_random_string (max_str_len);
94 /* Match string using DFA */
95 dfa = GNUNET_REGEX_construct_dfa (rand_rx, strlen (rand_rx));
98 GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Constructing DFA failed\n");
102 eval = GNUNET_REGEX_eval (dfa, matching_str);
103 /* save the canonical regex for later comparison */
104 canonical_regex = GNUNET_strdup (GNUNET_REGEX_get_canonical_regex (dfa));
105 GNUNET_REGEX_automaton_destroy (dfa);
107 /* Match string using glibc regex */
108 if (0 != regcomp (&rx, rand_rx, REG_EXTENDED))
110 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
111 "Could not compile regex using regcomp: %s\n", rand_rx);
115 eval_check = regexec (&rx, matching_str, 1, matchptr, 0);
118 /* We only want to match the whole string, because that's what our DFA does,
120 if (eval_check == 0 &&
121 (matchptr[0].rm_so != 0 || matchptr[0].rm_eo != strlen (matching_str)))
124 /* Match canonical regex */
126 GNUNET_REGEX_construct_dfa (canonical_regex, strlen (canonical_regex));
129 GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Constructing DFA failed\n");
133 eval_canonical = GNUNET_REGEX_eval (dfa, matching_str);
134 GNUNET_REGEX_automaton_destroy (dfa);
136 if (0 != regcomp (&rx, canonical_regex, REG_EXTENDED))
138 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
139 "Could not compile regex using regcomp: %s\n",
144 eval_canonical_check = regexec (&rx, matching_str, 1, matchptr, 0);
147 /* We only want to match the whole string, because that's what our DFA does,
149 if (eval_canonical_check == 0 &&
150 (matchptr[0].rm_so != 0 || matchptr[0].rm_eo != strlen (matching_str)))
151 eval_canonical_check = 1;
153 /* compare results */
154 if (eval_check != eval || eval_canonical != eval_canonical_check)
156 regerror (eval_check, &rx, error, sizeof error);
157 GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Unexpected result:\nregex: %s\ncanonical_regex: %s\n\
158 string: %s\ngnunet regex: %i\nglibc regex: %i\n\
159 canonical regex: %i\ncanonical regex glibc: %i\n\
160 glibc error: %s\n\n", rand_rx, canonical_regex, matching_str,
161 eval, eval_check, eval_canonical, eval_canonical_check, error);
165 GNUNET_free (matching_str);
168 GNUNET_free (rand_rx);
169 GNUNET_free (canonical_regex);
175 * Automaton test that compares the result of matching regular expression 'rx'
176 * with the strings and expected results in 'rxstr' with the result of matching
177 * the same strings with glibc regex.
179 * @param a automaton.
180 * @param rx compiled glibc regex.
181 * @param rxstr regular expression and strings with expected results to
184 * @return 0 on successfull, non 0 otherwise
187 test_automaton (struct GNUNET_REGEX_Automaton *a, regex_t * rx,
188 struct Regex_String_Pair *rxstr)
194 regmatch_t matchptr[1];
199 GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Automaton was NULL\n");
205 for (i = 0; i < rxstr->string_count; i++)
207 eval = GNUNET_REGEX_eval (a, rxstr->strings[i]);
208 eval_check = regexec (rx, rxstr->strings[i], 1, matchptr, 0);
210 /* We only want to match the whole string, because that's what our DFA does,
212 if (eval_check == 0 &&
213 (matchptr[0].rm_so != 0 ||
214 matchptr[0].rm_eo != strlen (rxstr->strings[i])))
217 if ((rxstr->expected_results[i] == match && (0 != eval || 0 != eval_check))
218 || (rxstr->expected_results[i] == nomatch &&
219 (0 == eval || 0 == eval_check)))
222 regerror (eval_check, rx, error, sizeof error);
223 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
224 "Unexpected result:\nregex: %s\ncanonical_regex: %s\n"
225 "string: %s\nexpected result: %i\n"
226 "gnunet regex: %i\nglibc regex: %i\nglibc error: %s\n"
227 "rm_so: %i\nrm_eo: %i\n\n", rxstr->regex,
228 GNUNET_REGEX_get_canonical_regex (a), rxstr->strings[i],
229 rxstr->expected_results[i], eval, eval_check, error,
230 matchptr[0].rm_so, matchptr[0].rm_eo);
237 main (int argc, char *argv[])
239 GNUNET_log_setup ("test-regex",
247 struct GNUNET_REGEX_Automaton *a;
255 struct Regex_String_Pair rxstr[17] = {
257 {"ababcd", "abab", "aabcd", "a", "abb"},
258 {match, nomatch, match, match, nomatch}},
259 {"ab(c|d)+c*(a(b|c)d)+", 5,
260 {"abcdcdcdcdddddabd", "abcd",
261 "abcddddddccccccccccccccccccccccccabdacdabd",
262 "abccccca", "abcdcdcdccdabdabd"},
263 {match, nomatch, match, nomatch, match}},
264 {"ab+c*(a(bx|c)d)+", 5,
265 {"abcdcdcdcdddddabd", "abcd",
266 "abcddddddccccccccccccccccccccccccabdacdabd",
267 "abccccca", "abcdcdcdccdabdabd"},
268 {nomatch, nomatch, nomatch, nomatch, nomatch}},
269 {"a+X*y+c|p|R|Z*K*y*R+w|Y*6+n+h*k*w+V*F|W*B*e*", 1,
270 {"kaXycQepRZKyRwY6nhkwVFWBegNVtLPj39XhJJ6bEifRSZRYZg"},
272 {"k|a+X*y+c|Q*e|p|R|Z*K*y*R+w|Y*6+n+h*k*w+V*F|W*B*e*g|N+V|t+L|P*j*3*9+X*h*J|J*6|b|E*i*f*R+S|Z|R|Y*Z|g*", 1,
273 {"kaXycQepRZKyRwY6nhkwVFWBegNVtLPj39XhJJ6bEifRSZRYZg"},
275 {"F?W+m+2*6*c*s|P?U?a|B|y*i+t+A|V|6*C*7*e?Z*n*i|J?5+g?W*V?7*j?p?1|r?B?C+E+3+6*i+W*P?K?0|D+7?y*m+3?g?K?", 1,
276 {"osfjsodfonONONOnosndfsdnfsd"},
278 {"V|M*o?x*p*d+h+b|E*m?h?Y*E*O?W*W*P+o?Z+H*M|I*q+C*a+5?5*9|b?z|G*y*k?R|p+u|8*h?B+l*H|e|L*O|1|F?v*0?5|C+", 1,
279 {"VMoxpdhbEmhYEOWWPoZHMIqCa559bzGykRpu8hBlHeLO1Fv05C"},
282 {"", "bla", "blabla", "bl", "la", "b", "l", "a"},
283 {match, match, match, nomatch, nomatch, nomatch, nomatch, nomatch}},
284 {"ab(c|d)+c*(a(b|c)+d)+(bla)(bla)*", 8,
285 {"ab", "abcabdbla", "abdcccccccccccabcbccdblablabla", "bl", "la", "b",
288 {nomatch, match, match, nomatch, nomatch, nomatch, nomatch, nomatch}},
290 {"", "a", "aa", "aaa", "aaaa", "aaaaa"},
291 {nomatch, match, match, match, match, match}},
292 {"ab(c|d)+c*(a(b|c)+d)+(bla)+", 1,
296 {"b", "bb", "ac", "", "acb", "bacbacac", "acacac", "abc"},
297 {match, match, match, nomatch, match, match, match, nomatch}},
299 {"", "ab", "c", "abc", "ababcc", "acc", "abac"},
300 {nomatch, match, match, match, match, nomatch, nomatch}},
301 {"((j|2j)K|(j|2j)AK|(j|2j)(D|e|(j|2j)A(D|e))D*K)", 1,
302 {"", "2j2jADK", "j2jADK"},
303 {nomatch, match, match}},
304 {"((j|2j)K|(j|2j)(D|e|((j|2j)j|(j|2j)2j)A(D|e))D*K|(j|2j)AK)", 2,
305 {"", "2j2jjADK", "j2jADK"},
306 {nomatch, match, match}},
307 {"ab(c|d)+c*(a(b|c)d)+", 1,
319 for (i = 0; i < 17; i++)
321 if (0 != regcomp (&rx, rxstr[i].regex, REG_EXTENDED))
323 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
324 "Could not compile regex using regcomp()\n");
329 a = GNUNET_REGEX_construct_nfa (rxstr[i].regex, strlen (rxstr[i].regex));
330 check_nfa += test_automaton (a, &rx, &rxstr[i]);
331 GNUNET_REGEX_automaton_destroy (a);
334 a = GNUNET_REGEX_construct_dfa (rxstr[i].regex, strlen (rxstr[i].regex));
335 check_dfa += test_automaton (a, &rx, &rxstr[i]);
336 check_proof = GNUNET_strdup (GNUNET_REGEX_get_canonical_regex (a));
337 GNUNET_REGEX_automaton_destroy (a);
339 a = GNUNET_REGEX_construct_dfa (check_proof, strlen (check_proof));
340 check_dfa += test_automaton (a, &rx, &rxstr[i]);
341 GNUNET_REGEX_automaton_destroy (a);
343 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "check_proof: %s\n", check_proof);
344 GNUNET_free_non_null (check_proof);
351 for (i = 0; i < 20; i++)
352 check_rand += test_random (50, 60, 10);
354 return check_nfa + check_dfa + check_rand;