2 * This file is part of GNUnet
3 * Copyright (C) 2012 GNUnet e.V.
5 * GNUnet is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published
7 * by the Free Software Foundation; either version 3, or (at your
8 * option) any later version.
10 * GNUnet is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with GNUnet; see the file COPYING. If not, write to the
17 * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
18 * Boston, MA 02110-1301, USA.
21 * @file src/regex/regex_test_lib.c
22 * @brief library to read regexes representing IP networks from a file.
23 * and simplyfinying the into one big regex, in order to run
24 * tests (regex performance, cadet profiler).
25 * @author Bartlomiej Polot
29 #include "gnunet_util_lib.h"
33 * Struct to hold the tree formed by prefix-combining the regexes.
35 struct RegexCombineCtx {
38 * Next node with same prefix but different token.
40 struct RegexCombineCtx *next;
43 * Prev node with same prefix but different token.
45 struct RegexCombineCtx *prev;
48 * First child node with same prefix and token.
50 struct RegexCombineCtx *head;
55 struct RegexCombineCtx *tail;
68 for (i = 0; i < n; i++)
73 debugctx (struct RegexCombineCtx *ctx, int level)
75 struct RegexCombineCtx *p;
78 printf ("'%s'\n", ctx->s);
81 for (p = ctx->head; NULL != p; p = p->next)
83 debugctx (p, level + 1);
89 * Extract a string from all prefix-combined regexes.
91 * @param ctx Context with 0 or more regexes.
93 * @return Regex that matches any of the added regexes.
96 regex_combine (struct RegexCombineCtx *ctx)
98 struct RegexCombineCtx *p;
105 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "new combine %s\n", ctx->s);
106 regex = GNUNET_strdup ("");
108 for (p = ctx->head; NULL != p; p = p->next)
110 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "adding '%s' to innner %s\n", p->s, ctx->s);
111 s = regex_combine (p);
112 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " total '%s'\n", s);
119 GNUNET_asprintf (&tmp, "%s%s|", regex, s);
120 GNUNET_free_non_null (regex);
123 GNUNET_free_non_null (s);
124 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " so far '%s' for inner %s\n", regex, ctx->s);
127 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "opt: %d, innner: '%s'\n", opt, regex);
128 len = strlen (regex);
131 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "empty, returning ''\n");
133 return NULL == ctx->s ? NULL : GNUNET_strdup (ctx->s);
136 if ('|' == regex[len - 1])
137 regex[len - 1] = '\0';
142 GNUNET_asprintf (&s, "%s(%s)?", ctx->s, regex);
144 GNUNET_asprintf (&s, "%s(%s)", ctx->s, regex);
149 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "partial: %s\n", regex);
155 * Get the number of matching characters on the prefix of both strings.
157 * @param s1 String 1.
158 * @param s2 String 2.
160 * @return Number of characters of matching prefix.
163 get_prefix_length (const char *s1, const char *s2)
172 limit = l1 > l2 ? l2 : l1;
174 for (i = 1; i <= limit; i++)
176 if (0 != strncmp (s1, s2, i))
184 * Return the child context with the longest prefix match with the regex.
185 * Usually only one child will match, search all just in case.
187 * @param ctx Context whose children to search.
188 * @param regex String to match.
190 * @return Child with the longest prefix, NULL if no child matches.
192 static struct RegexCombineCtx *
193 get_longest_prefix (struct RegexCombineCtx *ctx, const char *regex)
195 struct RegexCombineCtx *p;
196 struct RegexCombineCtx *best;
202 for (p = ctx->head; NULL != p; p = p->next)
204 l = get_prefix_length (p->s, regex);
207 GNUNET_break (0 == best_l);
217 * Add a single regex to a context, combining with exisiting regex by-prefix.
219 * @param ctx Context with 0 or more regexes.
220 * @param regex Regex to add.
223 regex_add (struct RegexCombineCtx *ctx, const char *regex)
225 struct RegexCombineCtx *p;
226 struct RegexCombineCtx *newctx;
227 unsigned int prefix_l;
232 if (0 == strlen (regex))
235 p = get_longest_prefix (ctx, regex);
236 if (NULL != p) /* There is some prefix match, reduce regex and try again */
238 prefix_l = get_prefix_length (p->s, regex);
239 rest_s = &p->s[prefix_l];
240 rest_r = ®ex[prefix_l];
241 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "chosen '%s' [%u]\n", p->s, prefix_l);
242 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "prefix r '%.*s'\n", prefix_l, p->s);
243 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "rest r '%s'\n", rest_r);
244 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "rest s '%s'\n", rest_s);
246 if (prefix_l < len) /* only partial match, split existing state */
248 newctx = GNUNET_new (struct RegexCombineCtx);
249 newctx->head = p->head;
250 newctx->tail = p->tail;
251 newctx->s = GNUNET_malloc(len - prefix_l + 1);
252 strncpy (newctx->s, rest_s, len - prefix_l + 1);
256 p->s[prefix_l] = '\0';
258 regex_add (p, rest_r);
261 /* There is no prefix match, add new */
262 if (NULL == ctx->head && NULL != ctx->s)
264 /* this was the end before, add empty string */
265 newctx = GNUNET_new (struct RegexCombineCtx);
266 newctx->s = GNUNET_strdup ("");
267 GNUNET_CONTAINER_DLL_insert (ctx->head, ctx->tail, newctx);
269 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " no match\n");
270 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " new state %s\n", regex);
271 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " under %s\n", ctx->s);
272 newctx = GNUNET_new (struct RegexCombineCtx);
273 newctx->s = GNUNET_strdup (regex);
274 GNUNET_CONTAINER_DLL_insert (ctx->head, ctx->tail, newctx);
279 * Free all resources used by the context node and all its children.
281 * @param ctx Context to free.
284 regex_ctx_destroy (struct RegexCombineCtx *ctx)
286 struct RegexCombineCtx *p;
287 struct RegexCombineCtx *next;
289 for (p = ctx->head; NULL != p; p = next)
292 regex_ctx_destroy (p);
294 GNUNET_free_non_null (ctx->s); /* 's' on root node is null */
300 * Return a prefix-combine regex that matches the same strings as
301 * any of the original regexes.
303 * WARNING: only useful for reading specific regexes for specific applications,
304 * namely the gnunet-regex-profiler / gnunet-regex-daemon.
305 * This function DOES NOT support arbitrary regex combining.
308 REGEX_TEST_combine (char * const regexes[])
313 struct RegexCombineCtx *ctx;
315 ctx = GNUNET_new (struct RegexCombineCtx);
316 for (i = 0; regexes[i]; i++)
318 current = regexes[i];
319 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Regex %u: %s\n", i, current);
320 regex_add (ctx, current);
321 /* debugctx (ctx, 0); */
323 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "\nCombining...\n");
324 /* debugctx (ctx, 0); */
326 combined = regex_combine (ctx);
328 regex_ctx_destroy (ctx);
335 * Read a set of regexes from a file, one per line and return them in an array
336 * suitable for REGEX_TEST_combine.
337 * The array must be free'd using REGEX_TEST_free_from_file.
339 * @param filename Name of the file containing the regexes.
341 * @return A newly allocated, NULL terminated array of regexes.
344 REGEX_TEST_read_from_file (const char *filename)
346 struct GNUNET_DISK_FileHandle *f;
355 f = GNUNET_DISK_file_open (filename,
356 GNUNET_DISK_OPEN_READ,
357 GNUNET_DISK_PERM_NONE);
360 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
361 "Can't open file %s for reading\n", filename);
364 if (GNUNET_OK != GNUNET_DISK_file_handle_size (f, &size))
366 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
367 "Can't get size of file %s\n", filename);
368 GNUNET_DISK_file_close (f);
371 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
372 "using file %s, size %llu\n",
373 filename, (unsigned long long) size);
375 buffer = GNUNET_malloc (size + 1);
376 GNUNET_DISK_file_read (f, buffer, size);
377 GNUNET_DISK_file_close (f);
378 regexes = GNUNET_malloc (sizeof (char *));
385 regex = GNUNET_malloc (size + 1);
386 len = (size_t) sscanf (&buffer[offset], "%s", regex);
389 len = strlen (regex);
394 regex = GNUNET_realloc (regex, len + 1);
395 GNUNET_array_grow (regexes, nr, nr + 1);
396 GNUNET_assert (NULL == regexes[nr - 2]);
397 regexes[nr - 2] = regex;
398 regexes[nr - 1] = NULL;
400 } while (offset < size);
401 GNUNET_free_non_null (regex);
402 GNUNET_free (buffer);
409 * Free all memory reserved for a set of regexes created by read_from_file.
411 * @param regexes NULL-terminated array of regexes.
414 REGEX_TEST_free_from_file (char **regexes)
418 for (i = 0; regexes[i]; i++)
419 GNUNET_free (regexes[i]);
420 GNUNET_free (regexes);
423 /* end of regex_test_lib.c */