2 This file is part of GNUnet
3 (C) 2012 Christian Grothoff (and other contributing authors)
5 GNUnet is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published
7 by the Free Software Foundation; either version 3, or (at your
8 option) any later version.
10 GNUnet is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with GNUnet; see the file COPYING. If not, write to the
17 Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA.
21 * @file src/regex/regex.c
22 * @brief library to create automatons from regular expressions
23 * @author Maximilian Szengel
26 #include "gnunet_regex_lib.h"
35 static struct Stack *nfa_stack = NULL;
38 push (void *val, struct Stack **stack)
40 struct Stack *new = GNUNET_malloc (sizeof (struct Stack *));
44 GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not push to stack\n");
53 empty (struct Stack **stack)
55 return (NULL == *stack || NULL == stack);
59 pop (struct Stack **stack)
75 top (struct Stack **stack)
80 return (*stack)->data;
83 struct GNUNET_REGEX_Nfa
88 unsigned int statecnt;
89 struct State **states;
97 struct Transition *transitions;
108 static unsigned int state_id = 0;
109 static unsigned int transition_id = 0;
111 struct GNUNET_REGEX_Nfa *
112 nfa_create (struct State *start, struct State *end)
114 struct GNUNET_REGEX_Nfa *n;
116 n = GNUNET_malloc (sizeof (struct GNUNET_REGEX_Nfa));
118 if (NULL == start && NULL == end)
128 n->states = GNUNET_malloc ((sizeof (struct State *)) * 2);
129 n->states[0] = start;
141 nfa_add_states (struct GNUNET_REGEX_Nfa *n, struct State **states,
148 GNUNET_array_grow (n->states, n->statecnt, n->statecnt + count);
149 for (j = 0; i < n->statecnt && j < count; i++, j++)
151 n->states[i] = states[j];
157 nfa_create_state (int accepting)
161 s = GNUNET_malloc (sizeof (struct State));
163 s->accepting = accepting;
165 s->transitions = NULL;
172 nfa_add_transition (struct State *from_state, const char literal,
173 struct State *to_state)
177 if (NULL == to_state)
179 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
180 "Could not create Transition. to_state was NULL.\n");
184 t.id = transition_id++;
188 if (0 == from_state->tcnt)
189 from_state->transitions = NULL;
191 GNUNET_array_append (from_state->transitions, from_state->tcnt, t);
195 mark_all_states (struct GNUNET_REGEX_Nfa *n, int visited)
199 for (i = 0; i < n->statecnt; i++)
201 n->states[i]->visited = visited;
206 print_states (struct GNUNET_REGEX_Nfa *n, char **out_str)
213 mark_all_states (n, 0);
215 s_all = GNUNET_malloc (sizeof (char));
218 for (i_s = 0; i_s < n->statecnt; i_s++)
220 struct Transition *ctran;
228 GNUNET_asprintf (&s_acc, "s%i [shape=doublecircle];\n", s->id);
230 s_all = GNUNET_realloc (s_all, strlen (s_all) + strlen (s_acc) + 1);
231 strcat (s_all, s_acc);
235 ctran = s->transitions;
238 for (i_t = 0; i_t < s->tcnt && NULL != s->transitions; i_t++)
242 GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "s->transitions was NULL\n");
245 if (NULL == ctran->state)
247 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
248 "Transition from State %i has has no state for transitioning\n",
252 if (ctran->literal == 0)
254 GNUNET_asprintf (&s_tran, "s%i -> s%i [label = \"epsilon\"];\n", s->id,
259 GNUNET_asprintf (&s_tran, "s%i -> s%i [label = \"%c\"];\n", s->id,
260 ctran->state->id, ctran->literal);
263 s_all = GNUNET_realloc (s_all, strlen (s_all) + strlen (s_tran) + 1);
264 strcat (s_all, s_tran);
265 GNUNET_free (s_tran);
275 nfa_add_concatenation ()
277 struct GNUNET_REGEX_Nfa *A;
278 struct GNUNET_REGEX_Nfa *B;
279 struct GNUNET_REGEX_Nfa *new;
281 B = pop (&nfa_stack);
282 A = pop (&nfa_stack);
284 if (NULL == A || NULL == B)
286 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
287 "nfa_add_concatenationenation failed, because there were not enough elements on the stack");
291 nfa_add_transition (A->end, 0, B->start);
292 A->end->accepting = 0;
293 B->end->accepting = 1;
295 new = nfa_create (NULL, NULL);
296 nfa_add_states (new, A->states, A->statecnt);
297 nfa_add_states (new, B->states, B->statecnt);
298 new->start = A->start;
303 push (new, &nfa_stack);
309 struct GNUNET_REGEX_Nfa *A;
310 struct GNUNET_REGEX_Nfa *new;
314 A = pop (&nfa_stack);
318 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
319 "nfa_add_star_op failed, because there was no element on the stack");
323 start = nfa_create_state (0);
324 end = nfa_create_state (1);
326 nfa_add_transition (start, 0, A->start);
327 nfa_add_transition (start, 0, end);
328 nfa_add_transition (A->end, 0, A->start);
329 nfa_add_transition (A->end, 0, end);
331 A->end->accepting = 0;
334 new = nfa_create (start, end);
335 nfa_add_states (new, A->states, A->statecnt);
338 push (new, &nfa_stack);
344 struct GNUNET_REGEX_Nfa *A;
346 A = pop (&nfa_stack);
350 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
351 "nfa_add_plus_op failed, because there was no element on the stack");
355 nfa_add_transition (A->end, 0, A->start);
357 push (A, &nfa_stack);
361 nfa_add_alternation ()
363 struct GNUNET_REGEX_Nfa *A;
364 struct GNUNET_REGEX_Nfa *B;
365 struct GNUNET_REGEX_Nfa *new;
369 B = pop (&nfa_stack);
370 A = pop (&nfa_stack);
372 if (NULL == A || NULL == B)
374 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
375 "alternation failed, because there were not enough elements on the stack");
379 start = nfa_create_state (0);
380 end = nfa_create_state (1);
381 nfa_add_transition (start, 0, A->start);
382 nfa_add_transition (start, 0, B->start);
384 nfa_add_transition (A->end, 0, end);
385 nfa_add_transition (B->end, 0, end);
387 A->end->accepting = 0;
388 B->end->accepting = 0;
391 new = nfa_create (start, end);
392 nfa_add_states (new, A->states, A->statecnt);
393 nfa_add_states (new, B->states, B->statecnt);
397 push (new, &nfa_stack);
401 nfa_add_literal (const char lit)
403 struct GNUNET_REGEX_Nfa *n;
407 start = nfa_create_state (0);
408 end = nfa_create_state (1);
409 nfa_add_transition (start, lit, end);
410 n = nfa_create (start, end);
411 push (n, &nfa_stack);
414 struct GNUNET_REGEX_Nfa *
415 GNUNET_REGEX_construct_nfa (const char *regex, size_t len)
417 struct GNUNET_REGEX_Nfa *nfa;
419 unsigned int altcount;
420 unsigned int atomcount;
435 for (count = 0; count < len && *regex; count++, regex++)
444 nfa_add_concatenation ();
446 GNUNET_array_grow (p, pcount, pcount + 1);
447 p[pcount - 1].altcount = altcount;
448 p[pcount - 1].atomcount = atomcount;
455 while (--atomcount > 0)
456 nfa_add_concatenation ();
464 while (--atomcount > 0)
465 nfa_add_concatenation ();
466 for (; altcount > 0; altcount--)
467 nfa_add_alternation ();
469 altcount = p[pcount].altcount;
470 atomcount = p[pcount].atomcount;
483 case 92: /* escape: \ */
490 nfa_add_concatenation ();
492 nfa_add_literal (*regex);
499 while (--atomcount > 0)
500 nfa_add_concatenation ();
501 for (; altcount > 0; altcount--)
502 nfa_add_alternation ();
507 nfa = pop (&nfa_stack);
509 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
510 "Created NFA with %i States and a total of %i Transitions\n",
511 state_id, transition_id);
516 GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not parse regex\n");
518 while (!empty (&nfa_stack))
519 GNUNET_REGEX_destroy_nfa (pop (&nfa_stack));
524 GNUNET_REGEX_destroy_nfa (struct GNUNET_REGEX_Nfa *n)
528 for (i = 0; i < n->statecnt; i++)
530 GNUNET_free (n->states[i]);
535 GNUNET_REGEX_save_nfa_graph (struct GNUNET_REGEX_Nfa *n, const char *filename)
547 GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not print NFA, was NULL!");
551 mark_all_states (n, 0);
553 states = GNUNET_malloc (sizeof (char));
556 for (i_s = 0; i_s < n->statecnt; i_s++)
558 struct Transition *ctran;
566 GNUNET_asprintf (&s_acc, "s%i [shape=doublecircle];\n", s->id);
568 states = GNUNET_realloc (states, strlen (states) + strlen (s_acc) + 1);
569 strcat (states, s_acc);
573 ctran = s->transitions;
576 for (i_t = 0; i_t < s->tcnt && NULL != s->transitions; i_t++)
580 GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "s->transitions was NULL\n");
583 if (NULL == ctran->state)
585 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
586 "Transition from State %i has has no state for transitioning\n",
590 if (ctran->literal == 0)
592 GNUNET_asprintf (&s_tran, "s%i -> s%i [label = \"epsilon\"];\n", s->id,
597 GNUNET_asprintf (&s_tran, "s%i -> s%i [label = \"%c\"];\n", s->id,
598 ctran->state->id, ctran->literal);
601 states = GNUNET_realloc (states, strlen (states) + strlen (s_tran) + 1);
602 strcat (states, s_tran);
603 GNUNET_free (s_tran);
611 GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not print NFA");
615 if (NULL == filename || strlen (filename) < 1)
617 GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "No Filename given!");
618 GNUNET_free (states);
622 p = fopen (filename, "w");
625 GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not open file for writing: %s",
627 GNUNET_free (states);
631 start = "digraph G {\nrankdir=LR\n";
633 fwrite (start, strlen (start), 1, p);
634 fwrite (states, strlen (states), 1, p);
635 fwrite (end, strlen (end), 1, p);
638 GNUNET_free (states);