{
int is_dup;
struct Transition *t;
+ struct Transition *oth;
if (NULL == from_state)
{
if (is_dup)
return;
+ // sort transitions by label
+ for (oth = from_state->transitions_head; NULL != oth; oth = oth->next)
+ {
+ if (oth->label > label)
+ break;
+ }
+
t = GNUNET_malloc (sizeof (struct Transition));
t->id = ctx->transition_id++;
t->label = label;
// Add outgoing transition to 'from_state'
from_state->transition_count++;
- GNUNET_CONTAINER_DLL_insert (from_state->transitions_head,
- from_state->transitions_tail, t);
+ GNUNET_CONTAINER_DLL_insert_before (from_state->transitions_head,
+ from_state->transitions_tail, oth, t);
}
/**
char *R_cur_r;
int length_l;
int length_r;
+ int s_cnt;
+ int l_cnt;
char *complete_regex;
k = 0;
n = a->state_count;
+ // number the states
for (i = 0, s = a->states_head; NULL != s; s = s->next, i++)
{
s->marked = i;
{
for (j = 0; j < n; j++)
{
+ //construct R_cur
+
// 0*R = R*0 = 0
// 0+R = R+0 = R
if (NULL == R_last[i][k] || NULL == R_last[k][j])
if (NULL != R_last[i][j])
R_cur[i][j] = GNUNET_strdup (R_last[i][j]);
}
+ // a(ba)*b = (ab)+
+ /*else if ((NULL == R_last[i][j] || 0 == strlen (R_last[i][j])) && */
+ /*NULL != R_last[k][k] && 0 < strlen (R_last[k][k]) && */
+ /*NULL != R_last[k][j] && 0 < strlen (R_last[k][j]) && */
+ /*0 == strncmp (R_last[k][k], R_last[k][j], (strlen (R_last[k][k]) - 1)) && */
+ /*R_last[i][k][0] == R_last[k][k][strlen (R_last[k][k]) - 1]) */
+ /*{ */
+ /*GNUNET_asprintf (&R_cur[i][j], "(%s%s)+", R_last[i][k], R_last[k][j]); */
+ /*} */
else
{
// R(k)ij = R(k-1)ij + R(k-1)ik (R(k-1)kk)* R(k-1)kj
if (NULL != R_last[i][j])
strcat (R_cur_l, R_last[i][j]);
- if (NULL != R_last[i][k])
+ if (NULL != R_last[i][k] && 0 != strcmp (R_last[i][k], R_last[k][k]))
strcat (R_cur_r, R_last[i][k]);
if (NULL != R_last[k][k] && 0 != strcmp (R_last[k][k], ""))
{
- if (R_last[k][k][0] == '(' && R_last[k][k][strlen (R_last[k][k])-1] == ')')
+ if (R_last[k][k][0] == '(' &&
+ R_last[k][k][strlen (R_last[k][k]) - 1] == ')')
{
strcat (R_cur_r, R_last[k][k]);
- strcat (R_cur_r, "*");
}
else
{
strcat (R_cur_r, "(");
strcat (R_cur_r, R_last[k][k]);
- strcat (R_cur_r, ")*");
+ strcat (R_cur_r, ")");
}
+
+ if (0 == strcmp (R_last[i][k], R_last[k][k]) ||
+ 0 == strcmp (R_last[k][k], R_last[k][j]))
+ strcat (R_cur_r, "+");
+ else
+ strcat (R_cur_r, "*");
}
- if (NULL != R_last[k][j])
+ if (NULL != R_last[k][j] && 0 != strcmp (R_last[k][k], R_last[k][j]))
strcat (R_cur_r, R_last[k][j]);
+ // simplifications...
+
// | is idempotent: a | a = a for all a in A
if (0 == strcmp (R_cur_l, R_cur_r) || 0 == strcmp (R_cur_l, "") ||
0 == strcmp (R_cur_r, ""))
else
GNUNET_asprintf (&R_cur[i][j], "%s", R_cur_l);
}
- // a | a a* a = a+
+ // TODO: in theory only applicable if (e + a) | (e + a)(e + a)*(e+a)
+ // where e means epsilon... check if practical!
+ // a | a a* a = a*
else if (R_last[i][j] == R_last[i][k] && R_last[i][k] == R_last[k][k]
&& R_last[k][k] == R_last[k][j])
{
- GNUNET_asprintf (&R_cur[i][j], "%s+", R_last[i][j]);
+ if (1 >= strlen (R_last[k][k]) ||
+ (R_last[k][k][0] == '(' &&
+ R_last[k][k][strlen (R_last[k][k]) - 1] == ')'))
+ GNUNET_asprintf (&R_cur[i][j], "%s*", R_last[k][k]);
+ else
+ GNUNET_asprintf (&R_cur[i][j], "(%s)*", R_last[k][k]);
}
// a | a b* b => a | a b | a b b | ... => a b*
else if (R_last[i][j] == R_last[i][k] && R_last[k][k] == R_last[k][j])
{
- GNUNET_asprintf (&R_cur[i][j], "%s%s*", R_last[i][k], R_last[k][k]);
+ // if a is xb then a b* becomes x b b* = x b+
+
+ s_cnt = strlen (R_last[k][k]);
+ l_cnt = strlen (R_last[i][k]);
+ R_cur[i][j] = GNUNET_malloc (s_cnt + l_cnt + 4);
+
+ if (l_cnt > 0 && s_cnt >= l_cnt)
+ for (; s_cnt > 0; s_cnt--, l_cnt--)
+ if (R_last[i][k][l_cnt] != R_last[k][k][s_cnt])
+ break;
+
+ if (strlen (R_last[i][k]) > 0 && 0 == s_cnt && 0 <= l_cnt)
+ strncat (R_cur[i][j], R_last[i][k], l_cnt);
+ else
+ strcat (R_cur[i][j], R_last[i][k]);
+
+ if (1 >= strlen (R_last[k][k]) &&
+ !(R_last[k][k][0] == '(' &&
+ R_last[k][k][strlen (R_last[k][k]) - 1] == ')'))
+ {
+ strcat (R_cur[i][j], "(");
+ strcat (R_cur[i][j], R_last[k][k]);
+ strcat (R_cur[i][j], ")");
+ }
+ else
+ strcat (R_cur[i][j], R_last[k][k]);
+
+ if (0 == s_cnt && 0 <= l_cnt)
+ strcat (R_cur[i][j], "+");
+ else
+ strcat (R_cur[i][j], "*");
}
// a | b b* a => a | b a | b b a | ... => b* a
else if (R_last[i][j] == R_last[k][j] && R_last[i][k] == R_last[k][k])
{
- GNUNET_asprintf (&R_cur[i][j], "%s*%s", R_last[k][k], R_last[k][j]);
+ // if a is bx then b* a becomes b+ x
+
+ temp = NULL;
+ s_cnt = strlen (R_last[k][k]);
+ l_cnt = strlen (R_last[k][j]);
+ R_cur[i][j] = GNUNET_malloc (s_cnt + l_cnt + 4);
+
+ int bla;
+
+ if (l_cnt > 0 && s_cnt >= l_cnt)
+ for (bla = 0; bla < s_cnt; bla++)
+ if (R_last[k][k][bla] != R_last[k][j][bla])
+ break;
+
+ if (1 >= strlen (R_last[k][k]) &&
+ !(R_last[k][k][0] == '(' &&
+ R_last[k][k][strlen (R_last[k][k]) - 1] == ')'))
+ {
+ strcat (R_cur[i][j], "(");
+ strcat (R_cur[i][j], R_last[k][k]);
+ strcat (R_cur[i][j], ")");
+ }
+ else
+ strcat (R_cur[i][j], R_last[k][k]);
+
+ if (bla == s_cnt)
+ strcat (R_cur[i][j], "+");
+ else
+ strcat (R_cur[i][j], "*");
+
+ if (strlen (R_last[k][j]) > 0 && bla == s_cnt)
+ strcat (R_cur[i][j], &R_last[k][j][bla]);
+ else
+ strcat (R_cur[i][j], R_last[k][j]);
}
else
GNUNET_asprintf (&R_cur[i][j], "(%s|%s)", R_cur_l, R_cur_r);
GNUNET_free_non_null (R_cur_l);
GNUNET_free_non_null (R_cur_r);
-
}
}
}
+ // set R_last = R_cur
for (i = 0; i < n; i++)
{
for (j = 0; j < n; j++)
&states[i]->hash);
}
- // complete regex for whole DFA
+ // complete regex for whole DFA: union of all pairs (start state/accepting state(s)).
complete_regex = NULL;
for (i = 0; i < n; i++)
{
GNUNET_assert (0 == cls_check->len);
GNUNET_free (cls_check);
+ // sort the states
if (cls->len > 1)
qsort (cls->states, cls->len, sizeof (struct GNUNET_REGEX_State *),
state_compare);
return;
GNUNET_free (a->regex);
+ GNUNET_free_non_null (a->computed_regex);
for (s = a->states_head; NULL != s;)
{
GNUNET_free (a);
}
+/**
+ * Save a state to an open file pointer. cls is expected to be a file pointer to
+ * an open file. Used only in conjunction with
+ * GNUNET_REGEX_automaton_save_graph.
+ *
+ * @param cls file pointer
+ * @param s state
+ */
+void
+GNUNET_REGEX_automaton_save_graph_step (void *cls, struct GNUNET_REGEX_State *s)
+{
+ FILE *p;
+ struct Transition *ctran;
+ char *s_acc = NULL;
+ char *s_tran = NULL;
+
+ p = cls;
+
+ if (s->accepting)
+ {
+ GNUNET_asprintf (&s_acc,
+ "\"%s\" [shape=doublecircle, color=\"0.%i 0.8 0.95\"];\n",
+ s->name, s->scc_id);
+ }
+ else
+ {
+ GNUNET_asprintf (&s_acc, "\"%s\" [color=\"0.%i 0.8 0.95\"];\n", s->name,
+ s->scc_id);
+ }
+
+ if (NULL == s_acc)
+ {
+ GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not print state %s\n", s->name);
+ return;
+ }
+ fwrite (s_acc, strlen (s_acc), 1, p);
+ GNUNET_free (s_acc);
+ s_acc = NULL;
+
+ for (ctran = s->transitions_head; NULL != ctran; ctran = ctran->next)
+ {
+ if (NULL == ctran->to_state)
+ {
+ GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
+ "Transition from State %i has has no state for transitioning\n",
+ s->id);
+ continue;
+ }
+
+ if (ctran->label == 0)
+ {
+ GNUNET_asprintf (&s_tran,
+ "\"%s\" -> \"%s\" [label = \"epsilon\", color=\"0.%i 0.8 0.95\"];\n",
+ s->name, ctran->to_state->name, s->scc_id);
+ }
+ else
+ {
+ GNUNET_asprintf (&s_tran,
+ "\"%s\" -> \"%s\" [label = \"%c\", color=\"0.%i 0.8 0.95\"];\n",
+ s->name, ctran->to_state->name, ctran->label, s->scc_id);
+ }
+
+ if (NULL == s_tran)
+ {
+ GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not print state %s\n",
+ s->name);
+ return;
+ }
+
+ fwrite (s_tran, strlen (s_tran), 1, p);
+ GNUNET_free (s_tran);
+ s_tran = NULL;
+ }
+}
+
/**
* Save the given automaton as a GraphViz dot file
*
GNUNET_REGEX_automaton_save_graph (struct GNUNET_REGEX_Automaton *a,
const char *filename)
{
- struct GNUNET_REGEX_State *s;
- struct Transition *ctran;
- char *s_acc = NULL;
- char *s_tran = NULL;
char *start;
char *end;
FILE *p;
start = "digraph G {\nrankdir=LR\n";
fwrite (start, strlen (start), 1, p);
- for (s = a->states_head; NULL != s; s = s->next)
- {
- if (s->accepting)
- {
- GNUNET_asprintf (&s_acc,
- "\"%s\" [shape=doublecircle, color=\"0.%i 0.8 0.95\"];\n",
- s->name, s->scc_id);
- }
- else
- {
- GNUNET_asprintf (&s_acc, "\"%s\" [color=\"0.%i 0.8 0.95\"];\n", s->name,
- s->scc_id);
- }
-
- if (NULL == s_acc)
- {
- GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not print state %s\n",
- s->name);
- return;
- }
- fwrite (s_acc, strlen (s_acc), 1, p);
- GNUNET_free (s_acc);
- s_acc = NULL;
-
- for (ctran = s->transitions_head; NULL != ctran; ctran = ctran->next)
- {
- if (NULL == ctran->to_state)
- {
- GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
- "Transition from State %i has has no state for transitioning\n",
- s->id);
- continue;
- }
-
- if (ctran->label == 0)
- {
- GNUNET_asprintf (&s_tran,
- "\"%s\" -> \"%s\" [label = \"epsilon\", color=\"0.%i 0.8 0.95\"];\n",
- s->name, ctran->to_state->name, s->scc_id);
- }
- else
- {
- GNUNET_asprintf (&s_tran,
- "\"%s\" -> \"%s\" [label = \"%c\", color=\"0.%i 0.8 0.95\"];\n",
- s->name, ctran->to_state->name, ctran->label,
- s->scc_id);
- }
-
- if (NULL == s_tran)
- {
- GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not print state %s\n",
- s->name);
- return;
- }
-
- fwrite (s_tran, strlen (s_tran), 1, p);
- GNUNET_free (s_tran);
- s_tran = NULL;
- }
- }
+ automaton_traverse (p, a, GNUNET_REGEX_automaton_save_graph_step);
end = "\n}\n";
fwrite (end, strlen (end), 1, p);
s = a->start;
+ // If the string is empty but the starting state is accepting, we accept.
+ if ((NULL == string || 0 == strlen (string)) && s->accepting)
+ return 0;
+
for (strp = string; NULL != strp && *strp; strp++)
{
s = dfa_move (s, *strp);
return -1;
}
+ // If the string is empty but the starting state is accepting, we accept.
+ if ((NULL == string || 0 == strlen (string)) && a->start->accepting)
+ return 0;
+
result = 1;
strp = string;
sset = nfa_closure_create (a, a->start, 0);
int check_nfa;
int check_dfa;
int check_rand;
+ char *check_proof;
- struct Regex_String_Pair rxstr[8] = {
+ struct Regex_String_Pair rxstr[12] = {
{"ab?(abcd)?", 5,
{"ababcd", "abab", "aabcd", "a", "abb"},
{match, nomatch, match, match, nomatch}},
{"V|M*o?x*p*d+h+b|E*m?h?Y*E*O?W*W*P+o?Z+H*M|I*q+C*a+5?5*9|b?z|G*y*k?R|p+u|8*h?B+l*H|e|L*O|1|F?v*0?5|C+", 1,
{"VMoxpdhbEmhYEOWWPoZHMIqCa559bzGykRpu8hBlHeLO1Fv05C"},
{nomatch}},
+ {"(bla)*", 8,
+ {"", "bla", "blabla", "bl", "la", "b", "l", "a"},
+ {match, match, match, nomatch, nomatch, nomatch, nomatch, nomatch}},
+ {"ab(c|d)+c*(a(b|c)+d)+(bla)(bla)*", 8,
+ {"ab", "abcabdbla", "abdcccccccccccabcbccdblablabla", "bl", "la", "b", "l",
+ "a"},
+ {nomatch, match, match, nomatch, nomatch, nomatch, nomatch, nomatch}},
+ {"a|aa*a", 6,
+ {"", "a", "aa", "aaa", "aaaa", "aaaaa"},
+ {nomatch, match, match, match, match, match}},
+ {"ab(c|d)+c*(a(b|c)+d)+(bla)+", 1,
+ {"abcabdblaacdbla"},
+ {nomatch}},
{"ab(c|d)+c*(a(b|c)d)+", 1,
{"abacd"},
{nomatch}}
check_dfa = 0;
check_rand = 0;
- for (i = 0; i < 8; i++)
+ for (i = 0; i < 12; i++)
{
if (0 != regcomp (&rx, rxstr[i].regex, REG_EXTENDED))
{
// DFA test
a = GNUNET_REGEX_construct_dfa (rxstr[i].regex, strlen (rxstr[i].regex));
check_dfa += test_automaton (a, &rx, &rxstr[i]);
+ check_proof = GNUNET_strdup (GNUNET_REGEX_get_computed_regex (a));
+ GNUNET_REGEX_automaton_destroy (a);
+ a = GNUNET_REGEX_construct_dfa (check_proof, strlen (check_proof));
+ check_dfa += test_automaton (a, &rx, &rxstr[i]);
GNUNET_REGEX_automaton_destroy (a);
+ if (0 != check_dfa)
+ GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "check_proof: %s\n", check_proof);
+ GNUNET_free_non_null (check_proof);
regfree (&rx);
}