}
}
+/**
+ * Compare two states. Used for sorting.
+ *
+ * @param a first state
+ * @param b second state
+ *
+ * @return an integer less than, equal to, or greater than zero
+ * if the first argument is considered to be respectively
+ * less than, equal to, or greater than the second.
+ */
static int
state_compare (const void *a, const void *b)
{
GNUNET_free (s);
}
+/**
+ * Remove a state from the given automaton 'a'. Always use this function
+ * when altering the states of an automaton. Will also remove all transitions
+ * leading to this state, before destroying it.
+ *
+ * @param a automaton
+ * @param s state to remove
+ */
static void
automaton_remove_state (struct GNUNET_REGEX_Automaton *a, struct State *s)
{
struct State *ss;
+ struct State *s_check;
+ struct Transition *t_check;
+
+ // remove state
ss = s;
GNUNET_CONTAINER_DLL_remove (a->states_head, a->states_tail, s);
a->state_count--;
+
+ // remove all transitions leading to this state
+ for (s_check = a->states_head; NULL != s_check; s_check = s_check->next)
+ {
+ for (t_check = s_check->transitions_head; NULL != t_check;
+ t_check = t_check->next)
+ {
+ if (t_check->state == ss)
+ {
+ GNUNET_CONTAINER_DLL_remove (s_check->transitions_head,
+ s_check->transitions_tail, t_check);
+ s_check->transition_count--;
+ }
+ }
+ }
+
automaton_destroy_state (ss);
}
+/**
+ * Merge two states into one. Will merge 's1' and 's2' into 's1' and destroy 's2'.
+ *
+ * @param ctx context
+ * @param a automaton
+ * @param s1 first state
+ * @param s2 second state, will be destroyed
+ */
+static void
+automaton_merge_states (struct GNUNET_REGEX_Context *ctx,
+ struct GNUNET_REGEX_Automaton *a, struct State *s1,
+ struct State *s2)
+{
+ struct State *s_check;
+ struct Transition *t_check;
+ struct Transition *t;
+ char *new_name;
+
+ GNUNET_assert (NULL != ctx && NULL != a && NULL != s1 && NULL != s2);
+
+ // 1. Make all transitions pointing to s2 point to s1
+ for (s_check = a->states_head; NULL != s_check; s_check = s_check->next)
+ {
+ for (t_check = s_check->transitions_head; NULL != t_check;
+ t_check = t_check->next)
+ {
+ if (s_check != s2 && s2 == t_check->state)
+ t_check->state = s1;
+ }
+ }
+
+ // 2. Add all transitions from s2 to sX to s1
+ for (t_check = s2->transitions_head; NULL != t_check; t_check = t_check->next)
+ {
+ for (t = s1->transitions_head; NULL != t; t = t->next)
+ {
+ if (t_check->literal != t->literal && NULL != t_check->state &&
+ t_check->state != t->state && t_check->state != s2)
+ {
+ add_transition (ctx, s1, t_check->literal, t_check->state);
+ }
+ }
+ }
+
+ // 3. Rename s1 to {s1,s2}
+ new_name = GNUNET_malloc (strlen (s1->name) + strlen (s2->name) + 1);
+ strncat (new_name, s1->name, strlen (s1->name));
+ strncat (new_name, s2->name, strlen (s2->name));
+ if (NULL != s1->name)
+ GNUNET_free (s1->name);
+ s1->name = new_name;
+
+ // remove state
+ s_check = s2;
+ GNUNET_CONTAINER_DLL_remove (a->states_head, a->states_tail, s_check);
+ a->state_count--;
+ automaton_destroy_state (s_check);
+}
+
+/**
+ * Add a state to the automaton 'a', always use this function to
+ * alter the states DLL of the automaton.
+ *
+ * @param a automaton to add the state to
+ * @param s state that should be added
+ */
static void
automaton_add_state (struct GNUNET_REGEX_Automaton *a, struct State *s)
{
stack_len++;
while (stack_len > 0)
{
- s = stack[stack_len-1];
+ s = stack[stack_len - 1];
stack_len--;
- s->marked = 1; // mark s as visited
+ s->marked = 1; // mark s as visited
for (t = s->transitions_head; NULL != t; t = t->next)
{
if (NULL != t->state && 0 == t->state->marked)
dfa_remove_dead_states (struct GNUNET_REGEX_Automaton *a)
{
struct State *s;
- struct State *s_check;
struct Transition *t;
- struct Transition *t_check;
int dead;
GNUNET_assert (DFA == a->type);
continue;
// state s is dead, remove it
- // 1. remove all transitions to this state
- for (s_check = a->states_head; NULL != s_check; s_check = s_check->next)
- {
- for (t_check = s_check->transitions_head; NULL != t_check;
- t_check = t_check->next)
- {
- if (t_check->state == s)
- {
- GNUNET_CONTAINER_DLL_remove (s_check->transitions_head,
- s_check->transitions_tail, t_check);
- }
- }
- }
- // 2. remove state
automaton_remove_state (a, s);
}
}
* @param a DFA automaton
*/
static void
-dfa_merge_nondistinguishable_states (struct GNUNET_REGEX_Automaton *a)
+dfa_merge_nondistinguishable_states (struct GNUNET_REGEX_Context *ctx,
+ struct GNUNET_REGEX_Automaton *a)
{
+ int i;
+ int table[a->state_count][a->state_count];
+ struct State *s1;
+ struct State *s2;
+ struct Transition *t1;
+ struct Transition *t2;
+ int change;
+
+ change = 1;
+ for (i = 0, s1 = a->states_head; i < a->state_count && NULL != s1;
+ i++, s1 = s1->next)
+ s1->marked = i;
+
+ // Mark all pairs of accepting/!accepting states
+ for (s1 = a->states_head; NULL != s1; s1 = s1->next)
+ {
+ for (s2 = a->states_head; NULL != s2 && s1 != s2; s2 = s2->next)
+ {
+ if ((s1->accepting && !s2->accepting) ||
+ (!s1->accepting && s2->accepting))
+ {
+ table[s1->marked][s2->marked] = 1;
+ }
+ else
+ table[s1->marked][s2->marked] = 0;
+ }
+ }
+
+ while (0 != change)
+ {
+ change = 0;
+ for (s1 = a->states_head; NULL != s1; s1 = s1->next)
+ {
+ for (s2 = a->states_head; NULL != s2 && s1 != s2; s2 = s2->next)
+ {
+ if (0 != table[s1->marked][s2->marked])
+ continue;
+ for (t1 = s1->transitions_head; NULL != t1; t1 = t1->next)
+ {
+ for (t2 = s2->transitions_head; NULL != t2; t2 = t2->next)
+ {
+ if (t1->literal == t2->literal && t1->state == t2->state &&
+ (0 != table[t1->state->marked][t2->state->marked] ||
+ 0 != table[t2->state->marked][t1->state->marked]))
+ {
+ table[s1->marked][s2->marked] = t1->literal;
+ change = 1;
+ }
+ else if (t1->literal != t2->literal && t1->state != t2->state)
+ {
+ table[s1->marked][s2->marked] = -1;
+ change = 1;
+ }
+ }
+ }
+ }
+ }
+ }
+
+ struct State *s2_next;
+
+ for (i = 0, s1 = a->states_head; NULL != s1; s1 = s1->next)
+ {
+ for (s2 = a->states_head; NULL != s2 && s1 != s2; s2 = s2_next)
+ {
+ s2_next = s2->next;
+ if (s1 != s2 && table[s1->marked][s2->marked] == 0)
+ automaton_merge_states (ctx, a, s1, s2);
+ }
+ }
}
/**
* @param a DFA automaton
*/
static void
-dfa_minimize (struct GNUNET_REGEX_Automaton *a)
+dfa_minimize (struct GNUNET_REGEX_Context *ctx,
+ struct GNUNET_REGEX_Automaton *a)
{
if (NULL == a)
return;
dfa_remove_dead_states (a);
// 3. Merge nondistinguishable states
- dfa_merge_nondistinguishable_states (a);
+ dfa_merge_nondistinguishable_states (ctx, a);
}
/**
if (NULL == nfa)
{
- GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
+ GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
"Could not create DFA, because NFA creation failed\n");
return NULL;
}
GNUNET_free (dfa_stack);
GNUNET_REGEX_automaton_destroy (nfa);
- dfa_minimize (dfa);
+ /*dfa_minimize (&ctx, dfa);*/
return dfa;
}
return result;
}
-
/**
* Evaluates the given 'string' against the given compiled regex
*
};
static const char allowed_literals[] =
- "0123456789"
- "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
- "abcdefghijklmnopqrstuvwxyz";
+ "0123456789" "ABCDEFGHIJKLMNOPQRSTUVWXYZ" "abcdefghijklmnopqrstuvwxyz";
int
-test_random (unsigned int rx_length, unsigned int max_str_len, unsigned int str_count)
+test_random (unsigned int rx_length, unsigned int max_str_len,
+ unsigned int str_count)
{
int i;
int j;
int rx_exp;
- char rand_rx[rx_length+1];
- char matching_str[str_count][max_str_len+1];
+ char rand_rx[rx_length + 1];
+ char matching_str[str_count][max_str_len + 1];
char *rand_rxp;
char *matching_strp;
int char_op_switch;
last_was_op = 1;
// Generate random regex and a string that matches the regex
- for (i=0; i<rx_length; i++)
+ for (i = 0; i < rx_length; i++)
{
- char_op_switch = 0 + (int)(1.0 * rand() / (RAND_MAX + 1.0));
+ char_op_switch = 0 + (int) (1.0 * rand () / (RAND_MAX + 1.0));
- if (0 == char_op_switch
- && !last_was_op)
+ if (0 == char_op_switch && !last_was_op)
{
last_was_op = 1;
rx_exp = rand () % 3;
switch (rx_exp)
{
- case 0:
- current_char = '+';
- break;
- case 1:
- current_char = '*';
- break;
- case 2:
- if (i < rx_length -1) // '|' cannot be at the end
- current_char = '|';
- else
- current_char = allowed_literals[rand() % (sizeof(allowed_literals) - 1)];
- break;
+ case 0:
+ current_char = '+';
+ break;
+ case 1:
+ current_char = '*';
+ break;
+ case 2:
+ if (i < rx_length - 1) // '|' cannot be at the end
+ current_char = '|';
+ else
+ current_char =
+ allowed_literals[rand () % (sizeof (allowed_literals) - 1)];
+ break;
}
}
else
{
- current_char = allowed_literals[rand() % (sizeof(allowed_literals) - 1)];
+ current_char =
+ allowed_literals[rand () % (sizeof (allowed_literals) - 1)];
last_was_op = 0;
}
- if (current_char != '+'
- && current_char != '*'
- && current_char != '|')
+ if (current_char != '+' && current_char != '*' && current_char != '|')
{
*matching_strp = current_char;
matching_strp++;
// Generate some random strings for matching...
// Start at 1, because the first string is generated above during regex generation
- for (i=1; i<str_count; i++)
+ for (i = 1; i < str_count; i++)
{
- str_len = rand() % max_str_len;
- for (j=0; j<str_len; j++)
- matching_str[i][j] = allowed_literals[rand() % (sizeof(allowed_literals) - 1)];
+ str_len = rand () % max_str_len;
+ for (j = 0; j < str_len; j++)
+ matching_str[i][j] =
+ allowed_literals[rand () % (sizeof (allowed_literals) - 1)];
matching_str[i][str_len] = '\0';
}
// Now match
result = 0;
- for (i=0; i<str_count; i++)
+ for (i = 0; i < str_count; i++)
{
// Match string using DFA
dfa = GNUNET_REGEX_construct_dfa (rand_rx, strlen (rand_rx));
// Match string using glibc regex
if (0 != regcomp (&rx, rand_rx, REG_EXTENDED))
{
- GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not compile regex using regcomp\n");
+ GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
+ "Could not compile regex using regcomp\n");
return -1;
}
regfree (&rx);
// We only want to match the whole string, because that's what our DFA does, too.
- if (eval_check == 0 && (matchptr[0].rm_so != 0 || matchptr[0].rm_eo != strlen (matching_str[i])))
+ if (eval_check == 0 &&
+ (matchptr[0].rm_so != 0 ||
+ matchptr[0].rm_eo != strlen (matching_str[i])))
eval_check = 1;
// compare result
}
int
-test_automaton (struct GNUNET_REGEX_Automaton *a, regex_t *rx, struct Regex_String_Pair *rxstr)
+test_automaton (struct GNUNET_REGEX_Automaton *a, regex_t * rx,
+ struct Regex_String_Pair *rxstr)
{
int result;
int eval;
result = 0;
- for (i=0; i<rxstr->string_count; i++)
+ for (i = 0; i < rxstr->string_count; i++)
{
eval = GNUNET_REGEX_eval (a, rxstr->strings[i]);
eval_check = regexec (rx, rxstr->strings[i], 1, matchptr, 0);
// We only want to match the whole string, because that's what our DFA does, too.
- if (eval_check == 0 && (matchptr[0].rm_so != 0 || matchptr[0].rm_eo != strlen (rxstr->strings[i])))
+ if (eval_check == 0 &&
+ (matchptr[0].rm_so != 0 ||
+ matchptr[0].rm_eo != strlen (rxstr->strings[i])))
eval_check = 1;
- if ((rxstr->expected_results[i] == match
- && (0 != eval || 0 != eval_check))
- ||
- (rxstr->expected_results[i] == nomatch
- && (0 == eval || 0 == eval_check)))
+ if ((rxstr->expected_results[i] == match && (0 != eval || 0 != eval_check))
+ || (rxstr->expected_results[i] == nomatch &&
+ (0 == eval || 0 == eval_check)))
{
- result = 1;
- regerror (eval_check, rx, error, sizeof error);
- GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
- "Unexpected result:\nregex: %s\nstring: %s\nexpected result: %i\n"
- "gnunet regex: %i\nglibc regex: %i\nglibc error: %s\nrm_so: %i\nrm_eo: %i\n\n",
- rxstr->regex, rxstr->strings[i], rxstr->expected_results[i],
- eval, eval_check, error, matchptr[0].rm_so, matchptr[0].rm_eo);
+ result = 1;
+ regerror (eval_check, rx, error, sizeof error);
+ GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
+ "Unexpected result:\nregex: %s\nstring: %s\nexpected result: %i\n"
+ "gnunet regex: %i\nglibc regex: %i\nglibc error: %s\nrm_so: %i\nrm_eo: %i\n\n",
+ rxstr->regex, rxstr->strings[i], rxstr->expected_results[i],
+ eval, eval_check, error, matchptr[0].rm_so,
+ matchptr[0].rm_eo);
}
}
return result;
int check_nfa;
int check_dfa;
int check_rand;
- struct Regex_String_Pair rxstr[2] = {
- {"ab(c|d)+c*(a(b|c)d)+", 5,
- {"abcdcdcdcdddddabd", "abcd", "abcddddddccccccccccccccccccccccccabdacdabd", "abccccca", "abcdcdcdccdabdabd"},
+
+ struct Regex_String_Pair rxstr[4] = {
+ {"ab(c|d)+c*(a(b|c)d)+", 5,
+ {"abcdcdcdcdddddabd", "abcd", "abcddddddccccccccccccccccccccccccabdacdabd",
+ "abccccca", "abcdcdcdccdabdabd"},
{match, nomatch, match, nomatch, match}},
- {"ab+c*(a(bx|c)d)+", 5,
- {"abcdcdcdcdddddabd", "abcd", "abcddddddccccccccccccccccccccccccabdacdabd", "abccccca", "abcdcdcdccdabdabd"},
- {nomatch, nomatch, nomatch, nomatch, nomatch}}};
+ {"ab+c*(a(bx|c)d)+", 5,
+ {"abcdcdcdcdddddabd", "abcd", "abcddddddccccccccccccccccccccccccabdacdabd",
+ "abccccca", "abcdcdcdccdabdabd"},
+ {nomatch, nomatch, nomatch, nomatch, nomatch}},
+ {"k|a+X*y+c|Q*e|p|R|Z*K*y*R+w|Y*6+n+h*k*w+V*F|W*B*e*g|N+V|t+L|P*j*3*9+X*h*J|J*6|b|E*i*f*R+S|Z|R|Y*Z|g*", 1,
+ {"kaXycQepRZKyRwY6nhkwVFWBegNVtLPj39XhJJ6bEifRSZRYZg"},
+ {nomatch}},
+ {"k|a+X*y+c|Q*e|p|R|Z*K*y*R+w|Y*6+n+h*k*w+V*F|W*B*e*g|N+V|t+L|P*j*3*9+X*h*J|J*6|b|E*i*f*R+S|Z|R|Y*Z|g*", 1,
+ {"kaXycQepRZKyRwY6nhkwVFWBegNVtLPj39XhJJ6bEifRSZRYZg"},
+ {nomatch}}
+ };
check_nfa = 0;
check_dfa = 0;
check_rand = 0;
- for (i=0; i<2; i++)
+ for (i = 0; i < 4; i++)
{
if (0 != regcomp (&rx, rxstr[i].regex, REG_EXTENDED))
{
- GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not compile regex using regcomp()\n");
+ GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
+ "Could not compile regex using regcomp()\n");
return 1;
}
regfree (&rx);
}
- srand (time(NULL));
- for (i=0; i< 100; i++)
+ srand (time (NULL));
+ for (i = 0; i < 100; i++)
check_rand += test_random (100, 150, 10);
return check_nfa + check_dfa + check_rand;