#include "gnunet_regex_lib.h"
#include "regex.h"
+/**
+ * Constant for how many bits the initial string regex should have.
+ */
#define INITIAL_BITS 10
/**
char *regex;
/**
- * Computed regex (result of RX->NFA->DFA->RX)
+ * Canonical regex (result of RX->NFA->DFA->RX)
*/
- char *computed_regex;
+ char *canonical_regex;
};
/**
/*
* Debug helper functions
*/
+
+/**
+ * Print all the transitions of state 's'.
+ *
+ * @param s state for which to print it's transitions.
+ */
void
-debug_print_transitions (struct GNUNET_REGEX_State *);
+debug_print_transitions (struct GNUNET_REGEX_State *s);
+/**
+ * Print information of the given state 's'.
+ *
+ * @param s state for which debug information should be printed.
+ */
void
debug_print_state (struct GNUNET_REGEX_State *s)
{
debug_print_transitions (s);
}
+/**
+ * Print debug information for all states contained in the automaton 'a'.
+ *
+ * @param a automaton for which debug information of it's states should be printed.
+ */
void
debug_print_states (struct GNUNET_REGEX_Automaton *a)
{
debug_print_state (s);
}
+/**
+ * Print debug information for given transition 't'.
+ *
+ * @param t transition for which to print debug info.
+ */
void
debug_print_transition (struct Transition *t)
{
debug_print_transition (t);
}
+
/**
* Recursive function doing DFS with 'v' as a start, detecting all SCCs inside
* the subgraph reachable from 'v'. Used with scc_tarjan function to detect all
* SCCs inside an automaton.
*
- * @param ctx context
+ * @param scc_counter counter for numbering the sccs
* @param v start vertex
* @param index current index
* @param stack stack for saving all SCCs
}
}
+
/**
* Detect all SCCs (Strongly Connected Components) inside the given automaton.
* SCCs will be marked using the scc_id on each state.
*
- * @param ctx context
- * @param a automaton
+ * @param a the automaton for which SCCs should be computed and assigned.
*/
static void
scc_tarjan (struct GNUNET_REGEX_Automaton *a)
* @param action_cls closure for action
*/
static void
-automaton_state_traverse (struct GNUNET_REGEX_State *s,
- unsigned int *count,
- GNUNET_REGEX_traverse_action action,
- void *action_cls)
+automaton_state_traverse (struct GNUNET_REGEX_State *s, unsigned int *count,
+ GNUNET_REGEX_traverse_action action, void *action_cls)
{
struct Transition *t;
action (action_cls, *count, s);
(*count)++;
for (t = s->transitions_head; NULL != t; t = t->next)
- automaton_state_traverse (t->to_state, count, action, action_cls);
+ automaton_state_traverse (t->to_state, count, action, action_cls);
}
*/
static void
automaton_traverse (struct GNUNET_REGEX_Automaton *a,
- GNUNET_REGEX_traverse_action action,
- void *action_cls)
+ GNUNET_REGEX_traverse_action action, void *action_cls)
{
unsigned int count;
struct GNUNET_REGEX_State *s;
* using it to generate a regex.
*
* Currently only tests for first and last characters being '()' respectively.
- * FIXME: What about "(ab)|(cd)"?
+ * FIXME: What about "(ab)|(cd)"?
*
* @param str string
*
const char *pos;
unsigned int cnt;
- if ( (NULL == str) ||
- ((slen = strlen(str)) < 2) )
+ if ((NULL == str) || ((slen = strlen (str)) < 2))
return GNUNET_NO;
-
+
if ('(' != str[0])
return GNUNET_YES;
cnt = 1;
return GNUNET_YES;
}
op = strchr (pos, '(');
- if ( (NULL != op) && (op < cl))
+ if ((NULL != op) && (op < cl))
{
cnt++;
pos = op + 1;
* You need to GNUNET_free the returned string.
*
* Currently only tests for first and last characters being '()' respectively.
- * FIXME: What about "(ab)|(cd)"?
+ * FIXME: What about "(ab)|(cd)"?
*
* @param str string, free'd or re-used by this function, can be NULL
*
{
size_t slen;
- if ( (NULL == str) || ('(' != str[0]) || (str[(slen = strlen(str)) - 1] != ')') )
+ if ((NULL == str) || ('(' != str[0]) ||
+ (str[(slen = strlen (str)) - 1] != ')'))
return str;
memmove (str, &str[1], slen - 2);
str[slen - 2] = '\0';
static int
has_epsilon (const char *str)
{
- return (NULL != str) && ('(' == str[0]) && ('|' == str[1]) && (')' == str[strlen(str) - 1]);
+ return (NULL != str) && ('(' == str[0]) && ('|' == str[1]) &&
+ (')' == str[strlen (str) - 1]);
}
if (NULL == str)
return NULL;
- if ( ('(' == str[0]) && ('|' == str[1]) )
+ if (('(' == str[0]) && ('|' == str[1]))
{
len = strlen (str);
- if (')' == str[len-1])
+ if (')' == str[len - 1])
return GNUNET_strndup (&str[2], len - 3);
}
return GNUNET_strdup (str);
}
-/**
+/**
* Compare 'str1', starting from position 'k', with whole 'str2'
- *
+ *
* @param str1 first string to compare, starting from position 'k'
* @param str2 second string for comparison
* @param k starting position in 'str1'
- *
+ *
* @return -1 if any of the strings is NULL, 0 if equal, non 0 otherwise
*/
static int
strkcmp (const char *str1, const char *str2, size_t k)
{
- if ( (NULL == str1) || (NULL == str2) || (strlen(str1) < k) )
+ if ((NULL == str1) || (NULL == str2) || (strlen (str1) < k))
return -1;
return strcmp (&str1[k], str2);
}
* Compare two strings for equality. If either is NULL (or if both are
* NULL), they are not equal.
*
- * @return 0 if the strings are the same, 1 or -1 if not
+ * @param str1 first string for comparison.
+ * @param str2 second string for comparison.
+ *
+ * @return 0 if the strings are the same, 1 or -1 if not
*/
static int
nullstrcmp (const char *str1, const char *str2)
{
- if ( (NULL == str1) || (NULL == str2) )
+ if ((NULL == str1) || (NULL == str2))
return -1;
return strcmp (str1, str2);
}
-/**
+/**
* Helper function used as 'action' in 'automaton_traverse' function to create
* the depth-first numbering of the states.
- *
+ *
* @param cls states array.
* @param count current state counter.
* @param s current state.
/* Compute regular expressions of length "1" between each pair of states */
for (i = 0; i < n; i++)
{
- for (j=0;j<n;j++)
+ for (j = 0; j < n; j++)
{
R_cur[i][j] = NULL;
R_last[i][j] = NULL;
}
for (t = states[i]->transitions_head; NULL != t; t = t->next)
{
- j = t->to_state->proof_id;
+ j = t->to_state->proof_id;
if (NULL == R_last[i][j])
- GNUNET_asprintf (&R_last[i][j], "%c", t->label);
+ GNUNET_asprintf (&R_last[i][j], "%c", t->label);
else
- {
- temp_a = R_last[i][j];
- GNUNET_asprintf (&R_last[i][j], "%s|%c", R_last[i][j], t->label);
- GNUNET_free (temp_a);
- }
+ {
+ temp_a = R_last[i][j];
+ GNUNET_asprintf (&R_last[i][j], "%s|%c", R_last[i][j], t->label);
+ GNUNET_free (temp_a);
+ }
if (GNUNET_YES == needs_parentheses (R_last[i][j]))
- {
- temp_a = R_last[i][j];
- GNUNET_asprintf (&R_last[i][j], "(%s)", R_last[i][j]);
- GNUNET_free (temp_a);
- }
+ {
+ temp_a = R_last[i][j];
+ GNUNET_asprintf (&R_last[i][j], "(%s)", R_last[i][j]);
+ GNUNET_free (temp_a);
+ }
}
if (NULL == R_last[i][i])
GNUNET_asprintf (&R_last[i][i], "");
else
- {
- temp_a = R_last[i][i];
- GNUNET_asprintf (&R_last[i][i], "(|%s)", R_last[i][i]);
- GNUNET_free (temp_a);
- }
+ {
+ temp_a = R_last[i][i];
+ GNUNET_asprintf (&R_last[i][i], "(|%s)", R_last[i][i]);
+ GNUNET_free (temp_a);
+ }
}
R_cur_r = NULL;
R_cur_l = NULL;
- // cache results from strcmp, we might need these many times
- ij_kj_cmp = nullstrcmp (R_last[i][j], R_last[k][j]);
- ij_ik_cmp = nullstrcmp (R_last[i][j], R_last[i][k]);
- ik_kk_cmp = nullstrcmp (R_last[i][k], R_last[k][k]);
- ik_kj_cmp = nullstrcmp (R_last[i][k], R_last[k][j]);
- kk_kj_cmp = nullstrcmp (R_last[k][k], R_last[k][j]);
+ // cache results from strcmp, we might need these many times
+ ij_kj_cmp = nullstrcmp (R_last[i][j], R_last[k][j]);
+ ij_ik_cmp = nullstrcmp (R_last[i][j], R_last[i][k]);
+ ik_kk_cmp = nullstrcmp (R_last[i][k], R_last[k][k]);
+ ik_kj_cmp = nullstrcmp (R_last[i][k], R_last[k][j]);
+ kk_kj_cmp = nullstrcmp (R_last[k][k], R_last[k][j]);
// $R^{(k)}_{ij} = R^{(k-1)}_{ij} | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk})^* R^{(k-1)}_{kj}
// With: R_cur[i][j] = R_cur_l | R_cur_r
// Assign R_temp_(ik|kk|kj) to R_last[][] and remove epsilon as well
// as parentheses, so we can better compare the contents
- R_temp_ik = remove_parentheses (remove_epsilon (R_last[i][k]));
+ R_temp_ik = remove_parentheses (remove_epsilon (R_last[i][k]));
R_temp_kk = remove_parentheses (remove_epsilon (R_last[k][k]));
R_temp_kj = remove_parentheses (remove_epsilon (R_last[k][j]));
clean_ik_kk_cmp = nullstrcmp (R_last[i][k], R_temp_kk);
clean_kk_kj_cmp = nullstrcmp (R_temp_kk, R_last[k][j]);
-
+
// construct R_cur_l (and, if necessary R_cur_r)
if (NULL != R_last[i][j])
{
// Assign R_temp_ij to R_last[i][j] and remove epsilon as well
// as parentheses, so we can better compare the contents
- R_temp_ij = remove_parentheses (remove_epsilon (R_last[i][j]));
+ R_temp_ij = remove_parentheses (remove_epsilon (R_last[i][j]));
if (0 == strcmp (R_temp_ij, R_temp_ik) &&
0 == strcmp (R_temp_ik, R_temp_kk) &&
else
{
/* GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "NO SIMPLIFICATION\n"); */
- temp_a = (NULL == R_last[i][j]) ? NULL : GNUNET_strdup (R_last[i][j]);
+ temp_a =
+ (NULL == R_last[i][j]) ? NULL : GNUNET_strdup (R_last[i][j]);
temp_a = remove_parentheses (temp_a);
R_cur_l = temp_a;
}
for (j = 0; j < n; j++)
{
GNUNET_free_non_null (R_last[i][j]);
- R_last[i][j] = R_cur[i][j];
- R_cur[i][j] = NULL;
+ R_last[i][j] = R_cur[i][j];
+ R_cur[i][j] = NULL;
}
}
}
}
}
}
- a->computed_regex = complete_regex;
+ a->canonical_regex = complete_regex;
GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
"---------------------------------------------\n");
return;
GNUNET_free_non_null (a->regex);
- GNUNET_free_non_null (a->computed_regex);
+ GNUNET_free_non_null (a->canonical_regex);
for (s = a->states_head; NULL != s;)
{
return result;
}
+
/**
- * Get the computed regex of the given automaton.
+ * Get the canonical regex of the given automaton.
* When constructing the automaton a proof is computed for each state,
* consisting of the regular expression leading to this state. A complete
* regex for the automaton can be computed by combining these proofs.
- * As of now this computed regex is only useful for testing.
+ * As of now this function is only useful for testing.
+ *
+ * @param a automaton for which the canonical regex should be returned.
+ *
+ * @return
*/
const char *
-GNUNET_REGEX_get_computed_regex (struct GNUNET_REGEX_Automaton *a)
+GNUNET_REGEX_get_canonical_regex (struct GNUNET_REGEX_Automaton *a)
{
if (NULL == a)
return NULL;
- return a->computed_regex;
+ return a->canonical_regex;
}
/**
char current_char;
int eval;
int eval_check;
- int eval_computed;
+ int eval_canonical;
struct GNUNET_REGEX_Automaton *dfa;
regex_t rx;
regmatch_t matchptr[1];
char error[200];
int result;
unsigned int str_len;
- char *computed_regex;
+ char *canonical_regex;
// At least one string is needed for matching
GNUNET_assert (str_count > 0);
}
eval = GNUNET_REGEX_eval (dfa, matching_str[i]);
- computed_regex = GNUNET_strdup (GNUNET_REGEX_get_computed_regex (dfa));
+ canonical_regex = GNUNET_strdup (GNUNET_REGEX_get_canonical_regex (dfa));
GNUNET_REGEX_automaton_destroy (dfa);
// Match string using glibc regex
eval_check = regexec (&rx, matching_str[i], 1, matchptr, 0);
regfree (&rx);
- // Match computed regex
- if (0 != regcomp (&rx, computed_regex, REG_EXTENDED))
+ // Match canonical regex
+ if (0 != regcomp (&rx, canonical_regex, REG_EXTENDED))
{
GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
"Could not compile regex using regcomp: %s\n",
- computed_regex);
+ canonical_regex);
return -1;
}
- eval_computed = regexec (&rx, matching_str[i], 1, matchptr, 0);
+ eval_canonical = regexec (&rx, matching_str[i], 1, matchptr, 0);
regfree (&rx);
- GNUNET_free (computed_regex);
+ GNUNET_free (canonical_regex);
// We only want to match the whole string, because that's what our DFA does, too.
if (eval_check == 0 &&
// DFA test
a = GNUNET_REGEX_construct_dfa (rxstr[i].regex, strlen (rxstr[i].regex));
check_dfa += test_automaton (a, &rx, &rxstr[i]);
- check_proof = GNUNET_strdup (GNUNET_REGEX_get_computed_regex (a));
+ check_proof = GNUNET_strdup (GNUNET_REGEX_get_canonical_regex (a));
GNUNET_REGEX_automaton_destroy (a);
a = GNUNET_REGEX_construct_dfa (check_proof, strlen (check_proof));
check_dfa += test_automaton (a, &rx, &rxstr[i]);
}
srand (time (NULL));
- for (i = 0; i < 50; i++)
- check_rand += test_random (100, 120, 20);
+ for (i = 0; i < 50; i++)
+ check_rand += test_random (100, 120, 20);
return check_nfa + check_dfa + check_rand;
}