/*
This file is part of GNUnet
- (C) 2012 Christian Grothoff (and other contributing authors)
+ Copyright (C) 2012 GNUnet e.V.
GNUnet is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published
You should have received a copy of the GNU General Public License
along with GNUnet; see the file COPYING. If not, write to the
- Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- Boston, MA 02111-1307, USA.
+ Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ Boston, MA 02110-1301, USA.
*/
/**
* @file src/regex/regex_internal.c
/**
- * Set this to GNUNET_YES to enable state naming. Used to debug NFA->DFA
+ * Set this to #GNUNET_YES to enable state naming. Used to debug NFA->DFA
* creation. Disabled by default for better performance.
*/
#define REGEX_DEBUG_DFA GNUNET_NO
/**
- * Append state to the given StateSet '
+ * Append state to the given StateSet.
*
* @param set set to be modified
* @param state state to be appended
/**
- * Adds a transition from one state to another on 'label'. Does not add
+ * Adds a transition from one state to another on @a label. Does not add
* duplicate states.
*
* @param ctx context
*/
static void
state_add_transition (struct REGEX_INTERNAL_Context *ctx,
- struct REGEX_INTERNAL_State *from_state, const char *label,
+ struct REGEX_INTERNAL_State *from_state,
+ const char *label,
struct REGEX_INTERNAL_State *to_state)
{
struct REGEX_INTERNAL_Transition *t;
if (NULL == from_state)
{
- GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not create Transition.\n");
+ GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
+ "Could not create Transition.\n");
return;
}
break;
}
- t = GNUNET_malloc (sizeof (struct REGEX_INTERNAL_Transition));
+ t = GNUNET_new (struct REGEX_INTERNAL_Transition);
if (NULL != ctx)
t->id = ctx->transition_id++;
if (NULL != label)
/**
- * Get all edges leaving state 's'.
+ * Get all edges leaving state @a s.
*
* @param s state.
- * @param edges all edges leaving 's', expected to be allocated and have enough
- * space for s->transitions_count elements.
+ * @param edges all edges leaving @a s, expected to be allocated and have enough
+ * space for `s->transitions_count` elements.
*
* @return number of edges.
*/
static unsigned int
-state_get_edges (struct REGEX_INTERNAL_State *s, struct REGEX_BLOCK_Edge *edges)
+state_get_edges (struct REGEX_INTERNAL_State *s,
+ struct REGEX_BLOCK_Edge *edges)
{
struct REGEX_INTERNAL_Transition *t;
unsigned int count;
/**
- * Frees the memory used by State 's'
+ * Frees the memory used by State @a s
*
* @param s state that should be destroyed
*/
* @param start start state, pass a->start or NULL to traverse the whole automaton.
* @param check function that is checked before advancing on each transition
* in the DFS.
- * @param check_cls closure for check.
+ * @param check_cls closure for @a check.
* @param action action to be performed on each state.
- * @param action_cls closure for action
+ * @param action_cls closure for @a action
*/
void
REGEX_INTERNAL_automaton_traverse (const struct REGEX_INTERNAL_Automaton *a,
- struct REGEX_INTERNAL_State *start,
- REGEX_INTERNAL_traverse_check check,
- void *check_cls,
- REGEX_INTERNAL_traverse_action action,
- void *action_cls)
+ struct REGEX_INTERNAL_State *start,
+ REGEX_INTERNAL_traverse_check check,
+ void *check_cls,
+ REGEX_INTERNAL_traverse_action action,
+ void *action_cls)
{
unsigned int count;
struct REGEX_INTERNAL_State *s;
else
s = start;
- automaton_state_traverse (s, marks, &count, check, check_cls, action,
- action_cls);
+ automaton_state_traverse (s, marks, &count,
+ check, check_cls,
+ action, action_cls);
}
* Allocated buffer.
*/
char *abuf;
-
+
/**
* Length of the string in the buffer.
*/
size_t slen;
/**
- * Number of bytes allocated for 'sbuf'
+ * Number of bytes allocated for @e sbuf
*/
unsigned int blen;
/**
* If this entry is part of the last/current generation array,
- * this flag is GNUNET_YES if the last and current generation are
+ * this flag is #GNUNET_YES if the last and current generation are
* identical (and thus copying is unnecessary if the value didn't
* change). This is used in an optimization that improves
* performance by about 1% --- if we use int16_t here. With just
* "int" for both flags, performance drops (on my system) significantly,
- * most likely due to increased cache misses.
+ * most likely due to increased cache misses.
*/
int16_t synced;
-
+
};
return -1;
if (s1->slen != s2->slen)
return -1;
+ if (0 == s1->slen)
+ return 0;
return memcmp (s1->sbuf, s2->sbuf, s1->slen);
}
-
+
/**
- * Compare two strings for equality.
+ * Compare two strings for equality.
*
* @param s1 first string for comparison.
* @param s2 second string for comparison.
{
if (s1->slen != s2->slen)
return -1;
+ if (0 == s1->slen)
+ return 0;
return memcmp (s1->sbuf, s2->sbuf, s1->slen);
}
-
+
/**
* Reallocate the buffer of 'ret' to fit 'nlen' characters;
old = ret->abuf;
ret->abuf = GNUNET_malloc (nlen);
ret->blen = nlen;
- memcpy (ret->abuf,
+ GNUNET_memcpy (ret->abuf,
ret->sbuf,
ret->slen);
ret->sbuf = ret->abuf;
GNUNET_free_non_null (old);
}
-
+
/**
* Append a string.
ret->null_flag = GNUNET_NO;
if (ret->blen < sarg->slen + ret->slen)
sb_realloc (ret, ret->blen + sarg->slen + 128);
- memcpy (&ret->sbuf[ret->slen],
+ GNUNET_memcpy (&ret->sbuf[ret->slen],
sarg->sbuf,
sarg->slen);
ret->slen += sarg->slen;
}
-
+
/**
* Append a C string.
ret->null_flag = GNUNET_NO;
if (ret->blen < cstr_len + ret->slen)
sb_realloc (ret, ret->blen + cstr_len + 128);
- memcpy (&ret->sbuf[ret->slen],
+ GNUNET_memcpy (&ret->sbuf[ret->slen],
cstr,
cstr_len);
ret->slen += cstr_len;
}
-
+
/**
* Wrap a string buffer, that is, set ret to the format string
static void
sb_strdup (struct StringBuffer *out,
const struct StringBuffer *in)
-
+
{
out->null_flag = in->null_flag;
if (GNUNET_YES == out->null_flag)
}
out->sbuf = out->abuf;
out->slen = in->slen;
- memcpy (out->sbuf, in->sbuf, out->slen);
+ GNUNET_memcpy (out->sbuf, in->sbuf, out->slen);
}
out->slen);
}
out->sbuf = out->abuf;
- memcpy (out->sbuf, cstr, out->slen);
+ GNUNET_memcpy (out->sbuf, cstr, out->slen);
}
/**
- * Check if the given string 'str' needs parentheses around it when
+ * Check if the given string @a str needs parentheses around it when
* using it to generate a regex.
*
* @param str string
*
- * @return GNUNET_YES if parentheses are needed, GNUNET_NO otherwise
+ * @return #GNUNET_YES if parentheses are needed, #GNUNET_NO otherwise
*/
static int
needs_parentheses (const struct StringBuffer *str)
}
/* while '(' before ')', count opening parens */
while ( (NULL != (op = memchr (pos, '(', end - pos))) &&
- (op < cl) )
+ (op < cl) )
{
cnt++;
pos = op + 1;
/**
- * Remove parentheses surrounding string 'str'.
+ * Remove parentheses surrounding string @a str.
* Example: "(a)" becomes "a", "(a|b)|(a|c)" stays the same.
- * You need to GNUNET_free the returned string.
+ * You need to #GNUNET_free() the returned string.
*
* @param str string, modified to contain a
* @return string without surrounding parentheses, string 'str' if no preceding
if (0)
return;
sbuf = str->sbuf;
- if ( (GNUNET_YES == str->null_flag) ||
+ if ( (GNUNET_YES == str->null_flag) ||
(1 >= (slen = str->slen)) ||
('(' != str->sbuf[0]) ||
(')' != str->sbuf[slen - 1]) )
end = &sbuf[slen - 1];
op = memchr (pos, '(', end - pos);
cp = memchr (pos, ')', end - pos);
- while (NULL != cp)
+ while (NULL != cp)
{
while ( (NULL != op) &&
(op < cp) )
return;
}
str->sbuf++;
- str->slen -= 2;
+ str->slen -= 2;
}
static int
has_epsilon (const struct StringBuffer *str)
{
- return
- (GNUNET_YES != str->null_flag) &&
+ return
+ (GNUNET_YES != str->null_flag) &&
(0 < str->slen) &&
- ('(' == str->sbuf[0]) &&
+ ('(' == str->sbuf[0]) &&
('|' == str->sbuf[1]) &&
(')' == str->sbuf[str->slen - 1]);
}
{
ret->null_flag = GNUNET_YES;
return;
- }
- if ( (str->slen > 1) &&
+ }
+ if ( (str->slen > 1) &&
('(' == str->sbuf[0]) &&
('|' == str->sbuf[1]) &&
(')' == str->sbuf[str->slen - 1]) )
}
ret->sbuf = ret->abuf;
ret->slen = str->slen - 3;
- memcpy (ret->sbuf, &str->sbuf[2], ret->slen);
+ GNUNET_memcpy (ret->sbuf, &str->sbuf[2], ret->slen);
return;
}
sb_strdup (ret, str);
* @return -1 if any of the strings is NULL, 0 if equal, non 0 otherwise
*/
static int
-sb_strncmp (const struct StringBuffer *str1,
+sb_strncmp (const struct StringBuffer *str1,
const struct StringBuffer *str2, size_t n)
{
size_t max;
-
+
if ( (str1->slen != str2->slen) &&
( (str1->slen < n) ||
(str2->slen < n) ) )
* @return -1 if any of the strings is NULL, 0 if equal, non 0 otherwise
*/
static int
-sb_strncmp_cstr (const struct StringBuffer *str1,
+sb_strncmp_cstr (const struct StringBuffer *str1,
const char *str2, size_t n)
{
- if (str1->slen < n)
+ if (str1->slen < n)
return -1;
return memcmp (str1->sbuf, str2, n);
}
/**
- * Initialize string buffer for storing strings of up to n
+ * Initialize string buffer for storing strings of up to n
* characters.
*
* @param sb buffer to initialize
* @return -1 if any of the strings is NULL, 0 if equal, non 0 otherwise
*/
static int
-sb_strkcmp (const struct StringBuffer *str1,
+sb_strkcmp (const struct StringBuffer *str1,
const struct StringBuffer *str2, size_t k)
{
if ( (GNUNET_YES == str1->null_flag) ||
* @param R_cur_r optimization -- kept between iterations to avoid realloc
*/
static void
-automaton_create_proofs_simplify (const struct StringBuffer *R_last_ij,
+automaton_create_proofs_simplify (const struct StringBuffer *R_last_ij,
const struct StringBuffer *R_last_ik,
const struct StringBuffer *R_last_kk,
const struct StringBuffer *R_last_kj,
* R_cur_r == R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj}
*/
- if ( (GNUNET_YES == R_last_ij->null_flag) &&
- ( (GNUNET_YES == R_last_ik->null_flag) ||
+ if ( (GNUNET_YES == R_last_ij->null_flag) &&
+ ( (GNUNET_YES == R_last_ik->null_flag) ||
(GNUNET_YES == R_last_kj->null_flag)))
{
/* R^{(k)}_{ij} = N | N */
return;
}
- if ( (GNUNET_YES == R_last_ik->null_flag) ||
+ if ( (GNUNET_YES == R_last_ik->null_flag) ||
(GNUNET_YES == R_last_kj->null_flag) )
{
/* R^{(k)}_{ij} = R^{(k-1)}_{ij} | N */
if (GNUNET_YES == R_last_ij->synced)
{
- R_cur_ij->synced = GNUNET_YES;
+ R_cur_ij->synced = GNUNET_YES;
R_cur_ij->null_flag = GNUNET_NO;
return;
}
/* $R^{(k)}_{ij} = N | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj} OR
* $R^{(k)}_{ij} = R^{(k-1)}_{ij} | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj} */
- R_cur_r->null_flag = GNUNET_YES;
- R_cur_r->slen = 0;
- R_cur_l->null_flag = GNUNET_YES;
- R_cur_l->slen = 0;
+ R_cur_r->null_flag = GNUNET_YES;
+ R_cur_r->slen = 0;
+ R_cur_l->null_flag = GNUNET_YES;
+ R_cur_l->slen = 0;
/* cache results from strcmp, we might need these many times */
ij_kj_cmp = sb_nullstrcmp (R_last_ij, R_last_kj);
remove_epsilon (R_last_ij, &R_temp_ij);
remove_parentheses (&R_temp_ij);
- if ( (0 == sb_strcmp (&R_temp_ij, &R_temp_ik)) &&
- (0 == sb_strcmp (&R_temp_ik, &R_temp_kk)) &&
+ if ( (0 == sb_strcmp (&R_temp_ij, &R_temp_ik)) &&
+ (0 == sb_strcmp (&R_temp_ik, &R_temp_kk)) &&
(0 == sb_strcmp (&R_temp_kk, &R_temp_kj)) )
{
if (0 == R_temp_ij.slen)
length = R_temp_kk.slen - R_last_ik->slen;
/* a(ba)*bx = (ab)+x */
- if ( (length > 0) &&
+ if ( (length > 0) &&
(GNUNET_YES != R_last_kk->null_flag) &&
(0 < R_last_kk->slen) &&
- (GNUNET_YES != R_last_kj->null_flag) &&
+ (GNUNET_YES != R_last_kj->null_flag) &&
(0 < R_last_kj->slen) &&
(GNUNET_YES != R_last_ik->null_flag) &&
(0 < R_last_ik->slen) &&
(0 == sb_strkcmp (&R_temp_kk, R_last_ik, length)) &&
(0 == sb_strncmp (&R_temp_kk, R_last_kj, length)) )
- {
+ {
struct StringBuffer temp_a;
struct StringBuffer temp_b;
length_l = length;
temp_a.sbuf = temp_a.abuf;
- memcpy (temp_a.sbuf, R_last_kj->sbuf, length_l);
+ GNUNET_memcpy (temp_a.sbuf, R_last_kj->sbuf, length_l);
temp_a.slen = length_l;
length_r = R_last_kj->slen - length;
temp_b.sbuf = temp_b.abuf;
- memcpy (temp_b.sbuf, &R_last_kj->sbuf[length], length_r);
+ GNUNET_memcpy (temp_b.sbuf, &R_last_kj->sbuf[length], length_r);
temp_b.slen = length_r;
/* e|(ab)+ = (ab)* */
sb_printf1 (R_cur_r, "%.*s*", 1, &R_temp_kk);
}
/* aa*a = a+a */
- else if ( (0 == clean_ik_kk_cmp) &&
+ else if ( (0 == clean_ik_kk_cmp) &&
(0 == clean_kk_kj_cmp) &&
(! has_epsilon (R_last_ik)) )
{
sb_free (&R_temp_kk);
sb_free (&R_temp_kj);
- if ( (GNUNET_YES == R_cur_l->null_flag) &&
+ if ( (GNUNET_YES == R_cur_l->null_flag) &&
(GNUNET_YES == R_cur_r->null_flag) )
{
R_cur_ij->null_flag = GNUNET_YES;
for (i = 0; i < n; i++)
for (j = 0; j < n; j++)
if (needs_parentheses (&R_last[i * n + j]))
- sb_wrap (&R_last[i * n + j], "(%.*s)", 2);
+ sb_wrap (&R_last[i * n + j], "(%.*s)", 2);
/* Compute regular expressions of length "k" between each pair of states per
* induction */
memset (&R_cur_l, 0, sizeof (struct StringBuffer));
if ( (0 == complete_regex.slen) &&
(0 < R_last[a->start->dfs_id * n + i].slen) )
{
- sb_append (&complete_regex,
+ sb_append (&complete_regex,
&R_last[a->start->dfs_id * n + i]);
}
else if ( (GNUNET_YES != R_last[a->start->dfs_id * n + i].null_flag) &&
(0 < R_last[a->start->dfs_id * n + i].slen) )
{
sb_append_cstr (&complete_regex, "|");
- sb_append (&complete_regex,
+ sb_append (&complete_regex,
&R_last[a->start->dfs_id * n + i]);
}
}
/* cleanup */
sb_free (&complete_regex);
- for (i = 0; i < n; i++)
+ for (i = 0; i < n; i++)
for (j = 0; j < n; j++)
{
- sb_free (&R_cur[i * n + j]);
- sb_free (&R_last[i * n + j]);
+ sb_free (&R_cur[i * n + j]);
+ sb_free (&R_last[i * n + j]);
}
GNUNET_free (R_cur);
GNUNET_free (R_last);
struct REGEX_INTERNAL_Transition *ctran;
unsigned int i;
- s = GNUNET_malloc (sizeof (struct REGEX_INTERNAL_State));
+ s = GNUNET_new (struct REGEX_INTERNAL_State);
s->id = ctx->state_id++;
s->index = -1;
s->lowlink = -1;
for (i = 0; i < nfa_states->off; i++)
{
cstate = nfa_states->states[i];
- GNUNET_snprintf (pos, pos - s->name + len,
- "%i,", cstate->id);
+ GNUNET_snprintf (pos,
+ pos - s->name + len,
+ "%i,",
+ cstate->id);
pos += strlen (pos);
/* Add a transition for each distinct label to NULL state */
- for (ctran = cstate->transitions_head; NULL != ctran; ctran = ctran->next)
+ for (ctran = cstate->transitions_head; NULL != ctran; ctran = ctran->next)
if (NULL != ctran->label)
- state_add_transition (ctx, s, ctran->label, NULL);
+ state_add_transition (ctx, s, ctran->label, NULL);
/* If the nfa_states contain an accepting state, the new dfa state is also
* accepting. */
if (cstate->accepting)
s->accepting = 1;
- }
+ }
pos[-1] = '}';
s->name = GNUNET_realloc (s->name, strlen (s->name) + 1);
/**
- * Set the given state 'marked' to GNUNET_YES. Used by the
- * 'dfa_remove_unreachable_states' function to detect unreachable states in the
+ * Set the given state 'marked' to #GNUNET_YES. Used by the
+ * #dfa_remove_unreachable_states() function to detect unreachable states in the
* automaton.
*
* @param cls closure, not used.
* @param count count, not used.
- * @param s state where the marked attribute will be set to GNUNET_YES.
+ * @param s state where the marked attribute will be set to #GNUNET_YES.
*/
static void
-mark_states (void *cls, const unsigned int count, struct REGEX_INTERNAL_State *s)
+mark_states (void *cls,
+ const unsigned int count,
+ struct REGEX_INTERNAL_State *s)
{
s->marked = GNUNET_YES;
}
*
* @param ctx context
* @param a DFA automaton
- * @return GNUNET_OK on success
+ * @return #GNUNET_OK on success
*/
static int
dfa_merge_nondistinguishable_states (struct REGEX_INTERNAL_Context *ctx,
if ( (s1->accepting && !s2->accepting) ||
(!s1->accepting && s2->accepting) )
{
- idx = s1->marked * state_cnt + s2->marked;
- table[idx / 32] |= (1 << (idx % 32));
+ idx = (unsigned long long) s1->marked * state_cnt + s2->marked;
+ table[idx / 32] |= (1U << (idx % 32));
}
/* Find all equal states */
{
for (s2 = a->states_head; NULL != s2 && s1 != s2; s2 = s2->next)
{
- idx = s1->marked * state_cnt + s2->marked;
- if (0 != (table[idx / 32] & (1 << (idx % 32))))
+ idx = (unsigned long long) s1->marked * state_cnt + s2->marked;
+ if (0 != (table[idx / 32] & (1U << (idx % 32))))
continue;
num_equal_edges = 0;
for (t1 = s1->transitions_head; NULL != t1; t1 = t1->next)
/* same edge, but targets definitively different, so we're different
as well */
if (t1->to_state->marked > t2->to_state->marked)
- idx1 = t1->to_state->marked * state_cnt + t2->to_state->marked;
+ idx1 = (unsigned long long) t1->to_state->marked * state_cnt + t2->to_state->marked;
else
- idx1 = t2->to_state->marked * state_cnt + t1->to_state->marked;
- if (0 != (table[idx1 / 32] & (1 << (idx1 % 32))))
+ idx1 = (unsigned long long) t2->to_state->marked * state_cnt + t1->to_state->marked;
+ if (0 != (table[idx1 / 32] & (1U << (idx1 % 32))))
{
- table[idx / 32] |= (1 << (idx % 32));
+ table[idx / 32] |= (1U << (idx % 32));
change = 1; /* changed a marker, need to run again */
}
}
(num_equal_edges != s2->transition_count) )
{
/* Make sure ALL edges of possible equal states are the same */
- table[idx / 32] |= (1 << (idx % 32));
+ table[idx / 32] |= (1U << (idx % 32));
change = 1; /* changed a marker, need to run again */
}
}
for (s2 = a->states_head; NULL != s2 && s1 != s2; s2 = s2_next)
{
s2_next = s2->next;
- idx = s1->marked * state_cnt + s2->marked;
- if (0 == (table[idx / 32] & (1 << (idx % 32))))
+ idx = (unsigned long long) s1->marked * state_cnt + s2->marked;
+ if (0 == (table[idx / 32] & (1U << (idx % 32))))
automaton_merge_states (ctx, a, s1, s2);
}
}
* @param start start state for the depth-first traversal of the graph.
* @param s current state in the depth-first traversal
*/
-void
+static void
dfa_add_multi_strides_helper (void *cls, const unsigned int depth, char *label,
struct REGEX_INTERNAL_State *start,
struct REGEX_INTERNAL_State *s)
if (depth == ctx->stride)
{
- t = GNUNET_malloc (sizeof (struct REGEX_INTERNAL_Transition));
+ t = GNUNET_new (struct REGEX_INTERNAL_Transition);
t->label = GNUNET_strdup (label);
t->to_state = s;
t->from_state = start;
* @param count not used.
* @param s current state.
*/
-void
+static void
dfa_add_multi_strides (void *cls, const unsigned int count,
struct REGEX_INTERNAL_State *s)
{
max_len == strlen (label)) ||
(start == dfa->start && GNUNET_REGEX_INITIAL_BYTES == strlen (label))))
{
- t = GNUNET_malloc (sizeof (struct REGEX_INTERNAL_Transition));
+ t = GNUNET_new (struct REGEX_INTERNAL_Transition);
t->label = GNUNET_strdup (label);
t->to_state = cur;
t->from_state = start;
{
struct REGEX_INTERNAL_Automaton *n;
- n = GNUNET_malloc (sizeof (struct REGEX_INTERNAL_Automaton));
+ n = GNUNET_new (struct REGEX_INTERNAL_Automaton);
n->type = NFA;
n->start = NULL;
{
struct REGEX_INTERNAL_State *s;
- s = GNUNET_malloc (sizeof (struct REGEX_INTERNAL_State));
+ s = GNUNET_new (struct REGEX_INTERNAL_State);
s->id = ctx->state_id++;
s->accepting = accepting;
s->marked = GNUNET_NO;
/* Add start state to closure only for epsilon closure */
if (NULL == label)
state_set_append (ret, s);
-
+
/* initialize work stack */
cls_stack.head = NULL;
cls_stack.tail = NULL;
{
GNUNET_CONTAINER_MDLL_remove (ST, cls_stack.head, cls_stack.tail,
currentstate);
- cls_stack.len--;
+ cls_stack.len--;
for (ctran = currentstate->transitions_head; NULL != ctran;
ctran = ctran->next)
{
clsstate);
cls_stack.len++;
clsstate->contained = 1;
- }
+ }
}
}
for (i = 0; i < ret->off; i++)
struct REGEX_INTERNAL_State *end;
a = ctx->stack_tail;
-
if (NULL == a)
{
GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
nfa_add_concatenation (&ctx);
}
if (poff == psize)
- GNUNET_array_grow (p, psize, psize * 2 + 4);
+ GNUNET_array_grow (p, psize, psize * 2 + 4); /* FIXME why *2 +4? */
p[poff].altcount = altcount;
p[poff].atomcount = atomcount;
poff++;
*/
struct REGEX_INTERNAL_Automaton *
REGEX_INTERNAL_construct_dfa (const char *regex, const size_t len,
- unsigned int max_path_len)
+ unsigned int max_path_len)
{
struct REGEX_INTERNAL_Context ctx;
struct REGEX_INTERNAL_Automaton *dfa;
REGEX_INTERNAL_context_init (&ctx);
/* Create NFA */
- // fprintf (stderr, "N");
nfa = REGEX_INTERNAL_construct_nfa (regex, len);
if (NULL == nfa)
return NULL;
}
- dfa = GNUNET_malloc (sizeof (struct REGEX_INTERNAL_Automaton));
+ dfa = GNUNET_new (struct REGEX_INTERNAL_Automaton);
dfa->type = DFA;
dfa->regex = GNUNET_strdup (regex);
dfa->start = dfa_state_create (&ctx, &nfa_start_eps_cls);
automaton_add_state (dfa, dfa->start);
- // fprintf (stderr, "D");
construct_dfa_states (&ctx, nfa, dfa, dfa->start);
REGEX_INTERNAL_automaton_destroy (nfa);
/* Minimize DFA */
- // fprintf (stderr, "M");
if (GNUNET_OK != dfa_minimize (&ctx, dfa))
{
REGEX_INTERNAL_automaton_destroy (dfa);
* @param a automaton, type must be DFA
* @param string string that should be evaluated
*
- * @return 0 if string matches, non 0 otherwise
+ * @return 0 if string matches, non-0 otherwise
*/
static int
-evaluate_dfa (struct REGEX_INTERNAL_Automaton *a, const char *string)
+evaluate_dfa (struct REGEX_INTERNAL_Automaton *a,
+ const char *string)
{
const char *strp;
struct REGEX_INTERNAL_State *s;
*
* @param a automaton, type must be NFA
* @param string string that should be evaluated
- *
- * @return 0 if string matches, non 0 otherwise
+ * @return 0 if string matches, non-0 otherwise
*/
static int
-evaluate_nfa (struct REGEX_INTERNAL_Automaton *a, const char *string)
+evaluate_nfa (struct REGEX_INTERNAL_Automaton *a,
+ const char *string)
{
const char *strp;
char str[2];
/**
- * Evaluates the given 'string' against the given compiled regex
+ * Evaluates the given @a string against the given compiled regex @a a
*
* @param a automaton
* @param string string to check
- *
- * @return 0 if string matches, non 0 otherwise
+ * @return 0 if string matches, non-0 otherwise
*/
int
-REGEX_INTERNAL_eval (struct REGEX_INTERNAL_Automaton *a, const char *string)
+REGEX_INTERNAL_eval (struct REGEX_INTERNAL_Automaton *a,
+ const char *string)
{
int result;
/**
- * Get the first key for the given 'input_string'. This hashes the first x bits
- * of the 'input_string'.
+ * Get the first key for the given @a input_string. This hashes the first x bits
+ * of the @a input_string.
*
* @param input_string string.
- * @param string_len length of the 'input_string'.
+ * @param string_len length of the @a input_string.
* @param key pointer to where to write the hash code.
- *
- * @return number of bits of 'input_string' that have been consumed
+ * @return number of bits of @a input_string that have been consumed
* to construct the key
*/
size_t
-REGEX_INTERNAL_get_first_key (const char *input_string, size_t string_len,
- struct GNUNET_HashCode * key)
+REGEX_INTERNAL_get_first_key (const char *input_string,
+ size_t string_len,
+ struct GNUNET_HashCode *key)
{
- unsigned int size;
-
- size =
- string_len <
- GNUNET_REGEX_INITIAL_BYTES ? string_len : GNUNET_REGEX_INITIAL_BYTES;
+ size_t size;
+ size = string_len < GNUNET_REGEX_INITIAL_BYTES ? string_len :
+ GNUNET_REGEX_INITIAL_BYTES;
if (NULL == input_string)
{
- GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Given input string was NULL!\n");
+ GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
+ "Given input string was NULL!\n");
return 0;
}
-
GNUNET_CRYPTO_hash (input_string, size, key);
return size;
}
-/**
- * Check if the given 'proof' matches the given 'key'.
- *
- * @param proof partial regex of a state.
- * @param key hash of a state.
- *
- * @return GNUNET_OK if the proof is valid for the given key.
- */
-int
-REGEX_INTERNAL_check_proof (const char *proof, const struct GNUNET_HashCode *key)
-{
- struct GNUNET_HashCode key_check;
-
- if (NULL == proof || NULL == key)
- {
- GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Proof check failed, was NULL.\n");
- return GNUNET_NO;
- }
-
- GNUNET_CRYPTO_hash (proof, strlen (proof), &key_check);
- return (0 ==
- GNUNET_CRYPTO_hash_cmp (key, &key_check)) ? GNUNET_OK : GNUNET_NO;
-}
-
-
/**
* Recursive function that calls the iterator for each synthetic start state.
*
* @param consumed_string string consumed by traversing the graph till this state.
* @param state current state of the automaton.
* @param iterator iterator function called for each edge.
- * @param iterator_cls closure for the iterator function.
+ * @param iterator_cls closure for the @a iterator function.
*/
static void
-iterate_initial_edge (const unsigned int min_len, const unsigned int max_len,
- char *consumed_string, struct REGEX_INTERNAL_State *state,
- REGEX_INTERNAL_KeyIterator iterator, void *iterator_cls)
+iterate_initial_edge (unsigned int min_len,
+ unsigned int max_len,
+ char *consumed_string,
+ struct REGEX_INTERNAL_State *state,
+ REGEX_INTERNAL_KeyIterator iterator,
+ void *iterator_cls)
{
- unsigned int i;
char *temp;
struct REGEX_INTERNAL_Transition *t;
unsigned int num_edges = state->transition_count;
struct REGEX_BLOCK_Edge edge[1];
struct GNUNET_HashCode hash;
struct GNUNET_HashCode hash_new;
-
unsigned int cur_len;
if (NULL != consumed_string)
else
cur_len = 0;
- if ((cur_len >= min_len || GNUNET_YES == state->accepting) && cur_len > 0 &&
- NULL != consumed_string)
+ if ( ( (cur_len >= min_len) ||
+ (GNUNET_YES == state->accepting) ) &&
+ (cur_len > 0) &&
+ (NULL != consumed_string) )
{
if (cur_len <= max_len)
{
- if (state->proof != NULL && 0 != strcmp (consumed_string, state->proof))
+ if ( (NULL != state->proof) &&
+ (0 != strcmp (consumed_string,
+ state->proof)) )
{
- for (i = 0, t = state->transitions_head; NULL != t && i < num_edges;
- t = t->next, i++)
- {
- edges[i].label = t->label;
- edges[i].destination = t->to_state->hash;
- }
- GNUNET_CRYPTO_hash (consumed_string, strlen (consumed_string), &hash);
- iterator (iterator_cls, &hash, consumed_string, state->accepting,
+ (void) state_get_edges (state, edges);
+ GNUNET_CRYPTO_hash (consumed_string,
+ strlen (consumed_string),
+ &hash);
+ GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
+ "Start state for string `%s' is %s\n",
+ consumed_string,
+ GNUNET_h2s (&hash));
+ iterator (iterator_cls,
+ &hash,
+ consumed_string,
+ state->accepting,
num_edges, edges);
}
- if (GNUNET_YES == state->accepting && cur_len > 1 &&
- state->transition_count < 1 && cur_len < max_len)
+ if ( (GNUNET_YES == state->accepting) &&
+ (cur_len > 1) &&
+ (state->transition_count < 1) &&
+ (cur_len < max_len) )
{
/* Special case for regex consisting of just a string that is shorter than
* max_len */
edge[0].destination = state->hash;
temp = GNUNET_strdup (consumed_string);
temp[cur_len - 1] = '\0';
- GNUNET_CRYPTO_hash (temp, cur_len - 1, &hash_new);
- iterator (iterator_cls, &hash_new, temp, GNUNET_NO, 1, edge);
+ GNUNET_CRYPTO_hash (temp,
+ cur_len - 1,
+ &hash_new);
+ GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
+ "Start state for short string `%s' is %s\n",
+ temp,
+ GNUNET_h2s (&hash_new));
+ iterator (iterator_cls,
+ &hash_new,
+ temp,
+ GNUNET_NO, 1,
+ edge);
GNUNET_free (temp);
}
}
- else if (max_len < cur_len)
+ else /* cur_len > max_len */
{
/* Case where the concatenated labels are longer than max_len, then split. */
edge[0].label = &consumed_string[max_len];
temp = GNUNET_strdup (consumed_string);
temp[max_len] = '\0';
GNUNET_CRYPTO_hash (temp, max_len, &hash);
- iterator (iterator_cls, &hash, temp, GNUNET_NO, 1, edge);
+ GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
+ "Start state at split edge `%s'-`%s` is %s\n",
+ temp,
+ edge[0].label,
+ GNUNET_h2s (&hash_new));
+ iterator (iterator_cls,
+ &hash,
+ temp,
+ GNUNET_NO,
+ 1,
+ edge);
GNUNET_free (temp);
}
}
{
for (t = state->transitions_head; NULL != t; t = t->next)
{
+ if (NULL != strchr (t->label,
+ (int) '.'))
+ {
+ /* Wildcards not allowed during starting states */
+ GNUNET_break (0);
+ continue;
+ }
if (NULL != consumed_string)
- GNUNET_asprintf (&temp, "%s%s", consumed_string, t->label);
+ GNUNET_asprintf (&temp,
+ "%s%s",
+ consumed_string,
+ t->label);
else
- GNUNET_asprintf (&temp, "%s", t->label);
-
- iterate_initial_edge (min_len, max_len, temp, t->to_state, iterator,
+ GNUNET_asprintf (&temp,
+ "%s",
+ t->label);
+ iterate_initial_edge (min_len,
+ max_len,
+ temp,
+ t->to_state,
+ iterator,
iterator_cls);
GNUNET_free (temp);
}
*/
void
REGEX_INTERNAL_iterate_all_edges (struct REGEX_INTERNAL_Automaton *a,
- REGEX_INTERNAL_KeyIterator iterator,
- void *iterator_cls)
+ REGEX_INTERNAL_KeyIterator iterator,
+ void *iterator_cls)
{
struct REGEX_INTERNAL_State *s;
+ GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
+ "Iterating over starting edges\n");
+ iterate_initial_edge (GNUNET_REGEX_INITIAL_BYTES,
+ GNUNET_REGEX_INITIAL_BYTES,
+ NULL, a->start,
+ iterator, iterator_cls);
+ GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
+ "Iterating over DFA edges\n");
for (s = a->states_head; NULL != s; s = s->next)
{
struct REGEX_BLOCK_Edge edges[s->transition_count];
unsigned int num_edges;
num_edges = state_get_edges (s, edges);
+ if ( ( (NULL != s->proof) &&
+ (0 < strlen (s->proof)) ) || s->accepting)
+ {
+ GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
+ "Creating DFA edges at `%s' under key %s\n",
+ s->proof,
+ GNUNET_h2s (&s->hash));
+ iterator (iterator_cls, &s->hash, s->proof,
+ s->accepting,
+ num_edges, edges);
+ }
+ s->marked = GNUNET_NO;
+ }
+}
- if ((NULL != s->proof && 0 < strlen (s->proof)) || s->accepting)
- iterator (iterator_cls, &s->hash, s->proof, s->accepting, num_edges,
- edges);
- s->marked = GNUNET_NO;
+/**
+ * Struct to hold all the relevant state information in the HashMap.
+ *
+ * Contains the same info as the Regex Iterator parametes except the key,
+ * which comes directly from the HashMap iterator.
+ */
+struct temporal_state_store {
+ int reachable;
+ char *proof;
+ int accepting;
+ int num_edges;
+ struct REGEX_BLOCK_Edge *edges;
+};
+
+
+/**
+ * Store regex iterator and cls in one place to pass to the hashmap iterator.
+ */
+struct client_iterator {
+ REGEX_INTERNAL_KeyIterator iterator;
+ void *iterator_cls;
+};
+
+
+/**
+ * Iterator over all edges of a dfa. Stores all of them in a HashMap
+ * for later reachability marking.
+ *
+ * @param cls Closure (HashMap)
+ * @param key hash for current state.
+ * @param proof proof for current state
+ * @param accepting GNUNET_YES if this is an accepting state, GNUNET_NO if not.
+ * @param num_edges number of edges leaving current state.
+ * @param edges edges leaving current state.
+ */
+static void
+store_all_states (void *cls,
+ const struct GNUNET_HashCode *key,
+ const char *proof,
+ int accepting,
+ unsigned int num_edges,
+ const struct REGEX_BLOCK_Edge *edges)
+{
+ struct GNUNET_CONTAINER_MultiHashMap *hm = cls;
+ struct temporal_state_store *tmp;
+ size_t edges_size;
+
+ tmp = GNUNET_new (struct temporal_state_store);
+ tmp->reachable = GNUNET_NO;
+ tmp->proof = GNUNET_strdup (proof);
+ tmp->accepting = accepting;
+ tmp->num_edges = num_edges;
+ edges_size = sizeof (struct REGEX_BLOCK_Edge) * num_edges;
+ tmp->edges = GNUNET_malloc (edges_size);
+ GNUNET_memcpy(tmp->edges, edges, edges_size);
+ GNUNET_CONTAINER_multihashmap_put (hm, key, tmp,
+ GNUNET_CONTAINER_MULTIHASHMAPOPTION_UNIQUE_FAST);
+}
+
+
+/**
+ * Mark state as reachable and call recursively on all its edges.
+ *
+ * If already marked as reachable, do nothing.
+ *
+ * @param state State to mark as reachable.
+ * @param hm HashMap which stores all the states indexed by key.
+ */
+static void
+mark_as_reachable (struct temporal_state_store *state,
+ struct GNUNET_CONTAINER_MultiHashMap *hm)
+{
+ struct temporal_state_store *child;
+ unsigned int i;
+
+ if (GNUNET_YES == state->reachable)
+ /* visited */
+ return;
+
+ state->reachable = GNUNET_YES;
+ for (i = 0; i < state->num_edges; i++)
+ {
+ child = GNUNET_CONTAINER_multihashmap_get (hm,
+ &state->edges[i].destination);
+ if (NULL == child)
+ {
+ GNUNET_break (0);
+ continue;
+ }
+ mark_as_reachable (child, hm);
}
+}
+
+
+/**
+ * Iterator over hash map entries to mark the ones that are reachable.
+ *
+ * @param cls closure
+ * @param key current key code
+ * @param value value in the hash map
+ * @return #GNUNET_YES if we should continue to iterate,
+ * #GNUNET_NO if not.
+ */
+static int
+reachability_iterator (void *cls,
+ const struct GNUNET_HashCode *key,
+ void *value)
+{
+ struct GNUNET_CONTAINER_MultiHashMap *hm = cls;
+ struct temporal_state_store *state = value;
+
+ if (GNUNET_YES == state->reachable)
+ /* already visited and marked */
+ return GNUNET_YES;
+
+ if (GNUNET_REGEX_INITIAL_BYTES > strlen (state->proof) &&
+ GNUNET_NO == state->accepting)
+ /* not directly reachable */
+ return GNUNET_YES;
+
+ mark_as_reachable (state, hm);
+ return GNUNET_YES;
+}
+
+
+/**
+ * Iterator over hash map entries.
+ * Calling the callback on the ones marked as reachables.
+ *
+ * @param cls closure
+ * @param key current key code
+ * @param value value in the hash map
+ * @return #GNUNET_YES if we should continue to iterate,
+ * #GNUNET_NO if not.
+ */
+static int
+iterate_reachables (void *cls,
+ const struct GNUNET_HashCode *key,
+ void *value)
+{
+ struct client_iterator *ci = cls;
+ struct temporal_state_store *state = value;
+
+ if (GNUNET_YES == state->reachable)
+ {
+ ci->iterator (ci->iterator_cls, key,
+ state->proof, state->accepting,
+ state->num_edges, state->edges);
+ }
+ GNUNET_free (state->edges);
+ GNUNET_free (state->proof);
+ GNUNET_free (state);
+ return GNUNET_YES;
- iterate_initial_edge (GNUNET_REGEX_INITIAL_BYTES, GNUNET_REGEX_INITIAL_BYTES,
- NULL, a->start, iterator, iterator_cls);
}
+/**
+ * Iterate over all edges of automaton 'a' that are reachable from a state with
+ * a proof of at least GNUNET_REGEX_INITIAL_BYTES characters.
+ *
+ * Call the iterator for each such edge.
+ *
+ * @param a automaton.
+ * @param iterator iterator called for each reachable edge.
+ * @param iterator_cls closure.
+ */
+void
+REGEX_INTERNAL_iterate_reachable_edges (struct REGEX_INTERNAL_Automaton *a,
+ REGEX_INTERNAL_KeyIterator iterator,
+ void *iterator_cls)
+{
+ struct GNUNET_CONTAINER_MultiHashMap *hm;
+ struct client_iterator ci;
+
+ hm = GNUNET_CONTAINER_multihashmap_create (a->state_count * 2, GNUNET_NO);
+ ci.iterator = iterator;
+ ci.iterator_cls = iterator_cls;
+ REGEX_INTERNAL_iterate_all_edges (a, &store_all_states, hm);
+ GNUNET_CONTAINER_multihashmap_iterate (hm, &reachability_iterator, hm);
+ GNUNET_CONTAINER_multihashmap_iterate (hm, &iterate_reachables, &ci);
+
+ GNUNET_CONTAINER_multihashmap_destroy (hm);
+}
-/* end of regex.c */
+/* end of regex_internal.c */