/*
This file is part of GNUnet
- (C) 2012 Christian Grothoff (and other contributing authors)
+ Copyright (C) 2012 GNUnet e.V.
GNUnet is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published
You should have received a copy of the GNU General Public License
along with GNUnet; see the file COPYING. If not, write to the
- Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- Boston, MA 02111-1307, USA.
+ Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ Boston, MA 02110-1301, USA.
*/
/**
* @file src/regex/regex_internal.c
/**
- * Set this to GNUNET_YES to enable state naming. Used to debug NFA->DFA
+ * Set this to #GNUNET_YES to enable state naming. Used to debug NFA->DFA
* creation. Disabled by default for better performance.
*/
#define REGEX_DEBUG_DFA GNUNET_NO
/**
- * Append state to the given StateSet '
+ * Append state to the given StateSet.
*
* @param set set to be modified
* @param state state to be appended
/**
- * Adds a transition from one state to another on 'label'. Does not add
+ * Adds a transition from one state to another on @a label. Does not add
* duplicate states.
*
* @param ctx context
*/
static void
state_add_transition (struct REGEX_INTERNAL_Context *ctx,
- struct REGEX_INTERNAL_State *from_state, const char *label,
+ struct REGEX_INTERNAL_State *from_state,
+ const char *label,
struct REGEX_INTERNAL_State *to_state)
{
struct REGEX_INTERNAL_Transition *t;
if (NULL == from_state)
{
- GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not create Transition.\n");
+ GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
+ "Could not create Transition.\n");
return;
}
break;
}
- t = GNUNET_malloc (sizeof (struct REGEX_INTERNAL_Transition));
+ t = GNUNET_new (struct REGEX_INTERNAL_Transition);
if (NULL != ctx)
t->id = ctx->transition_id++;
if (NULL != label)
/**
- * Get all edges leaving state 's'.
+ * Get all edges leaving state @a s.
*
* @param s state.
- * @param edges all edges leaving 's', expected to be allocated and have enough
- * space for s->transitions_count elements.
+ * @param edges all edges leaving @a s, expected to be allocated and have enough
+ * space for `s->transitions_count` elements.
*
* @return number of edges.
*/
static unsigned int
-state_get_edges (struct REGEX_INTERNAL_State *s, struct REGEX_BLOCK_Edge *edges)
+state_get_edges (struct REGEX_INTERNAL_State *s,
+ struct REGEX_BLOCK_Edge *edges)
{
struct REGEX_INTERNAL_Transition *t;
unsigned int count;
/**
- * Frees the memory used by State 's'
+ * Frees the memory used by State @a s
*
* @param s state that should be destroyed
*/
* @param start start state, pass a->start or NULL to traverse the whole automaton.
* @param check function that is checked before advancing on each transition
* in the DFS.
- * @param check_cls closure for check.
+ * @param check_cls closure for @a check.
* @param action action to be performed on each state.
- * @param action_cls closure for action
+ * @param action_cls closure for @a action
*/
void
REGEX_INTERNAL_automaton_traverse (const struct REGEX_INTERNAL_Automaton *a,
- struct REGEX_INTERNAL_State *start,
- REGEX_INTERNAL_traverse_check check,
- void *check_cls,
- REGEX_INTERNAL_traverse_action action,
- void *action_cls)
+ struct REGEX_INTERNAL_State *start,
+ REGEX_INTERNAL_traverse_check check,
+ void *check_cls,
+ REGEX_INTERNAL_traverse_action action,
+ void *action_cls)
{
unsigned int count;
struct REGEX_INTERNAL_State *s;
else
s = start;
- automaton_state_traverse (s, marks, &count, check, check_cls, action,
- action_cls);
+ automaton_state_traverse (s, marks, &count,
+ check, check_cls,
+ action, action_cls);
}
size_t slen;
/**
- * Number of bytes allocated for 'sbuf'
+ * Number of bytes allocated for @e sbuf
*/
unsigned int blen;
/**
* If this entry is part of the last/current generation array,
- * this flag is GNUNET_YES if the last and current generation are
+ * this flag is #GNUNET_YES if the last and current generation are
* identical (and thus copying is unnecessary if the value didn't
* change). This is used in an optimization that improves
* performance by about 1% --- if we use int16_t here. With just
return -1;
if (s1->slen != s2->slen)
return -1;
+ if (0 == s1->slen)
+ return 0;
return memcmp (s1->sbuf, s2->sbuf, s1->slen);
}
-
+
/**
* Compare two strings for equality.
{
if (s1->slen != s2->slen)
return -1;
+ if (0 == s1->slen)
+ return 0;
return memcmp (s1->sbuf, s2->sbuf, s1->slen);
}
-
+
/**
* Reallocate the buffer of 'ret' to fit 'nlen' characters;
old = ret->abuf;
ret->abuf = GNUNET_malloc (nlen);
ret->blen = nlen;
- memcpy (ret->abuf,
+ GNUNET_memcpy (ret->abuf,
ret->sbuf,
ret->slen);
ret->sbuf = ret->abuf;
ret->null_flag = GNUNET_NO;
if (ret->blen < sarg->slen + ret->slen)
sb_realloc (ret, ret->blen + sarg->slen + 128);
- memcpy (&ret->sbuf[ret->slen],
+ GNUNET_memcpy (&ret->sbuf[ret->slen],
sarg->sbuf,
sarg->slen);
ret->slen += sarg->slen;
}
-
+
/**
* Append a C string.
ret->null_flag = GNUNET_NO;
if (ret->blen < cstr_len + ret->slen)
sb_realloc (ret, ret->blen + cstr_len + 128);
- memcpy (&ret->sbuf[ret->slen],
+ GNUNET_memcpy (&ret->sbuf[ret->slen],
cstr,
cstr_len);
ret->slen += cstr_len;
}
-
+
/**
* Wrap a string buffer, that is, set ret to the format string
static void
sb_strdup (struct StringBuffer *out,
const struct StringBuffer *in)
-
+
{
out->null_flag = in->null_flag;
if (GNUNET_YES == out->null_flag)
}
out->sbuf = out->abuf;
out->slen = in->slen;
- memcpy (out->sbuf, in->sbuf, out->slen);
+ GNUNET_memcpy (out->sbuf, in->sbuf, out->slen);
}
out->slen);
}
out->sbuf = out->abuf;
- memcpy (out->sbuf, cstr, out->slen);
+ GNUNET_memcpy (out->sbuf, cstr, out->slen);
}
/**
- * Check if the given string 'str' needs parentheses around it when
+ * Check if the given string @a str needs parentheses around it when
* using it to generate a regex.
*
* @param str string
*
- * @return GNUNET_YES if parentheses are needed, GNUNET_NO otherwise
+ * @return #GNUNET_YES if parentheses are needed, #GNUNET_NO otherwise
*/
static int
needs_parentheses (const struct StringBuffer *str)
/**
- * Remove parentheses surrounding string 'str'.
+ * Remove parentheses surrounding string @a str.
* Example: "(a)" becomes "a", "(a|b)|(a|c)" stays the same.
- * You need to GNUNET_free the returned string.
+ * You need to #GNUNET_free() the returned string.
*
* @param str string, modified to contain a
* @return string without surrounding parentheses, string 'str' if no preceding
}
ret->sbuf = ret->abuf;
ret->slen = str->slen - 3;
- memcpy (ret->sbuf, &str->sbuf[2], ret->slen);
+ GNUNET_memcpy (ret->sbuf, &str->sbuf[2], ret->slen);
return;
}
sb_strdup (ret, str);
length_l = length;
temp_a.sbuf = temp_a.abuf;
- memcpy (temp_a.sbuf, R_last_kj->sbuf, length_l);
+ GNUNET_memcpy (temp_a.sbuf, R_last_kj->sbuf, length_l);
temp_a.slen = length_l;
length_r = R_last_kj->slen - length;
temp_b.sbuf = temp_b.abuf;
- memcpy (temp_b.sbuf, &R_last_kj->sbuf[length], length_r);
+ GNUNET_memcpy (temp_b.sbuf, &R_last_kj->sbuf[length], length_r);
temp_b.slen = length_r;
/* e|(ab)+ = (ab)* */
struct REGEX_INTERNAL_Transition *ctran;
unsigned int i;
- s = GNUNET_malloc (sizeof (struct REGEX_INTERNAL_State));
+ s = GNUNET_new (struct REGEX_INTERNAL_State);
s->id = ctx->state_id++;
s->index = -1;
s->lowlink = -1;
for (i = 0; i < nfa_states->off; i++)
{
cstate = nfa_states->states[i];
- GNUNET_snprintf (pos, pos - s->name + len,
- "%i,", cstate->id);
+ GNUNET_snprintf (pos,
+ pos - s->name + len,
+ "%i,",
+ cstate->id);
pos += strlen (pos);
/* Add a transition for each distinct label to NULL state */
/**
- * Set the given state 'marked' to GNUNET_YES. Used by the
- * 'dfa_remove_unreachable_states' function to detect unreachable states in the
+ * Set the given state 'marked' to #GNUNET_YES. Used by the
+ * #dfa_remove_unreachable_states() function to detect unreachable states in the
* automaton.
*
* @param cls closure, not used.
* @param count count, not used.
- * @param s state where the marked attribute will be set to GNUNET_YES.
+ * @param s state where the marked attribute will be set to #GNUNET_YES.
*/
static void
-mark_states (void *cls, const unsigned int count, struct REGEX_INTERNAL_State *s)
+mark_states (void *cls,
+ const unsigned int count,
+ struct REGEX_INTERNAL_State *s)
{
s->marked = GNUNET_YES;
}
*
* @param ctx context
* @param a DFA automaton
- * @return GNUNET_OK on success
+ * @return #GNUNET_OK on success
*/
static int
dfa_merge_nondistinguishable_states (struct REGEX_INTERNAL_Context *ctx,
if ( (s1->accepting && !s2->accepting) ||
(!s1->accepting && s2->accepting) )
{
- idx = s1->marked * state_cnt + s2->marked;
- table[idx / 32] |= (1 << (idx % 32));
+ idx = (unsigned long long) s1->marked * state_cnt + s2->marked;
+ table[idx / 32] |= (1U << (idx % 32));
}
/* Find all equal states */
{
for (s2 = a->states_head; NULL != s2 && s1 != s2; s2 = s2->next)
{
- idx = s1->marked * state_cnt + s2->marked;
- if (0 != (table[idx / 32] & (1 << (idx % 32))))
+ idx = (unsigned long long) s1->marked * state_cnt + s2->marked;
+ if (0 != (table[idx / 32] & (1U << (idx % 32))))
continue;
num_equal_edges = 0;
for (t1 = s1->transitions_head; NULL != t1; t1 = t1->next)
/* same edge, but targets definitively different, so we're different
as well */
if (t1->to_state->marked > t2->to_state->marked)
- idx1 = t1->to_state->marked * state_cnt + t2->to_state->marked;
+ idx1 = (unsigned long long) t1->to_state->marked * state_cnt + t2->to_state->marked;
else
- idx1 = t2->to_state->marked * state_cnt + t1->to_state->marked;
- if (0 != (table[idx1 / 32] & (1 << (idx1 % 32))))
+ idx1 = (unsigned long long) t2->to_state->marked * state_cnt + t1->to_state->marked;
+ if (0 != (table[idx1 / 32] & (1U << (idx1 % 32))))
{
- table[idx / 32] |= (1 << (idx % 32));
+ table[idx / 32] |= (1U << (idx % 32));
change = 1; /* changed a marker, need to run again */
}
}
(num_equal_edges != s2->transition_count) )
{
/* Make sure ALL edges of possible equal states are the same */
- table[idx / 32] |= (1 << (idx % 32));
+ table[idx / 32] |= (1U << (idx % 32));
change = 1; /* changed a marker, need to run again */
}
}
for (s2 = a->states_head; NULL != s2 && s1 != s2; s2 = s2_next)
{
s2_next = s2->next;
- idx = s1->marked * state_cnt + s2->marked;
- if (0 == (table[idx / 32] & (1 << (idx % 32))))
+ idx = (unsigned long long) s1->marked * state_cnt + s2->marked;
+ if (0 == (table[idx / 32] & (1U << (idx % 32))))
automaton_merge_states (ctx, a, s1, s2);
}
}
if (depth == ctx->stride)
{
- t = GNUNET_malloc (sizeof (struct REGEX_INTERNAL_Transition));
+ t = GNUNET_new (struct REGEX_INTERNAL_Transition);
t->label = GNUNET_strdup (label);
t->to_state = s;
t->from_state = start;
max_len == strlen (label)) ||
(start == dfa->start && GNUNET_REGEX_INITIAL_BYTES == strlen (label))))
{
- t = GNUNET_malloc (sizeof (struct REGEX_INTERNAL_Transition));
+ t = GNUNET_new (struct REGEX_INTERNAL_Transition);
t->label = GNUNET_strdup (label);
t->to_state = cur;
t->from_state = start;
{
struct REGEX_INTERNAL_Automaton *n;
- n = GNUNET_malloc (sizeof (struct REGEX_INTERNAL_Automaton));
+ n = GNUNET_new (struct REGEX_INTERNAL_Automaton);
n->type = NFA;
n->start = NULL;
{
struct REGEX_INTERNAL_State *s;
- s = GNUNET_malloc (sizeof (struct REGEX_INTERNAL_State));
+ s = GNUNET_new (struct REGEX_INTERNAL_State);
s->id = ctx->state_id++;
s->accepting = accepting;
s->marked = GNUNET_NO;
return NULL;
}
- dfa = GNUNET_malloc (sizeof (struct REGEX_INTERNAL_Automaton));
+ dfa = GNUNET_new (struct REGEX_INTERNAL_Automaton);
dfa->type = DFA;
dfa->regex = GNUNET_strdup (regex);
* @param a automaton, type must be DFA
* @param string string that should be evaluated
*
- * @return 0 if string matches, non 0 otherwise
+ * @return 0 if string matches, non-0 otherwise
*/
static int
-evaluate_dfa (struct REGEX_INTERNAL_Automaton *a, const char *string)
+evaluate_dfa (struct REGEX_INTERNAL_Automaton *a,
+ const char *string)
{
const char *strp;
struct REGEX_INTERNAL_State *s;
*
* @param a automaton, type must be NFA
* @param string string that should be evaluated
- *
- * @return 0 if string matches, non 0 otherwise
+ * @return 0 if string matches, non-0 otherwise
*/
static int
-evaluate_nfa (struct REGEX_INTERNAL_Automaton *a, const char *string)
+evaluate_nfa (struct REGEX_INTERNAL_Automaton *a,
+ const char *string)
{
const char *strp;
char str[2];
/**
- * Evaluates the given 'string' against the given compiled regex
+ * Evaluates the given @a string against the given compiled regex @a a
*
* @param a automaton
* @param string string to check
- *
- * @return 0 if string matches, non 0 otherwise
+ * @return 0 if string matches, non-0 otherwise
*/
int
-REGEX_INTERNAL_eval (struct REGEX_INTERNAL_Automaton *a, const char *string)
+REGEX_INTERNAL_eval (struct REGEX_INTERNAL_Automaton *a,
+ const char *string)
{
int result;
/**
- * Get the first key for the given 'input_string'. This hashes the first x bits
- * of the 'input_string'.
+ * Get the first key for the given @a input_string. This hashes the first x bits
+ * of the @a input_string.
*
* @param input_string string.
- * @param string_len length of the 'input_string'.
+ * @param string_len length of the @a input_string.
* @param key pointer to where to write the hash code.
- *
- * @return number of bits of 'input_string' that have been consumed
+ * @return number of bits of @a input_string that have been consumed
* to construct the key
*/
size_t
-REGEX_INTERNAL_get_first_key (const char *input_string, size_t string_len,
- struct GNUNET_HashCode * key)
+REGEX_INTERNAL_get_first_key (const char *input_string,
+ size_t string_len,
+ struct GNUNET_HashCode *key)
{
size_t size;
* @param consumed_string string consumed by traversing the graph till this state.
* @param state current state of the automaton.
* @param iterator iterator function called for each edge.
- * @param iterator_cls closure for the iterator function.
+ * @param iterator_cls closure for the @a iterator function.
*/
static void
-iterate_initial_edge (const unsigned int min_len, const unsigned int max_len,
- char *consumed_string, struct REGEX_INTERNAL_State *state,
- REGEX_INTERNAL_KeyIterator iterator, void *iterator_cls)
+iterate_initial_edge (unsigned int min_len,
+ unsigned int max_len,
+ char *consumed_string,
+ struct REGEX_INTERNAL_State *state,
+ REGEX_INTERNAL_KeyIterator iterator,
+ void *iterator_cls)
{
char *temp;
struct REGEX_INTERNAL_Transition *t;
else
cur_len = 0;
- if ((cur_len >= min_len || GNUNET_YES == state->accepting) && cur_len > 0 &&
- NULL != consumed_string)
+ if ( ( (cur_len >= min_len) ||
+ (GNUNET_YES == state->accepting) ) &&
+ (cur_len > 0) &&
+ (NULL != consumed_string) )
{
if (cur_len <= max_len)
{
- if (state->proof != NULL && 0 != strcmp (consumed_string, state->proof))
+ if ( (NULL != state->proof) &&
+ (0 != strcmp (consumed_string,
+ state->proof)) )
{
(void) state_get_edges (state, edges);
- GNUNET_CRYPTO_hash (consumed_string, strlen (consumed_string), &hash);
- iterator (iterator_cls, &hash, consumed_string, state->accepting,
+ GNUNET_CRYPTO_hash (consumed_string,
+ strlen (consumed_string),
+ &hash);
+ GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
+ "Start state for string `%s' is %s\n",
+ consumed_string,
+ GNUNET_h2s (&hash));
+ iterator (iterator_cls,
+ &hash,
+ consumed_string,
+ state->accepting,
num_edges, edges);
}
- if (GNUNET_YES == state->accepting && cur_len > 1 &&
- state->transition_count < 1 && cur_len < max_len)
+ if ( (GNUNET_YES == state->accepting) &&
+ (cur_len > 1) &&
+ (state->transition_count < 1) &&
+ (cur_len < max_len) )
{
/* Special case for regex consisting of just a string that is shorter than
* max_len */
edge[0].destination = state->hash;
temp = GNUNET_strdup (consumed_string);
temp[cur_len - 1] = '\0';
- GNUNET_CRYPTO_hash (temp, cur_len - 1, &hash_new);
- iterator (iterator_cls, &hash_new, temp, GNUNET_NO, 1, edge);
+ GNUNET_CRYPTO_hash (temp,
+ cur_len - 1,
+ &hash_new);
+ GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
+ "Start state for short string `%s' is %s\n",
+ temp,
+ GNUNET_h2s (&hash_new));
+ iterator (iterator_cls,
+ &hash_new,
+ temp,
+ GNUNET_NO, 1,
+ edge);
GNUNET_free (temp);
}
}
temp = GNUNET_strdup (consumed_string);
temp[max_len] = '\0';
GNUNET_CRYPTO_hash (temp, max_len, &hash);
- iterator (iterator_cls, &hash, temp, GNUNET_NO, 1, edge);
+ GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
+ "Start state at split edge `%s'-`%s` is %s\n",
+ temp,
+ edge[0].label,
+ GNUNET_h2s (&hash_new));
+ iterator (iterator_cls,
+ &hash,
+ temp,
+ GNUNET_NO,
+ 1,
+ edge);
GNUNET_free (temp);
}
}
{
for (t = state->transitions_head; NULL != t; t = t->next)
{
+ if (NULL != strchr (t->label,
+ (int) '.'))
+ {
+ /* Wildcards not allowed during starting states */
+ GNUNET_break (0);
+ continue;
+ }
if (NULL != consumed_string)
- GNUNET_asprintf (&temp, "%s%s", consumed_string, t->label);
+ GNUNET_asprintf (&temp,
+ "%s%s",
+ consumed_string,
+ t->label);
else
- GNUNET_asprintf (&temp, "%s", t->label);
-
- iterate_initial_edge (min_len, max_len, temp, t->to_state, iterator,
+ GNUNET_asprintf (&temp,
+ "%s",
+ t->label);
+ iterate_initial_edge (min_len,
+ max_len,
+ temp,
+ t->to_state,
+ iterator,
iterator_cls);
GNUNET_free (temp);
}
{
struct REGEX_INTERNAL_State *s;
+ GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
+ "Iterating over starting edges\n");
+ iterate_initial_edge (GNUNET_REGEX_INITIAL_BYTES,
+ GNUNET_REGEX_INITIAL_BYTES,
+ NULL, a->start,
+ iterator, iterator_cls);
+ GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
+ "Iterating over DFA edges\n");
for (s = a->states_head; NULL != s; s = s->next)
{
struct REGEX_BLOCK_Edge edges[s->transition_count];
num_edges = state_get_edges (s, edges);
if ( ( (NULL != s->proof) &&
(0 < strlen (s->proof)) ) || s->accepting)
+ {
+ GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
+ "Creating DFA edges at `%s' under key %s\n",
+ s->proof,
+ GNUNET_h2s (&s->hash));
iterator (iterator_cls, &s->hash, s->proof,
s->accepting,
num_edges, edges);
+ }
s->marked = GNUNET_NO;
}
-
- iterate_initial_edge (GNUNET_REGEX_INITIAL_BYTES,
- GNUNET_REGEX_INITIAL_BYTES,
- NULL, a->start,
- iterator, iterator_cls);
}
+
/**
* Struct to hold all the relevant state information in the HashMap.
*
tmp->num_edges = num_edges;
edges_size = sizeof (struct REGEX_BLOCK_Edge) * num_edges;
tmp->edges = GNUNET_malloc (edges_size);
- memcpy(tmp->edges, edges, edges_size);
+ GNUNET_memcpy(tmp->edges, edges, edges_size);
GNUNET_CONTAINER_multihashmap_put (hm, key, tmp,
GNUNET_CONTAINER_MULTIHASHMAPOPTION_UNIQUE_FAST);
}