* Allocated buffer.
*/
char *abuf;
-
+
/**
* Length of the string in the buffer.
*/
* change). This is used in an optimization that improves
* performance by about 1% --- if we use int16_t here. With just
* "int" for both flags, performance drops (on my system) significantly,
- * most likely due to increased cache misses.
+ * most likely due to increased cache misses.
*/
int16_t synced;
-
+
};
return -1;
return memcmp (s1->sbuf, s2->sbuf, s1->slen);
}
-
+
/**
- * Compare two strings for equality.
+ * Compare two strings for equality.
*
* @param s1 first string for comparison.
* @param s2 second string for comparison.
return -1;
return memcmp (s1->sbuf, s2->sbuf, s1->slen);
}
-
+
/**
* Reallocate the buffer of 'ret' to fit 'nlen' characters;
ret->sbuf = ret->abuf;
GNUNET_free_non_null (old);
}
-
+
/**
* Append a string.
sarg->slen);
ret->slen += sarg->slen;
}
-
+
/**
* Append a C string.
cstr_len);
ret->slen += cstr_len;
}
-
+
/**
* Wrap a string buffer, that is, set ret to the format string
static void
sb_strdup (struct StringBuffer *out,
const struct StringBuffer *in)
-
+
{
out->null_flag = in->null_flag;
if (GNUNET_YES == out->null_flag)
}
/* while '(' before ')', count opening parens */
while ( (NULL != (op = memchr (pos, '(', end - pos))) &&
- (op < cl) )
+ (op < cl) )
{
cnt++;
pos = op + 1;
if (0)
return;
sbuf = str->sbuf;
- if ( (GNUNET_YES == str->null_flag) ||
+ if ( (GNUNET_YES == str->null_flag) ||
(1 >= (slen = str->slen)) ||
('(' != str->sbuf[0]) ||
(')' != str->sbuf[slen - 1]) )
end = &sbuf[slen - 1];
op = memchr (pos, '(', end - pos);
cp = memchr (pos, ')', end - pos);
- while (NULL != cp)
+ while (NULL != cp)
{
while ( (NULL != op) &&
(op < cp) )
return;
}
str->sbuf++;
- str->slen -= 2;
+ str->slen -= 2;
}
static int
has_epsilon (const struct StringBuffer *str)
{
- return
- (GNUNET_YES != str->null_flag) &&
+ return
+ (GNUNET_YES != str->null_flag) &&
(0 < str->slen) &&
- ('(' == str->sbuf[0]) &&
+ ('(' == str->sbuf[0]) &&
('|' == str->sbuf[1]) &&
(')' == str->sbuf[str->slen - 1]);
}
{
ret->null_flag = GNUNET_YES;
return;
- }
- if ( (str->slen > 1) &&
+ }
+ if ( (str->slen > 1) &&
('(' == str->sbuf[0]) &&
('|' == str->sbuf[1]) &&
(')' == str->sbuf[str->slen - 1]) )
* @return -1 if any of the strings is NULL, 0 if equal, non 0 otherwise
*/
static int
-sb_strncmp (const struct StringBuffer *str1,
+sb_strncmp (const struct StringBuffer *str1,
const struct StringBuffer *str2, size_t n)
{
size_t max;
-
+
if ( (str1->slen != str2->slen) &&
( (str1->slen < n) ||
(str2->slen < n) ) )
* @return -1 if any of the strings is NULL, 0 if equal, non 0 otherwise
*/
static int
-sb_strncmp_cstr (const struct StringBuffer *str1,
+sb_strncmp_cstr (const struct StringBuffer *str1,
const char *str2, size_t n)
{
- if (str1->slen < n)
+ if (str1->slen < n)
return -1;
return memcmp (str1->sbuf, str2, n);
}
/**
- * Initialize string buffer for storing strings of up to n
+ * Initialize string buffer for storing strings of up to n
* characters.
*
* @param sb buffer to initialize
* @return -1 if any of the strings is NULL, 0 if equal, non 0 otherwise
*/
static int
-sb_strkcmp (const struct StringBuffer *str1,
+sb_strkcmp (const struct StringBuffer *str1,
const struct StringBuffer *str2, size_t k)
{
if ( (GNUNET_YES == str1->null_flag) ||
* @param R_cur_r optimization -- kept between iterations to avoid realloc
*/
static void
-automaton_create_proofs_simplify (const struct StringBuffer *R_last_ij,
+automaton_create_proofs_simplify (const struct StringBuffer *R_last_ij,
const struct StringBuffer *R_last_ik,
const struct StringBuffer *R_last_kk,
const struct StringBuffer *R_last_kj,
* R_cur_r == R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj}
*/
- if ( (GNUNET_YES == R_last_ij->null_flag) &&
- ( (GNUNET_YES == R_last_ik->null_flag) ||
+ if ( (GNUNET_YES == R_last_ij->null_flag) &&
+ ( (GNUNET_YES == R_last_ik->null_flag) ||
(GNUNET_YES == R_last_kj->null_flag)))
{
/* R^{(k)}_{ij} = N | N */
return;
}
- if ( (GNUNET_YES == R_last_ik->null_flag) ||
+ if ( (GNUNET_YES == R_last_ik->null_flag) ||
(GNUNET_YES == R_last_kj->null_flag) )
{
/* R^{(k)}_{ij} = R^{(k-1)}_{ij} | N */
if (GNUNET_YES == R_last_ij->synced)
{
- R_cur_ij->synced = GNUNET_YES;
+ R_cur_ij->synced = GNUNET_YES;
R_cur_ij->null_flag = GNUNET_NO;
return;
}
/* $R^{(k)}_{ij} = N | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj} OR
* $R^{(k)}_{ij} = R^{(k-1)}_{ij} | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj} */
- R_cur_r->null_flag = GNUNET_YES;
- R_cur_r->slen = 0;
- R_cur_l->null_flag = GNUNET_YES;
- R_cur_l->slen = 0;
+ R_cur_r->null_flag = GNUNET_YES;
+ R_cur_r->slen = 0;
+ R_cur_l->null_flag = GNUNET_YES;
+ R_cur_l->slen = 0;
/* cache results from strcmp, we might need these many times */
ij_kj_cmp = sb_nullstrcmp (R_last_ij, R_last_kj);
remove_epsilon (R_last_ij, &R_temp_ij);
remove_parentheses (&R_temp_ij);
- if ( (0 == sb_strcmp (&R_temp_ij, &R_temp_ik)) &&
- (0 == sb_strcmp (&R_temp_ik, &R_temp_kk)) &&
+ if ( (0 == sb_strcmp (&R_temp_ij, &R_temp_ik)) &&
+ (0 == sb_strcmp (&R_temp_ik, &R_temp_kk)) &&
(0 == sb_strcmp (&R_temp_kk, &R_temp_kj)) )
{
if (0 == R_temp_ij.slen)
length = R_temp_kk.slen - R_last_ik->slen;
/* a(ba)*bx = (ab)+x */
- if ( (length > 0) &&
+ if ( (length > 0) &&
(GNUNET_YES != R_last_kk->null_flag) &&
(0 < R_last_kk->slen) &&
- (GNUNET_YES != R_last_kj->null_flag) &&
+ (GNUNET_YES != R_last_kj->null_flag) &&
(0 < R_last_kj->slen) &&
(GNUNET_YES != R_last_ik->null_flag) &&
(0 < R_last_ik->slen) &&
(0 == sb_strkcmp (&R_temp_kk, R_last_ik, length)) &&
(0 == sb_strncmp (&R_temp_kk, R_last_kj, length)) )
- {
+ {
struct StringBuffer temp_a;
struct StringBuffer temp_b;
sb_printf1 (R_cur_r, "%.*s*", 1, &R_temp_kk);
}
/* aa*a = a+a */
- else if ( (0 == clean_ik_kk_cmp) &&
+ else if ( (0 == clean_ik_kk_cmp) &&
(0 == clean_kk_kj_cmp) &&
(! has_epsilon (R_last_ik)) )
{
sb_free (&R_temp_kk);
sb_free (&R_temp_kj);
- if ( (GNUNET_YES == R_cur_l->null_flag) &&
+ if ( (GNUNET_YES == R_cur_l->null_flag) &&
(GNUNET_YES == R_cur_r->null_flag) )
{
R_cur_ij->null_flag = GNUNET_YES;
for (i = 0; i < n; i++)
for (j = 0; j < n; j++)
if (needs_parentheses (&R_last[i * n + j]))
- sb_wrap (&R_last[i * n + j], "(%.*s)", 2);
+ sb_wrap (&R_last[i * n + j], "(%.*s)", 2);
/* Compute regular expressions of length "k" between each pair of states per
* induction */
memset (&R_cur_l, 0, sizeof (struct StringBuffer));
if ( (0 == complete_regex.slen) &&
(0 < R_last[a->start->dfs_id * n + i].slen) )
{
- sb_append (&complete_regex,
+ sb_append (&complete_regex,
&R_last[a->start->dfs_id * n + i]);
}
else if ( (GNUNET_YES != R_last[a->start->dfs_id * n + i].null_flag) &&
(0 < R_last[a->start->dfs_id * n + i].slen) )
{
sb_append_cstr (&complete_regex, "|");
- sb_append (&complete_regex,
+ sb_append (&complete_regex,
&R_last[a->start->dfs_id * n + i]);
}
}
/* cleanup */
sb_free (&complete_regex);
- for (i = 0; i < n; i++)
+ for (i = 0; i < n; i++)
for (j = 0; j < n; j++)
{
- sb_free (&R_cur[i * n + j]);
- sb_free (&R_last[i * n + j]);
+ sb_free (&R_cur[i * n + j]);
+ sb_free (&R_last[i * n + j]);
}
GNUNET_free (R_cur);
GNUNET_free (R_last);
pos += strlen (pos);
/* Add a transition for each distinct label to NULL state */
- for (ctran = cstate->transitions_head; NULL != ctran; ctran = ctran->next)
+ for (ctran = cstate->transitions_head; NULL != ctran; ctran = ctran->next)
if (NULL != ctran->label)
- state_add_transition (ctx, s, ctran->label, NULL);
+ state_add_transition (ctx, s, ctran->label, NULL);
/* If the nfa_states contain an accepting state, the new dfa state is also
* accepting. */
if (cstate->accepting)
s->accepting = 1;
- }
+ }
pos[-1] = '}';
s->name = GNUNET_realloc (s->name, strlen (s->name) + 1);
* @param start start state for the depth-first traversal of the graph.
* @param s current state in the depth-first traversal
*/
-void
+static void
dfa_add_multi_strides_helper (void *cls, const unsigned int depth, char *label,
struct REGEX_INTERNAL_State *start,
struct REGEX_INTERNAL_State *s)
* @param count not used.
* @param s current state.
*/
-void
+static void
dfa_add_multi_strides (void *cls, const unsigned int count,
struct REGEX_INTERNAL_State *s)
{
/* Add start state to closure only for epsilon closure */
if (NULL == label)
state_set_append (ret, s);
-
+
/* initialize work stack */
cls_stack.head = NULL;
cls_stack.tail = NULL;
{
GNUNET_CONTAINER_MDLL_remove (ST, cls_stack.head, cls_stack.tail,
currentstate);
- cls_stack.len--;
+ cls_stack.len--;
for (ctran = currentstate->transitions_head; NULL != ctran;
ctran = ctran->next)
{
clsstate);
cls_stack.len++;
clsstate->contained = 1;
- }
+ }
}
}
for (i = 0; i < ret->off; i++)
struct REGEX_INTERNAL_State *end;
a = ctx->stack_tail;
-
if (NULL == a)
{
GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
nfa_add_concatenation (&ctx);
}
if (poff == psize)
- GNUNET_array_grow (p, psize, psize * 2 + 4);
+ GNUNET_array_grow (p, psize, psize * 2 + 4); /* FIXME why *2 +4? */
p[poff].altcount = altcount;
p[poff].atomcount = atomcount;
poff++;
*/
struct REGEX_INTERNAL_Automaton *
REGEX_INTERNAL_construct_dfa (const char *regex, const size_t len,
- unsigned int max_path_len)
+ unsigned int max_path_len)
{
struct REGEX_INTERNAL_Context ctx;
struct REGEX_INTERNAL_Automaton *dfa;
REGEX_INTERNAL_context_init (&ctx);
/* Create NFA */
- // fprintf (stderr, "N");
nfa = REGEX_INTERNAL_construct_nfa (regex, len);
if (NULL == nfa)
dfa->start = dfa_state_create (&ctx, &nfa_start_eps_cls);
automaton_add_state (dfa, dfa->start);
- // fprintf (stderr, "D");
construct_dfa_states (&ctx, nfa, dfa, dfa->start);
REGEX_INTERNAL_automaton_destroy (nfa);
/* Minimize DFA */
- // fprintf (stderr, "M");
if (GNUNET_OK != dfa_minimize (&ctx, dfa))
{
REGEX_INTERNAL_automaton_destroy (dfa);
REGEX_INTERNAL_get_first_key (const char *input_string, size_t string_len,
struct GNUNET_HashCode * key)
{
- unsigned int size;
-
- size =
- string_len <
- GNUNET_REGEX_INITIAL_BYTES ? string_len : GNUNET_REGEX_INITIAL_BYTES;
+ size_t size;
+ size = string_len < GNUNET_REGEX_INITIAL_BYTES ? string_len :
+ GNUNET_REGEX_INITIAL_BYTES;
if (NULL == input_string)
{
- GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Given input string was NULL!\n");
+ GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
+ "Given input string was NULL!\n");
return 0;
}
-
GNUNET_CRYPTO_hash (input_string, size, key);
return size;
char *consumed_string, struct REGEX_INTERNAL_State *state,
REGEX_INTERNAL_KeyIterator iterator, void *iterator_cls)
{
- unsigned int i;
char *temp;
struct REGEX_INTERNAL_Transition *t;
unsigned int num_edges = state->transition_count;
struct REGEX_BLOCK_Edge edge[1];
struct GNUNET_HashCode hash;
struct GNUNET_HashCode hash_new;
-
unsigned int cur_len;
if (NULL != consumed_string)
{
if (state->proof != NULL && 0 != strcmp (consumed_string, state->proof))
{
- for (i = 0, t = state->transitions_head; NULL != t && i < num_edges;
- t = t->next, i++)
- {
- edges[i].label = t->label;
- edges[i].destination = t->to_state->hash;
- }
+ (void) state_get_edges (state, edges);
GNUNET_CRYPTO_hash (consumed_string, strlen (consumed_string), &hash);
iterator (iterator_cls, &hash, consumed_string, state->accepting,
num_edges, edges);
GNUNET_free (temp);
}
}
- else if (max_len < cur_len)
+ else /* cur_len > max_len */
{
/* Case where the concatenated labels are longer than max_len, then split. */
edge[0].label = &consumed_string[max_len];
*/
void
REGEX_INTERNAL_iterate_all_edges (struct REGEX_INTERNAL_Automaton *a,
- REGEX_INTERNAL_KeyIterator iterator,
- void *iterator_cls)
+ REGEX_INTERNAL_KeyIterator iterator,
+ void *iterator_cls)
{
struct REGEX_INTERNAL_State *s;
unsigned int num_edges;
num_edges = state_get_edges (s, edges);
+ if ( ( (NULL != s->proof) &&
+ (0 < strlen (s->proof)) ) || s->accepting)
+ iterator (iterator_cls, &s->hash, s->proof,
+ s->accepting,
+ num_edges, edges);
+ s->marked = GNUNET_NO;
+ }
- if ((NULL != s->proof && 0 < strlen (s->proof)) || s->accepting)
- iterator (iterator_cls, &s->hash, s->proof, s->accepting, num_edges,
- edges);
+ iterate_initial_edge (GNUNET_REGEX_INITIAL_BYTES,
+ GNUNET_REGEX_INITIAL_BYTES,
+ NULL, a->start,
+ iterator, iterator_cls);
+}
- s->marked = GNUNET_NO;
+/**
+ * Struct to hold all the relevant state information in the HashMap.
+ *
+ * Contains the same info as the Regex Iterator parametes except the key,
+ * which comes directly from the HashMap iterator.
+ */
+struct temporal_state_store {
+ int reachable;
+ char *proof;
+ int accepting;
+ int num_edges;
+ struct REGEX_BLOCK_Edge *edges;
+};
+
+
+/**
+ * Store regex iterator and cls in one place to pass to the hashmap iterator.
+ */
+struct client_iterator {
+ REGEX_INTERNAL_KeyIterator iterator;
+ void *iterator_cls;
+};
+
+
+/**
+ * Iterator over all edges of a dfa. Stores all of them in a HashMap
+ * for later reachability marking.
+ *
+ * @param cls Closure (HashMap)
+ * @param key hash for current state.
+ * @param proof proof for current state
+ * @param accepting GNUNET_YES if this is an accepting state, GNUNET_NO if not.
+ * @param num_edges number of edges leaving current state.
+ * @param edges edges leaving current state.
+ */
+static void
+store_all_states (void *cls,
+ const struct GNUNET_HashCode *key,
+ const char *proof,
+ int accepting,
+ unsigned int num_edges,
+ const struct REGEX_BLOCK_Edge *edges)
+{
+ struct GNUNET_CONTAINER_MultiHashMap *hm = cls;
+ struct temporal_state_store *tmp;
+ size_t edges_size;
+
+ tmp = GNUNET_new (struct temporal_state_store);
+ tmp->reachable = GNUNET_NO;
+ tmp->proof = GNUNET_strdup (proof);
+ tmp->accepting = accepting;
+ tmp->num_edges = num_edges;
+ edges_size = sizeof (struct REGEX_BLOCK_Edge) * num_edges;
+ tmp->edges = GNUNET_malloc (edges_size);
+ memcpy(tmp->edges, edges, edges_size);
+ GNUNET_CONTAINER_multihashmap_put (hm, key, tmp,
+ GNUNET_CONTAINER_MULTIHASHMAPOPTION_UNIQUE_FAST);
+}
+
+
+/**
+ * Mark state as reachable and call recursively on all its edges.
+ *
+ * If already marked as reachable, do nothing.
+ *
+ * @param state State to mark as reachable.
+ * @param hm HashMap which stores all the states indexed by key.
+ */
+static void
+mark_as_reachable (struct temporal_state_store *state,
+ struct GNUNET_CONTAINER_MultiHashMap *hm)
+{
+ struct temporal_state_store *child;
+ unsigned int i;
+
+ if (GNUNET_YES == state->reachable)
+ /* visited */
+ return;
+
+ state->reachable = GNUNET_YES;
+ for (i = 0; i < state->num_edges; i++)
+ {
+ child = GNUNET_CONTAINER_multihashmap_get (hm,
+ &state->edges[i].destination);
+ if (NULL == child)
+ {
+ GNUNET_break (0);
+ continue;
+ }
+ mark_as_reachable (child, hm);
+ }
+}
+
+
+/**
+ * Iterator over hash map entries to mark the ones that are reachable.
+ *
+ * @param cls closure
+ * @param key current key code
+ * @param value value in the hash map
+ * @return #GNUNET_YES if we should continue to iterate,
+ * #GNUNET_NO if not.
+ */
+static int
+reachability_iterator (void *cls,
+ const struct GNUNET_HashCode *key,
+ void *value)
+{
+ struct GNUNET_CONTAINER_MultiHashMap *hm = cls;
+ struct temporal_state_store *state = value;
+
+ if (GNUNET_YES == state->reachable)
+ /* already visited and marked */
+ return GNUNET_YES;
+
+ if (GNUNET_REGEX_INITIAL_BYTES > strlen (state->proof) &&
+ GNUNET_NO == state->accepting)
+ /* not directly reachable */
+ return GNUNET_YES;
+
+ mark_as_reachable (state, hm);
+ return GNUNET_YES;
+}
+
+
+/**
+ * Iterator over hash map entries.
+ * Calling the callback on the ones marked as reachables.
+ *
+ * @param cls closure
+ * @param key current key code
+ * @param value value in the hash map
+ * @return #GNUNET_YES if we should continue to iterate,
+ * #GNUNET_NO if not.
+ */
+static int
+iterate_reachables (void *cls,
+ const struct GNUNET_HashCode *key,
+ void *value)
+{
+ struct client_iterator *ci = cls;
+ struct temporal_state_store *state = value;
+
+ if (GNUNET_YES == state->reachable)
+ {
+ ci->iterator (ci->iterator_cls, key,
+ state->proof, state->accepting,
+ state->num_edges, state->edges);
}
+ GNUNET_free (state->edges);
+ GNUNET_free (state->proof);
+ GNUNET_free (state);
+ return GNUNET_YES;
- iterate_initial_edge (GNUNET_REGEX_INITIAL_BYTES, GNUNET_REGEX_INITIAL_BYTES,
- NULL, a->start, iterator, iterator_cls);
}
+/**
+ * Iterate over all edges of automaton 'a' that are reachable from a state with
+ * a proof of at least GNUNET_REGEX_INITIAL_BYTES characters.
+ *
+ * Call the iterator for each such edge.
+ *
+ * @param a automaton.
+ * @param iterator iterator called for each reachable edge.
+ * @param iterator_cls closure.
+ */
+void
+REGEX_INTERNAL_iterate_reachable_edges (struct REGEX_INTERNAL_Automaton *a,
+ REGEX_INTERNAL_KeyIterator iterator,
+ void *iterator_cls)
+{
+ struct GNUNET_CONTAINER_MultiHashMap *hm;
+ struct client_iterator ci;
+
+ hm = GNUNET_CONTAINER_multihashmap_create (a->state_count * 2, GNUNET_NO);
+ ci.iterator = iterator;
+ ci.iterator_cls = iterator_cls;
+
+ REGEX_INTERNAL_iterate_all_edges (a, &store_all_states, hm);
+ GNUNET_CONTAINER_multihashmap_iterate (hm, &reachability_iterator, hm);
+ GNUNET_CONTAINER_multihashmap_iterate (hm, &iterate_reachables, &ci);
+ GNUNET_CONTAINER_multihashmap_destroy (hm);
+}
-/* end of regex.c */
+/* end of regex_internal.c */