#include "regex_internal.h"
-/**
- * Constant for how many bits the initial string regex should have.
- */
-#define INITIAL_BITS 8
-
-
/**
* Set of states.
*/
state->transition_count--;
GNUNET_CONTAINER_DLL_remove (state->transitions_head, state->transitions_tail,
transition);
+
GNUNET_free (transition);
}
}
-/**
- * Context for adding strided transitions to a DFA.
- */
-struct GNUNET_REGEX_Strided_Context
-{
- /**
- * Length of the strides.
- */
- const unsigned int stride;
-
- /**
- * Strided transitions DLL. New strided transitions will be stored in this DLL
- * and afterwards added to the DFA.
- */
- struct GNUNET_REGEX_Transition *transitions_head;
-
- /**
- * Strided transitions DLL.
- */
- struct GNUNET_REGEX_Transition *transitions_tail;
-};
-
-
/**
* Check if the given string 'str' needs parentheses around it when
* using it to generate a regex.
}
+/**
+ * Context for adding strided transitions to a DFA.
+ */
+struct GNUNET_REGEX_Strided_Context
+{
+ /**
+ * Length of the strides.
+ */
+ const unsigned int stride;
+
+ /**
+ * Strided transitions DLL. New strided transitions will be stored in this DLL
+ * and afterwards added to the DFA.
+ */
+ struct GNUNET_REGEX_Transition *transitions_head;
+
+ /**
+ * Strided transitions DLL.
+ */
+ struct GNUNET_REGEX_Transition *transitions_tail;
+};
+
+
/**
* Recursive helper function to add strides to a DFA.
*
* and adds new transitions to the given transitions DLL and marks states that
* should be removed by setting state->contained to GNUNET_YES.
*
+ * @param dfa DFA for which the paths should be compressed.
* @param start starting state for linear path search.
* @param cur current state in the recursive DFS.
* @param label current label (string of traversed labels).
+ * @param max_len maximal path compression length.
* @param transitions_head transitions DLL.
* @param transitions_tail transitions DLL.
*/
void
-dfa_compress_paths_helper (struct GNUNET_REGEX_State *start,
+dfa_compress_paths_helper (struct GNUNET_REGEX_Automaton *dfa,
+ struct GNUNET_REGEX_State *start,
struct GNUNET_REGEX_State *cur, char *label,
+ unsigned int max_len,
struct GNUNET_REGEX_Transition **transitions_head,
struct GNUNET_REGEX_Transition **transitions_tail)
{
if (NULL != label &&
- (cur->incoming_transition_count > 1 || GNUNET_YES == cur->accepting ||
- cur->transition_count > 1 || GNUNET_YES == cur->marked))
+ ((cur->incoming_transition_count > 1 || GNUNET_YES == cur->accepting
+// || cur->transition_count > 1
+ || GNUNET_YES == cur->marked) || (start != dfa->start && max_len > 0 &&
+ max_len == strlen (label)) ||
+ (start == dfa->start && GNUNET_REGEX_INITIAL_BITS == strlen (label))))
{
t = GNUNET_malloc (sizeof (struct GNUNET_REGEX_Transition));
t->label = GNUNET_strdup (label);
if (GNUNET_NO == cur->marked)
{
- dfa_compress_paths_helper (cur, cur, NULL, transitions_head,
+ dfa_compress_paths_helper (dfa, cur, cur, NULL, max_len, transitions_head,
transitions_tail);
}
return;
if (t->to_state != cur)
{
- dfa_compress_paths_helper (start, t->to_state, new_label,
+ dfa_compress_paths_helper (dfa, start, t->to_state, new_label, max_len,
transitions_head, transitions_tail);
}
GNUNET_free (new_label);
*
* @param regex_ctx context for adding new transitions.
* @param dfa DFA representation, will directly modify the given DFA.
+ * @param max_len maximal length of the compressed paths.
*/
static void
dfa_compress_paths (struct GNUNET_REGEX_Context *regex_ctx,
- struct GNUNET_REGEX_Automaton *dfa)
+ struct GNUNET_REGEX_Automaton *dfa, unsigned int max_len)
{
struct GNUNET_REGEX_State *s;
struct GNUNET_REGEX_State *s_next;
}
// Add strides and mark states that can be deleted.
- dfa_compress_paths_helper (dfa->start, dfa->start, NULL, &transitions_head,
- &transitions_tail);
+ dfa_compress_paths_helper (dfa, dfa->start, dfa->start, NULL, max_len,
+ &transitions_head, &transitions_tail);
// Add all the new transitions to the automaton.
for (t = transitions_head; NULL != t; t = t_next)
struct GNUNET_REGEX_Automaton *a;
a = ctx->stack_tail;
+
+ if (NULL == a)
+ {
+ GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
+ "nfa_add_plus_op failed, because there was no element on the stack");
+ return;
+ }
+
GNUNET_CONTAINER_DLL_remove (ctx->stack_head, ctx->stack_tail, a);
state_add_transition (ctx, a->end, NULL, a->start);
// Minimize DFA
dfa_minimize (&ctx, dfa);
- // Compress DFA paths
- dfa_compress_paths (&ctx, dfa);
-
// Create proofs for all states
automaton_create_proofs (dfa);
+ // Compress DFA paths
+ dfa_compress_paths (&ctx, dfa, 8);
+
// Add strides to DFA
//GNUNET_REGEX_dfa_add_multi_strides (&ctx, dfa, 2);
if (NULL == a)
return 0;
- for (t_count = 0, s = a->states_head; NULL != s; s = s->next)
- {
+ t_count = 0;
+ for (s = a->states_head; NULL != s; s = s->next)
t_count += s->transition_count;
- }
return t_count;
}
{
unsigned int size;
- size = string_len < INITIAL_BITS ? string_len : INITIAL_BITS;
+ size =
+ string_len <
+ GNUNET_REGEX_INITIAL_BITS ? string_len : GNUNET_REGEX_INITIAL_BITS;
if (NULL == input_string)
{
/**
- * Recursive helper function for iterate_initial_edges. Will call iterator
- * function for each initial state.
+ * Recursive function that calls the iterator for each synthetic start state.
*
* @param min_len minimum length of the path in the graph.
* @param max_len maximum length of the path in the graph.
struct GNUNET_REGEX_Transition *t;
unsigned int num_edges = state->transition_count;
struct GNUNET_REGEX_Edge edges[num_edges];
+ struct GNUNET_REGEX_Edge edge[1];
struct GNUNET_HashCode hash;
+ struct GNUNET_HashCode hash_new;
unsigned int cur_len;
else
cur_len = 0;
- if (cur_len > min_len && NULL != consumed_string)
+ if ((cur_len >= min_len || GNUNET_YES == state->accepting) && cur_len > 0 &&
+ NULL != consumed_string)
{
-
if (cur_len <= max_len)
{
- for (i = 0, t = state->transitions_head; NULL != t && i < num_edges;
- t = t->next, i++)
+ if (state->proof != NULL && 0 != strcmp (consumed_string, state->proof))
{
- edges[i].label = t->label;
- edges[i].destination = t->to_state->hash;
+ for (i = 0, t = state->transitions_head; NULL != t && i < num_edges;
+ t = t->next, i++)
+ {
+ edges[i].label = t->label;
+ edges[i].destination = t->to_state->hash;
+ }
+ GNUNET_CRYPTO_hash (consumed_string, strlen (consumed_string), &hash);
+ iterator (iterator_cls, &hash, consumed_string, state->accepting,
+ num_edges, edges);
}
- GNUNET_CRYPTO_hash (consumed_string, strlen (consumed_string), &hash);
- iterator (iterator_cls, &hash, consumed_string, state->accepting,
- num_edges, edges);
-
- // Special case for regex consisting of just a string that is shorter than max_len
if (GNUNET_YES == state->accepting && cur_len > 1 &&
- state->transition_count < 1)
+ state->transition_count < 1 && cur_len < max_len)
{
- edges[0].label = &consumed_string[cur_len - 1];
- edges[0].destination = state->hash;
+ // Special case for regex consisting of just a string that is shorter than
+ // max_len
+ edge[0].label = &consumed_string[cur_len - 1];
+ edge[0].destination = state->hash;
temp = GNUNET_strdup (consumed_string);
temp[cur_len - 1] = '\0';
- GNUNET_CRYPTO_hash (temp, cur_len - 1, &hash);
- iterator (iterator_cls, &hash, temp, GNUNET_NO, 1, edges);
+ GNUNET_CRYPTO_hash (temp, cur_len - 1, &hash_new);
+ iterator (iterator_cls, &hash_new, temp, GNUNET_NO, 1, edge);
GNUNET_free (temp);
}
}
- else
+ else if (max_len < cur_len)
{
- edges[0].label = &consumed_string[max_len];
- edges[0].destination = state->hash;
+ // Case where the concatenated labels are longer than max_len, then split.
+ edge[0].label = &consumed_string[max_len];
+ edge[0].destination = state->hash;
temp = GNUNET_strdup (consumed_string);
temp[max_len] = '\0';
GNUNET_CRYPTO_hash (temp, max_len, &hash);
- iterator (iterator_cls, &hash, temp, GNUNET_NO, 1, edges);
+ iterator (iterator_cls, &hash, temp, GNUNET_NO, 1, edge);
GNUNET_free (temp);
}
}
/**
- * Iterate over all edges helper function starting from state 's', calling
- * iterator function for each edge if the automaton.
+ * Iterate over all edges starting from start state of automaton 'a'. Calling
+ * iterator for each edge.
*
- * @param s state.
- * @param iterator iterator function called for each edge.
+ * @param a automaton.
+ * @param iterator iterator called for each edge.
* @param iterator_cls closure.
*/
-static void
-iterate_edge (struct GNUNET_REGEX_State *s, GNUNET_REGEX_KeyIterator iterator,
- void *iterator_cls)
+void
+GNUNET_REGEX_iterate_all_edges (struct GNUNET_REGEX_Automaton *a,
+ GNUNET_REGEX_KeyIterator iterator,
+ void *iterator_cls)
{
- struct GNUNET_REGEX_Transition *t;
- struct GNUNET_REGEX_Edge edges[s->transition_count];
- unsigned int num_edges;
+ struct GNUNET_REGEX_State *s;
- if (GNUNET_YES != s->marked)
+ for (s = a->states_head; NULL != s; s = s->next)
{
- s->marked = GNUNET_YES;
+ struct GNUNET_REGEX_Edge edges[s->transition_count];
+ unsigned int num_edges;
num_edges = state_get_edges (s, edges);
iterator (iterator_cls, &s->hash, s->proof, s->accepting, num_edges,
edges);
- for (t = s->transitions_head; NULL != t; t = t->next)
- iterate_edge (t->to_state, iterator, iterator_cls);
+ s->marked = GNUNET_NO;
}
+
+ iterate_initial_edge (GNUNET_REGEX_INITIAL_BITS, GNUNET_REGEX_INITIAL_BITS,
+ NULL, a->start, iterator, iterator_cls);
}
/**
- * Iterate over all edges starting from start state of automaton 'a'. Calling
- * iterator for each edge.
+ * Create a string with binary IP notation for the given 'addr' in 'str'.
*
- * @param a automaton.
- * @param iterator iterator called for each edge.
- * @param iterator_cls closure.
+ * @param af address family of the given 'addr'.
+ * @param addr address that should be converted to a string.
+ * struct in_addr * for IPv4 and struct in6_addr * for IPv6.
+ * @param str string that will contain binary notation of 'addr'. Expected
+ * to be at least 33 bytes long for IPv4 and 129 bytes long for IPv6.
+ */
+static void
+iptobinstr (const int af, const void *addr, char *str)
+{
+ int i;
+
+ switch (af)
+ {
+ case AF_INET:
+ {
+ uint32_t b = htonl (((struct in_addr *) addr)->s_addr);
+
+ str[32] = '\0';
+ str += 31;
+ for (i = 31; i >= 0; i--)
+ {
+ *str = (b & 1) + '0';
+ str--;
+ b >>= 1;
+ }
+ break;
+ }
+ case AF_INET6:
+ {
+ struct in6_addr b = *(const struct in6_addr *) addr;
+
+ str[128] = '\0';
+ str += 127;
+ for (i = 127; i >= 0; i--)
+ {
+ *str = (b.s6_addr[i / 8] & 1) + '0';
+ str--;
+ b.s6_addr[i / 8] >>= 1;
+ }
+ break;
+ }
+ }
+}
+
+
+/**
+ * Get the ipv4 network prefix from the given 'netmask'.
+ *
+ * @param netmask netmask for which to get the prefix len.
+ *
+ * @return length of ipv4 prefix for 'netmask'.
+ */
+static unsigned int
+ipv4netmasktoprefixlen (const char *netmask)
+{
+ struct in_addr a;
+ unsigned int len;
+ uint32_t t;
+
+ if (1 != inet_pton (AF_INET, netmask, &a))
+ return 0;
+ len = 32;
+ for (t = htonl (~a.s_addr); 0 != t; t >>= 1)
+ len--;
+ return len;
+}
+
+
+/**
+ * Create a regex in 'rxstr' from the given 'ip' and 'netmask'.
+ *
+ * @param ip IPv4 representation.
+ * @param netmask netmask for the ip.
+ * @param rxstr generated regex, must be at least GNUNET_REGEX_IPV4_REGEXLEN
+ * bytes long.
*/
void
-GNUNET_REGEX_iterate_all_edges (struct GNUNET_REGEX_Automaton *a,
- GNUNET_REGEX_KeyIterator iterator,
- void *iterator_cls)
+GNUNET_REGEX_ipv4toregex (const struct in_addr *ip, const char *netmask,
+ char *rxstr)
{
- struct GNUNET_REGEX_State *s;
+ unsigned int pfxlen;
- for (s = a->states_head; NULL != s; s = s->next)
- s->marked = GNUNET_NO;
+ pfxlen = ipv4netmasktoprefixlen (netmask);
+ iptobinstr (AF_INET, ip, rxstr);
+ rxstr[pfxlen] = '\0';
+ if (pfxlen < 32)
+ strcat (rxstr, "(0|1)+");
+}
- iterate_initial_edge (0, INITIAL_BITS, NULL, a->start, iterator,
- iterator_cls);
- iterate_edge (a->start, iterator, iterator_cls);
+
+/**
+ * Create a regex in 'rxstr' from the given 'ipv6' and 'prefixlen'.
+ *
+ * @param ipv6 IPv6 representation.
+ * @param prefixlen length of the ipv6 prefix.
+ * @param rxstr generated regex, must be at least GNUNET_REGEX_IPV6_REGEXLEN
+ * bytes long.
+ */
+void
+GNUNET_REGEX_ipv6toregex (const struct in6_addr *ipv6, unsigned int prefixlen,
+ char *rxstr)
+{
+ iptobinstr (AF_INET6, ipv6, rxstr);
+ rxstr[prefixlen] = '\0';
+ if (prefixlen < 128)
+ strcat (rxstr, "(0|1)+");
}