+
+/**
+ * Remove parentheses surrounding string 'str'.
+ * Example: "(a)" becomes "a", "(a|b)|(a|c)" stays the same.
+ * You need to GNUNET_free the returned string.
+ *
+ * @param str string, modified to contain a
+ * @return string without surrounding parentheses, string 'str' if no preceding
+ * epsilon could be found, NULL if 'str' was NULL
+ */
+static void
+remove_parentheses (struct StringBuffer *str)
+{
+ size_t slen;
+ const char *pos;
+ const char *end;
+ const char *sbuf;
+ const char *op;
+ const char *cp;
+ unsigned int cnt;
+
+ if (0)
+ return;
+ sbuf = str->sbuf;
+ if ( (GNUNET_YES == str->null_flag) ||
+ (1 >= (slen = str->slen)) ||
+ ('(' != str->sbuf[0]) ||
+ (')' != str->sbuf[slen - 1]) )
+ return;
+ cnt = 0;
+ pos = &sbuf[1];
+ end = &sbuf[slen - 1];
+ op = memchr (pos, '(', end - pos);
+ cp = memchr (pos, ')', end - pos);
+ while (NULL != cp)
+ {
+ while ( (NULL != op) &&
+ (op < cp) )
+ {
+ cnt++;
+ pos = op + 1;
+ op = memchr (pos, '(', end - pos);
+ }
+ while ( (NULL != cp) &&
+ ( (NULL == op) ||
+ (cp < op) ) )
+ {
+ if (0 == cnt)
+ return; /* can't strip parens */
+ cnt--;
+ pos = cp + 1;
+ cp = memchr (pos, ')', end - pos);
+ }
+ }
+ if (0 != cnt)
+ {
+ GNUNET_break (0);
+ return;
+ }
+ str->sbuf++;
+ str->slen -= 2;
+}
+
+
+/**
+ * Check if the string 'str' starts with an epsilon (empty string).
+ * Example: "(|a)" is starting with an epsilon.
+ *
+ * @param str string to test
+ *
+ * @return 0 if str has no epsilon, 1 if str starts with '(|' and ends with ')'
+ */
+static int
+has_epsilon (const struct StringBuffer *str)
+{
+ return
+ (GNUNET_YES != str->null_flag) &&
+ (0 < str->slen) &&
+ ('(' == str->sbuf[0]) &&
+ ('|' == str->sbuf[1]) &&
+ (')' == str->sbuf[str->slen - 1]);
+}
+
+
+/**
+ * Remove an epsilon from the string str. Where epsilon is an empty string
+ * Example: str = "(|a|b|c)", result: "a|b|c"
+ * The returned string needs to be freed.
+ *
+ * @param str original string
+ * @param ret where to return string without preceding epsilon, string 'str' if no preceding
+ * epsilon could be found, NULL if 'str' was NULL
+ */
+static void
+remove_epsilon (const struct StringBuffer *str,
+ struct StringBuffer *ret)
+{
+ if (GNUNET_YES == str->null_flag)
+ {
+ ret->null_flag = GNUNET_YES;
+ return;
+ }
+ if ( (str->slen > 1) &&
+ ('(' == str->sbuf[0]) &&
+ ('|' == str->sbuf[1]) &&
+ (')' == str->sbuf[str->slen - 1]) )
+ {
+ /* remove epsilon */
+ if (ret->blen < str->slen - 3)
+ {
+ GNUNET_array_grow (ret->abuf,
+ ret->blen,
+ str->slen - 3);
+ }
+ ret->sbuf = ret->abuf;
+ ret->slen = str->slen - 3;
+ memcpy (ret->sbuf, &str->sbuf[2], ret->slen);
+ return;
+ }
+ sb_strdup (ret, str);
+}
+
+
+/**
+ * Compare n bytes of 'str1' and 'str2'
+ *
+ * @param str1 first string to compare
+ * @param str2 second string for comparison
+ * @param n number of bytes to compare
+ *
+ * @return -1 if any of the strings is NULL, 0 if equal, non 0 otherwise
+ */
+static int
+sb_strncmp (const struct StringBuffer *str1,
+ const struct StringBuffer *str2, size_t n)
+{
+ size_t max;
+
+ if ( (str1->slen != str2->slen) &&
+ ( (str1->slen < n) ||
+ (str2->slen < n) ) )
+ return -1;
+ max = GNUNET_MAX (str1->slen, str2->slen);
+ if (max > n)
+ max = n;
+ return memcmp (str1->sbuf, str2->sbuf, max);
+}
+
+
+/**
+ * Compare n bytes of 'str1' and 'str2'
+ *
+ * @param str1 first string to compare
+ * @param str2 second C string for comparison
+ * @param n number of bytes to compare (and length of str2)
+ *
+ * @return -1 if any of the strings is NULL, 0 if equal, non 0 otherwise
+ */
+static int
+sb_strncmp_cstr (const struct StringBuffer *str1,
+ const char *str2, size_t n)
+{
+ if (str1->slen < n)
+ return -1;
+ return memcmp (str1->sbuf, str2, n);
+}
+
+
+/**
+ * Initialize string buffer for storing strings of up to n
+ * characters.
+ *
+ * @param sb buffer to initialize
+ * @param n desired target length
+ */
+static void
+sb_init (struct StringBuffer *sb,
+ size_t n)
+{
+ sb->null_flag = GNUNET_NO;
+ sb->abuf = sb->sbuf = (0 == n) ? NULL : GNUNET_malloc (n);
+ sb->blen = n;
+ sb->slen = 0;
+}
+
+
+/**
+ * Compare 'str1', starting from position 'k', with whole 'str2'
+ *
+ * @param str1 first string to compare, starting from position 'k'
+ * @param str2 second string for comparison
+ * @param k starting position in 'str1'
+ *
+ * @return -1 if any of the strings is NULL, 0 if equal, non 0 otherwise
+ */
+static int
+sb_strkcmp (const struct StringBuffer *str1,
+ const struct StringBuffer *str2, size_t k)
+{
+ if ( (GNUNET_YES == str1->null_flag) ||
+ (GNUNET_YES == str2->null_flag) ||
+ (k > str1->slen) ||
+ (str1->slen - k != str2->slen) )
+ return -1;
+ return memcmp (&str1->sbuf[k], str2->sbuf, str2->slen);
+}
+
+
+/**
+ * Helper function used as 'action' in 'GNUNET_REGEX_automaton_traverse'
+ * function to create the depth-first numbering of the states.
+ *
+ * @param cls states array.
+ * @param count current state counter.
+ * @param s current state.
+ */
+static void
+number_states (void *cls, const unsigned int count,
+ struct GNUNET_REGEX_State *s)
+{
+ struct GNUNET_REGEX_State **states = cls;
+
+ s->dfs_id = count;
+ if (NULL != states)
+ states[count] = s;
+}
+
+
+
+#define PRIS(a) \
+ ((GNUNET_YES == a.null_flag) ? 6 : (int) a.slen), \
+ ((GNUNET_YES == a.null_flag) ? "(null)" : a.sbuf)
+
+
+/**
+ * Construct the regular expression given the inductive step,
+ * $R^{(k)}_{ij} = R^{(k-1)}_{ij} | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^*
+ * R^{(k-1)}_{kj}, and simplify the resulting expression saved in R_cur_ij.
+ *
+ * @param R_last_ij value of $R^{(k-1)_{ij}.
+ * @param R_last_ik value of $R^{(k-1)_{ik}.
+ * @param R_last_kk value of $R^{(k-1)_{kk}.
+ * @param R_last_kj value of $R^{(k-1)_{kj}.
+ * @param R_cur_ij result for this inductive step is saved in R_cur_ij, R_cur_ij
+ * is expected to be NULL when called!
+ * @param R_cur_l optimization -- kept between iterations to avoid realloc
+ * @param R_cur_r optimization -- kept between iterations to avoid realloc
+ */
+static void
+automaton_create_proofs_simplify (const struct StringBuffer *R_last_ij,
+ const struct StringBuffer *R_last_ik,
+ const struct StringBuffer *R_last_kk,
+ const struct StringBuffer *R_last_kj,
+ struct StringBuffer *R_cur_ij,
+ struct StringBuffer *R_cur_l,
+ struct StringBuffer *R_cur_r)
+{
+ struct StringBuffer R_temp_ij;
+ struct StringBuffer R_temp_ik;
+ struct StringBuffer R_temp_kj;
+ struct StringBuffer R_temp_kk;
+ int eps_check;
+ int ij_ik_cmp;
+ int ij_kj_cmp;
+ int ik_kk_cmp;
+ int kk_kj_cmp;
+ int clean_ik_kk_cmp;
+ int clean_kk_kj_cmp;
+ size_t length;
+ size_t length_l;
+ size_t length_r;
+
+ /*
+ * $R^{(k)}_{ij} = R^{(k-1)}_{ij} | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj}
+ * R_last == R^{(k-1)}, R_cur == R^{(k)}
+ * R_cur_ij = R_cur_l | R_cur_r
+ * R_cur_l == R^{(k-1)}_{ij}
+ * R_cur_r == R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj}
+ */
+
+ if ( (GNUNET_YES == R_last_ij->null_flag) &&
+ ( (GNUNET_YES == R_last_ik->null_flag) ||
+ (GNUNET_YES == R_last_kj->null_flag)))
+ {
+ /* R^{(k)}_{ij} = N | N */
+ R_cur_ij->null_flag = GNUNET_YES;
+ R_cur_ij->synced = GNUNET_NO;
+ return;
+ }
+
+ if ( (GNUNET_YES == R_last_ik->null_flag) ||
+ (GNUNET_YES == R_last_kj->null_flag) )
+ {
+ /* R^{(k)}_{ij} = R^{(k-1)}_{ij} | N */
+ if (GNUNET_YES == R_last_ij->synced)
+ {
+ R_cur_ij->synced = GNUNET_YES;
+ R_cur_ij->null_flag = GNUNET_NO;
+ return;
+ }
+ R_cur_ij->synced = GNUNET_YES;
+ sb_strdup (R_cur_ij, R_last_ij);
+ return;
+ }
+ R_cur_ij->synced = GNUNET_NO;
+
+ /* $R^{(k)}_{ij} = N | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj} OR
+ * $R^{(k)}_{ij} = R^{(k-1)}_{ij} | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj} */
+
+ R_cur_r->null_flag = GNUNET_YES;
+ R_cur_r->slen = 0;
+ R_cur_l->null_flag = GNUNET_YES;
+ R_cur_l->slen = 0;
+
+ /* cache results from strcmp, we might need these many times */
+ ij_kj_cmp = sb_nullstrcmp (R_last_ij, R_last_kj);
+ ij_ik_cmp = sb_nullstrcmp (R_last_ij, R_last_ik);
+ ik_kk_cmp = sb_nullstrcmp (R_last_ik, R_last_kk);
+ kk_kj_cmp = sb_nullstrcmp (R_last_kk, R_last_kj);
+
+ /* Assign R_temp_(ik|kk|kj) to R_last[][] and remove epsilon as well
+ * as parentheses, so we can better compare the contents */
+
+ memset (&R_temp_ij, 0, sizeof (struct StringBuffer));
+ memset (&R_temp_ik, 0, sizeof (struct StringBuffer));
+ memset (&R_temp_kk, 0, sizeof (struct StringBuffer));
+ memset (&R_temp_kj, 0, sizeof (struct StringBuffer));
+ remove_epsilon (R_last_ik, &R_temp_ik);
+ remove_epsilon (R_last_kk, &R_temp_kk);
+ remove_epsilon (R_last_kj, &R_temp_kj);
+ remove_parentheses (&R_temp_ik);
+ remove_parentheses (&R_temp_kk);
+ remove_parentheses (&R_temp_kj);
+ clean_ik_kk_cmp = sb_nullstrcmp (R_last_ik, &R_temp_kk);
+ clean_kk_kj_cmp = sb_nullstrcmp (&R_temp_kk, R_last_kj);
+
+ /* construct R_cur_l (and, if necessary R_cur_r) */
+ if (GNUNET_YES != R_last_ij->null_flag)
+ {
+ /* Assign R_temp_ij to R_last_ij and remove epsilon as well
+ * as parentheses, so we can better compare the contents */
+ remove_epsilon (R_last_ij, &R_temp_ij);
+ remove_parentheses (&R_temp_ij);
+
+ if ( (0 == sb_strcmp (&R_temp_ij, &R_temp_ik)) &&
+ (0 == sb_strcmp (&R_temp_ik, &R_temp_kk)) &&
+ (0 == sb_strcmp (&R_temp_kk, &R_temp_kj)) )
+ {
+ if (0 == R_temp_ij.slen)
+ {
+ R_cur_r->null_flag = GNUNET_NO;
+ }
+ else if ((0 == sb_strncmp_cstr (R_last_ij, "(|", 2)) ||
+ (0 == sb_strncmp_cstr (R_last_ik, "(|", 2) &&
+ 0 == sb_strncmp_cstr (R_last_kj, "(|", 2)))
+ {
+ /*
+ * a|(e|a)a*(e|a) = a*
+ * a|(e|a)(e|a)*(e|a) = a*
+ * (e|a)|aa*a = a*
+ * (e|a)|aa*(e|a) = a*
+ * (e|a)|(e|a)a*a = a*
+ * (e|a)|(e|a)a*(e|a) = a*
+ * (e|a)|(e|a)(e|a)*(e|a) = a*
+ */
+ if (GNUNET_YES == needs_parentheses (&R_temp_ij))
+ sb_printf1 (R_cur_r, "(%.*s)*", 3, &R_temp_ij);
+ else
+ sb_printf1 (R_cur_r, "%.*s*", 1, &R_temp_ij);
+ }
+ else
+ {
+ /*
+ * a|aa*a = a+
+ * a|(e|a)a*a = a+
+ * a|aa*(e|a) = a+
+ * a|(e|a)(e|a)*a = a+
+ * a|a(e|a)*(e|a) = a+
+ */
+ if (GNUNET_YES == needs_parentheses (&R_temp_ij))
+ sb_printf1 (R_cur_r, "(%.*s)+", 3, &R_temp_ij);
+ else
+ sb_printf1 (R_cur_r, "%.*s+", 1, &R_temp_ij);
+ }
+ }
+ else if ( (0 == ij_ik_cmp) && (0 == clean_kk_kj_cmp) && (0 != clean_ik_kk_cmp) )
+ {
+ /* a|ab*b = ab* */
+ if (0 == R_last_kk->slen)
+ sb_strdup (R_cur_r, R_last_ij);
+ else if (GNUNET_YES == needs_parentheses (&R_temp_kk))
+ sb_printf2 (R_cur_r, "%.*s(%.*s)*", 3, R_last_ij, &R_temp_kk);
+ else
+ sb_printf2 (R_cur_r, "%.*s%.*s*", 1, R_last_ij, R_last_kk);
+ R_cur_l->null_flag = GNUNET_YES;
+ }
+ else if ( (0 == ij_kj_cmp) && (0 == clean_ik_kk_cmp) && (0 != clean_kk_kj_cmp))
+ {
+ /* a|bb*a = b*a */
+ if (R_last_kk->slen < 1)
+ {
+ sb_strdup (R_cur_r, R_last_kj);
+ }
+ else if (GNUNET_YES == needs_parentheses (&R_temp_kk))
+ sb_printf2 (R_cur_r, "(%.*s)*%.*s", 3, &R_temp_kk, R_last_kj);
+ else
+ sb_printf2 (R_cur_r, "%.*s*%.*s", 1, &R_temp_kk, R_last_kj);
+
+ R_cur_l->null_flag = GNUNET_YES;
+ }
+ else if ( (0 == ij_ik_cmp) && (0 == kk_kj_cmp) && (! has_epsilon (R_last_ij)) &&
+ has_epsilon (R_last_kk))
+ {
+ /* a|a(e|b)*(e|b) = a|ab* = a|a|ab|abb|abbb|... = ab* */
+ if (needs_parentheses (&R_temp_kk))
+ sb_printf2 (R_cur_r, "%.*s(%.*s)*", 3, R_last_ij, &R_temp_kk);
+ else
+ sb_printf2 (R_cur_r, "%.*s%.*s*", 1, R_last_ij, &R_temp_kk);
+ R_cur_l->null_flag = GNUNET_YES;
+ }
+ else if ( (0 == ij_kj_cmp) && (0 == ik_kk_cmp) && (! has_epsilon (R_last_ij)) &&
+ has_epsilon (R_last_kk))
+ {
+ /* a|(e|b)(e|b)*a = a|b*a = a|a|ba|bba|bbba|... = b*a */
+ if (needs_parentheses (&R_temp_kk))
+ sb_printf2 (R_cur_r, "(%.*s)*%.*s", 3, &R_temp_kk, R_last_ij);
+ else
+ sb_printf2 (R_cur_r, "%.*s*%.*s", 1, &R_temp_kk, R_last_ij);
+ R_cur_l->null_flag = GNUNET_YES;
+ }
+ else
+ {
+ sb_strdup (R_cur_l, R_last_ij);
+ remove_parentheses (R_cur_l);
+ }
+ }
+ else
+ {
+ /* we have no left side */
+ R_cur_l->null_flag = GNUNET_YES;
+ }
+
+ /* construct R_cur_r, if not already constructed */
+ if (GNUNET_YES == R_cur_r->null_flag)
+ {
+ length = R_temp_kk.slen - R_last_ik->slen;
+
+ /* a(ba)*bx = (ab)+x */
+ if ( (length > 0) &&
+ (GNUNET_YES != R_last_kk->null_flag) &&
+ (0 < R_last_kk->slen) &&
+ (GNUNET_YES != R_last_kj->null_flag) &&
+ (0 < R_last_kj->slen) &&
+ (GNUNET_YES != R_last_ik->null_flag) &&
+ (0 < R_last_ik->slen) &&
+ (0 == sb_strkcmp (&R_temp_kk, R_last_ik, length)) &&
+ (0 == sb_strncmp (&R_temp_kk, R_last_kj, length)) )
+ {
+ struct StringBuffer temp_a;
+ struct StringBuffer temp_b;
+
+ sb_init (&temp_a, length);
+ sb_init (&temp_b, R_last_kj->slen - length);
+
+ length_l = length;
+ temp_a.sbuf = temp_a.abuf;
+ memcpy (temp_a.sbuf, R_last_kj->sbuf, length_l);
+ temp_a.slen = length_l;
+
+ length_r = R_last_kj->slen - length;
+ temp_b.sbuf = temp_b.abuf;
+ memcpy (temp_b.sbuf, &R_last_kj->sbuf[length], length_r);
+ temp_b.slen = length_r;
+
+ /* e|(ab)+ = (ab)* */
+ if ( (GNUNET_YES != R_cur_l->null_flag) &&
+ (0 == R_cur_l->slen) &&
+ (0 == temp_b.slen) )
+ {
+ sb_printf2 (R_cur_r, "(%.*s%.*s)*", 3, R_last_ik, &temp_a);
+ sb_free (R_cur_l);
+ R_cur_l->null_flag = GNUNET_YES;
+ }
+ else
+ {
+ sb_printf3 (R_cur_r, "(%.*s%.*s)+%.*s", 3, R_last_ik, &temp_a, &temp_b);
+ }
+ sb_free (&temp_a);
+ sb_free (&temp_b);
+ }
+ else if (0 == sb_strcmp (&R_temp_ik, &R_temp_kk) &&
+ 0 == sb_strcmp (&R_temp_kk, &R_temp_kj))
+ {
+ /*
+ * (e|a)a*(e|a) = a*
+ * (e|a)(e|a)*(e|a) = a*
+ */
+ if (has_epsilon (R_last_ik) && has_epsilon (R_last_kj))
+ {
+ if (needs_parentheses (&R_temp_kk))
+ sb_printf1 (R_cur_r, "(%.*s)*", 3, &R_temp_kk);
+ else
+ sb_printf1 (R_cur_r, "%.*s*", 1, &R_temp_kk);
+ }
+ /* aa*a = a+a */
+ else if ( (0 == clean_ik_kk_cmp) &&
+ (0 == clean_kk_kj_cmp) &&
+ (! has_epsilon (R_last_ik)) )
+ {
+ if (needs_parentheses (&R_temp_kk))
+ sb_printf2 (R_cur_r, "(%.*s)+%.*s", 3, &R_temp_kk, &R_temp_kk);
+ else
+ sb_printf2 (R_cur_r, "%.*s+%.*s", 1, &R_temp_kk, &R_temp_kk);
+ }
+ /*
+ * (e|a)a*a = a+
+ * aa*(e|a) = a+
+ * a(e|a)*(e|a) = a+
+ * (e|a)a*a = a+
+ */
+ else
+ {
+ eps_check =
+ (has_epsilon (R_last_ik) + has_epsilon (R_last_kk) +
+ has_epsilon (R_last_kj));
+
+ if (1 == eps_check)
+ {
+ if (needs_parentheses (&R_temp_kk))
+ sb_printf1 (R_cur_r, "(%.*s)+", 3, &R_temp_kk);
+ else
+ sb_printf1 (R_cur_r, "%.*s+", 1, &R_temp_kk);
+ }
+ }
+ }
+ /*
+ * aa*b = a+b
+ * (e|a)(e|a)*b = a*b
+ */
+ else if (0 == sb_strcmp (&R_temp_ik, &R_temp_kk))
+ {
+ if (has_epsilon (R_last_ik))
+ {
+ if (needs_parentheses (&R_temp_kk))
+ sb_printf2 (R_cur_r, "(%.*s)*%.*s", 3, &R_temp_kk, R_last_kj);
+ else
+ sb_printf2 (R_cur_r, "%.*s*%.*s", 1, &R_temp_kk, R_last_kj);
+ }
+ else
+ {
+ if (needs_parentheses (&R_temp_kk))
+ sb_printf2 (R_cur_r, "(%.*s)+%.*s", 3, &R_temp_kk, R_last_kj);
+ else
+ sb_printf2 (R_cur_r, "%.*s+%.*s", 1, &R_temp_kk, R_last_kj);
+ }
+ }
+ /*
+ * ba*a = ba+
+ * b(e|a)*(e|a) = ba*
+ */
+ else if (0 == sb_strcmp (&R_temp_kk, &R_temp_kj))
+ {
+ if (has_epsilon (R_last_kj))
+ {
+ if (needs_parentheses (&R_temp_kk))
+ sb_printf2 (R_cur_r, "%.*s(%.*s)*", 3, R_last_ik, &R_temp_kk);
+ else
+ sb_printf2 (R_cur_r, "%.*s%.*s*", 1, R_last_ik, &R_temp_kk);
+ }
+ else
+ {
+ if (needs_parentheses (&R_temp_kk))
+ sb_printf2 (R_cur_r, "(%.*s)+%.*s", 3, R_last_ik, &R_temp_kk);
+ else
+ sb_printf2 (R_cur_r, "%.*s+%.*s", 1, R_last_ik, &R_temp_kk);
+ }
+ }
+ else
+ {
+ if (0 < R_temp_kk.slen)
+ {
+ if (needs_parentheses (&R_temp_kk))
+ {
+ sb_printf3 (R_cur_r, "%.*s(%.*s)*%.*s", 3, R_last_ik, &R_temp_kk,
+ R_last_kj);
+ }
+ else
+ {
+ sb_printf3 (R_cur_r, "%.*s%.*s*%.*s", 1, R_last_ik, &R_temp_kk,
+ R_last_kj);
+ }
+ }
+ else
+ {
+ sb_printf2 (R_cur_r, "%.*s%.*s", 0, R_last_ik, R_last_kj);
+ }
+ }
+ }
+ sb_free (&R_temp_ij);
+ sb_free (&R_temp_ik);
+ sb_free (&R_temp_kk);
+ sb_free (&R_temp_kj);
+
+ if ( (GNUNET_YES == R_cur_l->null_flag) &&
+ (GNUNET_YES == R_cur_r->null_flag) )
+ {
+ R_cur_ij->null_flag = GNUNET_YES;
+ return;
+ }
+
+ if ( (GNUNET_YES != R_cur_l->null_flag) &&
+ (GNUNET_YES == R_cur_r->null_flag) )
+ {
+ struct StringBuffer tmp;
+
+ tmp = *R_cur_ij;
+ *R_cur_ij = *R_cur_l;
+ *R_cur_l = tmp;
+ return;
+ }
+
+ if ( (GNUNET_YES == R_cur_l->null_flag) &&
+ (GNUNET_YES != R_cur_r->null_flag) )
+ {
+ struct StringBuffer tmp;
+
+ tmp = *R_cur_ij;
+ *R_cur_ij = *R_cur_r;
+ *R_cur_r = tmp;
+ return;
+ }
+
+ if (0 == sb_nullstrcmp (R_cur_l, R_cur_r))
+ {
+ struct StringBuffer tmp;
+
+ tmp = *R_cur_ij;
+ *R_cur_ij = *R_cur_l;
+ *R_cur_l = tmp;
+ return;
+ }
+ sb_printf2 (R_cur_ij, "(%.*s|%.*s)", 3, R_cur_l, R_cur_r);
+}
+
+
+/**
+ * Create proofs for all states in the given automaton. Implementation of the
+ * algorithm descriped in chapter 3.2.1 of "Automata Theory, Languages, and
+ * Computation 3rd Edition" by Hopcroft, Motwani and Ullman.
+ *
+ * Each state in the automaton gets assigned 'proof' and 'hash' (hash of the
+ * proof) fields. The starting state will only have a valid proof/hash if it has
+ * any incoming transitions.
+ *
+ * @param a automaton for which to assign proofs and hashes, must not be NULL
+ */
+static int
+automaton_create_proofs (struct GNUNET_REGEX_Automaton *a)
+{
+ unsigned int n = a->state_count;
+ struct GNUNET_REGEX_State *states[n];
+ struct StringBuffer *R_last;
+ struct StringBuffer *R_cur;
+ struct StringBuffer R_cur_r;
+ struct StringBuffer R_cur_l;
+ struct StringBuffer *R_swap;
+ struct GNUNET_REGEX_Transition *t;
+ struct StringBuffer complete_regex;
+ unsigned int i;
+ unsigned int j;
+ unsigned int k;
+
+ R_last = GNUNET_malloc_large (sizeof (struct StringBuffer) * n * n);
+ R_cur = GNUNET_malloc_large (sizeof (struct StringBuffer) * n * n);
+ if ( (NULL == R_last) ||
+ (NULL == R_cur) )
+ {
+ GNUNET_log_strerror (GNUNET_ERROR_TYPE_ERROR, "malloc");
+ GNUNET_free_non_null (R_cur);
+ GNUNET_free_non_null (R_last);
+ return GNUNET_SYSERR;
+ }
+
+ /* create depth-first numbering of the states, initializes 'state' */
+ GNUNET_REGEX_automaton_traverse (a, a->start, NULL, NULL, &number_states,
+ states);
+
+ for (i = 0; i < n; i++)
+ GNUNET_assert (NULL != states[i]);
+ for (i = 0; i < n; i++)
+ for (j = 0; j < n; j++)
+ R_last[i *n + j].null_flag = GNUNET_YES;
+
+ /* Compute regular expressions of length "1" between each pair of states */
+ for (i = 0; i < n; i++)
+ {
+ for (t = states[i]->transitions_head; NULL != t; t = t->next)
+ {
+ j = t->to_state->dfs_id;
+ if (GNUNET_YES == R_last[i * n + j].null_flag)
+ {
+ sb_strdup_cstr (&R_last[i * n + j], t->label);
+ }
+ else
+ {
+ sb_append_cstr (&R_last[i * n + j], "|");
+ sb_append_cstr (&R_last[i * n + j], t->label);
+ }
+ }
+ /* add self-loop: i is reachable from i via epsilon-transition */
+ if (GNUNET_YES == R_last[i * n + i].null_flag)
+ {
+ R_last[i * n + i].slen = 0;
+ R_last[i * n + i].null_flag = GNUNET_NO;
+ }
+ else
+ {
+ sb_wrap (&R_last[i * n + i], "(|%.*s)", 3);
+ }
+ }
+ for (i = 0; i < n; i++)
+ for (j = 0; j < n; j++)
+ if (needs_parentheses (&R_last[i * n + j]))
+ sb_wrap (&R_last[i * n + j], "(%.*s)", 2);
+ /* Compute regular expressions of length "k" between each pair of states per
+ * induction */
+ memset (&R_cur_l, 0, sizeof (struct StringBuffer));
+ memset (&R_cur_r, 0, sizeof (struct StringBuffer));
+ for (k = 0; k < n; k++)
+ {
+ for (i = 0; i < n; i++)
+ {
+ for (j = 0; j < n; j++)
+ {
+ /* Basis for the recursion:
+ * $R^{(k)}_{ij} = R^{(k-1)}_{ij} | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj}
+ * R_last == R^{(k-1)}, R_cur == R^{(k)}
+ */
+
+ /* Create R_cur[i][j] and simplify the expression */
+ automaton_create_proofs_simplify (&R_last[i * n + j], &R_last[i * n + k],
+ &R_last[k * n + k], &R_last[k * n + j],
+ &R_cur[i * n + j],
+ &R_cur_l, &R_cur_r);
+ }
+ }
+ /* set R_last = R_cur */
+ R_swap = R_last;
+ R_last = R_cur;
+ R_cur = R_swap;
+ /* clear 'R_cur' for next iteration */
+ for (i = 0; i < n; i++)
+ for (j = 0; j < n; j++)
+ R_cur[i * n + j].null_flag = GNUNET_YES;
+ }
+ sb_free (&R_cur_l);
+ sb_free (&R_cur_r);
+ /* assign proofs and hashes */
+ for (i = 0; i < n; i++)
+ {
+ if (GNUNET_YES != R_last[a->start->dfs_id * n + i].null_flag)
+ {
+ states[i]->proof = GNUNET_strndup (R_last[a->start->dfs_id * n + i].sbuf,
+ R_last[a->start->dfs_id * n + i].slen);
+ GNUNET_CRYPTO_hash (states[i]->proof, strlen (states[i]->proof),
+ &states[i]->hash);
+ }
+ }
+
+ /* complete regex for whole DFA: union of all pairs (start state/accepting
+ * state(s)). */
+ sb_init (&complete_regex, 16 * n);
+ for (i = 0; i < n; i++)
+ {
+ if (states[i]->accepting)
+ {
+ if ( (0 == complete_regex.slen) &&
+ (0 < R_last[a->start->dfs_id * n + i].slen) )
+ {
+ sb_append (&complete_regex,
+ &R_last[a->start->dfs_id * n + i]);
+ }
+ else if ( (GNUNET_YES != R_last[a->start->dfs_id * n + i].null_flag) &&
+ (0 < R_last[a->start->dfs_id * n + i].slen) )
+ {
+ sb_append_cstr (&complete_regex, "|");
+ sb_append (&complete_regex,
+ &R_last[a->start->dfs_id * n + i]);
+ }
+ }
+ }
+ a->canonical_regex = GNUNET_strndup (complete_regex.sbuf, complete_regex.slen);
+
+ /* cleanup */
+ sb_free (&complete_regex);
+ for (i = 0; i < n; i++)
+ for (j = 0; j < n; j++)
+ {
+ sb_free (&R_cur[i * n + j]);
+ sb_free (&R_last[i * n + j]);
+ }
+ GNUNET_free (R_cur);
+ GNUNET_free (R_last);
+ return GNUNET_OK;
+}
+
+