new and improved tests

author Maximilian Szengel <gnunet@maxsz.de>

Wed, 27 Jun 2012 16:13:48 +0000 (16:13 +0000)

committer Maximilian Szengel <gnunet@maxsz.de>

Wed, 27 Jun 2012 16:13:48 +0000 (16:13 +0000)
author Maximilian Szengel <gnunet@maxsz.de>
Wed, 27 Jun 2012 16:13:48 +0000 (16:13 +0000)
committer Maximilian Szengel <gnunet@maxsz.de>
Wed, 27 Jun 2012 16:13:48 +0000 (16:13 +0000)
diff --git a/src/include/gnunet_regex_lib.h b/src/include/gnunet_regex_lib.h

index 64a370df31368dc6566c3032b35f7f180aaf431e..911128647a66e874b3bf019d7a1622f9ee6db44e 100644 (file)
--- a/src/include/gnunet_regex_lib.h
+++ b/src/include/gnunet_regex_lib.h
@@ -42,6 +42,7 @@ extern "C"
   */
  struct GNUNET_REGEX_Automaton;
  
+
  /**
   * Edge representation.
   */
@@ -58,6 +59,7 @@ struct GNUNET_REGEX_Edge
    struct GNUNET_HashCode destination;
  };
  
+
  /**
   * Construct an NFA by parsing the regex string of length 'len'.
   *
@@ -69,6 +71,7 @@ struct GNUNET_REGEX_Edge
  struct GNUNET_REGEX_Automaton *
  GNUNET_REGEX_construct_nfa (const char *regex, const size_t len);
  
+
  /**
   * Construct DFA for the given 'regex' of length 'len'.
   *
@@ -80,6 +83,7 @@ GNUNET_REGEX_construct_nfa (const char *regex, const size_t len);
  struct GNUNET_REGEX_Automaton *
  GNUNET_REGEX_construct_dfa (const char *regex, const size_t len);
  
+
  /**
   * Free the memory allocated by constructing the GNUNET_REGEX_Automaton.
   * data structure.
@@ -89,6 +93,7 @@ GNUNET_REGEX_construct_dfa (const char *regex, const size_t len);
  void
  GNUNET_REGEX_automaton_destroy (struct GNUNET_REGEX_Automaton *a);
  
+
  /**
   * Save the given automaton as a GraphViz dot file.
   *
@@ -111,19 +116,6 @@ int
  GNUNET_REGEX_eval (struct GNUNET_REGEX_Automaton *a,
                     const char *string);
  
-/** 
- * Get the canonical regex of the given automaton.
- * When constructing the automaton a proof is computed for each state,
- * consisting of the regular expression leading to this state. A complete
- * regex for the automaton can be computed by combining these proofs.
- * As of now this function is only useful for testing.
- * 
- * @param a automaton for which the canonical regex should be returned.
- * 
- * @return 
- */
-const char *
-GNUNET_REGEX_get_canonical_regex (struct GNUNET_REGEX_Automaton *a);
  
  /**
   * Get the first key for the given 'input_string'. This hashes
@@ -140,6 +132,7 @@ unsigned int /* FIXME: size_t */
  GNUNET_REGEX_get_first_key (const char *input_string, /* FIXME: size_t */ unsigned int string_len,
                              struct GNUNET_HashCode * key);
  
+
  /**
   * Check if the given 'proof' matches the given 'key'.
   *
@@ -152,6 +145,7 @@ int
  GNUNET_REGEX_check_proof (const char *proof,
                            const struct GNUNET_HashCode *key);
  
+
  /**
   * Iterator callback function.
   *
@@ -169,6 +163,7 @@ typedef void (*GNUNET_REGEX_KeyIterator)(void *cls,
                                           unsigned int num_edges,
                                           const struct GNUNET_REGEX_Edge *edges);
  
+
  /**
   * Iterate over all edges starting from start state of automaton 'a'. Calling
   * iterator for each edge.
@@ -182,6 +177,7 @@ GNUNET_REGEX_iterate_all_edges (struct GNUNET_REGEX_Automaton *a,
                                  GNUNET_REGEX_KeyIterator iterator,
                                  void *iterator_cls);
  
+
  #if 0                           /* keep Emacsens' auto-indent happy */
  {
  #endif
diff --git a/src/regex/Makefile.am b/src/regex/Makefile.am

index cb9bc093a0529e5e89c98efcbb14292d93d0b1dd..1284111d8ec53542e9f047d51c903eeeaa5f7181 100644 (file)
--- a/src/regex/Makefile.am
+++ b/src/regex/Makefile.am
@@ -11,7 +11,8 @@ endif
  lib_LTLIBRARIES = libgnunetregex.la
  
  libgnunetregex_la_SOURCES = \
-  regex.c
+  regex_internal.h regex.c \
+  regex_random.c
  libgnunetregex_la_LIBADD = -lm \
   $(top_builddir)/src/util/libgnunetutil.la
  libgnunetregex_la_LDFLAGS = \
diff --git a/src/regex/regex.c b/src/regex/regex.c

index 411c72c08b11292209df11a7c164989b6a7f2852..f237334d8265cc8393f9545cfb93ce2818430e79 100644 (file)
--- a/src/regex/regex.c
+++ b/src/regex/regex.c
@@ -26,7 +26,7 @@
  #include "gnunet_container_lib.h"
  #include "gnunet_crypto_lib.h"
  #include "gnunet_regex_lib.h"
-#include "regex.h"
+#include "regex_internal.h"
  
  /**
   * Constant for how many bits the initial string regex should have.
@@ -1078,12 +1078,6 @@ automaton_create_proofs (struct GNUNET_REGEX_Automaton *a)
          GNUNET_asprintf (&R_last[i][j], "%s|%c", R_last[i][j], t->label);
          GNUNET_free (temp_a);
        }
-      if (GNUNET_YES == needs_parentheses (R_last[i][j]))
-      {
-        temp_a = R_last[i][j];
-        GNUNET_asprintf (&R_last[i][j], "(%s)", R_last[i][j]);
-        GNUNET_free (temp_a);
-      }
      }
      if (NULL == R_last[i][i])
        GNUNET_asprintf (&R_last[i][i], "");
@@ -1094,7 +1088,16 @@ automaton_create_proofs (struct GNUNET_REGEX_Automaton *a)
        GNUNET_free (temp_a);
      }
    }
+  for (i = 0; i < n; i++)
+    for (j = 0; j < n; j++)
+      if (needs_parentheses (R_last[i][j]))
+      {
+        temp_a = R_last[i][j];
+        GNUNET_asprintf (&R_last[i][j], "(%s)", R_last[i][j]);
+        GNUNET_free (temp_a);
+      }
  
+  // TODO: clean up and fix the induction part
  
    // INDUCTION
    for (k = 0; k < n; k++)
diff --git a/src/regex/regex_internal.h b/src/regex/regex_internal.h

new file mode 100644 (file)

index 0000000..8ea597d
--- /dev/null
+++ b/src/regex/regex_internal.h
@@ -0,0 +1,96 @@
+/*
+     This file is part of GNUnet
+     (C) 2012 Christian Grothoff (and other contributing authors)
+
+     GNUnet is free software; you can redistribute it and/or modify
+     it under the terms of the GNU General Public License as published
+     by the Free Software Foundation; either version 3, or (at your
+     option) any later version.
+
+     GNUnet is distributed in the hope that it will be useful, but
+     WITHOUT ANY WARRANTY; without even the implied warranty of
+     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+     General Public License for more details.
+
+     You should have received a copy of the GNU General Public License
+     along with GNUnet; see the file COPYING.  If not, write to the
+     Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+     Boston, MA 02111-1307, USA.
+*/
+/**
+ * @file src/regex/regex_internal.h
+ * @brief common internal definitions for regex library
+ * @author Maximilian Szengel
+ */
+#ifndef REGEX_INTERNAL_H
+#define REGEX_INTERNAL_H
+
+#include "gnunet_regex_lib.h"
+
+#ifdef __cplusplus
+extern "C"
+{
+#if 0                           /* keep Emacsens' auto-indent happy */
+}
+#endif
+#endif
+
+/**
+ * char array of literals that are allowed inside a regex (apart from the
+ * operators)
+ */
+#define ALLOWED_LITERALS "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
+
+
+/**
+ * Get the canonical regex of the given automaton.
+ * When constructing the automaton a proof is computed for each state,
+ * consisting of the regular expression leading to this state. A complete
+ * regex for the automaton can be computed by combining these proofs.
+ * As of now this function is only useful for testing.
+ *
+ * @param a automaton for which the canonical regex should be returned.
+ *
+ * @return
+ */
+const char *
+GNUNET_REGEX_get_canonical_regex (struct GNUNET_REGEX_Automaton *a);
+
+
+/**
+ * Generate a (pseudo) random regular expression of length 'rx_length', as well
+ * as a (optional) string that will be matched by the generated regex. The
+ * returned regex needs to be freed.
+ *
+ * @param rx_length length of the random regex.
+ * @param matching_str (optional) pointer to a string that will contain a string
+ *                     that will be matched by the generated regex, if
+ *                     'matching_str' pointer was not NULL.
+ *
+ * @return NULL if 'rx_length' is 0, a random regex of length 'rx_length', which
+ *         needs to be freed, otherwise.
+ */
+char *
+GNUNET_REGEX_generate_random_regex (size_t rx_length, char *matching_str);
+
+
+/**
+ * Generate a random string of maximum length 'max_len' that only contains literals allowed
+ * in a regular expression. The string might be 0 chars long but is garantueed
+ * to be shorter or equal to 'max_len'.
+ *
+ * @param max_len maximum length of the string that should be generated.
+ *
+ * @return random string that needs to be freed.
+ */
+char *
+GNUNET_REGEX_generate_random_string (size_t max_len);
+
+#if 0                           /* keep Emacsens' auto-indent happy */
+{
+#endif
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/regex/regex_random.c b/src/regex/regex_random.c

new file mode 100644 (file)

index 0000000..3af9b7c
--- /dev/null
+++ b/src/regex/regex_random.c
@@ -0,0 +1,170 @@
+/*
+     This file is part of GNUnet
+     (C) 2012 Christian Grothoff (and other contributing authors)
+
+     GNUnet is free software; you can redistribute it and/or modify
+     it under the terms of the GNU General Public License as published
+     by the Free Software Foundation; either version 3, or (at your
+     option) any later version.
+
+     GNUnet is distributed in the hope that it will be useful, but
+     WITHOUT ANY WARRANTY; without even the implied warranty of
+     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+     General Public License for more details.
+
+     You should have received a copy of the GNU General Public License
+     along with GNUnet; see the file COPYING.  If not, write to the
+     Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+     Boston, MA 02111-1307, USA.
+*/
+/**
+ * @file src/regex/regex_random.c
+ * @brief functions for creating random regular expressions and strings
+ * @author Maximilian Szengel
+ */
+#include "platform.h"
+#include "gnunet_regex_lib.h"
+#include "gnunet_crypto_lib.h"
+#include "regex_internal.h"
+
+
+/**
+ * Get a (pseudo) random valid literal for building a regular expression.
+ *
+ * @return random valid literal
+ */
+char
+get_random_literal ()
+{
+  uint32_t ridx;
+
+  ridx =
+      GNUNET_CRYPTO_random_u32 (GNUNET_CRYPTO_QUALITY_WEAK,
+                                (uint32_t) strlen (ALLOWED_LITERALS));
+
+  return ALLOWED_LITERALS[ridx];
+}
+
+
+/**
+ * Generate a (pseudo) random regular expression of length 'rx_length', as well
+ * as a (optional) string that will be matched by the generated regex. The
+ * returned regex needs to be freed.
+ *
+ * @param rx_length length of the random regex.
+ * @param matching_str (optional) pointer to a string that will contain a string
+ *                     that will be matched by the generated regex, if
+ *                     'matching_str' pointer was not NULL. Make sure you
+ *                     allocated at least rx_length+1 bytes for this sting.
+ *
+ * @return NULL if 'rx_length' is 0, a random regex of length 'rx_length', which
+ *         needs to be freed, otherwise.
+ */
+char *
+GNUNET_REGEX_generate_random_regex (size_t rx_length, char *matching_str)
+{
+  char *rx;
+  char *rx_p;
+  char *matching_strp;
+  unsigned int i;
+  unsigned int char_op_switch;
+  unsigned int last_was_op;
+  int rx_op;
+  char current_char;
+
+  if (0 == rx_length)
+    return NULL;
+
+  if (NULL != matching_str)
+    matching_strp = matching_str;
+  else
+    matching_strp = NULL;
+
+  rx = GNUNET_malloc (rx_length + 1);
+  rx_p = rx;
+  current_char = 0;
+  last_was_op = 1;
+
+  for (i = 0; i < rx_length; i++)
+  {
+    char_op_switch = GNUNET_CRYPTO_random_u32 (GNUNET_CRYPTO_QUALITY_WEAK, 2);
+
+    if (0 == char_op_switch && !last_was_op)
+    {
+      last_was_op = 1;
+      rx_op = GNUNET_CRYPTO_random_u32 (GNUNET_CRYPTO_QUALITY_WEAK, 4);
+
+      switch (rx_op)
+      {
+      case 0:
+        current_char = '+';
+        break;
+      case 1:
+        current_char = '*';
+        break;
+      case 2:
+        current_char = '?';
+        break;
+      case 3:
+        if (i < rx_length - 1)  // '|' cannot be at the end
+          current_char = '|';
+        else
+          current_char = get_random_literal ();
+        break;
+      }
+    }
+    else
+    {
+      current_char = get_random_literal ();
+      last_was_op = 0;
+    }
+
+    if (NULL != matching_strp &&
+        (current_char != '+' && current_char != '*' && current_char != '?' &&
+         current_char != '|'))
+    {
+      *matching_strp = current_char;
+      matching_strp++;
+    }
+
+    *rx_p = current_char;
+    rx_p++;
+  }
+  *rx_p = '\0';
+  if (NULL != matching_strp)
+    *matching_strp = '\0';
+
+  return rx;
+}
+
+/**
+ * Generate a random string of maximum length 'max_len' that only contains literals allowed
+ * in a regular expression. The string might be 0 chars long but is garantueed
+ * to be shorter or equal to 'max_len'.
+ *
+ * @param max_len maximum length of the string that should be generated.
+ *
+ * @return random string that needs to be freed.
+ */
+char *
+GNUNET_REGEX_generate_random_string (size_t max_len)
+{
+  unsigned int i;
+  char *str;
+  size_t len;
+
+  if (1 > max_len)
+    return GNUNET_strdup ("");
+
+  len = (size_t) GNUNET_CRYPTO_random_u32 (GNUNET_CRYPTO_QUALITY_WEAK, max_len);
+  str = GNUNET_malloc (len + 1);
+
+  for (i = 0; i < len; i++)
+  {
+    str[i] = get_random_literal ();
+  }
+
+  str[i] = '\0';
+
+  return str;
+}
diff --git a/src/regex/test_regex_eval_api.c b/src/regex/test_regex_eval_api.c

index b6cdbe100d4693ebb01cdcd9ce399ea5ad3bef15..6d575a05cc1cc0511c83ef78ca24ff2267d0f50d 100644 (file)
--- a/src/regex/test_regex_eval_api.c
+++ b/src/regex/test_regex_eval_api.c
@@ -26,6 +26,7 @@
  #include <time.h>
  #include "platform.h"
  #include "gnunet_regex_lib.h"
+#include "regex_internal.h"
  
  enum Match_Result
  {
@@ -41,8 +42,6 @@ struct Regex_String_Pair
    enum Match_Result expected_results[20];
  };
  
-static const char allowed_literals[] =
-    "0123456789" "ABCDEFGHIJKLMNOPQRSTUVWXYZ" "abcdefghijklmnopqrstuvwxyz";
  
  /**
   * Random regex test. Generate a random regex as well as 'str_count' strings to
@@ -60,15 +59,8 @@ test_random (unsigned int rx_length, unsigned int max_str_len,
               unsigned int str_count)
  {
    int i;
-  int j;
-  int rx_exp;
-  char rand_rx[rx_length + 1];
-  char matching_str[str_count][max_str_len + 1];
-  char *rand_rxp;
-  char *matching_strp;
-  int char_op_switch;
-  int last_was_op;
-  char current_char;
+  char *rand_rx;
+  char *matching_str;
    int eval;
    int eval_check;
    int eval_canonical;
@@ -77,7 +69,7 @@ test_random (unsigned int rx_length, unsigned int max_str_len,
    regmatch_t matchptr[1];
    char error[200];
    int result;
-  unsigned int str_len;
+  size_t str_len;
    char *canonical_regex;
  
    // At least one string is needed for matching
@@ -85,76 +77,20 @@ test_random (unsigned int rx_length, unsigned int max_str_len,
    // The string should be at least as long as the regex itself
    GNUNET_assert (max_str_len >= rx_length);
  
-  rand_rxp = rand_rx;
-  matching_strp = matching_str[0];
-  current_char = 0;
-  last_was_op = 1;
-
    // Generate random regex and a string that matches the regex
-  for (i = 0; i < rx_length; i++)
-  {
-    char_op_switch = 0 + (int) (1.0 * rand () / (RAND_MAX + 1.0));
-
-    if (0 == char_op_switch && !last_was_op)
-    {
-      last_was_op = 1;
-      rx_exp = rand () % 4;
-
-      switch (rx_exp)
-      {
-      case 0:
-        current_char = '+';
-        break;
-      case 1:
-        current_char = '*';
-        break;
-      case 2:
-        current_char = '?';
-        break;
-      case 3:
-        if (i < rx_length - 1)  // '|' cannot be at the end
-          current_char = '|';
-        else
-          current_char =
-              allowed_literals[rand () % (sizeof (allowed_literals) - 1)];
-        break;
-      }
-    }
-    else
-    {
-      current_char =
-          allowed_literals[rand () % (sizeof (allowed_literals) - 1)];
-      last_was_op = 0;
-    }
-
-    if (current_char != '+' && current_char != '*' && current_char != '?' &&
-        current_char != '|')
-    {
-      *matching_strp = current_char;
-      matching_strp++;
-    }
-
-    *rand_rxp = current_char;
-    rand_rxp++;
-  }
-  *rand_rxp = '\0';
-  *matching_strp = '\0';
-
-  // Generate some random strings for matching...
-  // Start at 1, because the first string is generated above during regex generation
-  for (i = 1; i < str_count; i++)
-  {
-    str_len = rand () % max_str_len;
-    for (j = 0; j < str_len; j++)
-      matching_str[i][j] =
-          allowed_literals[rand () % (sizeof (allowed_literals) - 1)];
-    matching_str[i][str_len] = '\0';
-  }
+  matching_str = GNUNET_malloc (rx_length + 1);
+  rand_rx = GNUNET_REGEX_generate_random_regex (rx_length, matching_str);
  
    // Now match
    result = 0;
    for (i = 0; i < str_count; i++)
    {
+    if (0 < i)
+    {
+      matching_str = GNUNET_REGEX_generate_random_string (max_str_len);
+      str_len = strlen (matching_str);
+    }
+
      // Match string using DFA
      dfa = GNUNET_REGEX_construct_dfa (rand_rx, strlen (rand_rx));
      if (NULL == dfa)
@@ -163,7 +99,7 @@ test_random (unsigned int rx_length, unsigned int max_str_len,
        return -1;
      }
  
-    eval = GNUNET_REGEX_eval (dfa, matching_str[i]);
+    eval = GNUNET_REGEX_eval (dfa, matching_str);
      canonical_regex = GNUNET_strdup (GNUNET_REGEX_get_canonical_regex (dfa));
      GNUNET_REGEX_automaton_destroy (dfa);
  
@@ -175,7 +111,7 @@ test_random (unsigned int rx_length, unsigned int max_str_len,
        return -1;
      }
  
-    eval_check = regexec (&rx, matching_str[i], 1, matchptr, 0);
+    eval_check = regexec (&rx, matching_str, 1, matchptr, 0);
      regfree (&rx);
  
      // Match canonical regex
@@ -187,14 +123,13 @@ test_random (unsigned int rx_length, unsigned int max_str_len,
        return -1;
      }
  
-    eval_canonical = regexec (&rx, matching_str[i], 1, matchptr, 0);
+    eval_canonical = regexec (&rx, matching_str, 1, matchptr, 0);
      regfree (&rx);
      GNUNET_free (canonical_regex);
  
      // We only want to match the whole string, because that's what our DFA does, too.
      if (eval_check == 0 &&
-        (matchptr[0].rm_so != 0 ||
-         matchptr[0].rm_eo != strlen (matching_str[i])))
+        (matchptr[0].rm_so != 0 || matchptr[0].rm_eo != strlen (matching_str)))
        eval_check = 1;
  
      // compare result
@@ -206,7 +141,12 @@ test_random (unsigned int rx_length, unsigned int max_str_len,
                    rand_rx, matching_str, eval, eval_check, error);
        result += 1;
      }
+
+    GNUNET_free (matching_str);
    }
+
+  GNUNET_free (rand_rx);
+
    return result;
  }
  
diff --git a/src/regex/test_regex_proofs.c b/src/regex/test_regex_proofs.c

index 5d0aabd00df4a69e3b63bca160ce25924bf36428..85fc3079df2180618b0f8f8d7ccd5715c7f0cabd 100644 (file)
--- a/src/regex/test_regex_proofs.c
+++ b/src/regex/test_regex_proofs.c
@@ -22,68 +22,146 @@
   * @brief test for regex.c
   * @author Maximilian Szengel
   */
-#include <regex.h>
-#include <time.h>
  #include "platform.h"
  #include "gnunet_regex_lib.h"
+#include "regex_internal.h"
  
-int
-main (int argc, char *argv[])
+
+/**
+ * Test if the given regex's canonical regex is the same as this canonical
+ * regex's canonical regex. Confused? Ok, then: 1. construct a dfa A from the
+ * given 'regex' 2. get the canonical regex of dfa A 3. construct a dfa B from
+ * this canonical regex 3. compare the canonical regex of dfa A with the
+ * canonical regex of dfa B.
+ *
+ * @param regex regular expression used for this test (see above).
+ *
+ * @return 0 on success, 1 on failure
+ */
+unsigned int
+test_proof (const char *regex)
  {
-  GNUNET_log_setup ("test-regex",
-#if VERBOSE
-                    "DEBUG",
-#else
-                    "WARNING",
-#endif
-                    NULL);
+  unsigned int error;
+  struct GNUNET_REGEX_Automaton *dfa;
+  char *c_rx1;
+  const char *c_rx2;
  
-  int error;
-  int i;
-
-  const char *regex[21] = {
-    "ab(c|d)+c*(a(b|c)+d)+(bla)+",
-    "(bla)*",
-    "b(lab)*la",
-    "(ab)*",
-    "ab(c|d)+c*(a(b|c)+d)+(bla)(bla)*",
-    "z(abc|def)?xyz",
-    "1*0(0|1)*",
-    "a+X*y+c|p|R|Z*K*y*R+w|Y*6+n+h*k*w+V*F|W*B*e*",
-    "(cd|ab)*",
-    "abcd:(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1):(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)",
-    "abc(1|0)*def",
-    "ab|ac",
-    "(ab)(ab)*",
-    "ab|cd|ef|gh",
-    "a|b|c|d|e|f|g",
-    "(ab)|(ac)",
-    "a(b|c)",
-    "a*a",
-    "ab?(abcd)?",
-    "(ab|cs|df|sdf)*",
-    "a|aa*a"
+  dfa = GNUNET_REGEX_construct_dfa (regex, strlen (regex));
+  c_rx1 = GNUNET_strdup (GNUNET_REGEX_get_canonical_regex (dfa));
+  GNUNET_REGEX_automaton_destroy (dfa);
+  dfa = GNUNET_REGEX_construct_dfa (c_rx1, strlen (c_rx1));
+  c_rx2 = GNUNET_REGEX_get_canonical_regex (dfa);
+
+  error = (0 == strcmp (c_rx1, c_rx2)) ? 0 : 1;
+
+  if (error > 0)
+  {
+    GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
+                "Comparing canonical regex of\n%s\nfailed:\n%s\nvs.\n%s\n",
+                regex, c_rx1, c_rx2);
+  }
+
+  GNUNET_free (c_rx1);
+  GNUNET_REGEX_automaton_destroy (dfa);
+
+  return error;
+}
+
+/**
+ * Use 'test_proof' function to randomly test the canonical regexes of 'count'
+ * random expressions of length 'rx_length'.
+ *
+ * @param count number of random regular expressions to test.
+ * @param rx_length length of the random regular expressions.
+ *
+ * @return 0 on succes, number of failures otherwise.
+ */
+unsigned int
+test_proofs_random (unsigned int count, size_t rx_length)
+{
+  unsigned int i;
+  char *rand_rx;
+  unsigned int failures;
+
+  failures = 0;
+
+  for (i = 0; i < count; i++)
+  {
+    rand_rx = GNUNET_REGEX_generate_random_regex (rx_length, NULL);
+    failures += test_proof (rand_rx);
+    GNUNET_free (rand_rx);
+  }
+
+  return failures;
+}
+
+/**
+ * Test a number of known examples of regexes for proper canonicalization.
+ *
+ * @return 0 on success, number of failures otherwise.
+ */
+unsigned int
+test_proofs_static (void)
+{
+  unsigned int i;
+  unsigned int error;
+
+  const char *regex[4] = {
+    "a|aa*a",
+    "a+",
+    "a*",
+    "a*a*"
    };
+
    char *canonical_regex;
    struct GNUNET_REGEX_Automaton *dfa;
  
    error = 0;
  
-  for (i = 0; i < 21; i++)
+  for (i = 0; i < 4; i += 2)
    {
      dfa = GNUNET_REGEX_construct_dfa (regex[i], strlen (regex[i]));
      canonical_regex = GNUNET_strdup (GNUNET_REGEX_get_canonical_regex (dfa));
      GNUNET_REGEX_automaton_destroy (dfa);
  
-    dfa =
-        GNUNET_REGEX_construct_dfa (canonical_regex, strlen (canonical_regex));
+    dfa = GNUNET_REGEX_construct_dfa (regex[i + 1], strlen (regex[i + 1]));
      error +=
          (0 ==
           strcmp (canonical_regex,
                   GNUNET_REGEX_get_canonical_regex (dfa))) ? 0 : 1;
+
+    if (error > 0)
+    {
+      GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
+                  "Comparing canonical regex of %s with %s failed.\n", regex[i],
+                  regex[i + 1]);
+    }
+
      GNUNET_free (canonical_regex);
      GNUNET_REGEX_automaton_destroy (dfa);
    }
  
    return error;
  }
+
+
+int
+main (int argc, char *argv[])
+{
+  GNUNET_log_setup ("test-regex",
+#if VERBOSE
+                    "DEBUG",
+#else
+                    "WARNING",
+#endif
+                    NULL);
+
+  int error;
+
+  error = 0;
+
+  error += test_proofs_static ();
+//  error += test_proofs_random (100, 10);
+
+  return error;
+}
author	Maximilian Szengel <gnunet@maxsz.de>
	Wed, 27 Jun 2012 16:13:48 +0000 (16:13 +0000)
committer	Maximilian Szengel <gnunet@maxsz.de>
	Wed, 27 Jun 2012 16:13:48 +0000 (16:13 +0000)
src/include/gnunet_regex_lib.h		patch \| blob \| history
src/regex/Makefile.am		patch \| blob \| history
src/regex/regex.c		patch \| blob \| history
src/regex/regex_internal.h	[new file with mode: 0644]	patch \| blob
src/regex/regex_random.c	[new file with mode: 0644]	patch \| blob
src/regex/test_regex_eval_api.c		patch \| blob \| history
src/regex/test_regex_proofs.c		patch \| blob \| history