Store canonical encodings of Name structures. Update X509_NAME_cmp() to use
authorDr. Stephen Henson <steve@openssl.org>
Tue, 18 Jul 2006 12:36:19 +0000 (12:36 +0000)
committerDr. Stephen Henson <steve@openssl.org>
Tue, 18 Jul 2006 12:36:19 +0000 (12:36 +0000)
them.

CHANGES
crypto/asn1/x_name.c
crypto/x509/x509.h
crypto/x509/x509_cmp.c

diff --git a/CHANGES b/CHANGES
index 89c016e17cb486c862cefc1adff5481b8b57fd22..2c1daddf2bb724f74d88e6ea5d505224dabf7005 100644 (file)
--- a/CHANGES
+++ b/CHANGES
@@ -4,6 +4,11 @@
 
  Changes between 0.9.8b and 0.9.9  [xx XXX xxxx]
 
+  *) Store a "canonical" representation of X509_NAME structure (ASN1 Name)
+     this maps equivalent X509_NAME structures into a consistent structure.
+     Name comparison can then be performed rapidly using memcmp().
+     [Steve Henson]
+
   *) Non-blocking OCSP request processing. Add -timeout option to ocsp 
      utility.
      [Steve Henson]
index 8701c54114b784148a358b13c592c4c80fb9f696..68fa34a207aa101b7eceaae9cefc39dd2f5eaa4e 100644 (file)
  */
 
 #include <stdio.h>
+#include <ctype.h>
 #include "cryptlib.h"
 #include <openssl/asn1t.h>
 #include <openssl/x509.h>
 #include "asn1_locl.h"
 
-static int x509_name_ex_d2i(ASN1_VALUE **val, const unsigned char **in, long len, const ASN1_ITEM *it,
-                                       int tag, int aclass, char opt, ASN1_TLC *ctx);
+static int x509_name_ex_d2i(ASN1_VALUE **val,
+                               const unsigned char **in, long len,
+                               const ASN1_ITEM *it,
+                               int tag, int aclass, char opt, ASN1_TLC *ctx);
 
-static int x509_name_ex_i2d(ASN1_VALUE **val, unsigned char **out, const ASN1_ITEM *it, int tag, int aclass);
+static int x509_name_ex_i2d(ASN1_VALUE **val, unsigned char **out,
+                               const ASN1_ITEM *it, int tag, int aclass);
 static int x509_name_ex_new(ASN1_VALUE **val, const ASN1_ITEM *it);
 static void x509_name_ex_free(ASN1_VALUE **val, const ASN1_ITEM *it);
 
 static int x509_name_encode(X509_NAME *a);
+static int x509_name_canon(X509_NAME *a);
+static int asn1_string_canon(ASN1_STRING *out, ASN1_STRING *in);
+static int i2d_name_canon(STACK *intname, unsigned char **in);
 
 
 static int x509_name_ex_print(BIO *out, ASN1_VALUE **pval,
@@ -126,6 +133,8 @@ static int x509_name_ex_new(ASN1_VALUE **val, const ASN1_ITEM *it)
        if ((ret->entries=sk_X509_NAME_ENTRY_new_null()) == NULL)
                goto memerr;
        if((ret->bytes = BUF_MEM_new()) == NULL) goto memerr;
+       ret->canon_enc = NULL;
+       ret->canon_enclen = 0;
        ret->modified=1;
        *val = (ASN1_VALUE *)ret;
        return 1;
@@ -150,6 +159,8 @@ static void x509_name_ex_free(ASN1_VALUE **pval, const ASN1_ITEM *it)
 
        BUF_MEM_free(a->bytes);
        sk_X509_NAME_ENTRY_pop_free(a->entries,X509_NAME_ENTRY_free);
+       if (a->canon_enc)
+               OPENSSL_free(a->canon_enc);
        OPENSSL_free(a);
        *pval = NULL;
 }
@@ -164,8 +175,14 @@ static void sk_internal_free(void *a)
        sk_free(a);
 }
 
-static int x509_name_ex_d2i(ASN1_VALUE **val, const unsigned char **in, long len, const ASN1_ITEM *it,
-                                       int tag, int aclass, char opt, ASN1_TLC *ctx)
+static void canon_free(void *a)
+{
+       sk_X509_NAME_ENTRY_pop_free(a, X509_NAME_ENTRY_free);
+}
+
+static int x509_name_ex_d2i(ASN1_VALUE **val,
+                       const unsigned char **in, long len, const ASN1_ITEM *it,
+                               int tag, int aclass, char opt, ASN1_TLC *ctx)
 {
        const unsigned char *p = *in, *q;
        STACK *intname = NULL, **intname_pp = &intname;
@@ -200,6 +217,9 @@ static int x509_name_ex_d2i(ASN1_VALUE **val, const unsigned char **in, long len
                sk_X509_NAME_ENTRY_free(entries);
        }
        sk_free(intname);
+       ret = x509_name_canon(nm);
+       if (!ret)
+               goto err;
        nm->modified = 0;
        *val = (ASN1_VALUE *)nm;
        *in = p;
@@ -214,8 +234,12 @@ static int x509_name_ex_i2d(ASN1_VALUE **val, unsigned char **out, const ASN1_IT
        int ret;
        X509_NAME *a = (X509_NAME *)*val;
        if(a->modified) {
-               ret = x509_name_encode((X509_NAME *)a);
-               if(ret < 0) return ret;
+               ret = x509_name_encode(a);
+               if(ret < 0)
+                       return ret;
+               ret = x509_name_canon(a);
+               if(ret < 0)
+                       return ret;
        }
        ret = a->bytes->length;
        if(out != NULL) {
@@ -271,6 +295,185 @@ static int x509_name_ex_print(BIO *out, ASN1_VALUE **pval,
        return 2;
        }
 
+/* This function generates the canonical encoding of the Name structure.
+ * In it all strings are converted to UTF8, leading, trailing and
+ * multiple spaces collapsed, converted to lower case and the leading
+ * SEQUENCE header removed.
+ *
+ * In future we could also normalize the UTF8 too.
+ *
+ * By doing this comparison of Name structures can be rapidly
+ * perfomed by just using memcmp() of the canonical encoding.
+ * By omitting the leading SEQUENCE name constraints of type
+ * dirName can also be checked with a simple memcmp().
+ */
+
+static int x509_name_canon(X509_NAME *a)
+       {
+       unsigned char *p;
+       STACK *intname = NULL;
+       STACK_OF(X509_NAME_ENTRY) *entries = NULL;
+       X509_NAME_ENTRY *entry, *tmpentry;
+       int i, set = -1, ret = 0;
+       if (a->canon_enc)
+               {
+               OPENSSL_free(a->canon_enc);
+               a->canon_enc = NULL;
+               }
+       intname = sk_new_null();
+       if(!intname)
+               goto err;
+       for(i = 0; i < sk_X509_NAME_ENTRY_num(a->entries); i++)
+               {
+               entry = sk_X509_NAME_ENTRY_value(a->entries, i);
+               if(entry->set != set)
+                       {
+                       entries = sk_X509_NAME_ENTRY_new_null();
+                       if(!entries)
+                               goto err;
+                       if(!sk_push(intname, (char *)entries))
+                               goto err;
+                       set = entry->set;
+                       }
+               tmpentry = X509_NAME_ENTRY_new();
+               tmpentry->object = OBJ_dup(entry->object);
+               if (!asn1_string_canon(tmpentry->value, entry->value))
+                       goto err;
+               if(!sk_X509_NAME_ENTRY_push(entries, tmpentry))
+                       goto err;
+               tmpentry = NULL;
+               }
+
+       /* Finally generate encoding */
+
+       a->canon_enclen = i2d_name_canon(intname, NULL);
+
+       p = OPENSSL_malloc(a->canon_enclen);
+
+       if (!p)
+               goto err;
+
+       a->canon_enc = p;
+
+       i2d_name_canon(intname, &p);
+
+       ret = 1;
+
+       err:
+
+       if (tmpentry)
+               X509_NAME_ENTRY_free(tmpentry);
+       if (intname)
+               sk_pop_free(intname, canon_free);
+       return ret;
+       }
+
+/* Bitmap of all the types of string that will be canonicalized. */
+
+#define ASN1_MASK_CANON        \
+       (B_ASN1_UTF8STRING | B_ASN1_BMPSTRING | B_ASN1_UNIVERSALSTRING \
+       | B_ASN1_PRINTABLESTRING | B_ASN1_T61STRING | B_ASN1_IA5STRING \
+       | B_ASN1_VISIBLESTRING)
+       
+
+static int asn1_string_canon(ASN1_STRING *out, ASN1_STRING *in)
+       {
+       unsigned char *to, *from;
+       int len, i;
+
+       /* If type not in bitmask just copy string across */
+       if (!(ASN1_tag2bit(in->type) & ASN1_MASK_CANON))
+               {
+               out->type = in->type;
+               if (!ASN1_STRING_set(out, in->data, in->length))
+                       return 0;
+               }
+
+       out->type = V_ASN1_UTF8STRING;
+       out->length = ASN1_STRING_to_UTF8(&out->data, in);
+       if (out->length == -1)
+               return 0;
+
+       to = out->data;
+       from = to;
+
+       len = out->length;
+
+       /* Convert string in place to canonical form.
+        * Ultimately we may need to handle a wider range of characters
+        * but for now ignore anything with MSB set and rely on the
+        * isspace() and tolower() functions.
+        */
+
+       /* Ignore leading spaces */
+       while((len > 0) && !(*from & 0x80) && isspace(*from))
+               {
+               from++;
+               len--;
+               }
+
+       to = from + len - 1;
+
+       /* Ignore trailing spaces */
+       while ((len > 0) && !(*to & 0x80) && isspace(*to))
+               {
+               to--;
+               len--;
+               }
+
+       to = out->data;
+
+       i = 0;
+       while(i < len)
+               {
+               /* If MSB set just copy across */
+               if (*from & 0x80)
+                       *to++ = *from++;
+               /* Collapse multiple spaces */
+               else if (isspace(*from))
+                       {
+                       /* Copy one space across */
+                       *to++ = ' ';
+                       /* Ignore subsequent spaces. Note: don't need to
+                        * check len here because we know the last 
+                        * character is a non-space so we can't overflow.
+                        */
+                       do
+                               {
+                               from++;
+                               i++;
+                               }
+                       while(!(*from & 0x80) && isspace(*from));
+                       }
+               else
+                       {
+                       *to++ = tolower(*from++);
+                       i++;
+                       }
+               }
+
+       out->length = to - out->data;
+
+       return 1;
+
+       }
+
+static int i2d_name_canon(STACK *intname, unsigned char **in)
+       {
+       int i, len, ltmp;
+       ASN1_VALUE *v;
+       len = 0;
+       for (i = 0; i < sk_num(intname); i++)
+               {
+               v = (ASN1_VALUE *)sk_value(intname, i);
+               ltmp = ASN1_item_ex_i2d(&v, in,
+                       ASN1_ITEM_rptr(X509_NAME_ENTRIES), -1, -1);
+               if (ltmp < 0)
+                       return ltmp;
+               len += ltmp;
+               }
+       return len;
+       }
 
 int X509_NAME_set(X509_NAME **xn, X509_NAME *name)
        {
index f9d7e56219e672ee3d3b61f4273cbb026a2d49eb..b06863291083ebe860bf45c30662a172186335f5 100644 (file)
@@ -190,6 +190,8 @@ struct X509_name_st
        char *bytes;
 #endif
        unsigned long hash; /* Keep the hash around for lookups */
+       unsigned char *canon_enc;
+       int canon_enclen;
        } /* X509_NAME */;
 
 DECLARE_STACK_OF(X509_NAME)
index d04225a932d314f10c010a8313cbc3723a8b4b2a..4f157ba8076b0baec07b24c1e13b3770a12d81bd 100644 (file)
@@ -162,159 +162,36 @@ int X509_cmp(const X509 *a, const X509 *b)
 #endif
 
 
-/* Case insensitive string comparision */
-static int nocase_cmp(const ASN1_STRING *a, const ASN1_STRING *b)
-{
-       int i;
-
-       if (a->length != b->length)
-               return (a->length - b->length);
-
-       for (i=0; i<a->length; i++)
-       {
-               int ca, cb;
-
-               ca = tolower(a->data[i]);
-               cb = tolower(b->data[i]);
-
-               if (ca != cb)
-                       return(ca-cb);
-       }
-       return 0;
-}
-
-/* Case insensitive string comparision with space normalization 
- * Space normalization - ignore leading, trailing spaces, 
- *       multiple spaces between characters are replaced by single space  
- */
-static int nocase_spacenorm_cmp(const ASN1_STRING *a, const ASN1_STRING *b)
-{
-       unsigned char *pa = NULL, *pb = NULL;
-       int la, lb;
-       
-       la = a->length;
-       lb = b->length;
-       pa = a->data;
-       pb = b->data;
-
-       /* skip leading spaces */
-       while (la > 0 && isspace(*pa))
-       {
-               la--;
-               pa++;
-       }
-       while (lb > 0 && isspace(*pb))
-       {
-               lb--;
-               pb++;
-       }
-
-       /* skip trailing spaces */
-       while (la > 0 && isspace(pa[la-1]))
-               la--;
-       while (lb > 0 && isspace(pb[lb-1]))
-               lb--;
-
-       /* compare strings with space normalization */
-       while (la > 0 && lb > 0)
+int X509_NAME_cmp(const X509_NAME *a, const X509_NAME *b)
        {
-               int ca, cb;
-
-               /* compare character */
-               ca = tolower(*pa);
-               cb = tolower(*pb);
-               if (ca != cb)
-                       return (ca - cb);
-
-               pa++; pb++;
-               la--; lb--;
+       int ret;
 
-               if (la <= 0 || lb <= 0)
-                       break;
+       /* Ensure canonical encoding is present */
 
-               /* is white space next character ? */
-               if (isspace(*pa) && isspace(*pb))
+       if (!a->canon_enc)
                {
-                       /* skip remaining white spaces */
-                       while (la > 0 && isspace(*pa))
-                       {
-                               la--;
-                               pa++;
-                       }
-                       while (lb > 0 && isspace(*pb))
-                       {
-                               lb--;
-                               pb++;
-                       }
+               ret = i2d_X509_NAME((X509_NAME *)a, NULL);
+               if (ret < 0)
+                       return -2;
                }
-       }
-       if (la > 0 || lb > 0)
-               return la - lb;
-
-       return 0;
-}
-
-static int asn1_string_memcmp(ASN1_STRING *a, ASN1_STRING *b)
-       {
-       int j;
-       j = a->length - b->length;
-       if (j)
-               return j;
-       return memcmp(a->data, b->data, a->length);
-       }
 
-#define STR_TYPE_CMP (B_ASN1_PRINTABLESTRING|B_ASN1_T61STRING|B_ASN1_UTF8STRING)
+       if (!b->canon_enc)
+               {
+               ret = i2d_X509_NAME((X509_NAME *)b, NULL);
+               if (ret < 0)
+                       return -2;
+               }
 
-int X509_NAME_cmp(const X509_NAME *a, const X509_NAME *b)
-       {
-       int i,j;
-       X509_NAME_ENTRY *na,*nb;
+       ret = a->canon_enclen - b->canon_enclen;
 
-       unsigned long nabit, nbbit;
+       if (ret)
+               return ret;
 
-       j = sk_X509_NAME_ENTRY_num(a->entries)
-                 - sk_X509_NAME_ENTRY_num(b->entries);
-       if (j)
-               return j;
-       for (i=sk_X509_NAME_ENTRY_num(a->entries)-1; i>=0; i--)
-               {
-               na=sk_X509_NAME_ENTRY_value(a->entries,i);
-               nb=sk_X509_NAME_ENTRY_value(b->entries,i);
-               j=na->value->type-nb->value->type;
-               if (j)
-                       {
-                       nabit = ASN1_tag2bit(na->value->type);
-                       nbbit = ASN1_tag2bit(nb->value->type);
-                       if (!(nabit & STR_TYPE_CMP) ||
-                               !(nbbit & STR_TYPE_CMP))
-                               return j;
-                       j = asn1_string_memcmp(na->value, nb->value);
-                       }
-               else if (na->value->type == V_ASN1_PRINTABLESTRING)
-                       j=nocase_spacenorm_cmp(na->value, nb->value);
-               else if (na->value->type == V_ASN1_IA5STRING
-                       && OBJ_obj2nid(na->object) == NID_pkcs9_emailAddress)
-                       j=nocase_cmp(na->value, nb->value);
-               else
-                       j = asn1_string_memcmp(na->value, nb->value);
-               if (j) return(j);
-               j=na->set-nb->set;
-               if (j) return(j);
-               }
+       return memcmp(a->canon_enc, b->canon_enc, a->canon_enclen);
 
-       /* We will check the object types after checking the values
-        * since the values will more often be different than the object
-        * types. */
-       for (i=sk_X509_NAME_ENTRY_num(a->entries)-1; i>=0; i--)
-               {
-               na=sk_X509_NAME_ENTRY_value(a->entries,i);
-               nb=sk_X509_NAME_ENTRY_value(b->entries,i);
-               j=OBJ_cmp(na->object,nb->object);
-               if (j) return(j);
-               }
-       return(0);
        }
 
+
 #ifndef OPENSSL_NO_MD5
 /* I now DER encode the name and hash it.  Since I cache the DER encoding,
  * this is reasonably efficient. */