AES IGE mode speedup.
authorBen Laurie <ben@openssl.org>
Sun, 13 May 2007 12:03:57 +0000 (12:03 +0000)
committerBen Laurie <ben@openssl.org>
Sun, 13 May 2007 12:03:57 +0000 (12:03 +0000)
CHANGES
apps/speed.c
crypto/aes/aes_ige.c
test/igetest.c

diff --git a/CHANGES b/CHANGES
index be58ee9e7b066811956523761bc76b176c8b520a..2f83eb0b57ce24d1fbaadeccb3c73af4344afc7b 100644 (file)
--- a/CHANGES
+++ b/CHANGES
@@ -4,6 +4,9 @@
 
  Changes between 0.9.8e and 0.9.8f  [xx XXX xxxx]
 
+  *) AES IGE mode speedup.
+     [Dean Gaudet (Google)]
+
   *) Add the Korean symmetric 128-bit cipher SEED (see
      http://www.kisa.or.kr/kisa/seed/jsp/seed_eng.jsp) and
      add SEED ciphersuites from RFC 4162:
index 92a58fdfc1e988f0d2ec98e5c940dd23737ff2dd..4d5d3bfa0736343a6f9782b1e952b30ff5d28a4c 100644 (file)
@@ -275,7 +275,7 @@ static void print_result(int alg,int run_no,int count,double time_used);
 static int do_multi(int multi);
 #endif
 
-#define ALGOR_NUM      25
+#define ALGOR_NUM      28
 #define SIZE_NUM       5
 #define RSA_NUM                4
 #define DSA_NUM                3
@@ -289,7 +289,8 @@ static const char *names[ALGOR_NUM]={
   "rc2 cbc","rc5-32/12 cbc","blowfish cbc","cast cbc",
   "aes-128 cbc","aes-192 cbc","aes-256 cbc",
   "camellia-128 cbc","camellia-192 cbc","camellia-256 cbc",
-  "evp","sha256","sha512"};
+  "evp","sha256","sha512",
+  "aes-128 ige","aes-192 ige","aes-256 ige"};
 static double results[ALGOR_NUM][SIZE_NUM];
 static int lengths[SIZE_NUM]={16,64,256,1024,8*1024};
 static double rsa_results[RSA_NUM][2];
@@ -617,6 +618,9 @@ int MAIN(int argc, char **argv)
 #define D_EVP          22
 #define D_SHA256       23      
 #define D_SHA512       24
+#define D_IGE_128_AES   25
+#define D_IGE_192_AES   26
+#define D_IGE_256_AES   27
        double d=0.0;
        long c[ALGOR_NUM][SIZE_NUM];
 #define        R_DSA_512       0
@@ -957,7 +961,10 @@ int MAIN(int argc, char **argv)
                        if (strcmp(*argv,"aes-128-cbc") == 0) doit[D_CBC_128_AES]=1;
                else    if (strcmp(*argv,"aes-192-cbc") == 0) doit[D_CBC_192_AES]=1;
                else    if (strcmp(*argv,"aes-256-cbc") == 0) doit[D_CBC_256_AES]=1;
-               else
+               else    if (strcmp(*argv,"aes-128-ige") == 0) doit[D_IGE_128_AES]=1;
+               else    if (strcmp(*argv,"aes-192-ige") == 0) doit[D_IGE_192_AES]=1;
+               else    if (strcmp(*argv,"aes-256-ige") == 0) doit[D_IGE_256_AES]=1;
+                else
 #endif
 #ifndef OPENSSL_NO_CAMELLIA
                        if (strcmp(*argv,"camellia-128-cbc") == 0) doit[D_CBC_128_CML]=1;
@@ -1177,6 +1184,7 @@ int MAIN(int argc, char **argv)
 #endif
 #ifndef OPENSSL_NO_AES
                        BIO_printf(bio_err,"aes-128-cbc aes-192-cbc aes-256-cbc ");
+                       BIO_printf(bio_err,"aes-128-ige aes-192-ige aes-256-ige ");
 #endif
 #ifndef OPENSSL_NO_CAMELLIA
                        BIO_printf(bio_err,"\n");
@@ -1395,6 +1403,9 @@ int MAIN(int argc, char **argv)
        c[D_CBC_256_CML][0]=count;
        c[D_SHA256][0]=count;
        c[D_SHA512][0]=count;
+       c[D_IGE_128_AES][0]=count;
+       c[D_IGE_192_AES][0]=count;
+       c[D_IGE_256_AES][0]=count;
 
        for (i=1; i<SIZE_NUM; i++)
                {
@@ -1429,6 +1440,9 @@ int MAIN(int argc, char **argv)
                c[D_CBC_128_CML][i]=c[D_CBC_128_CML][i-1]*l0/l1;
                c[D_CBC_192_CML][i]=c[D_CBC_192_CML][i-1]*l0/l1;
                c[D_CBC_256_CML][i]=c[D_CBC_256_CML][i-1]*l0/l1;
+               c[D_IGE_128_AES][i]=c[D_IGE_128_AES][i-1]*l0/l1;
+               c[D_IGE_192_AES][i]=c[D_IGE_192_AES][i-1]*l0/l1;
+               c[D_IGE_256_AES][i]=c[D_IGE_256_AES][i-1]*l0/l1;
                }
 #ifndef OPENSSL_NO_RSA
        rsa_c[R_RSA_512][0]=count/2000;
@@ -1822,6 +1836,48 @@ int MAIN(int argc, char **argv)
                        }
                }
 
+       if (doit[D_IGE_128_AES])
+               {
+               for (j=0; j<SIZE_NUM; j++)
+                       {
+                       print_message(names[D_IGE_128_AES],c[D_IGE_128_AES][j],lengths[j]);
+                       Time_F(START);
+                       for (count=0,run=1; COND(c[D_IGE_128_AES][j]); count++)
+                               AES_ige_encrypt(buf,buf,
+                                       (unsigned long)lengths[j],&aes_ks1,
+                                       iv,AES_ENCRYPT);
+                       d=Time_F(STOP);
+                       print_result(D_IGE_128_AES,j,count,d);
+                       }
+               }
+       if (doit[D_IGE_192_AES])
+               {
+               for (j=0; j<SIZE_NUM; j++)
+                       {
+                       print_message(names[D_IGE_192_AES],c[D_IGE_192_AES][j],lengths[j]);
+                       Time_F(START);
+                       for (count=0,run=1; COND(c[D_IGE_192_AES][j]); count++)
+                               AES_ige_encrypt(buf,buf,
+                                       (unsigned long)lengths[j],&aes_ks2,
+                                       iv,AES_ENCRYPT);
+                       d=Time_F(STOP);
+                       print_result(D_IGE_192_AES,j,count,d);
+                       }
+               }
+       if (doit[D_IGE_256_AES])
+               {
+               for (j=0; j<SIZE_NUM; j++)
+                       {
+                       print_message(names[D_IGE_256_AES],c[D_IGE_256_AES][j],lengths[j]);
+                       Time_F(START);
+                       for (count=0,run=1; COND(c[D_IGE_256_AES][j]); count++)
+                               AES_ige_encrypt(buf,buf,
+                                       (unsigned long)lengths[j],&aes_ks3,
+                                       iv,AES_ENCRYPT);
+                       d=Time_F(STOP);
+                       print_result(D_IGE_256_AES,j,count,d);
+                       }
+               }
 #endif
 #ifndef OPENSSL_NO_CAMELLIA
        if (doit[D_CBC_128_CML])
index 2082d060cf94f880fe00a6f3beebc75bab823a21..5e4736827fbc10be0ce72e6dbaa3d8bf3d6d8ebe 100644 (file)
@@ -70,6 +70,24 @@ static void hexdump(FILE *f,const char *title,const unsigned char *s,int l)
     }
 */
 
+#define N_WORDS (AES_BLOCK_SIZE / sizeof(unsigned long))
+typedef struct {
+        unsigned long data[N_WORDS];
+} aes_block_t;
+
+// XXX: probably some better way to do this
+#if defined(__i386__) || defined(__x86_64__)
+#define UNALIGNED_MEMOPS_ARE_FAST 1
+#endif
+
+#ifdef UNALIGNED_MEMOPS_ARE_FAST
+#define load_block(d, s)        (d) = *(const aes_block_t *)(s)
+#define store_block(d, s)       *(aes_block_t *)(d) = (s)
+#else
+#define load_block(d, s)        memcpy((d).data, (s), AES_BLOCK_SIZE)
+#define store_block(d, s)       memcpy((d), (s).data, AES_BLOCK_SIZE)
+#endif
+
 /* N.B. The IV for this mode is _twice_ the block size */
 
 void AES_ige_encrypt(const unsigned char *in, unsigned char *out,
@@ -77,68 +95,73 @@ void AES_ige_encrypt(const unsigned char *in, unsigned char *out,
                                         unsigned char *ivec, const int enc)
        {
        unsigned long n;
-       unsigned long len = length;
-       unsigned char tmp[AES_BLOCK_SIZE];
-       unsigned char tmp2[AES_BLOCK_SIZE];
-       unsigned char prev[AES_BLOCK_SIZE];
-       const unsigned char *iv = ivec;
-       const unsigned char *iv2 = ivec + AES_BLOCK_SIZE;
+       unsigned long len;
+       aes_block_t tmp, tmp2;
+       aes_block_t iv;
+       aes_block_t iv2;
 
        OPENSSL_assert(in && out && key && ivec);
        OPENSSL_assert((AES_ENCRYPT == enc)||(AES_DECRYPT == enc));
        OPENSSL_assert((length%AES_BLOCK_SIZE) == 0);
 
+       len = length / AES_BLOCK_SIZE;
+       load_block(iv, ivec);
+       load_block(iv2, ivec + AES_BLOCK_SIZE);
+
        if (AES_ENCRYPT == enc)
                {
                /* XXX: Do a separate case for when in != out (strictly should
                   check for overlap, too) */
-               while (len >= AES_BLOCK_SIZE)
+               while (len)
                        {
+                       load_block(tmp, in);
                        /*                      hexdump(stdout, "in", in, AES_BLOCK_SIZE); */
                        /*                      hexdump(stdout, "iv", iv, AES_BLOCK_SIZE); */
-                       for(n=0 ; n < AES_BLOCK_SIZE ; ++n)
-                               out[n] = in[n] ^ iv[n];
+                       for(n=0 ; n < N_WORDS; ++n)
+                               tmp2.data[n] = tmp.data[n] ^ iv.data[n];
                        /*                      hexdump(stdout, "in ^ iv", out, AES_BLOCK_SIZE); */
-                       AES_encrypt(out, out, key);
+                       AES_encrypt((unsigned char *)tmp2.data, (unsigned char *)tmp2.data, key);
                        /*                      hexdump(stdout,"enc", out, AES_BLOCK_SIZE); */
                        /*                      hexdump(stdout,"iv2", iv2, AES_BLOCK_SIZE); */
-                       for(n=0 ; n < AES_BLOCK_SIZE ; ++n)
-                               out[n] ^= iv2[n];
+                       for(n=0 ; n < N_WORDS; ++n)
+                               tmp2.data[n] ^= iv2.data[n];
+                       store_block(out, tmp2);
                        /*                      hexdump(stdout,"out", out, AES_BLOCK_SIZE); */
-                       iv = out;
-                       memcpy(prev, in, AES_BLOCK_SIZE);
-                       iv2 = prev;
-                       len -= AES_BLOCK_SIZE;
+                       iv = tmp2;
+                       iv2 = tmp;
+                       --len;
                        in += AES_BLOCK_SIZE;
                        out += AES_BLOCK_SIZE;
                        }
-               memcpy(ivec, iv, AES_BLOCK_SIZE);
-               memcpy(ivec + AES_BLOCK_SIZE, iv2, AES_BLOCK_SIZE);
+               memcpy(ivec, iv.data, AES_BLOCK_SIZE);
+               memcpy(ivec + AES_BLOCK_SIZE, iv2.data, AES_BLOCK_SIZE);
                }
        else
                {
-               while (len >= AES_BLOCK_SIZE)
+               while (len)
                        {
-                       memcpy(tmp, in, AES_BLOCK_SIZE);
-                       memcpy(tmp2, in, AES_BLOCK_SIZE);
+                       load_block(tmp, in);
+                       tmp2 = tmp;
                        /*                      hexdump(stdout, "in", in, AES_BLOCK_SIZE); */
                        /*                      hexdump(stdout, "iv2", iv2, AES_BLOCK_SIZE); */
-                       for(n=0 ; n < AES_BLOCK_SIZE ; ++n)
-                               tmp[n] ^= iv2[n];
+                       for(n=0 ; n < N_WORDS; ++n)
+                               tmp.data[n] ^= iv2.data[n];
                        /*                      hexdump(stdout, "in ^ iv2", tmp, AES_BLOCK_SIZE); */
-                       AES_decrypt(tmp, out, key);
+                       AES_decrypt((unsigned char *)tmp.data, (unsigned char *)tmp.data, key);
                        /*                      hexdump(stdout, "dec", out, AES_BLOCK_SIZE); */
-                       /*                      hexdump(stdout, "iv", ivec, AES_BLOCK_SIZE); */
-                       for(n=0 ; n < AES_BLOCK_SIZE ; ++n)
-                               out[n] ^= ivec[n];
+                       /*                      hexdump(stdout, "iv", iv, AES_BLOCK_SIZE); */
+                       for(n=0 ; n < N_WORDS; ++n)
+                               tmp.data[n] ^= iv.data[n];
+                       store_block(out, tmp);
                        /*                      hexdump(stdout, "out", out, AES_BLOCK_SIZE); */
-                       memcpy(ivec, tmp2, AES_BLOCK_SIZE);
-                       iv2 = out;
-                       len -= AES_BLOCK_SIZE;
+                       iv = tmp2;
+                       iv2 = tmp;
+                       --len;
                        in += AES_BLOCK_SIZE;
                        out += AES_BLOCK_SIZE;
                        }
-               memcpy(ivec + AES_BLOCK_SIZE, iv2, AES_BLOCK_SIZE);
+               memcpy(ivec, iv.data, AES_BLOCK_SIZE);
+               memcpy(ivec + AES_BLOCK_SIZE, iv2.data, AES_BLOCK_SIZE);
                }
        }
 
index d93428f42baa2ef84b237b701bd827c8cc5a19fa..ecdeb906033050f4905e3fbd13bb137c0a330701 100644 (file)
@@ -218,6 +218,23 @@ static int run_test_vectors(void)
                        hexdump(stdout, "expected", v->out, v->length);
                        hexdump(stdout, "got", buf, v->length);
 
+                       ++errs;
+                       }
+
+               // try with in == out
+               memcpy(iv, v->iv, sizeof iv);
+               memcpy(buf, v->in, v->length);
+               AES_ige_encrypt(buf, buf, v->length, &key, iv, v->encrypt);
+
+               if(memcmp(v->out, buf, v->length))
+                       {
+                       printf("IGE test vector %d failed (with in == out)\n", n);
+                       hexdump(stdout, "key", v->key, sizeof v->key);
+                       hexdump(stdout, "iv", v->iv, sizeof v->iv);
+                       hexdump(stdout, "in", v->in, v->length);
+                       hexdump(stdout, "expected", v->out, v->length);
+                       hexdump(stdout, "got", buf, v->length);
+
                        ++errs;
                        }
                }