Engage GHASH for PowerISA 2.07.
[oweals/openssl.git] / crypto / modes / gcm128.c
index ae5fab1b46c7d36aff57be9dc2164e8fa368e88c..c8c906c8211632ea52729e18129df08e429fe0bc 100644 (file)
@@ -645,7 +645,7 @@ static void gcm_gmult_1bit(u64 Xi[2],const u64 H[2])
 
 #endif
 
-#if    TABLE_BITS==4 && defined(GHASH_ASM)
+#if    TABLE_BITS==4 && (defined(GHASH_ASM) || defined(OPENSSL_CPUID_OBJ))
 # if   !defined(I386_ONLY) && \
        (defined(__i386)        || defined(__i386__)    || \
         defined(__x86_64)      || defined(__x86_64__)  || \
@@ -658,7 +658,7 @@ void gcm_init_clmul(u128 Htable[16],const u64 Xi[2]);
 void gcm_gmult_clmul(u64 Xi[2],const u128 Htable[16]);
 void gcm_ghash_clmul(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
 
-#if defined(__i386) || defined(__i386__)
+#if defined(__i386) || defined(__i386__) || defined(_M_IX86)
 # define gcm_init_avx  gcm_init_clmul
 # define gcm_gmult_avx gcm_gmult_clmul
 # define gcm_ghash_avx gcm_ghash_clmul
@@ -676,13 +676,21 @@ void gcm_ghash_4bit_mmx(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len
 void gcm_gmult_4bit_x86(u64 Xi[2],const u128 Htable[16]);
 void gcm_ghash_4bit_x86(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
 #  endif
-# elif defined(__arm__) || defined(__arm)
+# elif defined(__arm__) || defined(__arm) || defined(__aarch64__)
 #  include "arm_arch.h"
 #  if __ARM_ARCH__>=7
 #   define GHASH_ASM_ARM
 #   define GCM_FUNCREF_4BIT
+#   define PMULL_CAPABLE       (OPENSSL_armcap_P & ARMV8_PMULL)
+#   if defined(__arm__) || defined(__arm)
+#    define NEON_CAPABLE       (OPENSSL_armcap_P & ARMV7_NEON)
+#   endif
+void gcm_init_neon(u128 Htable[16],const u64 Xi[2]);
 void gcm_gmult_neon(u64 Xi[2],const u128 Htable[16]);
 void gcm_ghash_neon(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
+void gcm_init_v8(u128 Htable[16],const u64 Xi[2]);
+void gcm_gmult_v8(u64 Xi[2],const u128 Htable[16]);
+void gcm_ghash_v8(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
 #  endif
 # elif defined(__sparc__) || defined(__sparc)
 #  include "sparc_arch.h"
@@ -692,6 +700,13 @@ extern unsigned int OPENSSL_sparcv9cap_P[];
 void gcm_init_vis3(u128 Htable[16],const u64 Xi[2]);
 void gcm_gmult_vis3(u64 Xi[2],const u128 Htable[16]);
 void gcm_ghash_vis3(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
+#elif defined(OPENSSL_CPUID_OBJ) && (defined(__powerpc__) || defined(__ppc__) || defined(_ARCH_PPC))
+#  define GHASH_ASM_PPC
+#  define GCM_FUNCREF_4BIT
+extern unsigned int OPENSSL_ppccap_P[];
+void gcm_init_p8(u128 Htable[16],const u64 Xi[2]);
+void gcm_gmult_p8(u64 Xi[2],const u128 Htable[16]);
+void gcm_ghash_p8(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
 # endif
 #endif
 
@@ -766,10 +781,21 @@ void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx,void *key,block128_f block)
        ctx->ghash = gcm_ghash_4bit;
 #  endif
 # elif defined(GHASH_ASM_ARM)
-       if (OPENSSL_armcap_P & ARMV7_NEON) {
+#  ifdef PMULL_CAPABLE
+       if (PMULL_CAPABLE) {
+               gcm_init_v8(ctx->Htable,ctx->H.u);
+               ctx->gmult = gcm_gmult_v8;
+               ctx->ghash = gcm_ghash_v8;
+       } else
+#  endif
+#  ifdef NEON_CAPABLE
+       if (NEON_CAPABLE) {
+               gcm_init_neon(ctx->Htable,ctx->H.u);
                ctx->gmult = gcm_gmult_neon;
                ctx->ghash = gcm_ghash_neon;
-       } else {
+       } else
+#  endif
+       {
                gcm_init_4bit(ctx->Htable,ctx->H.u);
                ctx->gmult = gcm_gmult_4bit;
                ctx->ghash = gcm_ghash_4bit;
@@ -784,6 +810,16 @@ void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx,void *key,block128_f block)
                ctx->gmult = gcm_gmult_4bit;
                ctx->ghash = gcm_ghash_4bit;
        }
+# elif defined(GHASH_ASM_PPC)
+       if (OPENSSL_ppccap_P[0] & (1<<2)) {
+               gcm_init_p8(ctx->Htable,ctx->H.u);
+               ctx->gmult = gcm_gmult_p8;
+               ctx->ghash = gcm_ghash_p8;
+       } else {
+               gcm_init_4bit(ctx->Htable,ctx->H.u);
+               ctx->gmult = gcm_gmult_4bit;
+               ctx->ghash = gcm_ghash_4bit;
+       }
 # else
        gcm_init_4bit(ctx->Htable,ctx->H.u);
 # endif
@@ -847,7 +883,11 @@ void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx,const unsigned char *iv,size_t len)
                GCM_MUL(ctx,Yi);
 
                if (is_endian.little)
+#ifdef BSWAP4
+                       ctr = BSWAP4(ctx->Yi.d[3]);
+#else
                        ctr = GETU32(ctx->Yi.c+12);
+#endif
                else
                        ctr = ctx->Yi.d[3];
        }
@@ -855,7 +895,11 @@ void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx,const unsigned char *iv,size_t len)
        (*ctx->block)(ctx->Yi.c,ctx->EK0.c,ctx->key);
        ++ctr;
        if (is_endian.little)
+#ifdef BSWAP4
+               ctx->Yi.d[3] = BSWAP4(ctr);
+#else
                PUTU32(ctx->Yi.c+12,ctr);
+#endif
        else
                ctx->Yi.d[3] = ctr;
 }
@@ -950,7 +994,11 @@ int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
        }
 
        if (is_endian.little)
+#ifdef BSWAP4
+               ctr = BSWAP4(ctx->Yi.d[3]);
+#else
                ctr = GETU32(ctx->Yi.c+12);
+#endif
        else
                ctr = ctx->Yi.d[3];
 
@@ -984,7 +1032,11 @@ int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
                        (*block)(ctx->Yi.c,ctx->EKi.c,key);
                        ++ctr;
                        if (is_endian.little)
+#ifdef BSWAP4
+                               ctx->Yi.d[3] = BSWAP4(ctr);
+#else
                                PUTU32(ctx->Yi.c+12,ctr);
+#endif
                        else
                                ctx->Yi.d[3] = ctr;
                        for (i=0; i<16/sizeof(size_t); ++i)
@@ -1006,7 +1058,11 @@ int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
                        (*block)(ctx->Yi.c,ctx->EKi.c,key);
                        ++ctr;
                        if (is_endian.little)
+#ifdef BSWAP4
+                               ctx->Yi.d[3] = BSWAP4(ctr);
+#else
                                PUTU32(ctx->Yi.c+12,ctr);
+#endif
                        else
                                ctx->Yi.d[3] = ctr;
                        for (i=0; i<16/sizeof(size_t); ++i)
@@ -1025,7 +1081,11 @@ int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
                        (*block)(ctx->Yi.c,ctx->EKi.c,key);
                        ++ctr;
                        if (is_endian.little)
+#ifdef BSWAP4
+                               ctx->Yi.d[3] = BSWAP4(ctr);
+#else
                                PUTU32(ctx->Yi.c+12,ctr);
+#endif
                        else
                                ctx->Yi.d[3] = ctr;
                        for (i=0; i<16/sizeof(size_t); ++i)
@@ -1041,7 +1101,11 @@ int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
                        (*block)(ctx->Yi.c,ctx->EKi.c,key);
                        ++ctr;
                        if (is_endian.little)
+#ifdef BSWAP4
+                               ctx->Yi.d[3] = BSWAP4(ctr);
+#else
                                PUTU32(ctx->Yi.c+12,ctr);
+#endif
                        else
                                ctx->Yi.d[3] = ctr;
                        while (len--) {
@@ -1059,7 +1123,11 @@ int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
                        (*block)(ctx->Yi.c,ctx->EKi.c,key);
                        ++ctr;
                        if (is_endian.little)
+#ifdef BSWAP4
+                               ctx->Yi.d[3] = BSWAP4(ctr);
+#else
                                PUTU32(ctx->Yi.c+12,ctr);
+#endif
                        else
                                ctx->Yi.d[3] = ctr;
                }
@@ -1103,7 +1171,11 @@ int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
        }
 
        if (is_endian.little)
+#ifdef BSWAP4
+               ctr = BSWAP4(ctx->Yi.d[3]);
+#else
                ctr = GETU32(ctx->Yi.c+12);
+#endif
        else
                ctr = ctx->Yi.d[3];
 
@@ -1140,7 +1212,11 @@ int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
                        (*block)(ctx->Yi.c,ctx->EKi.c,key);
                        ++ctr;
                        if (is_endian.little)
+#ifdef BSWAP4
+                               ctx->Yi.d[3] = BSWAP4(ctr);
+#else
                                PUTU32(ctx->Yi.c+12,ctr);
+#endif
                        else
                                ctx->Yi.d[3] = ctr;
                        for (i=0; i<16/sizeof(size_t); ++i)
@@ -1160,7 +1236,11 @@ int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
                        (*block)(ctx->Yi.c,ctx->EKi.c,key);
                        ++ctr;
                        if (is_endian.little)
+#ifdef BSWAP4
+                               ctx->Yi.d[3] = BSWAP4(ctr);
+#else
                                PUTU32(ctx->Yi.c+12,ctr);
+#endif
                        else
                                ctx->Yi.d[3] = ctr;
                        for (i=0; i<16/sizeof(size_t); ++i)
@@ -1178,7 +1258,11 @@ int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
                        (*block)(ctx->Yi.c,ctx->EKi.c,key);
                        ++ctr;
                        if (is_endian.little)
+#ifdef BSWAP4
+                               ctx->Yi.d[3] = BSWAP4(ctr);
+#else
                                PUTU32(ctx->Yi.c+12,ctr);
+#endif
                        else
                                ctx->Yi.d[3] = ctr;
                        for (i=0; i<16/sizeof(size_t); ++i) {
@@ -1196,7 +1280,11 @@ int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
                        (*block)(ctx->Yi.c,ctx->EKi.c,key);
                        ++ctr;
                        if (is_endian.little)
+#ifdef BSWAP4
+                               ctx->Yi.d[3] = BSWAP4(ctr);
+#else
                                PUTU32(ctx->Yi.c+12,ctr);
+#endif
                        else
                                ctx->Yi.d[3] = ctr;
                        while (len--) {
@@ -1217,7 +1305,11 @@ int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
                        (*block)(ctx->Yi.c,ctx->EKi.c,key);
                        ++ctr;
                        if (is_endian.little)
+#ifdef BSWAP4
+                               ctx->Yi.d[3] = BSWAP4(ctr);
+#else
                                PUTU32(ctx->Yi.c+12,ctr);
+#endif
                        else
                                ctx->Yi.d[3] = ctr;
                }
@@ -1262,7 +1354,11 @@ int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx,
        }
 
        if (is_endian.little)
+#ifdef BSWAP4
+               ctr = BSWAP4(ctx->Yi.d[3]);
+#else
                ctr = GETU32(ctx->Yi.c+12);
+#endif
        else
                ctr = ctx->Yi.d[3];
 
@@ -1284,7 +1380,11 @@ int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx,
                (*stream)(in,out,GHASH_CHUNK/16,key,ctx->Yi.c);
                ctr += GHASH_CHUNK/16;
                if (is_endian.little)
+#ifdef BSWAP4
+                       ctx->Yi.d[3] = BSWAP4(ctr);
+#else
                        PUTU32(ctx->Yi.c+12,ctr);
+#endif
                else
                        ctx->Yi.d[3] = ctr;
                GHASH(ctx,out,GHASH_CHUNK);
@@ -1299,7 +1399,11 @@ int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx,
                (*stream)(in,out,j,key,ctx->Yi.c);
                ctr += (unsigned int)j;
                if (is_endian.little)
+#ifdef BSWAP4
+                       ctx->Yi.d[3] = BSWAP4(ctr);
+#else
                        PUTU32(ctx->Yi.c+12,ctr);
+#endif
                else
                        ctx->Yi.d[3] = ctr;
                in  += i;
@@ -1319,7 +1423,11 @@ int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx,
                (*ctx->block)(ctx->Yi.c,ctx->EKi.c,key);
                ++ctr;
                if (is_endian.little)
+#ifdef BSWAP4
+                       ctx->Yi.d[3] = BSWAP4(ctr);
+#else
                        PUTU32(ctx->Yi.c+12,ctr);
+#endif
                else
                        ctx->Yi.d[3] = ctr;
                while (len--) {
@@ -1361,7 +1469,11 @@ int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx,
        }
 
        if (is_endian.little)
+#ifdef BSWAP4
+               ctr = BSWAP4(ctx->Yi.d[3]);
+#else
                ctr = GETU32(ctx->Yi.c+12);
+#endif
        else
                ctr = ctx->Yi.d[3];
 
@@ -1386,7 +1498,11 @@ int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx,
                (*stream)(in,out,GHASH_CHUNK/16,key,ctx->Yi.c);
                ctr += GHASH_CHUNK/16;
                if (is_endian.little)
+#ifdef BSWAP4
+                       ctx->Yi.d[3] = BSWAP4(ctr);
+#else
                        PUTU32(ctx->Yi.c+12,ctr);
+#endif
                else
                        ctx->Yi.d[3] = ctr;
                out += GHASH_CHUNK;
@@ -1412,7 +1528,11 @@ int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx,
                (*stream)(in,out,j,key,ctx->Yi.c);
                ctr += (unsigned int)j;
                if (is_endian.little)
+#ifdef BSWAP4
+                       ctx->Yi.d[3] = BSWAP4(ctr);
+#else
                        PUTU32(ctx->Yi.c+12,ctr);
+#endif
                else
                        ctx->Yi.d[3] = ctr;
                out += i;
@@ -1423,7 +1543,11 @@ int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx,
                (*ctx->block)(ctx->Yi.c,ctx->EKi.c,key);
                ++ctr;
                if (is_endian.little)
+#ifdef BSWAP4
+                       ctx->Yi.d[3] = BSWAP4(ctr);
+#else
                        PUTU32(ctx->Yi.c+12,ctr);
+#endif
                else
                        ctx->Yi.d[3] = ctr;
                while (len--) {