InvCipher(in, out, rk, key->rounds);
}
-
-# ifndef OPENSSL_SMALL_FOOTPRINT
-void AES_ctr32_encrypt(const unsigned char *in, unsigned char *out,
- size_t blocks, const AES_KEY *key,
- const unsigned char *ivec);
-
-static void RawToBits(const u8 raw[64], u64 bits[8])
-{
- int i, j;
- u64 in, out;
-
- memset(bits, 0, 64);
- for (i = 0; i < 8; i++) {
- in = 0;
- for (j = 0; j < 8; j++)
- in |= ((u64)raw[i * 8 + j]) << (8 * j);
- out = in & 0xF0F0F0F00F0F0F0FuLL;
- out |= (in & 0x0F0F0F0F00000000uLL) >> 28;
- out |= (in & 0x00000000F0F0F0F0uLL) << 28;
- in = out & 0xCCCC3333CCCC3333uLL;
- in |= (out & 0x3333000033330000uLL) >> 14;
- in |= (out & 0x0000CCCC0000CCCCuLL) << 14;
- out = in & 0xAA55AA55AA55AA55uLL;
- out |= (in & 0x5500550055005500uLL) >> 7;
- out |= (in & 0x00AA00AA00AA00AAuLL) << 7;
- for (j = 0; j < 8; j++) {
- bits[j] |= (out & 0xFFuLL) << (8 * i);
- out = out >> 8;
- }
- }
-}
-
-static void BitsToRaw(const u64 bits[8], u8 raw[64])
-{
- int i, j;
- u64 in, out;
-
- for (i = 0; i < 8; i++) {
- in = 0;
- for (j = 0; j < 8; j++)
- in |= ((bits[j] >> (8 * i)) & 0xFFuLL) << (8 * j);
- out = in & 0xF0F0F0F00F0F0F0FuLL;
- out |= (in & 0x0F0F0F0F00000000uLL) >> 28;
- out |= (in & 0x00000000F0F0F0F0uLL) << 28;
- in = out & 0xCCCC3333CCCC3333uLL;
- in |= (out & 0x3333000033330000uLL) >> 14;
- in |= (out & 0x0000CCCC0000CCCCuLL) << 14;
- out = in & 0xAA55AA55AA55AA55uLL;
- out |= (in & 0x5500550055005500uLL) >> 7;
- out |= (in & 0x00AA00AA00AA00AAuLL) << 7;
- for (j = 0; j < 8; j++) {
- raw[i * 8 + j] = (u8)out;
- out = out >> 8;
- }
- }
-}
-
-static void BitsXtime(u64 state[8])
-{
- u64 b;
-
- b = state[7];
- state[7] = state[6];
- state[6] = state[5];
- state[5] = state[4];
- state[4] = state[3] ^ b;
- state[3] = state[2] ^ b;
- state[2] = state[1];
- state[1] = state[0] ^ b;
- state[0] = b;
-}
-
-/*
- * This S-box implementation follows a circuit described in
- * Boyar and Peralta: "A new combinational logic minimization
- * technique with applications to cryptology."
- * https://eprint.iacr.org/2009/191.pdf
- *
- * The math is similar to above, in that it uses
- * a tower field of GF(2^2^2^2) but with a different
- * basis representation, that is better suited to
- * logic designs.
- */
-static void BitsSub(u64 state[8])
-{
- u64 x0, x1, x2, x3, x4, x5, x6, x7;
- u64 y1, y2, y3, y4, y5, y6, y7, y8, y9, y10, y11;
- u64 y12, y13, y14, y15, y16, y17, y18, y19, y20, y21;
- u64 t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11;
- u64 t12, t13, t14, t15, t16, t17, t18, t19, t20, t21;
- u64 t22, t23, t24, t25, t26, t27, t28, t29, t30, t31;
- u64 t32, t33, t34, t35, t36, t37, t38, t39, t40, t41;
- u64 t42, t43, t44, t45, t46, t47, t48, t49, t50, t51;
- u64 t52, t53, t54, t55, t56, t57, t58, t59, t60, t61;
- u64 t62, t63, t64, t65, t66, t67;
- u64 z0, z1, z2, z3, z4, z5, z6, z7, z8, z9, z10, z11;
- u64 z12, z13, z14, z15, z16, z17;
- u64 s0, s1, s2, s3, s4, s5, s6, s7;
-
- x7 = state[0];
- x6 = state[1];
- x5 = state[2];
- x4 = state[3];
- x3 = state[4];
- x2 = state[5];
- x1 = state[6];
- x0 = state[7];
- y14 = x3 ^ x5;
- y13 = x0 ^ x6;
- y9 = x0 ^ x3;
- y8 = x0 ^ x5;
- t0 = x1 ^ x2;
- y1 = t0 ^ x7;
- y4 = y1 ^ x3;
- y12 = y13 ^ y14;
- y2 = y1 ^ x0;
- y5 = y1 ^ x6;
- y3 = y5 ^ y8;
- t1 = x4 ^ y12;
- y15 = t1 ^ x5;
- y20 = t1 ^ x1;
- y6 = y15 ^ x7;
- y10 = y15 ^ t0;
- y11 = y20 ^ y9;
- y7 = x7 ^ y11;
- y17 = y10 ^ y11;
- y19 = y10 ^ y8;
- y16 = t0 ^ y11;
- y21 = y13 ^ y16;
- y18 = x0 ^ y16;
- t2 = y12 & y15;
- t3 = y3 & y6;
- t4 = t3 ^ t2;
- t5 = y4 & x7;
- t6 = t5 ^ t2;
- t7 = y13 & y16;
- t8 = y5 & y1;
- t9 = t8 ^ t7;
- t10 = y2 & y7;
- t11 = t10 ^ t7;
- t12 = y9 & y11;
- t13 = y14 & y17;
- t14 = t13 ^ t12;
- t15 = y8 & y10;
- t16 = t15 ^ t12;
- t17 = t4 ^ t14;
- t18 = t6 ^ t16;
- t19 = t9 ^ t14;
- t20 = t11 ^ t16;
- t21 = t17 ^ y20;
- t22 = t18 ^ y19;
- t23 = t19 ^ y21;
- t24 = t20 ^ y18;
- t25 = t21 ^ t22;
- t26 = t21 & t23;
- t27 = t24 ^ t26;
- t28 = t25 & t27;
- t29 = t28 ^ t22;
- t30 = t23 ^ t24;
- t31 = t22 ^ t26;
- t32 = t31 & t30;
- t33 = t32 ^ t24;
- t34 = t23 ^ t33;
- t35 = t27 ^ t33;
- t36 = t24 & t35;
- t37 = t36 ^ t34;
- t38 = t27 ^ t36;
- t39 = t29 & t38;
- t40 = t25 ^ t39;
- t41 = t40 ^ t37;
- t42 = t29 ^ t33;
- t43 = t29 ^ t40;
- t44 = t33 ^ t37;
- t45 = t42 ^ t41;
- z0 = t44 & y15;
- z1 = t37 & y6;
- z2 = t33 & x7;
- z3 = t43 & y16;
- z4 = t40 & y1;
- z5 = t29 & y7;
- z6 = t42 & y11;
- z7 = t45 & y17;
- z8 = t41 & y10;
- z9 = t44 & y12;
- z10 = t37 & y3;
- z11 = t33 & y4;
- z12 = t43 & y13;
- z13 = t40 & y5;
- z14 = t29 & y2;
- z15 = t42 & y9;
- z16 = t45 & y14;
- z17 = t41 & y8;
- t46 = z15 ^ z16;
- t47 = z10 ^ z11;
- t48 = z5 ^ z13;
- t49 = z9 ^ z10;
- t50 = z2 ^ z12;
- t51 = z2 ^ z5;
- t52 = z7 ^ z8;
- t53 = z0 ^ z3;
- t54 = z6 ^ z7;
- t55 = z16 ^ z17;
- t56 = z12 ^ t48;
- t57 = t50 ^ t53;
- t58 = z4 ^ t46;
- t59 = z3 ^ t54;
- t60 = t46 ^ t57;
- t61 = z14 ^ t57;
- t62 = t52 ^ t58;
- t63 = t49 ^ t58;
- t64 = z4 ^ t59;
- t65 = t61 ^ t62;
- t66 = z1 ^ t63;
- s0 = t59 ^ t63;
- s6 = ~(t56 ^ t62);
- s7 = ~(t48 ^ t60);
- t67 = t64 ^ t65;
- s3 = t53 ^ t66;
- s4 = t51 ^ t66;
- s5 = t47 ^ t65;
- s1 = ~(t64 ^ s3);
- s2 = ~(t55 ^ t67);
- state[0] = s7;
- state[1] = s6;
- state[2] = s5;
- state[3] = s4;
- state[4] = s3;
- state[5] = s2;
- state[6] = s1;
- state[7] = s0;
-}
-
-static void BitsShiftRows(u64 state[8])
-{
- u64 s, s0;
- int i;
-
- for (i = 0; i < 8; i++) {
- s = state[i];
- s0 = s & 0x1111111111111111uLL;
- s0 |= ((s & 0x2220222022202220uLL) >> 4) | ((s & 0x0002000200020002uLL) << 12);
- s0 |= ((s & 0x4400440044004400uLL) >> 8) | ((s & 0x0044004400440044uLL) << 8);
- s0 |= ((s & 0x8000800080008000uLL) >> 12) | ((s & 0x0888088808880888uLL) << 4);
- state[i] = s0;
- }
-}
-
-static void BitsMixColumns(u64 state[8])
-{
- u64 s1, s;
- u64 s0[8];
- int i;
-
- for (i = 0; i < 8; i++) {
- s1 = state[i];
- s = s1;
- s ^= ((s & 0xCCCCCCCCCCCCCCCCuLL) >> 2) | ((s & 0x3333333333333333uLL) << 2);
- s ^= ((s & 0xAAAAAAAAAAAAAAAAuLL) >> 1) | ((s & 0x5555555555555555uLL) << 1);
- s ^= s1;
- s0[i] = s;
- }
- BitsXtime(state);
- for (i = 0; i < 8; i++) {
- s1 = state[i];
- s = s0[i];
- s ^= s1;
- s ^= ((s1 & 0xEEEEEEEEEEEEEEEEuLL) >> 1) | ((s1 & 0x1111111111111111uLL) << 3);
- state[i] = s;
- }
-}
-
-static void BitsAddRoundKey(u64 state[8], const u64 key[8])
-{
- int i;
-
- for (i = 0; i < 8; i++)
- state[i] ^= key[i];
-}
-
-void AES_ctr32_encrypt(const unsigned char *in, unsigned char *out,
- size_t blocks, const AES_KEY *key,
- const unsigned char *ivec)
-{
- struct {
- u8 cipher[64];
- u64 state[8];
- u64 rd_key[AES_MAXNR + 1][8];
- } *bs;
- u32 ctr32;
- int i;
-
- ctr32 = GETU32(ivec + 12);
- if (blocks >= 4
- && (bs = OPENSSL_malloc(sizeof(*bs)))) {
- for (i = 0; i < key->rounds + 1; i++) {
- memcpy(bs->cipher + 0, &key->rd_key[4 * i], 16);
- memcpy(bs->cipher + 16, bs->cipher, 16);
- memcpy(bs->cipher + 32, bs->cipher, 32);
- RawToBits(bs->cipher, bs->rd_key[i]);
- }
- while (blocks) {
- memcpy(bs->cipher, ivec, 12);
- PUTU32(bs->cipher + 12, ctr32);
- ctr32++;
- memcpy(bs->cipher + 16, ivec, 12);
- PUTU32(bs->cipher + 28, ctr32);
- ctr32++;
- memcpy(bs->cipher + 32, ivec, 12);
- PUTU32(bs->cipher + 44, ctr32);
- ctr32++;
- memcpy(bs->cipher + 48, ivec, 12);
- PUTU32(bs->cipher + 60, ctr32);
- ctr32++;
- RawToBits(bs->cipher, bs->state);
- BitsAddRoundKey(bs->state, bs->rd_key[0]);
- for (i = 1; i < key->rounds; i++) {
- BitsSub(bs->state);
- BitsShiftRows(bs->state);
- BitsMixColumns(bs->state);
- BitsAddRoundKey(bs->state, bs->rd_key[i]);
- }
- BitsSub(bs->state);
- BitsShiftRows(bs->state);
- BitsAddRoundKey(bs->state, bs->rd_key[key->rounds]);
- BitsToRaw(bs->state, bs->cipher);
- for (i = 0; i < 64 && blocks; i++) {
- out[i] = in[i] ^ bs->cipher[i];
- if ((i & 15) == 15)
- blocks--;
- }
- in += i;
- out += i;
- }
- OPENSSL_clear_free(bs, sizeof(*bs));
- } else {
- unsigned char cipher[16];
-
- while (blocks) {
- memcpy(cipher, ivec, 12);
- PUTU32(cipher + 12, ctr32);
- AES_encrypt(cipher, cipher, key);
- for (i = 0; i < 16; i++)
- out[i] = in[i] ^ cipher[i];
- in += 16;
- out += 16;
- ctr32++;
- blocks--;
- }
- }
-}
-# endif
#elif !defined(AES_ASM)
/*-
Te0[x] = S [x].[02, 01, 01, 03];