2 * Copyright (c) 2007, Cameron Rich
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions are met:
9 * * Redistributions of source code must retain the above copyright notice,
10 * this list of conditions and the following disclaimer.
11 * * Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14 * * Neither the name of the axTLS project nor the names of its contributors
15 * may be used to endorse or promote products derived from this software
16 * without specific prior written permission.
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
22 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
25 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
26 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
27 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 * AES implementation - this is a small code version. There are much faster
33 * versions around but they are much larger in size (i.e. they use large
40 /* all commented out in skeleton mode */
41 #ifndef CONFIG_SSL_SKELETON_MODE
43 #define rot1(x) (((x) << 24) | ((x) >> 8))
44 #define rot2(x) (((x) << 16) | ((x) >> 16))
45 #define rot3(x) (((x) << 8) | ((x) >> 24))
48 * This cute trick does 4 'mul by two' at once. Stolen from
49 * Dr B. R. Gladman <brg@gladman.uk.net> but I'm sure the u-(u>>7) is
50 * a standard graphics trick
51 * The key to this is that we need to xor with 0x1b if the top bit is set.
52 * a 1xxx xxxx 0xxx 0xxx First we mask the 7bit,
53 * b 1000 0000 0000 0000 then we shift right by 7 putting the 7bit in 0bit,
54 * c 0000 0001 0000 0000 we then subtract (c) from (b)
55 * d 0111 1111 0000 0000 and now we and with our mask
56 * e 0001 1011 0000 0000
62 #define mul2(x,t) ((t)=((x)&mt), \
63 ((((x)+(x))&mh)^(((t)-((t)>>7))&mm)))
65 #define inv_mix_col(x,f2,f4,f8,f9) (\
70 (f8)=((f2)^(f4)^(f8)), \
80 static const uint8_t aes_sbox[256] =
82 0x63,0x7C,0x77,0x7B,0xF2,0x6B,0x6F,0xC5,
83 0x30,0x01,0x67,0x2B,0xFE,0xD7,0xAB,0x76,
84 0xCA,0x82,0xC9,0x7D,0xFA,0x59,0x47,0xF0,
85 0xAD,0xD4,0xA2,0xAF,0x9C,0xA4,0x72,0xC0,
86 0xB7,0xFD,0x93,0x26,0x36,0x3F,0xF7,0xCC,
87 0x34,0xA5,0xE5,0xF1,0x71,0xD8,0x31,0x15,
88 0x04,0xC7,0x23,0xC3,0x18,0x96,0x05,0x9A,
89 0x07,0x12,0x80,0xE2,0xEB,0x27,0xB2,0x75,
90 0x09,0x83,0x2C,0x1A,0x1B,0x6E,0x5A,0xA0,
91 0x52,0x3B,0xD6,0xB3,0x29,0xE3,0x2F,0x84,
92 0x53,0xD1,0x00,0xED,0x20,0xFC,0xB1,0x5B,
93 0x6A,0xCB,0xBE,0x39,0x4A,0x4C,0x58,0xCF,
94 0xD0,0xEF,0xAA,0xFB,0x43,0x4D,0x33,0x85,
95 0x45,0xF9,0x02,0x7F,0x50,0x3C,0x9F,0xA8,
96 0x51,0xA3,0x40,0x8F,0x92,0x9D,0x38,0xF5,
97 0xBC,0xB6,0xDA,0x21,0x10,0xFF,0xF3,0xD2,
98 0xCD,0x0C,0x13,0xEC,0x5F,0x97,0x44,0x17,
99 0xC4,0xA7,0x7E,0x3D,0x64,0x5D,0x19,0x73,
100 0x60,0x81,0x4F,0xDC,0x22,0x2A,0x90,0x88,
101 0x46,0xEE,0xB8,0x14,0xDE,0x5E,0x0B,0xDB,
102 0xE0,0x32,0x3A,0x0A,0x49,0x06,0x24,0x5C,
103 0xC2,0xD3,0xAC,0x62,0x91,0x95,0xE4,0x79,
104 0xE7,0xC8,0x37,0x6D,0x8D,0xD5,0x4E,0xA9,
105 0x6C,0x56,0xF4,0xEA,0x65,0x7A,0xAE,0x08,
106 0xBA,0x78,0x25,0x2E,0x1C,0xA6,0xB4,0xC6,
107 0xE8,0xDD,0x74,0x1F,0x4B,0xBD,0x8B,0x8A,
108 0x70,0x3E,0xB5,0x66,0x48,0x03,0xF6,0x0E,
109 0x61,0x35,0x57,0xB9,0x86,0xC1,0x1D,0x9E,
110 0xE1,0xF8,0x98,0x11,0x69,0xD9,0x8E,0x94,
111 0x9B,0x1E,0x87,0xE9,0xCE,0x55,0x28,0xDF,
112 0x8C,0xA1,0x89,0x0D,0xBF,0xE6,0x42,0x68,
113 0x41,0x99,0x2D,0x0F,0xB0,0x54,0xBB,0x16,
119 static const uint8_t aes_isbox[256] =
121 0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38,
122 0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb,
123 0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87,
124 0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb,
125 0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d,
126 0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e,
127 0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2,
128 0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25,
129 0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16,
130 0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92,
131 0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda,
132 0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84,
133 0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a,
134 0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06,
135 0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02,
136 0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b,
137 0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea,
138 0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73,
139 0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85,
140 0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e,
141 0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89,
142 0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b,
143 0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20,
144 0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4,
145 0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31,
146 0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f,
147 0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d,
148 0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef,
149 0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0,
150 0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61,
151 0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26,
152 0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d
155 static const unsigned char Rcon[30]=
157 0x01,0x02,0x04,0x08,0x10,0x20,0x40,0x80,
158 0x1b,0x36,0x6c,0xd8,0xab,0x4d,0x9a,0x2f,
159 0x5e,0xbc,0x63,0xc6,0x97,0x35,0x6a,0xd4,
160 0xb3,0x7d,0xfa,0xef,0xc5,0x91,
163 /* ----- static functions ----- */
164 static void AES_encrypt(const AES_CTX *ctx, uint32_t *data);
165 static void AES_decrypt(const AES_CTX *ctx, uint32_t *data);
167 /* Perform doubling in Galois Field GF(2^8) using the irreducible polynomial
169 static unsigned char AES_xtime(uint32_t x)
171 return x = (x&0x80) ? (x<<1)^0x1b : x<<1;
175 * Set up AES with the key/iv and cipher size.
177 void AES_set_key(AES_CTX *ctx, const uint8_t *key,
178 const uint8_t *iv, AES_MODE mode)
181 uint32_t *W, tmp, tmp2;
182 const unsigned char *ip;
197 default: /* fail silently */
202 ctx->key_size = words;
204 for (i = 0; i < words; i+=2)
206 W[i+0]= ((uint32_t)key[ 0]<<24)|
207 ((uint32_t)key[ 1]<<16)|
208 ((uint32_t)key[ 2]<< 8)|
209 ((uint32_t)key[ 3] );
210 W[i+1]= ((uint32_t)key[ 4]<<24)|
211 ((uint32_t)key[ 5]<<16)|
212 ((uint32_t)key[ 6]<< 8)|
213 ((uint32_t)key[ 7] );
218 ii = 4 * (ctx->rounds+1);
219 for (i = words; i<ii; i++)
223 if ((i % words) == 0)
225 tmp2 =(uint32_t)aes_sbox[(tmp )&0xff]<< 8;
226 tmp2|=(uint32_t)aes_sbox[(tmp>> 8)&0xff]<<16;
227 tmp2|=(uint32_t)aes_sbox[(tmp>>16)&0xff]<<24;
228 tmp2|=(uint32_t)aes_sbox[(tmp>>24) ];
229 tmp=tmp2^(((unsigned int)*ip)<<24);
233 if ((words == 8) && ((i % words) == 4))
235 tmp2 =(uint32_t)aes_sbox[(tmp )&0xff] ;
236 tmp2|=(uint32_t)aes_sbox[(tmp>> 8)&0xff]<< 8;
237 tmp2|=(uint32_t)aes_sbox[(tmp>>16)&0xff]<<16;
238 tmp2|=(uint32_t)aes_sbox[(tmp>>24) ]<<24;
245 /* copy the iv across */
246 memcpy(ctx->iv, iv, 16);
250 * Change a key for decryption.
252 void AES_convert_key(AES_CTX *ctx)
255 uint32_t *k,w,t1,t2,t3,t4;
260 for (i= ctx->rounds*4; i > 4; i--)
263 w = inv_mix_col(w,t1,t2,t3,t4);
269 * Encrypt a byte sequence (with a block size 16) using the AES cipher.
271 void AES_cbc_encrypt(AES_CTX *ctx, const uint8_t *msg, uint8_t *out, int length)
274 uint32_t tin[4], tout[4], iv[4];
276 memcpy(iv, ctx->iv, AES_IV_SIZE);
277 for (i = 0; i < 4; i++)
278 tout[i] = ntohl(iv[i]);
280 for (length -= AES_BLOCKSIZE; length >= 0; length -= AES_BLOCKSIZE)
284 memcpy(msg_32, msg, AES_BLOCKSIZE);
285 msg += AES_BLOCKSIZE;
287 for (i = 0; i < 4; i++)
288 tin[i] = ntohl(msg_32[i])^tout[i];
290 AES_encrypt(ctx, tin);
292 for (i = 0; i < 4; i++)
295 out_32[i] = htonl(tout[i]);
298 memcpy(out, out_32, AES_BLOCKSIZE);
299 out += AES_BLOCKSIZE;
302 for (i = 0; i < 4; i++)
303 iv[i] = htonl(tout[i]);
304 memcpy(ctx->iv, iv, AES_IV_SIZE);
308 * Decrypt a byte sequence (with a block size 16) using the AES cipher.
310 void AES_cbc_decrypt(AES_CTX *ctx, const uint8_t *msg, uint8_t *out, int length)
313 uint32_t tin[4], xor[4], tout[4], data[4], iv[4];
315 memcpy(iv, ctx->iv, AES_IV_SIZE);
316 for (i = 0; i < 4; i++)
317 xor[i] = ntohl(iv[i]);
319 for (length -= 16; length >= 0; length -= 16)
323 memcpy(msg_32, msg, AES_BLOCKSIZE);
324 msg += AES_BLOCKSIZE;
326 for (i = 0; i < 4; i++)
328 tin[i] = ntohl(msg_32[i]);
332 AES_decrypt(ctx, data);
334 for (i = 0; i < 4; i++)
336 tout[i] = data[i]^xor[i];
338 out_32[i] = htonl(tout[i]);
341 memcpy(out, out_32, AES_BLOCKSIZE);
342 out += AES_BLOCKSIZE;
345 for (i = 0; i < 4; i++)
346 iv[i] = htonl(xor[i]);
347 memcpy(ctx->iv, iv, AES_IV_SIZE);
351 * Encrypt a single block (16 bytes) of data
353 static void AES_encrypt(const AES_CTX *ctx, uint32_t *data)
355 /* To make this code smaller, generate the sbox entries on the fly.
356 * This will have a really heavy effect upon performance.
359 uint32_t tmp1, old_a0, a0, a1, a2, a3, row;
361 int rounds = ctx->rounds;
362 const uint32_t *k = ctx->ks;
364 /* Pre-round key addition */
365 for (row = 0; row < 4; row++)
368 /* Encrypt one block. */
369 for (curr_rnd = 0; curr_rnd < rounds; curr_rnd++)
371 /* Perform ByteSub and ShiftRow operations together */
372 for (row = 0; row < 4; row++)
374 a0 = (uint32_t)aes_sbox[(data[row%4]>>24)&0xFF];
375 a1 = (uint32_t)aes_sbox[(data[(row+1)%4]>>16)&0xFF];
376 a2 = (uint32_t)aes_sbox[(data[(row+2)%4]>>8)&0xFF];
377 a3 = (uint32_t)aes_sbox[(data[(row+3)%4])&0xFF];
379 /* Perform MixColumn iff not last round */
380 if (curr_rnd < (rounds - 1))
382 tmp1 = a0 ^ a1 ^ a2 ^ a3;
384 a0 ^= tmp1 ^ AES_xtime(a0 ^ a1);
385 a1 ^= tmp1 ^ AES_xtime(a1 ^ a2);
386 a2 ^= tmp1 ^ AES_xtime(a2 ^ a3);
387 a3 ^= tmp1 ^ AES_xtime(a3 ^ old_a0);
390 tmp[row] = ((a0 << 24) | (a1 << 16) | (a2 << 8) | a3);
393 /* KeyAddition - note that it is vital that this loop is separate from
394 the MixColumn operation, which must be atomic...*/
395 for (row = 0; row < 4; row++)
396 data[row] = tmp[row] ^ *(k++);
401 * Decrypt a single block (16 bytes) of data
403 static void AES_decrypt(const AES_CTX *ctx, uint32_t *data)
406 uint32_t xt0,xt1,xt2,xt3,xt4,xt5,xt6;
407 uint32_t a0, a1, a2, a3, row;
409 int rounds = ctx->rounds;
410 const uint32_t *k = ctx->ks + ((rounds+1)*4);
412 /* pre-round key addition */
413 for (row=4; row > 0;row--)
414 data[row-1] ^= *(--k);
416 /* Decrypt one block */
417 for (curr_rnd = 0; curr_rnd < rounds; curr_rnd++)
419 /* Perform ByteSub and ShiftRow operations together */
420 for (row = 4; row > 0; row--)
422 a0 = aes_isbox[(data[(row+3)%4]>>24)&0xFF];
423 a1 = aes_isbox[(data[(row+2)%4]>>16)&0xFF];
424 a2 = aes_isbox[(data[(row+1)%4]>>8)&0xFF];
425 a3 = aes_isbox[(data[row%4])&0xFF];
427 /* Perform MixColumn iff not last round */
428 if (curr_rnd<(rounds-1))
430 /* The MDS cofefficients (0x09, 0x0B, 0x0D, 0x0E)
431 are quite large compared to encryption; this
432 operation slows decryption down noticeably. */
433 xt0 = AES_xtime(a0^a1);
434 xt1 = AES_xtime(a1^a2);
435 xt2 = AES_xtime(a2^a3);
436 xt3 = AES_xtime(a3^a0);
437 xt4 = AES_xtime(xt0^xt1);
438 xt5 = AES_xtime(xt1^xt2);
439 xt6 = AES_xtime(xt4^xt5);
441 xt0 ^= a1^a2^a3^xt4^xt6;
442 xt1 ^= a0^a2^a3^xt5^xt6;
443 xt2 ^= a0^a1^a3^xt4^xt6;
444 xt3 ^= a0^a1^a2^xt5^xt6;
445 tmp[row-1] = ((xt0<<24)|(xt1<<16)|(xt2<<8)|xt3);
448 tmp[row-1] = ((a0<<24)|(a1<<16)|(a2<<8)|a3);
451 for (row = 4; row > 0; row--)
452 data[row-1] = tmp[row-1] ^ *(--k);