2 * Copyright (C) 2017 Denys Vlasenko
4 * Licensed under GPLv2, see file LICENSE in this source tree.
7 /* This AES implementation is derived from tiny-AES128-C code,
8 * which was put by its author into public domain:
10 * tiny-AES128-C/unlicense.txt, Dec 8, 2014
12 * This is free and unencumbered software released into the public domain.
14 * Anyone is free to copy, modify, publish, use, compile, sell, or
15 * distribute this software, either in source code form or as a compiled
16 * binary, for any purpose, commercial or non-commercial, and by any
19 * In jurisdictions that recognize copyright laws, the author or authors
20 * of this software dedicate any and all copyright interest in the
21 * software to the public domain. We make this dedication for the benefit
22 * of the public at large and to the detriment of our heirs and
23 * successors. We intend this dedication to be an overt act of
24 * relinquishment in perpetuity of all present and future rights to this
25 * software under copyright law.
27 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
28 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
29 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
30 * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
31 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
32 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
33 * OTHER DEALINGS IN THE SOFTWARE.
36 /* Note that only original tiny-AES128-C code is public domain.
37 * The derived code in this file has been expanded to also implement aes192
38 * and aes256 and use more efficient word-sized operations in many places,
39 * and put under GPLv2 license.
43 // The lookup-tables are marked const so they can be placed in read-only storage instead of RAM
44 // The numbers below can be computed dynamically trading ROM for RAM -
45 // This can be useful in (embedded) bootloader applications, where ROM is often limited.
46 static const uint8_t sbox[] = {
47 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5,
48 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76,
49 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0,
50 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0,
51 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc,
52 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15,
53 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a,
54 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75,
55 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0,
56 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84,
57 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b,
58 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf,
59 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85,
60 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8,
61 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5,
62 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2,
63 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17,
64 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73,
65 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88,
66 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb,
67 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c,
68 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79,
69 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9,
70 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08,
71 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6,
72 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a,
73 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e,
74 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e,
75 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94,
76 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf,
77 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68,
78 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16,
81 static const uint8_t rsbox[] = {
82 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38,
83 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb,
84 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87,
85 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb,
86 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d,
87 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e,
88 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2,
89 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25,
90 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16,
91 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92,
92 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda,
93 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84,
94 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a,
95 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06,
96 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02,
97 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b,
98 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea,
99 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73,
100 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85,
101 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e,
102 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89,
103 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b,
104 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20,
105 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4,
106 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31,
107 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f,
108 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d,
109 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef,
110 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0,
111 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61,
112 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26,
113 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d,
116 // SubWord() is a function that takes a four-byte input word and
117 // applies the S-box to each of the four bytes to produce an output word.
118 static uint32_t Subword(uint32_t x)
120 return (sbox[(x >> 24) ] << 24)
121 | (sbox[(x >> 16) & 255] << 16)
122 | (sbox[(x >> 8 ) & 255] << 8 )
123 | (sbox[(x ) & 255] );
126 // This function produces Nb(Nr+1) round keys.
127 // The round keys are used in each round to decrypt the states.
128 static int KeyExpansion(uint32_t *RoundKey, const void *key, unsigned key_len)
130 // The round constant word array, Rcon[i], contains the values given by
131 // x to th e power (i-1) being powers of x (x is denoted as {02}) in the field GF(2^8).
132 // Note that i starts at 2, not 0.
133 static const uint8_t Rcon[] ALIGN1 = {
134 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36
135 //..... 0x6c, 0xd8, 0xab, 0x4d, 0x9a, 0x2f, 0x5e, 0xbc, 0x63, 0xc6,...
136 // but aes256 only uses values up to 0x36
138 int rounds, words_key, words_RoundKey;
141 // key_len 16: aes128, rounds 10, words_key 4, words_RoundKey 44
142 // key_len 24: aes192, rounds 12, words_key 6, words_RoundKey 52
143 // key_len 32: aes256, rounds 14, words_key 8, words_RoundKey 60
144 words_key = key_len / 4;
145 rounds = 6 + (key_len / 4);
146 words_RoundKey = 28 + key_len;
148 // The first round key is the key itself.
149 for (i = 0; i < words_key; i++)
150 RoundKey[i] = get_unaligned_be32((uint32_t*)key + i);
151 // i == words_key now
153 // All other round keys are found from the previous round keys.
155 for (; i < words_RoundKey; i++) {
158 tempa = RoundKey[i - 1];
160 // RotWord(): rotates the 4 bytes in a word to the left once.
161 tempa = (tempa << 8) | (tempa >> 24);
162 tempa = Subword(tempa);
163 tempa ^= (uint32_t)Rcon[k] << 24;
164 } else if (words_key > 6 && j == 4) {
165 tempa = Subword(tempa);
167 RoundKey[i] = RoundKey[i - words_key] ^ tempa;
169 if (j == words_key) {
177 // This function adds the round key to state.
178 // The round key is added to the state by an XOR function.
179 static void AddRoundKey(unsigned astate[16], const uint32_t *RoundKeys)
183 for (i = 0; i < 16; i += 4) {
184 uint32_t n = *RoundKeys++;
185 astate[i + 0] ^= (n >> 24);
186 astate[i + 1] ^= (n >> 16) & 255;
187 astate[i + 2] ^= (n >> 8) & 255;
188 astate[i + 3] ^= n & 255;
192 // The SubBytes Function Substitutes the values in the
193 // state matrix with values in an S-box.
194 static void SubBytes(unsigned astate[16])
198 for (i = 0; i < 16; i++)
199 astate[i] = sbox[astate[i]];
202 // Our code actually stores "columns" (in aes encryption terminology)
203 // of state in rows: first 4 elements are "row 0, col 0", "row 1, col 0".
204 // "row 2, col 0", "row 3, col 0". The fifth element is "row 0, col 1",
206 #define ASTATE(col,row) astate[(col)*4 + (row)]
208 // The ShiftRows() function shifts the rows in the state to the left.
209 // Each row is shifted with different offset.
210 // Offset = Row number. So the first row is not shifted.
211 static void ShiftRows(unsigned astate[16])
215 // Rotate first row 1 columns to left
217 ASTATE(0,1) = ASTATE(1,1);
218 ASTATE(1,1) = ASTATE(2,1);
219 ASTATE(2,1) = ASTATE(3,1);
222 // Rotate second row 2 columns to left
223 v = ASTATE(0,2); ASTATE(0,2) = ASTATE(2,2); ASTATE(2,2) = v;
224 v = ASTATE(1,2); ASTATE(1,2) = ASTATE(3,2); ASTATE(3,2) = v;
226 // Rotate third row 3 columns to left
228 ASTATE(3,3) = ASTATE(2,3);
229 ASTATE(2,3) = ASTATE(1,3);
230 ASTATE(1,3) = ASTATE(0,3);
234 // MixColumns function mixes the columns of the state matrix
235 static void MixColumns(unsigned astate[16])
239 for (i = 0; i < 16; i += 4) {
247 x = (a << 1) ^ b ^ (b << 1) ^ c ^ d;
248 y = a ^ (b << 1) ^ c ^ (c << 1) ^ d;
249 z = a ^ b ^ (c << 1) ^ d ^ (d << 1);
250 t = a ^ (a << 1) ^ b ^ c ^ (d << 1);
251 astate[i + 0] = x ^ ((-(int)(x >> 8)) & 0x11b);
252 astate[i + 1] = y ^ ((-(int)(y >> 8)) & 0x11b);
253 astate[i + 2] = z ^ ((-(int)(z >> 8)) & 0x11b);
254 astate[i + 3] = t ^ ((-(int)(t >> 8)) & 0x11b);
258 // The SubBytes Function Substitutes the values in the
259 // state matrix with values in an S-box.
260 static void InvSubBytes(unsigned astate[16])
264 for (i = 0; i < 16; i++)
265 astate[i] = rsbox[astate[i]];
268 static void InvShiftRows(unsigned astate[16])
272 // Rotate first row 1 columns to right
274 ASTATE(3,1) = ASTATE(2,1);
275 ASTATE(2,1) = ASTATE(1,1);
276 ASTATE(1,1) = ASTATE(0,1);
279 // Rotate second row 2 columns to right
280 v = ASTATE(0,2); ASTATE(0,2) = ASTATE(2,2); ASTATE(2,2) = v;
281 v = ASTATE(1,2); ASTATE(1,2) = ASTATE(3,2); ASTATE(3,2) = v;
283 // Rotate third row 3 columns to right
285 ASTATE(0,3) = ASTATE(1,3);
286 ASTATE(1,3) = ASTATE(2,3);
287 ASTATE(2,3) = ASTATE(3,3);
291 static ALWAYS_INLINE unsigned Multiply(unsigned x)
296 return (x ^ y ^ (y << 1) ^ (y << 3) ^ (y << 4)) & 255;
299 // MixColumns function mixes the columns of the state matrix.
300 // The method used to multiply may be difficult to understand for the inexperienced.
301 // Please use the references to gain more information.
302 static void InvMixColumns(unsigned astate[16])
306 for (i = 0; i < 16; i += 4) {
314 x = (a << 1) ^ (a << 2) ^ (a << 3) ^ b ^ (b << 1) ^ (b << 3)
315 /***/ ^ c ^ (c << 2) ^ (c << 3) ^ d ^ (d << 3);
316 y = a ^ (a << 3) ^ (b << 1) ^ (b << 2) ^ (b << 3)
317 /***/ ^ c ^ (c << 1) ^ (c << 3) ^ d ^ (d << 2) ^ (d << 3);
318 z = a ^ (a << 2) ^ (a << 3) ^ b ^ (b << 3)
319 /***/ ^ (c << 1) ^ (c << 2) ^ (c << 3) ^ d ^ (d << 1) ^ (d << 3);
320 t = a ^ (a << 1) ^ (a << 3) ^ b ^ (b << 2) ^ (b << 3)
321 /***/ ^ c ^ (c << 3) ^ (d << 1) ^ (d << 2) ^ (d << 3);
322 astate[i + 0] = Multiply(x);
323 astate[i + 1] = Multiply(y);
324 astate[i + 2] = Multiply(z);
325 astate[i + 3] = Multiply(t);
329 static void aes_encrypt_1(struct tls_aes *aes, unsigned astate[16])
331 unsigned rounds = aes->rounds;
332 const uint32_t *RoundKey = aes->key;
335 AddRoundKey(astate, RoundKey);
343 AddRoundKey(astate, RoundKey);
346 void FAST_FUNC aes_setkey(struct tls_aes *aes, const void *key, unsigned key_len)
348 aes->rounds = KeyExpansion(aes->key, key, key_len);
351 void FAST_FUNC aes_encrypt_one_block(struct tls_aes *aes, const void *data, void *dst)
356 const uint8_t *pt = data;
359 for (i = 0; i < 16; i++)
361 aes_encrypt_1(aes, astate);
362 for (i = 0; i < 16; i++)
366 void FAST_FUNC aes_cbc_encrypt(struct tls_aes *aes, void *iv, const void *data, size_t len, void *dst)
370 const uint8_t *pt = data;
376 /* almost aes_encrypt_one_block(rounds, RoundKey, pt, ct);
377 * but xor'ing of IV with plaintext[] is combined
378 * with plaintext[] -> astate[]
382 for (i = 0; i < 16; i++)
383 astate[i] = pt[i] ^ iv2[i];
384 aes_encrypt_1(aes, astate);
385 for (i = 0; i < 16; i++)
386 iv2[i] = ct[i] = astate[i];
394 static void aes_decrypt_1(struct tls_aes *aes, unsigned astate[16])
396 unsigned rounds = aes->rounds;
397 const uint32_t *RoundKey = aes->key;
399 RoundKey += rounds * 4;
400 AddRoundKey(astate, RoundKey);
402 InvShiftRows(astate);
405 AddRoundKey(astate, RoundKey);
408 InvMixColumns(astate);
413 static void aes_decrypt_one_block(struct tls_aes *aes, const void *data, void *dst)
415 unsigned rounds = aes->rounds;
416 const uint32_t *RoundKey = aes->key;
420 const uint8_t *ct = data;
423 for (i = 0; i < 16; i++)
425 aes_decrypt_1(aes, astate);
426 for (i = 0; i < 16; i++)
431 void FAST_FUNC aes_cbc_decrypt(struct tls_aes *aes, void *iv, const void *data, size_t len, void *dst)
438 const uint8_t *ct = data;
441 ivbuf = memcpy(iv2, iv, 16);
443 ivnext = (ivbuf==iv2) ? iv3 : iv2;
445 /* almost aes_decrypt_one_block(rounds, RoundKey, ct, pt)
446 * but xor'ing of ivbuf is combined with astate[] -> plaintext[]
450 for (i = 0; i < 16; i++)
451 ivnext[i] = astate[i] = ct[i];
452 aes_decrypt_1(aes, astate);
453 for (i = 0; i < 16; i++)
454 pt[i] = astate[i] ^ ivbuf[i];