1 /* ====================================================================
2 * Copyright (c) 2010 The OpenSSL Project. All rights reserved.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in
13 * the documentation and/or other materials provided with the
16 * 3. All advertising materials mentioning features or use of this
17 * software must display the following acknowledgment:
18 * "This product includes software developed by the OpenSSL Project
19 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
21 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
22 * endorse or promote products derived from this software without
23 * prior written permission. For written permission, please contact
24 * openssl-core@openssl.org.
26 * 5. Products derived from this software may not be called "OpenSSL"
27 * nor may "OpenSSL" appear in their names without prior written
28 * permission of the OpenSSL Project.
30 * 6. Redistributions of any form whatsoever must retain the following
32 * "This product includes software developed by the OpenSSL Project
33 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
35 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
36 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
37 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
38 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
39 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
40 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
41 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
42 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
43 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
44 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
45 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
46 * OF THE POSSIBILITY OF SUCH DAMAGE.
47 * ====================================================================
50 #include <openssl/crypto.h>
51 #include "modes_lcl.h"
61 #if defined(BSWAP4) && defined(STRICT_ALIGNMENT)
62 /* redefine, because alignment is ensured */
64 # define GETU32(p) BSWAP4(*(const u32 *)(p))
66 # define PUTU32(p,v) *(u32 *)(p) = BSWAP4(v)
69 #define PACK(s) ((size_t)(s)<<(sizeof(size_t)*8-16))
70 #define REDUCE1BIT(V) do { \
71 if (sizeof(size_t)==8) { \
72 u64 T = U64(0xe100000000000000) & (0-(V.lo&1)); \
73 V.lo = (V.hi<<63)|(V.lo>>1); \
74 V.hi = (V.hi>>1 )^T; \
77 u32 T = 0xe1000000U & (0-(u32)(V.lo&1)); \
78 V.lo = (V.hi<<63)|(V.lo>>1); \
79 V.hi = (V.hi>>1 )^((u64)T<<32); \
84 * Even though permitted values for TABLE_BITS are 8, 4 and 1, it should
85 * never be set to 8. 8 is effectively reserved for testing purposes.
86 * TABLE_BITS>1 are lookup-table-driven implementations referred to as
87 * "Shoup's" in GCM specification. In other words OpenSSL does not cover
88 * whole spectrum of possible table driven implementations. Why? In
89 * non-"Shoup's" case memory access pattern is segmented in such manner,
90 * that it's trivial to see that cache timing information can reveal
91 * fair portion of intermediate hash value. Given that ciphertext is
92 * always available to attacker, it's possible for him to attempt to
93 * deduce secret parameter H and if successful, tamper with messages
94 * [which is nothing but trivial in CTR mode]. In "Shoup's" case it's
95 * not as trivial, but there is no reason to believe that it's resistant
96 * to cache-timing attack. And the thing about "8-bit" implementation is
97 * that it consumes 16 (sixteen) times more memory, 4KB per individual
98 * key + 1KB shared. Well, on pros side it should be twice as fast as
99 * "4-bit" version. And for gcc-generated x86[_64] code, "8-bit" version
100 * was observed to run ~75% faster, closer to 100% for commercial
101 * compilers... Yet "4-bit" procedure is preferred, because it's
102 * believed to provide better security-performance balance and adequate
103 * all-round performance. "All-round" refers to things like:
105 * - shorter setup time effectively improves overall timing for
106 * handling short messages;
107 * - larger table allocation can become unbearable because of VM
108 * subsystem penalties (for example on Windows large enough free
109 * results in VM working set trimming, meaning that consequent
110 * malloc would immediately incur working set expansion);
111 * - larger table has larger cache footprint, which can affect
112 * performance of other code paths (not necessarily even from same
113 * thread in Hyper-Threading world);
115 * Value of 1 is not appropriate for performance reasons.
119 static void gcm_init_8bit(u128 Htable[256], u64 H[2])
129 for (Htable[128] = V, i = 64; i > 0; i >>= 1) {
134 for (i = 2; i < 256; i <<= 1) {
135 u128 *Hi = Htable + i, H0 = *Hi;
136 for (j = 1; j < i; ++j) {
137 Hi[j].hi = H0.hi ^ Htable[j].hi;
138 Hi[j].lo = H0.lo ^ Htable[j].lo;
143 static void gcm_gmult_8bit(u64 Xi[2], const u128 Htable[256])
146 const u8 *xi = (const u8 *)Xi + 15;
152 static const size_t rem_8bit[256] = {
153 PACK(0x0000), PACK(0x01C2), PACK(0x0384), PACK(0x0246),
154 PACK(0x0708), PACK(0x06CA), PACK(0x048C), PACK(0x054E),
155 PACK(0x0E10), PACK(0x0FD2), PACK(0x0D94), PACK(0x0C56),
156 PACK(0x0918), PACK(0x08DA), PACK(0x0A9C), PACK(0x0B5E),
157 PACK(0x1C20), PACK(0x1DE2), PACK(0x1FA4), PACK(0x1E66),
158 PACK(0x1B28), PACK(0x1AEA), PACK(0x18AC), PACK(0x196E),
159 PACK(0x1230), PACK(0x13F2), PACK(0x11B4), PACK(0x1076),
160 PACK(0x1538), PACK(0x14FA), PACK(0x16BC), PACK(0x177E),
161 PACK(0x3840), PACK(0x3982), PACK(0x3BC4), PACK(0x3A06),
162 PACK(0x3F48), PACK(0x3E8A), PACK(0x3CCC), PACK(0x3D0E),
163 PACK(0x3650), PACK(0x3792), PACK(0x35D4), PACK(0x3416),
164 PACK(0x3158), PACK(0x309A), PACK(0x32DC), PACK(0x331E),
165 PACK(0x2460), PACK(0x25A2), PACK(0x27E4), PACK(0x2626),
166 PACK(0x2368), PACK(0x22AA), PACK(0x20EC), PACK(0x212E),
167 PACK(0x2A70), PACK(0x2BB2), PACK(0x29F4), PACK(0x2836),
168 PACK(0x2D78), PACK(0x2CBA), PACK(0x2EFC), PACK(0x2F3E),
169 PACK(0x7080), PACK(0x7142), PACK(0x7304), PACK(0x72C6),
170 PACK(0x7788), PACK(0x764A), PACK(0x740C), PACK(0x75CE),
171 PACK(0x7E90), PACK(0x7F52), PACK(0x7D14), PACK(0x7CD6),
172 PACK(0x7998), PACK(0x785A), PACK(0x7A1C), PACK(0x7BDE),
173 PACK(0x6CA0), PACK(0x6D62), PACK(0x6F24), PACK(0x6EE6),
174 PACK(0x6BA8), PACK(0x6A6A), PACK(0x682C), PACK(0x69EE),
175 PACK(0x62B0), PACK(0x6372), PACK(0x6134), PACK(0x60F6),
176 PACK(0x65B8), PACK(0x647A), PACK(0x663C), PACK(0x67FE),
177 PACK(0x48C0), PACK(0x4902), PACK(0x4B44), PACK(0x4A86),
178 PACK(0x4FC8), PACK(0x4E0A), PACK(0x4C4C), PACK(0x4D8E),
179 PACK(0x46D0), PACK(0x4712), PACK(0x4554), PACK(0x4496),
180 PACK(0x41D8), PACK(0x401A), PACK(0x425C), PACK(0x439E),
181 PACK(0x54E0), PACK(0x5522), PACK(0x5764), PACK(0x56A6),
182 PACK(0x53E8), PACK(0x522A), PACK(0x506C), PACK(0x51AE),
183 PACK(0x5AF0), PACK(0x5B32), PACK(0x5974), PACK(0x58B6),
184 PACK(0x5DF8), PACK(0x5C3A), PACK(0x5E7C), PACK(0x5FBE),
185 PACK(0xE100), PACK(0xE0C2), PACK(0xE284), PACK(0xE346),
186 PACK(0xE608), PACK(0xE7CA), PACK(0xE58C), PACK(0xE44E),
187 PACK(0xEF10), PACK(0xEED2), PACK(0xEC94), PACK(0xED56),
188 PACK(0xE818), PACK(0xE9DA), PACK(0xEB9C), PACK(0xEA5E),
189 PACK(0xFD20), PACK(0xFCE2), PACK(0xFEA4), PACK(0xFF66),
190 PACK(0xFA28), PACK(0xFBEA), PACK(0xF9AC), PACK(0xF86E),
191 PACK(0xF330), PACK(0xF2F2), PACK(0xF0B4), PACK(0xF176),
192 PACK(0xF438), PACK(0xF5FA), PACK(0xF7BC), PACK(0xF67E),
193 PACK(0xD940), PACK(0xD882), PACK(0xDAC4), PACK(0xDB06),
194 PACK(0xDE48), PACK(0xDF8A), PACK(0xDDCC), PACK(0xDC0E),
195 PACK(0xD750), PACK(0xD692), PACK(0xD4D4), PACK(0xD516),
196 PACK(0xD058), PACK(0xD19A), PACK(0xD3DC), PACK(0xD21E),
197 PACK(0xC560), PACK(0xC4A2), PACK(0xC6E4), PACK(0xC726),
198 PACK(0xC268), PACK(0xC3AA), PACK(0xC1EC), PACK(0xC02E),
199 PACK(0xCB70), PACK(0xCAB2), PACK(0xC8F4), PACK(0xC936),
200 PACK(0xCC78), PACK(0xCDBA), PACK(0xCFFC), PACK(0xCE3E),
201 PACK(0x9180), PACK(0x9042), PACK(0x9204), PACK(0x93C6),
202 PACK(0x9688), PACK(0x974A), PACK(0x950C), PACK(0x94CE),
203 PACK(0x9F90), PACK(0x9E52), PACK(0x9C14), PACK(0x9DD6),
204 PACK(0x9898), PACK(0x995A), PACK(0x9B1C), PACK(0x9ADE),
205 PACK(0x8DA0), PACK(0x8C62), PACK(0x8E24), PACK(0x8FE6),
206 PACK(0x8AA8), PACK(0x8B6A), PACK(0x892C), PACK(0x88EE),
207 PACK(0x83B0), PACK(0x8272), PACK(0x8034), PACK(0x81F6),
208 PACK(0x84B8), PACK(0x857A), PACK(0x873C), PACK(0x86FE),
209 PACK(0xA9C0), PACK(0xA802), PACK(0xAA44), PACK(0xAB86),
210 PACK(0xAEC8), PACK(0xAF0A), PACK(0xAD4C), PACK(0xAC8E),
211 PACK(0xA7D0), PACK(0xA612), PACK(0xA454), PACK(0xA596),
212 PACK(0xA0D8), PACK(0xA11A), PACK(0xA35C), PACK(0xA29E),
213 PACK(0xB5E0), PACK(0xB422), PACK(0xB664), PACK(0xB7A6),
214 PACK(0xB2E8), PACK(0xB32A), PACK(0xB16C), PACK(0xB0AE),
215 PACK(0xBBF0), PACK(0xBA32), PACK(0xB874), PACK(0xB9B6),
216 PACK(0xBCF8), PACK(0xBD3A), PACK(0xBF7C), PACK(0xBEBE)
220 Z.hi ^= Htable[n].hi;
221 Z.lo ^= Htable[n].lo;
228 rem = (size_t)Z.lo & 0xff;
229 Z.lo = (Z.hi << 56) | (Z.lo >> 8);
231 if (sizeof(size_t) == 8)
232 Z.hi ^= rem_8bit[rem];
234 Z.hi ^= (u64)rem_8bit[rem] << 32;
237 if (is_endian.little) {
239 Xi[0] = BSWAP8(Z.hi);
240 Xi[1] = BSWAP8(Z.lo);
244 v = (u32)(Z.hi >> 32);
248 v = (u32)(Z.lo >> 32);
259 # define GCM_MUL(ctx,Xi) gcm_gmult_8bit(ctx->Xi.u,ctx->Htable)
263 static void gcm_init_4bit(u128 Htable[16], u64 H[2])
266 # if defined(OPENSSL_SMALL_FOOTPRINT)
275 # if defined(OPENSSL_SMALL_FOOTPRINT)
276 for (Htable[8] = V, i = 4; i > 0; i >>= 1) {
281 for (i = 2; i < 16; i <<= 1) {
282 u128 *Hi = Htable + i;
284 for (V = *Hi, j = 1; j < i; ++j) {
285 Hi[j].hi = V.hi ^ Htable[j].hi;
286 Hi[j].lo = V.lo ^ Htable[j].lo;
297 Htable[3].hi = V.hi ^ Htable[2].hi, Htable[3].lo = V.lo ^ Htable[2].lo;
299 Htable[5].hi = V.hi ^ Htable[1].hi, Htable[5].lo = V.lo ^ Htable[1].lo;
300 Htable[6].hi = V.hi ^ Htable[2].hi, Htable[6].lo = V.lo ^ Htable[2].lo;
301 Htable[7].hi = V.hi ^ Htable[3].hi, Htable[7].lo = V.lo ^ Htable[3].lo;
303 Htable[9].hi = V.hi ^ Htable[1].hi, Htable[9].lo = V.lo ^ Htable[1].lo;
304 Htable[10].hi = V.hi ^ Htable[2].hi, Htable[10].lo = V.lo ^ Htable[2].lo;
305 Htable[11].hi = V.hi ^ Htable[3].hi, Htable[11].lo = V.lo ^ Htable[3].lo;
306 Htable[12].hi = V.hi ^ Htable[4].hi, Htable[12].lo = V.lo ^ Htable[4].lo;
307 Htable[13].hi = V.hi ^ Htable[5].hi, Htable[13].lo = V.lo ^ Htable[5].lo;
308 Htable[14].hi = V.hi ^ Htable[6].hi, Htable[14].lo = V.lo ^ Htable[6].lo;
309 Htable[15].hi = V.hi ^ Htable[7].hi, Htable[15].lo = V.lo ^ Htable[7].lo;
311 # if defined(GHASH_ASM) && (defined(__arm__) || defined(__arm))
313 * ARM assembler expects specific dword order in Htable.
322 if (is_endian.little)
323 for (j = 0; j < 16; ++j) {
328 for (j = 0; j < 16; ++j) {
330 Htable[j].hi = V.lo << 32 | V.lo >> 32;
331 Htable[j].lo = V.hi << 32 | V.hi >> 32;
338 static const size_t rem_4bit[16] = {
339 PACK(0x0000), PACK(0x1C20), PACK(0x3840), PACK(0x2460),
340 PACK(0x7080), PACK(0x6CA0), PACK(0x48C0), PACK(0x54E0),
341 PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560),
342 PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0)
345 static void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16])
349 size_t rem, nlo, nhi;
355 nlo = ((const u8 *)Xi)[15];
359 Z.hi = Htable[nlo].hi;
360 Z.lo = Htable[nlo].lo;
363 rem = (size_t)Z.lo & 0xf;
364 Z.lo = (Z.hi << 60) | (Z.lo >> 4);
366 if (sizeof(size_t) == 8)
367 Z.hi ^= rem_4bit[rem];
369 Z.hi ^= (u64)rem_4bit[rem] << 32;
371 Z.hi ^= Htable[nhi].hi;
372 Z.lo ^= Htable[nhi].lo;
377 nlo = ((const u8 *)Xi)[cnt];
381 rem = (size_t)Z.lo & 0xf;
382 Z.lo = (Z.hi << 60) | (Z.lo >> 4);
384 if (sizeof(size_t) == 8)
385 Z.hi ^= rem_4bit[rem];
387 Z.hi ^= (u64)rem_4bit[rem] << 32;
389 Z.hi ^= Htable[nlo].hi;
390 Z.lo ^= Htable[nlo].lo;
393 if (is_endian.little) {
395 Xi[0] = BSWAP8(Z.hi);
396 Xi[1] = BSWAP8(Z.lo);
400 v = (u32)(Z.hi >> 32);
404 v = (u32)(Z.lo >> 32);
415 # if !defined(OPENSSL_SMALL_FOOTPRINT)
417 * Streamed gcm_mult_4bit, see CRYPTO_gcm128_[en|de]crypt for
418 * details... Compiler-generated code doesn't seem to give any
419 * performance improvement, at least not on x86[_64]. It's here
420 * mostly as reference and a placeholder for possible future
421 * non-trivial optimization[s]...
423 static void gcm_ghash_4bit(u64 Xi[2], const u128 Htable[16],
424 const u8 *inp, size_t len)
428 size_t rem, nlo, nhi;
437 nlo = ((const u8 *)Xi)[15];
442 Z.hi = Htable[nlo].hi;
443 Z.lo = Htable[nlo].lo;
446 rem = (size_t)Z.lo & 0xf;
447 Z.lo = (Z.hi << 60) | (Z.lo >> 4);
449 if (sizeof(size_t) == 8)
450 Z.hi ^= rem_4bit[rem];
452 Z.hi ^= (u64)rem_4bit[rem] << 32;
454 Z.hi ^= Htable[nhi].hi;
455 Z.lo ^= Htable[nhi].lo;
460 nlo = ((const u8 *)Xi)[cnt];
465 rem = (size_t)Z.lo & 0xf;
466 Z.lo = (Z.hi << 60) | (Z.lo >> 4);
468 if (sizeof(size_t) == 8)
469 Z.hi ^= rem_4bit[rem];
471 Z.hi ^= (u64)rem_4bit[rem] << 32;
473 Z.hi ^= Htable[nlo].hi;
474 Z.lo ^= Htable[nlo].lo;
478 * Extra 256+16 bytes per-key plus 512 bytes shared tables
479 * [should] give ~50% improvement... One could have PACK()-ed
480 * the rem_8bit even here, but the priority is to minimize
483 u128 Hshr4[16]; /* Htable shifted right by 4 bits */
484 u8 Hshl4[16]; /* Htable shifted left by 4 bits */
485 static const unsigned short rem_8bit[256] = {
486 0x0000, 0x01C2, 0x0384, 0x0246, 0x0708, 0x06CA, 0x048C, 0x054E,
487 0x0E10, 0x0FD2, 0x0D94, 0x0C56, 0x0918, 0x08DA, 0x0A9C, 0x0B5E,
488 0x1C20, 0x1DE2, 0x1FA4, 0x1E66, 0x1B28, 0x1AEA, 0x18AC, 0x196E,
489 0x1230, 0x13F2, 0x11B4, 0x1076, 0x1538, 0x14FA, 0x16BC, 0x177E,
490 0x3840, 0x3982, 0x3BC4, 0x3A06, 0x3F48, 0x3E8A, 0x3CCC, 0x3D0E,
491 0x3650, 0x3792, 0x35D4, 0x3416, 0x3158, 0x309A, 0x32DC, 0x331E,
492 0x2460, 0x25A2, 0x27E4, 0x2626, 0x2368, 0x22AA, 0x20EC, 0x212E,
493 0x2A70, 0x2BB2, 0x29F4, 0x2836, 0x2D78, 0x2CBA, 0x2EFC, 0x2F3E,
494 0x7080, 0x7142, 0x7304, 0x72C6, 0x7788, 0x764A, 0x740C, 0x75CE,
495 0x7E90, 0x7F52, 0x7D14, 0x7CD6, 0x7998, 0x785A, 0x7A1C, 0x7BDE,
496 0x6CA0, 0x6D62, 0x6F24, 0x6EE6, 0x6BA8, 0x6A6A, 0x682C, 0x69EE,
497 0x62B0, 0x6372, 0x6134, 0x60F6, 0x65B8, 0x647A, 0x663C, 0x67FE,
498 0x48C0, 0x4902, 0x4B44, 0x4A86, 0x4FC8, 0x4E0A, 0x4C4C, 0x4D8E,
499 0x46D0, 0x4712, 0x4554, 0x4496, 0x41D8, 0x401A, 0x425C, 0x439E,
500 0x54E0, 0x5522, 0x5764, 0x56A6, 0x53E8, 0x522A, 0x506C, 0x51AE,
501 0x5AF0, 0x5B32, 0x5974, 0x58B6, 0x5DF8, 0x5C3A, 0x5E7C, 0x5FBE,
502 0xE100, 0xE0C2, 0xE284, 0xE346, 0xE608, 0xE7CA, 0xE58C, 0xE44E,
503 0xEF10, 0xEED2, 0xEC94, 0xED56, 0xE818, 0xE9DA, 0xEB9C, 0xEA5E,
504 0xFD20, 0xFCE2, 0xFEA4, 0xFF66, 0xFA28, 0xFBEA, 0xF9AC, 0xF86E,
505 0xF330, 0xF2F2, 0xF0B4, 0xF176, 0xF438, 0xF5FA, 0xF7BC, 0xF67E,
506 0xD940, 0xD882, 0xDAC4, 0xDB06, 0xDE48, 0xDF8A, 0xDDCC, 0xDC0E,
507 0xD750, 0xD692, 0xD4D4, 0xD516, 0xD058, 0xD19A, 0xD3DC, 0xD21E,
508 0xC560, 0xC4A2, 0xC6E4, 0xC726, 0xC268, 0xC3AA, 0xC1EC, 0xC02E,
509 0xCB70, 0xCAB2, 0xC8F4, 0xC936, 0xCC78, 0xCDBA, 0xCFFC, 0xCE3E,
510 0x9180, 0x9042, 0x9204, 0x93C6, 0x9688, 0x974A, 0x950C, 0x94CE,
511 0x9F90, 0x9E52, 0x9C14, 0x9DD6, 0x9898, 0x995A, 0x9B1C, 0x9ADE,
512 0x8DA0, 0x8C62, 0x8E24, 0x8FE6, 0x8AA8, 0x8B6A, 0x892C, 0x88EE,
513 0x83B0, 0x8272, 0x8034, 0x81F6, 0x84B8, 0x857A, 0x873C, 0x86FE,
514 0xA9C0, 0xA802, 0xAA44, 0xAB86, 0xAEC8, 0xAF0A, 0xAD4C, 0xAC8E,
515 0xA7D0, 0xA612, 0xA454, 0xA596, 0xA0D8, 0xA11A, 0xA35C, 0xA29E,
516 0xB5E0, 0xB422, 0xB664, 0xB7A6, 0xB2E8, 0xB32A, 0xB16C, 0xB0AE,
517 0xBBF0, 0xBA32, 0xB874, 0xB9B6, 0xBCF8, 0xBD3A, 0xBF7C, 0xBEBE
520 * This pre-processing phase slows down procedure by approximately
521 * same time as it makes each loop spin faster. In other words
522 * single block performance is approximately same as straightforward
523 * "4-bit" implementation, and then it goes only faster...
525 for (cnt = 0; cnt < 16; ++cnt) {
526 Z.hi = Htable[cnt].hi;
527 Z.lo = Htable[cnt].lo;
528 Hshr4[cnt].lo = (Z.hi << 60) | (Z.lo >> 4);
529 Hshr4[cnt].hi = (Z.hi >> 4);
530 Hshl4[cnt] = (u8)(Z.lo << 4);
534 for (Z.lo = 0, Z.hi = 0, cnt = 15; cnt; --cnt) {
535 nlo = ((const u8 *)Xi)[cnt];
540 Z.hi ^= Htable[nlo].hi;
541 Z.lo ^= Htable[nlo].lo;
543 rem = (size_t)Z.lo & 0xff;
545 Z.lo = (Z.hi << 56) | (Z.lo >> 8);
548 Z.hi ^= Hshr4[nhi].hi;
549 Z.lo ^= Hshr4[nhi].lo;
550 Z.hi ^= (u64)rem_8bit[rem ^ Hshl4[nhi]] << 48;
553 nlo = ((const u8 *)Xi)[0];
558 Z.hi ^= Htable[nlo].hi;
559 Z.lo ^= Htable[nlo].lo;
561 rem = (size_t)Z.lo & 0xf;
563 Z.lo = (Z.hi << 60) | (Z.lo >> 4);
566 Z.hi ^= Htable[nhi].hi;
567 Z.lo ^= Htable[nhi].lo;
568 Z.hi ^= ((u64)rem_8bit[rem << 4]) << 48;
571 if (is_endian.little) {
573 Xi[0] = BSWAP8(Z.hi);
574 Xi[1] = BSWAP8(Z.lo);
578 v = (u32)(Z.hi >> 32);
582 v = (u32)(Z.lo >> 32);
591 } while (inp += 16, len -= 16);
595 void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16]);
596 void gcm_ghash_4bit(u64 Xi[2], const u128 Htable[16], const u8 *inp,
600 # define GCM_MUL(ctx,Xi) gcm_gmult_4bit(ctx->Xi.u,ctx->Htable)
601 # if defined(GHASH_ASM) || !defined(OPENSSL_SMALL_FOOTPRINT)
602 # define GHASH(ctx,in,len) gcm_ghash_4bit((ctx)->Xi.u,(ctx)->Htable,in,len)
604 * GHASH_CHUNK is "stride parameter" missioned to mitigate cache trashing
605 * effect. In other words idea is to hash data while it's still in L1 cache
606 * after encryption pass...
608 # define GHASH_CHUNK (3*1024)
611 #else /* TABLE_BITS */
613 static void gcm_gmult_1bit(u64 Xi[2], const u64 H[2])
615 u128 V, Z = { 0, 0 };
618 const long *xi = (const long *)Xi;
624 V.hi = H[0]; /* H is in host byte order, no byte swapping */
627 for (j = 0; j < 16 / sizeof(long); ++j) {
628 if (is_endian.little) {
629 if (sizeof(long) == 8) {
631 X = (long)(BSWAP8(xi[j]));
633 const u8 *p = (const u8 *)(xi + j);
634 X = (long)((u64)GETU32(p) << 32 | GETU32(p + 4));
637 const u8 *p = (const u8 *)(xi + j);
643 for (i = 0; i < 8 * sizeof(long); ++i, X <<= 1) {
644 u64 M = (u64)(X >> (8 * sizeof(long) - 1));
652 if (is_endian.little) {
654 Xi[0] = BSWAP8(Z.hi);
655 Xi[1] = BSWAP8(Z.lo);
659 v = (u32)(Z.hi >> 32);
663 v = (u32)(Z.lo >> 32);
674 # define GCM_MUL(ctx,Xi) gcm_gmult_1bit(ctx->Xi.u,ctx->H.u)
678 #if TABLE_BITS==4 && (defined(GHASH_ASM) || defined(OPENSSL_CPUID_OBJ))
679 # if !defined(I386_ONLY) && \
680 (defined(__i386) || defined(__i386__) || \
681 defined(__x86_64) || defined(__x86_64__) || \
682 defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64))
683 # define GHASH_ASM_X86_OR_64
684 # define GCM_FUNCREF_4BIT
685 extern unsigned int OPENSSL_ia32cap_P[];
687 void gcm_init_clmul(u128 Htable[16], const u64 Xi[2]);
688 void gcm_gmult_clmul(u64 Xi[2], const u128 Htable[16]);
689 void gcm_ghash_clmul(u64 Xi[2], const u128 Htable[16], const u8 *inp,
692 # if defined(__i386) || defined(__i386__) || defined(_M_IX86)
693 # define gcm_init_avx gcm_init_clmul
694 # define gcm_gmult_avx gcm_gmult_clmul
695 # define gcm_ghash_avx gcm_ghash_clmul
697 void gcm_init_avx(u128 Htable[16], const u64 Xi[2]);
698 void gcm_gmult_avx(u64 Xi[2], const u128 Htable[16]);
699 void gcm_ghash_avx(u64 Xi[2], const u128 Htable[16], const u8 *inp,
703 # if defined(__i386) || defined(__i386__) || defined(_M_IX86)
704 # define GHASH_ASM_X86
705 void gcm_gmult_4bit_mmx(u64 Xi[2], const u128 Htable[16]);
706 void gcm_ghash_4bit_mmx(u64 Xi[2], const u128 Htable[16], const u8 *inp,
709 void gcm_gmult_4bit_x86(u64 Xi[2], const u128 Htable[16]);
710 void gcm_ghash_4bit_x86(u64 Xi[2], const u128 Htable[16], const u8 *inp,
713 # elif defined(__arm__) || defined(__arm) || defined(__aarch64__)
714 # include "arm_arch.h"
715 # if __ARM_MAX_ARCH__>=7
716 # define GHASH_ASM_ARM
717 # define GCM_FUNCREF_4BIT
718 # define PMULL_CAPABLE (OPENSSL_armcap_P & ARMV8_PMULL)
719 # if defined(__arm__) || defined(__arm)
720 # define NEON_CAPABLE (OPENSSL_armcap_P & ARMV7_NEON)
722 void gcm_init_neon(u128 Htable[16], const u64 Xi[2]);
723 void gcm_gmult_neon(u64 Xi[2], const u128 Htable[16]);
724 void gcm_ghash_neon(u64 Xi[2], const u128 Htable[16], const u8 *inp,
726 void gcm_init_v8(u128 Htable[16], const u64 Xi[2]);
727 void gcm_gmult_v8(u64 Xi[2], const u128 Htable[16]);
728 void gcm_ghash_v8(u64 Xi[2], const u128 Htable[16], const u8 *inp,
731 # elif defined(__sparc__) || defined(__sparc)
732 # include "sparc_arch.h"
733 # define GHASH_ASM_SPARC
734 # define GCM_FUNCREF_4BIT
735 extern unsigned int OPENSSL_sparcv9cap_P[];
736 void gcm_init_vis3(u128 Htable[16], const u64 Xi[2]);
737 void gcm_gmult_vis3(u64 Xi[2], const u128 Htable[16]);
738 void gcm_ghash_vis3(u64 Xi[2], const u128 Htable[16], const u8 *inp,
740 # elif defined(OPENSSL_CPUID_OBJ) && (defined(__powerpc__) || defined(__ppc__) || defined(_ARCH_PPC))
741 # include "ppc_arch.h"
742 # define GHASH_ASM_PPC
743 # define GCM_FUNCREF_4BIT
744 void gcm_init_p8(u128 Htable[16], const u64 Xi[2]);
745 void gcm_gmult_p8(u64 Xi[2], const u128 Htable[16]);
746 void gcm_ghash_p8(u64 Xi[2], const u128 Htable[16], const u8 *inp,
751 #ifdef GCM_FUNCREF_4BIT
753 # define GCM_MUL(ctx,Xi) (*gcm_gmult_p)(ctx->Xi.u,ctx->Htable)
756 # define GHASH(ctx,in,len) (*gcm_ghash_p)(ctx->Xi.u,ctx->Htable,in,len)
760 void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx, void *key, block128_f block)
767 memset(ctx, 0, sizeof(*ctx));
771 (*block) (ctx->H.c, ctx->H.c, key);
773 if (is_endian.little) {
774 /* H is stored in host byte order */
776 ctx->H.u[0] = BSWAP8(ctx->H.u[0]);
777 ctx->H.u[1] = BSWAP8(ctx->H.u[1]);
781 hi = (u64)GETU32(p) << 32 | GETU32(p + 4);
782 lo = (u64)GETU32(p + 8) << 32 | GETU32(p + 12);
788 gcm_init_8bit(ctx->Htable, ctx->H.u);
791 # define CTX__GHASH(f) (ctx->ghash = (f))
793 # define CTX__GHASH(f) (ctx->ghash = NULL)
795 # if defined(GHASH_ASM_X86_OR_64)
796 # if !defined(GHASH_ASM_X86) || defined(OPENSSL_IA32_SSE2)
797 if (OPENSSL_ia32cap_P[0] & (1 << 24) && /* check FXSR bit */
798 OPENSSL_ia32cap_P[1] & (1 << 1)) { /* check PCLMULQDQ bit */
799 if (((OPENSSL_ia32cap_P[1] >> 22) & 0x41) == 0x41) { /* AVX+MOVBE */
800 gcm_init_avx(ctx->Htable, ctx->H.u);
801 ctx->gmult = gcm_gmult_avx;
802 CTX__GHASH(gcm_ghash_avx);
804 gcm_init_clmul(ctx->Htable, ctx->H.u);
805 ctx->gmult = gcm_gmult_clmul;
806 CTX__GHASH(gcm_ghash_clmul);
811 gcm_init_4bit(ctx->Htable, ctx->H.u);
812 # if defined(GHASH_ASM_X86) /* x86 only */
813 # if defined(OPENSSL_IA32_SSE2)
814 if (OPENSSL_ia32cap_P[0] & (1 << 25)) { /* check SSE bit */
816 if (OPENSSL_ia32cap_P[0] & (1 << 23)) { /* check MMX bit */
818 ctx->gmult = gcm_gmult_4bit_mmx;
819 CTX__GHASH(gcm_ghash_4bit_mmx);
821 ctx->gmult = gcm_gmult_4bit_x86;
822 CTX__GHASH(gcm_ghash_4bit_x86);
825 ctx->gmult = gcm_gmult_4bit;
826 CTX__GHASH(gcm_ghash_4bit);
828 # elif defined(GHASH_ASM_ARM)
829 # ifdef PMULL_CAPABLE
831 gcm_init_v8(ctx->Htable, ctx->H.u);
832 ctx->gmult = gcm_gmult_v8;
833 CTX__GHASH(gcm_ghash_v8);
838 gcm_init_neon(ctx->Htable, ctx->H.u);
839 ctx->gmult = gcm_gmult_neon;
840 CTX__GHASH(gcm_ghash_neon);
844 gcm_init_4bit(ctx->Htable, ctx->H.u);
845 ctx->gmult = gcm_gmult_4bit;
846 CTX__GHASH(gcm_ghash_4bit);
848 # elif defined(GHASH_ASM_SPARC)
849 if (OPENSSL_sparcv9cap_P[0] & SPARCV9_VIS3) {
850 gcm_init_vis3(ctx->Htable, ctx->H.u);
851 ctx->gmult = gcm_gmult_vis3;
852 CTX__GHASH(gcm_ghash_vis3);
854 gcm_init_4bit(ctx->Htable, ctx->H.u);
855 ctx->gmult = gcm_gmult_4bit;
856 CTX__GHASH(gcm_ghash_4bit);
858 # elif defined(GHASH_ASM_PPC)
859 if (OPENSSL_ppccap_P & PPC_CRYPTO207) {
860 gcm_init_p8(ctx->Htable, ctx->H.u);
861 ctx->gmult = gcm_gmult_p8;
862 CTX__GHASH(gcm_ghash_p8);
864 gcm_init_4bit(ctx->Htable, ctx->H.u);
865 ctx->gmult = gcm_gmult_4bit;
866 CTX__GHASH(gcm_ghash_4bit);
869 gcm_init_4bit(ctx->Htable, ctx->H.u);
875 void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx, const unsigned char *iv,
883 #ifdef GCM_FUNCREF_4BIT
884 void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
891 ctx->len.u[0] = 0; /* AAD length */
892 ctx->len.u[1] = 0; /* message length */
897 memcpy(ctx->Yi.c, iv, 12);
905 for (i = 0; i < 16; ++i)
906 ctx->Yi.c[i] ^= iv[i];
912 for (i = 0; i < len; ++i)
913 ctx->Yi.c[i] ^= iv[i];
917 if (is_endian.little) {
919 ctx->Yi.u[1] ^= BSWAP8(len0);
921 ctx->Yi.c[8] ^= (u8)(len0 >> 56);
922 ctx->Yi.c[9] ^= (u8)(len0 >> 48);
923 ctx->Yi.c[10] ^= (u8)(len0 >> 40);
924 ctx->Yi.c[11] ^= (u8)(len0 >> 32);
925 ctx->Yi.c[12] ^= (u8)(len0 >> 24);
926 ctx->Yi.c[13] ^= (u8)(len0 >> 16);
927 ctx->Yi.c[14] ^= (u8)(len0 >> 8);
928 ctx->Yi.c[15] ^= (u8)(len0);
931 ctx->Yi.u[1] ^= len0;
935 if (is_endian.little)
937 ctr = BSWAP4(ctx->Yi.d[3]);
939 ctr = GETU32(ctx->Yi.c + 12);
945 (*ctx->block) (ctx->Yi.c, ctx->EK0.c, ctx->key);
947 if (is_endian.little)
949 ctx->Yi.d[3] = BSWAP4(ctr);
951 PUTU32(ctx->Yi.c + 12, ctr);
957 int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx, const unsigned char *aad,
962 u64 alen = ctx->len.u[0];
963 #ifdef GCM_FUNCREF_4BIT
964 void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
966 void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
967 const u8 *inp, size_t len) = ctx->ghash;
975 if (alen > (U64(1) << 61) || (sizeof(len) == 8 && alen < len))
977 ctx->len.u[0] = alen;
982 ctx->Xi.c[n] ^= *(aad++);
994 if ((i = (len & (size_t)-16))) {
1001 for (i = 0; i < 16; ++i)
1002 ctx->Xi.c[i] ^= aad[i];
1009 n = (unsigned int)len;
1010 for (i = 0; i < len; ++i)
1011 ctx->Xi.c[i] ^= aad[i];
1018 int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
1019 const unsigned char *in, unsigned char *out,
1025 } is_endian = { 1 };
1026 unsigned int n, ctr;
1028 u64 mlen = ctx->len.u[1];
1029 block128_f block = ctx->block;
1030 void *key = ctx->key;
1031 #ifdef GCM_FUNCREF_4BIT
1032 void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
1033 # if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1034 void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
1035 const u8 *inp, size_t len) = ctx->ghash;
1040 if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1042 ctx->len.u[1] = mlen;
1045 /* First call to encrypt finalizes GHASH(AAD) */
1050 if (is_endian.little)
1052 ctr = BSWAP4(ctx->Yi.d[3]);
1054 ctr = GETU32(ctx->Yi.c + 12);
1060 #if !defined(OPENSSL_SMALL_FOOTPRINT)
1061 if (16 % sizeof(size_t) == 0) { /* always true actually */
1065 ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
1076 # if defined(STRICT_ALIGNMENT)
1077 if (((size_t)in | (size_t)out) % sizeof(size_t) != 0)
1081 # if defined(GHASH_CHUNK)
1082 while (len >= GHASH_CHUNK) {
1083 size_t j = GHASH_CHUNK;
1086 size_t *out_t = (size_t *)out;
1087 const size_t *in_t = (const size_t *)in;
1089 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1091 if (is_endian.little)
1093 ctx->Yi.d[3] = BSWAP4(ctr);
1095 PUTU32(ctx->Yi.c + 12, ctr);
1099 for (i = 0; i < 16 / sizeof(size_t); ++i)
1100 out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1105 GHASH(ctx, out - GHASH_CHUNK, GHASH_CHUNK);
1109 if ((i = (len & (size_t)-16))) {
1113 size_t *out_t = (size_t *)out;
1114 const size_t *in_t = (const size_t *)in;
1116 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1118 if (is_endian.little)
1120 ctx->Yi.d[3] = BSWAP4(ctr);
1122 PUTU32(ctx->Yi.c + 12, ctr);
1126 for (i = 0; i < 16 / sizeof(size_t); ++i)
1127 out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1132 GHASH(ctx, out - j, j);
1136 size_t *out_t = (size_t *)out;
1137 const size_t *in_t = (const size_t *)in;
1139 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1141 if (is_endian.little)
1143 ctx->Yi.d[3] = BSWAP4(ctr);
1145 PUTU32(ctx->Yi.c + 12, ctr);
1149 for (i = 0; i < 16 / sizeof(size_t); ++i)
1150 ctx->Xi.t[i] ^= out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1158 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1160 if (is_endian.little)
1162 ctx->Yi.d[3] = BSWAP4(ctr);
1164 PUTU32(ctx->Yi.c + 12, ctr);
1169 ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n];
1179 for (i = 0; i < len; ++i) {
1181 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1183 if (is_endian.little)
1185 ctx->Yi.d[3] = BSWAP4(ctr);
1187 PUTU32(ctx->Yi.c + 12, ctr);
1192 ctx->Xi.c[n] ^= out[i] = in[i] ^ ctx->EKi.c[n];
1202 int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
1203 const unsigned char *in, unsigned char *out,
1209 } is_endian = { 1 };
1210 unsigned int n, ctr;
1212 u64 mlen = ctx->len.u[1];
1213 block128_f block = ctx->block;
1214 void *key = ctx->key;
1215 #ifdef GCM_FUNCREF_4BIT
1216 void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
1217 # if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1218 void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
1219 const u8 *inp, size_t len) = ctx->ghash;
1224 if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1226 ctx->len.u[1] = mlen;
1229 /* First call to decrypt finalizes GHASH(AAD) */
1234 if (is_endian.little)
1236 ctr = BSWAP4(ctx->Yi.d[3]);
1238 ctr = GETU32(ctx->Yi.c + 12);
1244 #if !defined(OPENSSL_SMALL_FOOTPRINT)
1245 if (16 % sizeof(size_t) == 0) { /* always true actually */
1250 *(out++) = c ^ ctx->EKi.c[n];
1262 # if defined(STRICT_ALIGNMENT)
1263 if (((size_t)in | (size_t)out) % sizeof(size_t) != 0)
1267 # if defined(GHASH_CHUNK)
1268 while (len >= GHASH_CHUNK) {
1269 size_t j = GHASH_CHUNK;
1271 GHASH(ctx, in, GHASH_CHUNK);
1273 size_t *out_t = (size_t *)out;
1274 const size_t *in_t = (const size_t *)in;
1276 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1278 if (is_endian.little)
1280 ctx->Yi.d[3] = BSWAP4(ctr);
1282 PUTU32(ctx->Yi.c + 12, ctr);
1286 for (i = 0; i < 16 / sizeof(size_t); ++i)
1287 out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1295 if ((i = (len & (size_t)-16))) {
1298 size_t *out_t = (size_t *)out;
1299 const size_t *in_t = (const size_t *)in;
1301 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1303 if (is_endian.little)
1305 ctx->Yi.d[3] = BSWAP4(ctr);
1307 PUTU32(ctx->Yi.c + 12, ctr);
1311 for (i = 0; i < 16 / sizeof(size_t); ++i)
1312 out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1320 size_t *out_t = (size_t *)out;
1321 const size_t *in_t = (const size_t *)in;
1323 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1325 if (is_endian.little)
1327 ctx->Yi.d[3] = BSWAP4(ctr);
1329 PUTU32(ctx->Yi.c + 12, ctr);
1333 for (i = 0; i < 16 / sizeof(size_t); ++i) {
1335 out[i] = c ^ ctx->EKi.t[i];
1345 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1347 if (is_endian.little)
1349 ctx->Yi.d[3] = BSWAP4(ctr);
1351 PUTU32(ctx->Yi.c + 12, ctr);
1358 out[n] = c ^ ctx->EKi.c[n];
1368 for (i = 0; i < len; ++i) {
1371 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1373 if (is_endian.little)
1375 ctx->Yi.d[3] = BSWAP4(ctr);
1377 PUTU32(ctx->Yi.c + 12, ctr);
1383 out[i] = c ^ ctx->EKi.c[n];
1394 int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx,
1395 const unsigned char *in, unsigned char *out,
1396 size_t len, ctr128_f stream)
1398 #if defined(OPENSSL_SMALL_FOOTPRINT)
1399 return CRYPTO_gcm128_encrypt(ctx, in, out, len);
1404 } is_endian = { 1 };
1405 unsigned int n, ctr;
1407 u64 mlen = ctx->len.u[1];
1408 void *key = ctx->key;
1409 # ifdef GCM_FUNCREF_4BIT
1410 void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
1412 void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
1413 const u8 *inp, size_t len) = ctx->ghash;
1418 if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1420 ctx->len.u[1] = mlen;
1423 /* First call to encrypt finalizes GHASH(AAD) */
1428 if (is_endian.little)
1430 ctr = BSWAP4(ctx->Yi.d[3]);
1432 ctr = GETU32(ctx->Yi.c + 12);
1440 ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
1451 # if defined(GHASH) && defined(GHASH_CHUNK)
1452 while (len >= GHASH_CHUNK) {
1453 (*stream) (in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
1454 ctr += GHASH_CHUNK / 16;
1455 if (is_endian.little)
1457 ctx->Yi.d[3] = BSWAP4(ctr);
1459 PUTU32(ctx->Yi.c + 12, ctr);
1463 GHASH(ctx, out, GHASH_CHUNK);
1469 if ((i = (len & (size_t)-16))) {
1472 (*stream) (in, out, j, key, ctx->Yi.c);
1473 ctr += (unsigned int)j;
1474 if (is_endian.little)
1476 ctx->Yi.d[3] = BSWAP4(ctr);
1478 PUTU32(ctx->Yi.c + 12, ctr);
1489 for (i = 0; i < 16; ++i)
1490 ctx->Xi.c[i] ^= out[i];
1497 (*ctx->block) (ctx->Yi.c, ctx->EKi.c, key);
1499 if (is_endian.little)
1501 ctx->Yi.d[3] = BSWAP4(ctr);
1503 PUTU32(ctx->Yi.c + 12, ctr);
1508 ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n];
1518 int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx,
1519 const unsigned char *in, unsigned char *out,
1520 size_t len, ctr128_f stream)
1522 #if defined(OPENSSL_SMALL_FOOTPRINT)
1523 return CRYPTO_gcm128_decrypt(ctx, in, out, len);
1528 } is_endian = { 1 };
1529 unsigned int n, ctr;
1531 u64 mlen = ctx->len.u[1];
1532 void *key = ctx->key;
1533 # ifdef GCM_FUNCREF_4BIT
1534 void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
1536 void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
1537 const u8 *inp, size_t len) = ctx->ghash;
1542 if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1544 ctx->len.u[1] = mlen;
1547 /* First call to decrypt finalizes GHASH(AAD) */
1552 if (is_endian.little)
1554 ctr = BSWAP4(ctx->Yi.d[3]);
1556 ctr = GETU32(ctx->Yi.c + 12);
1565 *(out++) = c ^ ctx->EKi.c[n];
1577 # if defined(GHASH) && defined(GHASH_CHUNK)
1578 while (len >= GHASH_CHUNK) {
1579 GHASH(ctx, in, GHASH_CHUNK);
1580 (*stream) (in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
1581 ctr += GHASH_CHUNK / 16;
1582 if (is_endian.little)
1584 ctx->Yi.d[3] = BSWAP4(ctr);
1586 PUTU32(ctx->Yi.c + 12, ctr);
1595 if ((i = (len & (size_t)-16))) {
1603 for (k = 0; k < 16; ++k)
1604 ctx->Xi.c[k] ^= in[k];
1611 (*stream) (in, out, j, key, ctx->Yi.c);
1612 ctr += (unsigned int)j;
1613 if (is_endian.little)
1615 ctx->Yi.d[3] = BSWAP4(ctr);
1617 PUTU32(ctx->Yi.c + 12, ctr);
1626 (*ctx->block) (ctx->Yi.c, ctx->EKi.c, key);
1628 if (is_endian.little)
1630 ctx->Yi.d[3] = BSWAP4(ctr);
1632 PUTU32(ctx->Yi.c + 12, ctr);
1639 out[n] = c ^ ctx->EKi.c[n];
1649 int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx, const unsigned char *tag,
1655 } is_endian = { 1 };
1656 u64 alen = ctx->len.u[0] << 3;
1657 u64 clen = ctx->len.u[1] << 3;
1658 #ifdef GCM_FUNCREF_4BIT
1659 void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
1662 if (ctx->mres || ctx->ares)
1665 if (is_endian.little) {
1667 alen = BSWAP8(alen);
1668 clen = BSWAP8(clen);
1672 ctx->len.u[0] = alen;
1673 ctx->len.u[1] = clen;
1675 alen = (u64)GETU32(p) << 32 | GETU32(p + 4);
1676 clen = (u64)GETU32(p + 8) << 32 | GETU32(p + 12);
1680 ctx->Xi.u[0] ^= alen;
1681 ctx->Xi.u[1] ^= clen;
1684 ctx->Xi.u[0] ^= ctx->EK0.u[0];
1685 ctx->Xi.u[1] ^= ctx->EK0.u[1];
1687 if (tag && len <= sizeof(ctx->Xi))
1688 return CRYPTO_memcmp(ctx->Xi.c, tag, len);
1693 void CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len)
1695 CRYPTO_gcm128_finish(ctx, NULL, 0);
1696 memcpy(tag, ctx->Xi.c,
1697 len <= sizeof(ctx->Xi.c) ? len : sizeof(ctx->Xi.c));
1700 GCM128_CONTEXT *CRYPTO_gcm128_new(void *key, block128_f block)
1702 GCM128_CONTEXT *ret;
1704 if ((ret = OPENSSL_malloc(sizeof(*ret))) != NULL)
1705 CRYPTO_gcm128_init(ret, key, block);
1710 void CRYPTO_gcm128_release(GCM128_CONTEXT *ctx)
1712 OPENSSL_clear_free(ctx, sizeof(*ctx));
1715 #if defined(SELFTEST)
1717 # include <openssl/aes.h>
1720 static const u8 K1[16], *P1 = NULL, *A1 = NULL, IV1[12], *C1 = NULL;
1721 static const u8 T1[] = {
1722 0x58, 0xe2, 0xfc, 0xce, 0xfa, 0x7e, 0x30, 0x61,
1723 0x36, 0x7f, 0x1d, 0x57, 0xa4, 0xe7, 0x45, 0x5a
1730 static const u8 P2[16];
1731 static const u8 C2[] = {
1732 0x03, 0x88, 0xda, 0xce, 0x60, 0xb6, 0xa3, 0x92,
1733 0xf3, 0x28, 0xc2, 0xb9, 0x71, 0xb2, 0xfe, 0x78
1736 static const u8 T2[] = {
1737 0xab, 0x6e, 0x47, 0xd4, 0x2c, 0xec, 0x13, 0xbd,
1738 0xf5, 0x3a, 0x67, 0xb2, 0x12, 0x57, 0xbd, 0xdf
1743 static const u8 K3[] = {
1744 0xfe, 0xff, 0xe9, 0x92, 0x86, 0x65, 0x73, 0x1c,
1745 0x6d, 0x6a, 0x8f, 0x94, 0x67, 0x30, 0x83, 0x08
1748 static const u8 P3[] = {
1749 0xd9, 0x31, 0x32, 0x25, 0xf8, 0x84, 0x06, 0xe5,
1750 0xa5, 0x59, 0x09, 0xc5, 0xaf, 0xf5, 0x26, 0x9a,
1751 0x86, 0xa7, 0xa9, 0x53, 0x15, 0x34, 0xf7, 0xda,
1752 0x2e, 0x4c, 0x30, 0x3d, 0x8a, 0x31, 0x8a, 0x72,
1753 0x1c, 0x3c, 0x0c, 0x95, 0x95, 0x68, 0x09, 0x53,
1754 0x2f, 0xcf, 0x0e, 0x24, 0x49, 0xa6, 0xb5, 0x25,
1755 0xb1, 0x6a, 0xed, 0xf5, 0xaa, 0x0d, 0xe6, 0x57,
1756 0xba, 0x63, 0x7b, 0x39, 0x1a, 0xaf, 0xd2, 0x55
1759 static const u8 IV3[] = {
1760 0xca, 0xfe, 0xba, 0xbe, 0xfa, 0xce, 0xdb, 0xad,
1761 0xde, 0xca, 0xf8, 0x88
1764 static const u8 C3[] = {
1765 0x42, 0x83, 0x1e, 0xc2, 0x21, 0x77, 0x74, 0x24,
1766 0x4b, 0x72, 0x21, 0xb7, 0x84, 0xd0, 0xd4, 0x9c,
1767 0xe3, 0xaa, 0x21, 0x2f, 0x2c, 0x02, 0xa4, 0xe0,
1768 0x35, 0xc1, 0x7e, 0x23, 0x29, 0xac, 0xa1, 0x2e,
1769 0x21, 0xd5, 0x14, 0xb2, 0x54, 0x66, 0x93, 0x1c,
1770 0x7d, 0x8f, 0x6a, 0x5a, 0xac, 0x84, 0xaa, 0x05,
1771 0x1b, 0xa3, 0x0b, 0x39, 0x6a, 0x0a, 0xac, 0x97,
1772 0x3d, 0x58, 0xe0, 0x91, 0x47, 0x3f, 0x59, 0x85
1775 static const u8 T3[] = {
1776 0x4d, 0x5c, 0x2a, 0xf3, 0x27, 0xcd, 0x64, 0xa6,
1777 0x2c, 0xf3, 0x5a, 0xbd, 0x2b, 0xa6, 0xfa, 0xb4
1783 static const u8 P4[] = {
1784 0xd9, 0x31, 0x32, 0x25, 0xf8, 0x84, 0x06, 0xe5,
1785 0xa5, 0x59, 0x09, 0xc5, 0xaf, 0xf5, 0x26, 0x9a,
1786 0x86, 0xa7, 0xa9, 0x53, 0x15, 0x34, 0xf7, 0xda,
1787 0x2e, 0x4c, 0x30, 0x3d, 0x8a, 0x31, 0x8a, 0x72,
1788 0x1c, 0x3c, 0x0c, 0x95, 0x95, 0x68, 0x09, 0x53,
1789 0x2f, 0xcf, 0x0e, 0x24, 0x49, 0xa6, 0xb5, 0x25,
1790 0xb1, 0x6a, 0xed, 0xf5, 0xaa, 0x0d, 0xe6, 0x57,
1791 0xba, 0x63, 0x7b, 0x39
1794 static const u8 A4[] = {
1795 0xfe, 0xed, 0xfa, 0xce, 0xde, 0xad, 0xbe, 0xef,
1796 0xfe, 0xed, 0xfa, 0xce, 0xde, 0xad, 0xbe, 0xef,
1797 0xab, 0xad, 0xda, 0xd2
1800 static const u8 C4[] = {
1801 0x42, 0x83, 0x1e, 0xc2, 0x21, 0x77, 0x74, 0x24,
1802 0x4b, 0x72, 0x21, 0xb7, 0x84, 0xd0, 0xd4, 0x9c,
1803 0xe3, 0xaa, 0x21, 0x2f, 0x2c, 0x02, 0xa4, 0xe0,
1804 0x35, 0xc1, 0x7e, 0x23, 0x29, 0xac, 0xa1, 0x2e,
1805 0x21, 0xd5, 0x14, 0xb2, 0x54, 0x66, 0x93, 0x1c,
1806 0x7d, 0x8f, 0x6a, 0x5a, 0xac, 0x84, 0xaa, 0x05,
1807 0x1b, 0xa3, 0x0b, 0x39, 0x6a, 0x0a, 0xac, 0x97,
1808 0x3d, 0x58, 0xe0, 0x91
1811 static const u8 T4[] = {
1812 0x5b, 0xc9, 0x4f, 0xbc, 0x32, 0x21, 0xa5, 0xdb,
1813 0x94, 0xfa, 0xe9, 0x5a, 0xe7, 0x12, 0x1a, 0x47
1820 static const u8 IV5[] = {
1821 0xca, 0xfe, 0xba, 0xbe, 0xfa, 0xce, 0xdb, 0xad
1824 static const u8 C5[] = {
1825 0x61, 0x35, 0x3b, 0x4c, 0x28, 0x06, 0x93, 0x4a,
1826 0x77, 0x7f, 0xf5, 0x1f, 0xa2, 0x2a, 0x47, 0x55,
1827 0x69, 0x9b, 0x2a, 0x71, 0x4f, 0xcd, 0xc6, 0xf8,
1828 0x37, 0x66, 0xe5, 0xf9, 0x7b, 0x6c, 0x74, 0x23,
1829 0x73, 0x80, 0x69, 0x00, 0xe4, 0x9f, 0x24, 0xb2,
1830 0x2b, 0x09, 0x75, 0x44, 0xd4, 0x89, 0x6b, 0x42,
1831 0x49, 0x89, 0xb5, 0xe1, 0xeb, 0xac, 0x0f, 0x07,
1832 0xc2, 0x3f, 0x45, 0x98
1835 static const u8 T5[] = {
1836 0x36, 0x12, 0xd2, 0xe7, 0x9e, 0x3b, 0x07, 0x85,
1837 0x56, 0x1b, 0xe1, 0x4a, 0xac, 0xa2, 0xfc, 0xcb
1844 static const u8 IV6[] = {
1845 0x93, 0x13, 0x22, 0x5d, 0xf8, 0x84, 0x06, 0xe5,
1846 0x55, 0x90, 0x9c, 0x5a, 0xff, 0x52, 0x69, 0xaa,
1847 0x6a, 0x7a, 0x95, 0x38, 0x53, 0x4f, 0x7d, 0xa1,
1848 0xe4, 0xc3, 0x03, 0xd2, 0xa3, 0x18, 0xa7, 0x28,
1849 0xc3, 0xc0, 0xc9, 0x51, 0x56, 0x80, 0x95, 0x39,
1850 0xfc, 0xf0, 0xe2, 0x42, 0x9a, 0x6b, 0x52, 0x54,
1851 0x16, 0xae, 0xdb, 0xf5, 0xa0, 0xde, 0x6a, 0x57,
1852 0xa6, 0x37, 0xb3, 0x9b
1855 static const u8 C6[] = {
1856 0x8c, 0xe2, 0x49, 0x98, 0x62, 0x56, 0x15, 0xb6,
1857 0x03, 0xa0, 0x33, 0xac, 0xa1, 0x3f, 0xb8, 0x94,
1858 0xbe, 0x91, 0x12, 0xa5, 0xc3, 0xa2, 0x11, 0xa8,
1859 0xba, 0x26, 0x2a, 0x3c, 0xca, 0x7e, 0x2c, 0xa7,
1860 0x01, 0xe4, 0xa9, 0xa4, 0xfb, 0xa4, 0x3c, 0x90,
1861 0xcc, 0xdc, 0xb2, 0x81, 0xd4, 0x8c, 0x7c, 0x6f,
1862 0xd6, 0x28, 0x75, 0xd2, 0xac, 0xa4, 0x17, 0x03,
1863 0x4c, 0x34, 0xae, 0xe5
1866 static const u8 T6[] = {
1867 0x61, 0x9c, 0xc5, 0xae, 0xff, 0xfe, 0x0b, 0xfa,
1868 0x46, 0x2a, 0xf4, 0x3c, 0x16, 0x99, 0xd0, 0x50
1872 static const u8 K7[24], *P7 = NULL, *A7 = NULL, IV7[12], *C7 = NULL;
1873 static const u8 T7[] = {
1874 0xcd, 0x33, 0xb2, 0x8a, 0xc7, 0x73, 0xf7, 0x4b,
1875 0xa0, 0x0e, 0xd1, 0xf3, 0x12, 0x57, 0x24, 0x35
1882 static const u8 P8[16];
1883 static const u8 C8[] = {
1884 0x98, 0xe7, 0x24, 0x7c, 0x07, 0xf0, 0xfe, 0x41,
1885 0x1c, 0x26, 0x7e, 0x43, 0x84, 0xb0, 0xf6, 0x00
1888 static const u8 T8[] = {
1889 0x2f, 0xf5, 0x8d, 0x80, 0x03, 0x39, 0x27, 0xab,
1890 0x8e, 0xf4, 0xd4, 0x58, 0x75, 0x14, 0xf0, 0xfb
1895 static const u8 K9[] = {
1896 0xfe, 0xff, 0xe9, 0x92, 0x86, 0x65, 0x73, 0x1c,
1897 0x6d, 0x6a, 0x8f, 0x94, 0x67, 0x30, 0x83, 0x08,
1898 0xfe, 0xff, 0xe9, 0x92, 0x86, 0x65, 0x73, 0x1c
1901 static const u8 P9[] = {
1902 0xd9, 0x31, 0x32, 0x25, 0xf8, 0x84, 0x06, 0xe5,
1903 0xa5, 0x59, 0x09, 0xc5, 0xaf, 0xf5, 0x26, 0x9a,
1904 0x86, 0xa7, 0xa9, 0x53, 0x15, 0x34, 0xf7, 0xda,
1905 0x2e, 0x4c, 0x30, 0x3d, 0x8a, 0x31, 0x8a, 0x72,
1906 0x1c, 0x3c, 0x0c, 0x95, 0x95, 0x68, 0x09, 0x53,
1907 0x2f, 0xcf, 0x0e, 0x24, 0x49, 0xa6, 0xb5, 0x25,
1908 0xb1, 0x6a, 0xed, 0xf5, 0xaa, 0x0d, 0xe6, 0x57,
1909 0xba, 0x63, 0x7b, 0x39, 0x1a, 0xaf, 0xd2, 0x55
1912 static const u8 IV9[] = {
1913 0xca, 0xfe, 0xba, 0xbe, 0xfa, 0xce, 0xdb, 0xad,
1914 0xde, 0xca, 0xf8, 0x88
1917 static const u8 C9[] = {
1918 0x39, 0x80, 0xca, 0x0b, 0x3c, 0x00, 0xe8, 0x41,
1919 0xeb, 0x06, 0xfa, 0xc4, 0x87, 0x2a, 0x27, 0x57,
1920 0x85, 0x9e, 0x1c, 0xea, 0xa6, 0xef, 0xd9, 0x84,
1921 0x62, 0x85, 0x93, 0xb4, 0x0c, 0xa1, 0xe1, 0x9c,
1922 0x7d, 0x77, 0x3d, 0x00, 0xc1, 0x44, 0xc5, 0x25,
1923 0xac, 0x61, 0x9d, 0x18, 0xc8, 0x4a, 0x3f, 0x47,
1924 0x18, 0xe2, 0x44, 0x8b, 0x2f, 0xe3, 0x24, 0xd9,
1925 0xcc, 0xda, 0x27, 0x10, 0xac, 0xad, 0xe2, 0x56
1928 static const u8 T9[] = {
1929 0x99, 0x24, 0xa7, 0xc8, 0x58, 0x73, 0x36, 0xbf,
1930 0xb1, 0x18, 0x02, 0x4d, 0xb8, 0x67, 0x4a, 0x14
1936 static const u8 P10[] = {
1937 0xd9, 0x31, 0x32, 0x25, 0xf8, 0x84, 0x06, 0xe5,
1938 0xa5, 0x59, 0x09, 0xc5, 0xaf, 0xf5, 0x26, 0x9a,
1939 0x86, 0xa7, 0xa9, 0x53, 0x15, 0x34, 0xf7, 0xda,
1940 0x2e, 0x4c, 0x30, 0x3d, 0x8a, 0x31, 0x8a, 0x72,
1941 0x1c, 0x3c, 0x0c, 0x95, 0x95, 0x68, 0x09, 0x53,
1942 0x2f, 0xcf, 0x0e, 0x24, 0x49, 0xa6, 0xb5, 0x25,
1943 0xb1, 0x6a, 0xed, 0xf5, 0xaa, 0x0d, 0xe6, 0x57,
1944 0xba, 0x63, 0x7b, 0x39
1947 static const u8 A10[] = {
1948 0xfe, 0xed, 0xfa, 0xce, 0xde, 0xad, 0xbe, 0xef,
1949 0xfe, 0xed, 0xfa, 0xce, 0xde, 0xad, 0xbe, 0xef,
1950 0xab, 0xad, 0xda, 0xd2
1953 static const u8 C10[] = {
1954 0x39, 0x80, 0xca, 0x0b, 0x3c, 0x00, 0xe8, 0x41,
1955 0xeb, 0x06, 0xfa, 0xc4, 0x87, 0x2a, 0x27, 0x57,
1956 0x85, 0x9e, 0x1c, 0xea, 0xa6, 0xef, 0xd9, 0x84,
1957 0x62, 0x85, 0x93, 0xb4, 0x0c, 0xa1, 0xe1, 0x9c,
1958 0x7d, 0x77, 0x3d, 0x00, 0xc1, 0x44, 0xc5, 0x25,
1959 0xac, 0x61, 0x9d, 0x18, 0xc8, 0x4a, 0x3f, 0x47,
1960 0x18, 0xe2, 0x44, 0x8b, 0x2f, 0xe3, 0x24, 0xd9,
1961 0xcc, 0xda, 0x27, 0x10
1964 static const u8 T10[] = {
1965 0x25, 0x19, 0x49, 0x8e, 0x80, 0xf1, 0x47, 0x8f,
1966 0x37, 0xba, 0x55, 0xbd, 0x6d, 0x27, 0x61, 0x8c
1973 static const u8 IV11[] = { 0xca, 0xfe, 0xba, 0xbe, 0xfa, 0xce, 0xdb, 0xad };
1975 static const u8 C11[] = {
1976 0x0f, 0x10, 0xf5, 0x99, 0xae, 0x14, 0xa1, 0x54,
1977 0xed, 0x24, 0xb3, 0x6e, 0x25, 0x32, 0x4d, 0xb8,
1978 0xc5, 0x66, 0x63, 0x2e, 0xf2, 0xbb, 0xb3, 0x4f,
1979 0x83, 0x47, 0x28, 0x0f, 0xc4, 0x50, 0x70, 0x57,
1980 0xfd, 0xdc, 0x29, 0xdf, 0x9a, 0x47, 0x1f, 0x75,
1981 0xc6, 0x65, 0x41, 0xd4, 0xd4, 0xda, 0xd1, 0xc9,
1982 0xe9, 0x3a, 0x19, 0xa5, 0x8e, 0x8b, 0x47, 0x3f,
1983 0xa0, 0xf0, 0x62, 0xf7
1986 static const u8 T11[] = {
1987 0x65, 0xdc, 0xc5, 0x7f, 0xcf, 0x62, 0x3a, 0x24,
1988 0x09, 0x4f, 0xcc, 0xa4, 0x0d, 0x35, 0x33, 0xf8
1995 static const u8 IV12[] = {
1996 0x93, 0x13, 0x22, 0x5d, 0xf8, 0x84, 0x06, 0xe5,
1997 0x55, 0x90, 0x9c, 0x5a, 0xff, 0x52, 0x69, 0xaa,
1998 0x6a, 0x7a, 0x95, 0x38, 0x53, 0x4f, 0x7d, 0xa1,
1999 0xe4, 0xc3, 0x03, 0xd2, 0xa3, 0x18, 0xa7, 0x28,
2000 0xc3, 0xc0, 0xc9, 0x51, 0x56, 0x80, 0x95, 0x39,
2001 0xfc, 0xf0, 0xe2, 0x42, 0x9a, 0x6b, 0x52, 0x54,
2002 0x16, 0xae, 0xdb, 0xf5, 0xa0, 0xde, 0x6a, 0x57,
2003 0xa6, 0x37, 0xb3, 0x9b
2006 static const u8 C12[] = {
2007 0xd2, 0x7e, 0x88, 0x68, 0x1c, 0xe3, 0x24, 0x3c,
2008 0x48, 0x30, 0x16, 0x5a, 0x8f, 0xdc, 0xf9, 0xff,
2009 0x1d, 0xe9, 0xa1, 0xd8, 0xe6, 0xb4, 0x47, 0xef,
2010 0x6e, 0xf7, 0xb7, 0x98, 0x28, 0x66, 0x6e, 0x45,
2011 0x81, 0xe7, 0x90, 0x12, 0xaf, 0x34, 0xdd, 0xd9,
2012 0xe2, 0xf0, 0x37, 0x58, 0x9b, 0x29, 0x2d, 0xb3,
2013 0xe6, 0x7c, 0x03, 0x67, 0x45, 0xfa, 0x22, 0xe7,
2014 0xe9, 0xb7, 0x37, 0x3b
2017 static const u8 T12[] = {
2018 0xdc, 0xf5, 0x66, 0xff, 0x29, 0x1c, 0x25, 0xbb,
2019 0xb8, 0x56, 0x8f, 0xc3, 0xd3, 0x76, 0xa6, 0xd9
2023 static const u8 K13[32], *P13 = NULL, *A13 = NULL, IV13[12], *C13 = NULL;
2024 static const u8 T13[] = {
2025 0x53, 0x0f, 0x8a, 0xfb, 0xc7, 0x45, 0x36, 0xb9,
2026 0xa9, 0x63, 0xb4, 0xf1, 0xc4, 0xcb, 0x73, 0x8b
2032 static const u8 P14[16], IV14[12];
2033 static const u8 C14[] = {
2034 0xce, 0xa7, 0x40, 0x3d, 0x4d, 0x60, 0x6b, 0x6e,
2035 0x07, 0x4e, 0xc5, 0xd3, 0xba, 0xf3, 0x9d, 0x18
2038 static const u8 T14[] = {
2039 0xd0, 0xd1, 0xc8, 0xa7, 0x99, 0x99, 0x6b, 0xf0,
2040 0x26, 0x5b, 0x98, 0xb5, 0xd4, 0x8a, 0xb9, 0x19
2045 static const u8 K15[] = {
2046 0xfe, 0xff, 0xe9, 0x92, 0x86, 0x65, 0x73, 0x1c,
2047 0x6d, 0x6a, 0x8f, 0x94, 0x67, 0x30, 0x83, 0x08,
2048 0xfe, 0xff, 0xe9, 0x92, 0x86, 0x65, 0x73, 0x1c,
2049 0x6d, 0x6a, 0x8f, 0x94, 0x67, 0x30, 0x83, 0x08
2052 static const u8 P15[] = {
2053 0xd9, 0x31, 0x32, 0x25, 0xf8, 0x84, 0x06, 0xe5,
2054 0xa5, 0x59, 0x09, 0xc5, 0xaf, 0xf5, 0x26, 0x9a,
2055 0x86, 0xa7, 0xa9, 0x53, 0x15, 0x34, 0xf7, 0xda,
2056 0x2e, 0x4c, 0x30, 0x3d, 0x8a, 0x31, 0x8a, 0x72,
2057 0x1c, 0x3c, 0x0c, 0x95, 0x95, 0x68, 0x09, 0x53,
2058 0x2f, 0xcf, 0x0e, 0x24, 0x49, 0xa6, 0xb5, 0x25,
2059 0xb1, 0x6a, 0xed, 0xf5, 0xaa, 0x0d, 0xe6, 0x57,
2060 0xba, 0x63, 0x7b, 0x39, 0x1a, 0xaf, 0xd2, 0x55
2063 static const u8 IV15[] = {
2064 0xca, 0xfe, 0xba, 0xbe, 0xfa, 0xce, 0xdb, 0xad,
2065 0xde, 0xca, 0xf8, 0x88
2068 static const u8 C15[] = {
2069 0x52, 0x2d, 0xc1, 0xf0, 0x99, 0x56, 0x7d, 0x07,
2070 0xf4, 0x7f, 0x37, 0xa3, 0x2a, 0x84, 0x42, 0x7d,
2071 0x64, 0x3a, 0x8c, 0xdc, 0xbf, 0xe5, 0xc0, 0xc9,
2072 0x75, 0x98, 0xa2, 0xbd, 0x25, 0x55, 0xd1, 0xaa,
2073 0x8c, 0xb0, 0x8e, 0x48, 0x59, 0x0d, 0xbb, 0x3d,
2074 0xa7, 0xb0, 0x8b, 0x10, 0x56, 0x82, 0x88, 0x38,
2075 0xc5, 0xf6, 0x1e, 0x63, 0x93, 0xba, 0x7a, 0x0a,
2076 0xbc, 0xc9, 0xf6, 0x62, 0x89, 0x80, 0x15, 0xad
2079 static const u8 T15[] = {
2080 0xb0, 0x94, 0xda, 0xc5, 0xd9, 0x34, 0x71, 0xbd,
2081 0xec, 0x1a, 0x50, 0x22, 0x70, 0xe3, 0xcc, 0x6c
2087 static const u8 P16[] = {
2088 0xd9, 0x31, 0x32, 0x25, 0xf8, 0x84, 0x06, 0xe5,
2089 0xa5, 0x59, 0x09, 0xc5, 0xaf, 0xf5, 0x26, 0x9a,
2090 0x86, 0xa7, 0xa9, 0x53, 0x15, 0x34, 0xf7, 0xda,
2091 0x2e, 0x4c, 0x30, 0x3d, 0x8a, 0x31, 0x8a, 0x72,
2092 0x1c, 0x3c, 0x0c, 0x95, 0x95, 0x68, 0x09, 0x53,
2093 0x2f, 0xcf, 0x0e, 0x24, 0x49, 0xa6, 0xb5, 0x25,
2094 0xb1, 0x6a, 0xed, 0xf5, 0xaa, 0x0d, 0xe6, 0x57,
2095 0xba, 0x63, 0x7b, 0x39
2098 static const u8 A16[] = {
2099 0xfe, 0xed, 0xfa, 0xce, 0xde, 0xad, 0xbe, 0xef,
2100 0xfe, 0xed, 0xfa, 0xce, 0xde, 0xad, 0xbe, 0xef,
2101 0xab, 0xad, 0xda, 0xd2
2104 static const u8 C16[] = {
2105 0x52, 0x2d, 0xc1, 0xf0, 0x99, 0x56, 0x7d, 0x07,
2106 0xf4, 0x7f, 0x37, 0xa3, 0x2a, 0x84, 0x42, 0x7d,
2107 0x64, 0x3a, 0x8c, 0xdc, 0xbf, 0xe5, 0xc0, 0xc9,
2108 0x75, 0x98, 0xa2, 0xbd, 0x25, 0x55, 0xd1, 0xaa,
2109 0x8c, 0xb0, 0x8e, 0x48, 0x59, 0x0d, 0xbb, 0x3d,
2110 0xa7, 0xb0, 0x8b, 0x10, 0x56, 0x82, 0x88, 0x38,
2111 0xc5, 0xf6, 0x1e, 0x63, 0x93, 0xba, 0x7a, 0x0a,
2112 0xbc, 0xc9, 0xf6, 0x62
2115 static const u8 T16[] = {
2116 0x76, 0xfc, 0x6e, 0xce, 0x0f, 0x4e, 0x17, 0x68,
2117 0xcd, 0xdf, 0x88, 0x53, 0xbb, 0x2d, 0x55, 0x1b
2124 static const u8 IV17[] = { 0xca, 0xfe, 0xba, 0xbe, 0xfa, 0xce, 0xdb, 0xad };
2126 static const u8 C17[] = {
2127 0xc3, 0x76, 0x2d, 0xf1, 0xca, 0x78, 0x7d, 0x32,
2128 0xae, 0x47, 0xc1, 0x3b, 0xf1, 0x98, 0x44, 0xcb,
2129 0xaf, 0x1a, 0xe1, 0x4d, 0x0b, 0x97, 0x6a, 0xfa,
2130 0xc5, 0x2f, 0xf7, 0xd7, 0x9b, 0xba, 0x9d, 0xe0,
2131 0xfe, 0xb5, 0x82, 0xd3, 0x39, 0x34, 0xa4, 0xf0,
2132 0x95, 0x4c, 0xc2, 0x36, 0x3b, 0xc7, 0x3f, 0x78,
2133 0x62, 0xac, 0x43, 0x0e, 0x64, 0xab, 0xe4, 0x99,
2134 0xf4, 0x7c, 0x9b, 0x1f
2137 static const u8 T17[] = {
2138 0x3a, 0x33, 0x7d, 0xbf, 0x46, 0xa7, 0x92, 0xc4,
2139 0x5e, 0x45, 0x49, 0x13, 0xfe, 0x2e, 0xa8, 0xf2
2146 static const u8 IV18[] = {
2147 0x93, 0x13, 0x22, 0x5d, 0xf8, 0x84, 0x06, 0xe5,
2148 0x55, 0x90, 0x9c, 0x5a, 0xff, 0x52, 0x69, 0xaa,
2149 0x6a, 0x7a, 0x95, 0x38, 0x53, 0x4f, 0x7d, 0xa1,
2150 0xe4, 0xc3, 0x03, 0xd2, 0xa3, 0x18, 0xa7, 0x28,
2151 0xc3, 0xc0, 0xc9, 0x51, 0x56, 0x80, 0x95, 0x39,
2152 0xfc, 0xf0, 0xe2, 0x42, 0x9a, 0x6b, 0x52, 0x54,
2153 0x16, 0xae, 0xdb, 0xf5, 0xa0, 0xde, 0x6a, 0x57,
2154 0xa6, 0x37, 0xb3, 0x9b
2157 static const u8 C18[] = {
2158 0x5a, 0x8d, 0xef, 0x2f, 0x0c, 0x9e, 0x53, 0xf1,
2159 0xf7, 0x5d, 0x78, 0x53, 0x65, 0x9e, 0x2a, 0x20,
2160 0xee, 0xb2, 0xb2, 0x2a, 0xaf, 0xde, 0x64, 0x19,
2161 0xa0, 0x58, 0xab, 0x4f, 0x6f, 0x74, 0x6b, 0xf4,
2162 0x0f, 0xc0, 0xc3, 0xb7, 0x80, 0xf2, 0x44, 0x45,
2163 0x2d, 0xa3, 0xeb, 0xf1, 0xc5, 0xd8, 0x2c, 0xde,
2164 0xa2, 0x41, 0x89, 0x97, 0x20, 0x0e, 0xf8, 0x2e,
2165 0x44, 0xae, 0x7e, 0x3f
2168 static const u8 T18[] = {
2169 0xa4, 0x4a, 0x82, 0x66, 0xee, 0x1c, 0x8e, 0xb0,
2170 0xc8, 0xb5, 0xd4, 0xcf, 0x5a, 0xe9, 0xf1, 0x9a
2178 static const u8 A19[] = {
2179 0xd9, 0x31, 0x32, 0x25, 0xf8, 0x84, 0x06, 0xe5,
2180 0xa5, 0x59, 0x09, 0xc5, 0xaf, 0xf5, 0x26, 0x9a,
2181 0x86, 0xa7, 0xa9, 0x53, 0x15, 0x34, 0xf7, 0xda,
2182 0x2e, 0x4c, 0x30, 0x3d, 0x8a, 0x31, 0x8a, 0x72,
2183 0x1c, 0x3c, 0x0c, 0x95, 0x95, 0x68, 0x09, 0x53,
2184 0x2f, 0xcf, 0x0e, 0x24, 0x49, 0xa6, 0xb5, 0x25,
2185 0xb1, 0x6a, 0xed, 0xf5, 0xaa, 0x0d, 0xe6, 0x57,
2186 0xba, 0x63, 0x7b, 0x39, 0x1a, 0xaf, 0xd2, 0x55,
2187 0x52, 0x2d, 0xc1, 0xf0, 0x99, 0x56, 0x7d, 0x07,
2188 0xf4, 0x7f, 0x37, 0xa3, 0x2a, 0x84, 0x42, 0x7d,
2189 0x64, 0x3a, 0x8c, 0xdc, 0xbf, 0xe5, 0xc0, 0xc9,
2190 0x75, 0x98, 0xa2, 0xbd, 0x25, 0x55, 0xd1, 0xaa,
2191 0x8c, 0xb0, 0x8e, 0x48, 0x59, 0x0d, 0xbb, 0x3d,
2192 0xa7, 0xb0, 0x8b, 0x10, 0x56, 0x82, 0x88, 0x38,
2193 0xc5, 0xf6, 0x1e, 0x63, 0x93, 0xba, 0x7a, 0x0a,
2194 0xbc, 0xc9, 0xf6, 0x62, 0x89, 0x80, 0x15, 0xad
2197 static const u8 T19[] = {
2198 0x5f, 0xea, 0x79, 0x3a, 0x2d, 0x6f, 0x97, 0x4d,
2199 0x37, 0xe6, 0x8e, 0x0c, 0xb8, 0xff, 0x94, 0x92
2205 /* this results in 0xff in counter LSB */
2206 static const u8 IV20[64] = { 0xff, 0xff, 0xff, 0xff };
2208 static const u8 P20[288];
2209 static const u8 C20[] = {
2210 0x56, 0xb3, 0x37, 0x3c, 0xa9, 0xef, 0x6e, 0x4a,
2211 0x2b, 0x64, 0xfe, 0x1e, 0x9a, 0x17, 0xb6, 0x14,
2212 0x25, 0xf1, 0x0d, 0x47, 0xa7, 0x5a, 0x5f, 0xce,
2213 0x13, 0xef, 0xc6, 0xbc, 0x78, 0x4a, 0xf2, 0x4f,
2214 0x41, 0x41, 0xbd, 0xd4, 0x8c, 0xf7, 0xc7, 0x70,
2215 0x88, 0x7a, 0xfd, 0x57, 0x3c, 0xca, 0x54, 0x18,
2216 0xa9, 0xae, 0xff, 0xcd, 0x7c, 0x5c, 0xed, 0xdf,
2217 0xc6, 0xa7, 0x83, 0x97, 0xb9, 0xa8, 0x5b, 0x49,
2218 0x9d, 0xa5, 0x58, 0x25, 0x72, 0x67, 0xca, 0xab,
2219 0x2a, 0xd0, 0xb2, 0x3c, 0xa4, 0x76, 0xa5, 0x3c,
2220 0xb1, 0x7f, 0xb4, 0x1c, 0x4b, 0x8b, 0x47, 0x5c,
2221 0xb4, 0xf3, 0xf7, 0x16, 0x50, 0x94, 0xc2, 0x29,
2222 0xc9, 0xe8, 0xc4, 0xdc, 0x0a, 0x2a, 0x5f, 0xf1,
2223 0x90, 0x3e, 0x50, 0x15, 0x11, 0x22, 0x13, 0x76,
2224 0xa1, 0xcd, 0xb8, 0x36, 0x4c, 0x50, 0x61, 0xa2,
2225 0x0c, 0xae, 0x74, 0xbc, 0x4a, 0xcd, 0x76, 0xce,
2226 0xb0, 0xab, 0xc9, 0xfd, 0x32, 0x17, 0xef, 0x9f,
2227 0x8c, 0x90, 0xbe, 0x40, 0x2d, 0xdf, 0x6d, 0x86,
2228 0x97, 0xf4, 0xf8, 0x80, 0xdf, 0xf1, 0x5b, 0xfb,
2229 0x7a, 0x6b, 0x28, 0x24, 0x1e, 0xc8, 0xfe, 0x18,
2230 0x3c, 0x2d, 0x59, 0xe3, 0xf9, 0xdf, 0xff, 0x65,
2231 0x3c, 0x71, 0x26, 0xf0, 0xac, 0xb9, 0xe6, 0x42,
2232 0x11, 0xf4, 0x2b, 0xae, 0x12, 0xaf, 0x46, 0x2b,
2233 0x10, 0x70, 0xbe, 0xf1, 0xab, 0x5e, 0x36, 0x06,
2234 0x87, 0x2c, 0xa1, 0x0d, 0xee, 0x15, 0xb3, 0x24,
2235 0x9b, 0x1a, 0x1b, 0x95, 0x8f, 0x23, 0x13, 0x4c,
2236 0x4b, 0xcc, 0xb7, 0xd0, 0x32, 0x00, 0xbc, 0xe4,
2237 0x20, 0xa2, 0xf8, 0xeb, 0x66, 0xdc, 0xf3, 0x64,
2238 0x4d, 0x14, 0x23, 0xc1, 0xb5, 0x69, 0x90, 0x03,
2239 0xc1, 0x3e, 0xce, 0xf4, 0xbf, 0x38, 0xa3, 0xb6,
2240 0x0e, 0xed, 0xc3, 0x40, 0x33, 0xba, 0xc1, 0x90,
2241 0x27, 0x83, 0xdc, 0x6d, 0x89, 0xe2, 0xe7, 0x74,
2242 0x18, 0x8a, 0x43, 0x9c, 0x7e, 0xbc, 0xc0, 0x67,
2243 0x2d, 0xbd, 0xa4, 0xdd, 0xcf, 0xb2, 0x79, 0x46,
2244 0x13, 0xb0, 0xbe, 0x41, 0x31, 0x5e, 0xf7, 0x78,
2245 0x70, 0x8a, 0x70, 0xee, 0x7d, 0x75, 0x16, 0x5c
2248 static const u8 T20[] = {
2249 0x8b, 0x30, 0x7f, 0x6b, 0x33, 0x28, 0x6d, 0x0a,
2250 0xb0, 0x26, 0xa9, 0xed, 0x3f, 0xe1, 0xe8, 0x5f
2253 # define TEST_CASE(n) do { \
2254 u8 out[sizeof(P##n)]; \
2255 AES_set_encrypt_key(K##n,sizeof(K##n)*8,&key); \
2256 CRYPTO_gcm128_init(&ctx,&key,(block128_f)AES_encrypt); \
2257 CRYPTO_gcm128_setiv(&ctx,IV##n,sizeof(IV##n)); \
2258 memset(out,0,sizeof(out)); \
2259 if (A##n) CRYPTO_gcm128_aad(&ctx,A##n,sizeof(A##n)); \
2260 if (P##n) CRYPTO_gcm128_encrypt(&ctx,P##n,out,sizeof(out)); \
2261 if (CRYPTO_gcm128_finish(&ctx,T##n,16) || \
2262 (C##n && memcmp(out,C##n,sizeof(out)))) \
2263 ret++, printf ("encrypt test#%d failed.\n",n); \
2264 CRYPTO_gcm128_setiv(&ctx,IV##n,sizeof(IV##n)); \
2265 memset(out,0,sizeof(out)); \
2266 if (A##n) CRYPTO_gcm128_aad(&ctx,A##n,sizeof(A##n)); \
2267 if (C##n) CRYPTO_gcm128_decrypt(&ctx,C##n,out,sizeof(out)); \
2268 if (CRYPTO_gcm128_finish(&ctx,T##n,16) || \
2269 (P##n && memcmp(out,P##n,sizeof(out)))) \
2270 ret++, printf ("decrypt test#%d failed.\n",n); \
2300 # ifdef OPENSSL_CPUID_OBJ
2302 size_t start, stop, gcm_t, ctr_t, OPENSSL_rdtsc();
2309 AES_set_encrypt_key(K1, sizeof(K1) * 8, &key);
2310 CRYPTO_gcm128_init(&ctx, &key, (block128_f) AES_encrypt);
2311 CRYPTO_gcm128_setiv(&ctx, IV1, sizeof(IV1));
2313 CRYPTO_gcm128_encrypt(&ctx, buf.c, buf.c, sizeof(buf));
2314 start = OPENSSL_rdtsc();
2315 CRYPTO_gcm128_encrypt(&ctx, buf.c, buf.c, sizeof(buf));
2316 gcm_t = OPENSSL_rdtsc() - start;
2318 CRYPTO_ctr128_encrypt(buf.c, buf.c, sizeof(buf),
2319 &key, ctx.Yi.c, ctx.EKi.c, &ctx.mres,
2320 (block128_f) AES_encrypt);
2321 start = OPENSSL_rdtsc();
2322 CRYPTO_ctr128_encrypt(buf.c, buf.c, sizeof(buf),
2323 &key, ctx.Yi.c, ctx.EKi.c, &ctx.mres,
2324 (block128_f) AES_encrypt);
2325 ctr_t = OPENSSL_rdtsc() - start;
2327 printf("%.2f-%.2f=%.2f\n",
2328 gcm_t / (double)sizeof(buf),
2329 ctr_t / (double)sizeof(buf),
2330 (gcm_t - ctr_t) / (double)sizeof(buf));
2333 void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
2334 const u8 *inp, size_t len) = ctx.ghash;
2336 GHASH((&ctx), buf.c, sizeof(buf));
2337 start = OPENSSL_rdtsc();
2338 for (i = 0; i < 100; ++i)
2339 GHASH((&ctx), buf.c, sizeof(buf));
2340 gcm_t = OPENSSL_rdtsc() - start;
2341 printf("%.2f\n", gcm_t / (double)sizeof(buf) / (double)i);