1 /* ====================================================================
2 * Copyright (c) 2010 The OpenSSL Project. All rights reserved.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in
13 * the documentation and/or other materials provided with the
16 * 3. All advertising materials mentioning features or use of this
17 * software must display the following acknowledgment:
18 * "This product includes software developed by the OpenSSL Project
19 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
21 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
22 * endorse or promote products derived from this software without
23 * prior written permission. For written permission, please contact
24 * openssl-core@openssl.org.
26 * 5. Products derived from this software may not be called "OpenSSL"
27 * nor may "OpenSSL" appear in their names without prior written
28 * permission of the OpenSSL Project.
30 * 6. Redistributions of any form whatsoever must retain the following
32 * "This product includes software developed by the OpenSSL Project
33 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
35 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
36 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
37 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
38 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
39 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
40 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
41 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
42 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
43 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
44 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
45 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
46 * OF THE POSSIBILITY OF SUCH DAMAGE.
47 * ====================================================================
50 #include <openssl/crypto.h>
51 #include "modes_lcl.h"
54 #if defined(BSWAP4) && defined(STRICT_ALIGNMENT)
55 /* redefine, because alignment is ensured */
57 # define GETU32(p) BSWAP4(*(const u32 *)(p))
59 # define PUTU32(p,v) *(u32 *)(p) = BSWAP4(v)
62 #define PACK(s) ((size_t)(s)<<(sizeof(size_t)*8-16))
63 #define REDUCE1BIT(V) do { \
64 if (sizeof(size_t)==8) { \
65 u64 T = U64(0xe100000000000000) & (0-(V.lo&1)); \
66 V.lo = (V.hi<<63)|(V.lo>>1); \
67 V.hi = (V.hi>>1 )^T; \
70 u32 T = 0xe1000000U & (0-(u32)(V.lo&1)); \
71 V.lo = (V.hi<<63)|(V.lo>>1); \
72 V.hi = (V.hi>>1 )^((u64)T<<32); \
77 * Even though permitted values for TABLE_BITS are 8, 4 and 1, it should
78 * never be set to 8. 8 is effectively reserved for testing purposes.
79 * TABLE_BITS>1 are lookup-table-driven implementations referred to as
80 * "Shoup's" in GCM specification. In other words OpenSSL does not cover
81 * whole spectrum of possible table driven implementations. Why? In
82 * non-"Shoup's" case memory access pattern is segmented in such manner,
83 * that it's trivial to see that cache timing information can reveal
84 * fair portion of intermediate hash value. Given that ciphertext is
85 * always available to attacker, it's possible for him to attempt to
86 * deduce secret parameter H and if successful, tamper with messages
87 * [which is nothing but trivial in CTR mode]. In "Shoup's" case it's
88 * not as trivial, but there is no reason to believe that it's resistant
89 * to cache-timing attack. And the thing about "8-bit" implementation is
90 * that it consumes 16 (sixteen) times more memory, 4KB per individual
91 * key + 1KB shared. Well, on pros side it should be twice as fast as
92 * "4-bit" version. And for gcc-generated x86[_64] code, "8-bit" version
93 * was observed to run ~75% faster, closer to 100% for commercial
94 * compilers... Yet "4-bit" procedure is preferred, because it's
95 * believed to provide better security-performance balance and adequate
96 * all-round performance. "All-round" refers to things like:
98 * - shorter setup time effectively improves overall timing for
99 * handling short messages;
100 * - larger table allocation can become unbearable because of VM
101 * subsystem penalties (for example on Windows large enough free
102 * results in VM working set trimming, meaning that consequent
103 * malloc would immediately incur working set expansion);
104 * - larger table has larger cache footprint, which can affect
105 * performance of other code paths (not necessarily even from same
106 * thread in Hyper-Threading world);
108 * Value of 1 is not appropriate for performance reasons.
112 static void gcm_init_8bit(u128 Htable[256], u64 H[2])
122 for (Htable[128] = V, i = 64; i > 0; i >>= 1) {
127 for (i = 2; i < 256; i <<= 1) {
128 u128 *Hi = Htable + i, H0 = *Hi;
129 for (j = 1; j < i; ++j) {
130 Hi[j].hi = H0.hi ^ Htable[j].hi;
131 Hi[j].lo = H0.lo ^ Htable[j].lo;
136 static void gcm_gmult_8bit(u64 Xi[2], const u128 Htable[256])
139 const u8 *xi = (const u8 *)Xi + 15;
145 static const size_t rem_8bit[256] = {
146 PACK(0x0000), PACK(0x01C2), PACK(0x0384), PACK(0x0246),
147 PACK(0x0708), PACK(0x06CA), PACK(0x048C), PACK(0x054E),
148 PACK(0x0E10), PACK(0x0FD2), PACK(0x0D94), PACK(0x0C56),
149 PACK(0x0918), PACK(0x08DA), PACK(0x0A9C), PACK(0x0B5E),
150 PACK(0x1C20), PACK(0x1DE2), PACK(0x1FA4), PACK(0x1E66),
151 PACK(0x1B28), PACK(0x1AEA), PACK(0x18AC), PACK(0x196E),
152 PACK(0x1230), PACK(0x13F2), PACK(0x11B4), PACK(0x1076),
153 PACK(0x1538), PACK(0x14FA), PACK(0x16BC), PACK(0x177E),
154 PACK(0x3840), PACK(0x3982), PACK(0x3BC4), PACK(0x3A06),
155 PACK(0x3F48), PACK(0x3E8A), PACK(0x3CCC), PACK(0x3D0E),
156 PACK(0x3650), PACK(0x3792), PACK(0x35D4), PACK(0x3416),
157 PACK(0x3158), PACK(0x309A), PACK(0x32DC), PACK(0x331E),
158 PACK(0x2460), PACK(0x25A2), PACK(0x27E4), PACK(0x2626),
159 PACK(0x2368), PACK(0x22AA), PACK(0x20EC), PACK(0x212E),
160 PACK(0x2A70), PACK(0x2BB2), PACK(0x29F4), PACK(0x2836),
161 PACK(0x2D78), PACK(0x2CBA), PACK(0x2EFC), PACK(0x2F3E),
162 PACK(0x7080), PACK(0x7142), PACK(0x7304), PACK(0x72C6),
163 PACK(0x7788), PACK(0x764A), PACK(0x740C), PACK(0x75CE),
164 PACK(0x7E90), PACK(0x7F52), PACK(0x7D14), PACK(0x7CD6),
165 PACK(0x7998), PACK(0x785A), PACK(0x7A1C), PACK(0x7BDE),
166 PACK(0x6CA0), PACK(0x6D62), PACK(0x6F24), PACK(0x6EE6),
167 PACK(0x6BA8), PACK(0x6A6A), PACK(0x682C), PACK(0x69EE),
168 PACK(0x62B0), PACK(0x6372), PACK(0x6134), PACK(0x60F6),
169 PACK(0x65B8), PACK(0x647A), PACK(0x663C), PACK(0x67FE),
170 PACK(0x48C0), PACK(0x4902), PACK(0x4B44), PACK(0x4A86),
171 PACK(0x4FC8), PACK(0x4E0A), PACK(0x4C4C), PACK(0x4D8E),
172 PACK(0x46D0), PACK(0x4712), PACK(0x4554), PACK(0x4496),
173 PACK(0x41D8), PACK(0x401A), PACK(0x425C), PACK(0x439E),
174 PACK(0x54E0), PACK(0x5522), PACK(0x5764), PACK(0x56A6),
175 PACK(0x53E8), PACK(0x522A), PACK(0x506C), PACK(0x51AE),
176 PACK(0x5AF0), PACK(0x5B32), PACK(0x5974), PACK(0x58B6),
177 PACK(0x5DF8), PACK(0x5C3A), PACK(0x5E7C), PACK(0x5FBE),
178 PACK(0xE100), PACK(0xE0C2), PACK(0xE284), PACK(0xE346),
179 PACK(0xE608), PACK(0xE7CA), PACK(0xE58C), PACK(0xE44E),
180 PACK(0xEF10), PACK(0xEED2), PACK(0xEC94), PACK(0xED56),
181 PACK(0xE818), PACK(0xE9DA), PACK(0xEB9C), PACK(0xEA5E),
182 PACK(0xFD20), PACK(0xFCE2), PACK(0xFEA4), PACK(0xFF66),
183 PACK(0xFA28), PACK(0xFBEA), PACK(0xF9AC), PACK(0xF86E),
184 PACK(0xF330), PACK(0xF2F2), PACK(0xF0B4), PACK(0xF176),
185 PACK(0xF438), PACK(0xF5FA), PACK(0xF7BC), PACK(0xF67E),
186 PACK(0xD940), PACK(0xD882), PACK(0xDAC4), PACK(0xDB06),
187 PACK(0xDE48), PACK(0xDF8A), PACK(0xDDCC), PACK(0xDC0E),
188 PACK(0xD750), PACK(0xD692), PACK(0xD4D4), PACK(0xD516),
189 PACK(0xD058), PACK(0xD19A), PACK(0xD3DC), PACK(0xD21E),
190 PACK(0xC560), PACK(0xC4A2), PACK(0xC6E4), PACK(0xC726),
191 PACK(0xC268), PACK(0xC3AA), PACK(0xC1EC), PACK(0xC02E),
192 PACK(0xCB70), PACK(0xCAB2), PACK(0xC8F4), PACK(0xC936),
193 PACK(0xCC78), PACK(0xCDBA), PACK(0xCFFC), PACK(0xCE3E),
194 PACK(0x9180), PACK(0x9042), PACK(0x9204), PACK(0x93C6),
195 PACK(0x9688), PACK(0x974A), PACK(0x950C), PACK(0x94CE),
196 PACK(0x9F90), PACK(0x9E52), PACK(0x9C14), PACK(0x9DD6),
197 PACK(0x9898), PACK(0x995A), PACK(0x9B1C), PACK(0x9ADE),
198 PACK(0x8DA0), PACK(0x8C62), PACK(0x8E24), PACK(0x8FE6),
199 PACK(0x8AA8), PACK(0x8B6A), PACK(0x892C), PACK(0x88EE),
200 PACK(0x83B0), PACK(0x8272), PACK(0x8034), PACK(0x81F6),
201 PACK(0x84B8), PACK(0x857A), PACK(0x873C), PACK(0x86FE),
202 PACK(0xA9C0), PACK(0xA802), PACK(0xAA44), PACK(0xAB86),
203 PACK(0xAEC8), PACK(0xAF0A), PACK(0xAD4C), PACK(0xAC8E),
204 PACK(0xA7D0), PACK(0xA612), PACK(0xA454), PACK(0xA596),
205 PACK(0xA0D8), PACK(0xA11A), PACK(0xA35C), PACK(0xA29E),
206 PACK(0xB5E0), PACK(0xB422), PACK(0xB664), PACK(0xB7A6),
207 PACK(0xB2E8), PACK(0xB32A), PACK(0xB16C), PACK(0xB0AE),
208 PACK(0xBBF0), PACK(0xBA32), PACK(0xB874), PACK(0xB9B6),
209 PACK(0xBCF8), PACK(0xBD3A), PACK(0xBF7C), PACK(0xBEBE)
213 Z.hi ^= Htable[n].hi;
214 Z.lo ^= Htable[n].lo;
221 rem = (size_t)Z.lo & 0xff;
222 Z.lo = (Z.hi << 56) | (Z.lo >> 8);
224 if (sizeof(size_t) == 8)
225 Z.hi ^= rem_8bit[rem];
227 Z.hi ^= (u64)rem_8bit[rem] << 32;
230 if (is_endian.little) {
232 Xi[0] = BSWAP8(Z.hi);
233 Xi[1] = BSWAP8(Z.lo);
237 v = (u32)(Z.hi >> 32);
241 v = (u32)(Z.lo >> 32);
252 # define GCM_MUL(ctx,Xi) gcm_gmult_8bit(ctx->Xi.u,ctx->Htable)
256 static void gcm_init_4bit(u128 Htable[16], u64 H[2])
259 # if defined(OPENSSL_SMALL_FOOTPRINT)
268 # if defined(OPENSSL_SMALL_FOOTPRINT)
269 for (Htable[8] = V, i = 4; i > 0; i >>= 1) {
274 for (i = 2; i < 16; i <<= 1) {
275 u128 *Hi = Htable + i;
277 for (V = *Hi, j = 1; j < i; ++j) {
278 Hi[j].hi = V.hi ^ Htable[j].hi;
279 Hi[j].lo = V.lo ^ Htable[j].lo;
290 Htable[3].hi = V.hi ^ Htable[2].hi, Htable[3].lo = V.lo ^ Htable[2].lo;
292 Htable[5].hi = V.hi ^ Htable[1].hi, Htable[5].lo = V.lo ^ Htable[1].lo;
293 Htable[6].hi = V.hi ^ Htable[2].hi, Htable[6].lo = V.lo ^ Htable[2].lo;
294 Htable[7].hi = V.hi ^ Htable[3].hi, Htable[7].lo = V.lo ^ Htable[3].lo;
296 Htable[9].hi = V.hi ^ Htable[1].hi, Htable[9].lo = V.lo ^ Htable[1].lo;
297 Htable[10].hi = V.hi ^ Htable[2].hi, Htable[10].lo = V.lo ^ Htable[2].lo;
298 Htable[11].hi = V.hi ^ Htable[3].hi, Htable[11].lo = V.lo ^ Htable[3].lo;
299 Htable[12].hi = V.hi ^ Htable[4].hi, Htable[12].lo = V.lo ^ Htable[4].lo;
300 Htable[13].hi = V.hi ^ Htable[5].hi, Htable[13].lo = V.lo ^ Htable[5].lo;
301 Htable[14].hi = V.hi ^ Htable[6].hi, Htable[14].lo = V.lo ^ Htable[6].lo;
302 Htable[15].hi = V.hi ^ Htable[7].hi, Htable[15].lo = V.lo ^ Htable[7].lo;
304 # if defined(GHASH_ASM) && (defined(__arm__) || defined(__arm))
306 * ARM assembler expects specific dword order in Htable.
315 if (is_endian.little)
316 for (j = 0; j < 16; ++j) {
321 for (j = 0; j < 16; ++j) {
323 Htable[j].hi = V.lo << 32 | V.lo >> 32;
324 Htable[j].lo = V.hi << 32 | V.hi >> 32;
331 static const size_t rem_4bit[16] = {
332 PACK(0x0000), PACK(0x1C20), PACK(0x3840), PACK(0x2460),
333 PACK(0x7080), PACK(0x6CA0), PACK(0x48C0), PACK(0x54E0),
334 PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560),
335 PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0)
338 static void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16])
342 size_t rem, nlo, nhi;
348 nlo = ((const u8 *)Xi)[15];
352 Z.hi = Htable[nlo].hi;
353 Z.lo = Htable[nlo].lo;
356 rem = (size_t)Z.lo & 0xf;
357 Z.lo = (Z.hi << 60) | (Z.lo >> 4);
359 if (sizeof(size_t) == 8)
360 Z.hi ^= rem_4bit[rem];
362 Z.hi ^= (u64)rem_4bit[rem] << 32;
364 Z.hi ^= Htable[nhi].hi;
365 Z.lo ^= Htable[nhi].lo;
370 nlo = ((const u8 *)Xi)[cnt];
374 rem = (size_t)Z.lo & 0xf;
375 Z.lo = (Z.hi << 60) | (Z.lo >> 4);
377 if (sizeof(size_t) == 8)
378 Z.hi ^= rem_4bit[rem];
380 Z.hi ^= (u64)rem_4bit[rem] << 32;
382 Z.hi ^= Htable[nlo].hi;
383 Z.lo ^= Htable[nlo].lo;
386 if (is_endian.little) {
388 Xi[0] = BSWAP8(Z.hi);
389 Xi[1] = BSWAP8(Z.lo);
393 v = (u32)(Z.hi >> 32);
397 v = (u32)(Z.lo >> 32);
408 # if !defined(OPENSSL_SMALL_FOOTPRINT)
410 * Streamed gcm_mult_4bit, see CRYPTO_gcm128_[en|de]crypt for
411 * details... Compiler-generated code doesn't seem to give any
412 * performance improvement, at least not on x86[_64]. It's here
413 * mostly as reference and a placeholder for possible future
414 * non-trivial optimization[s]...
416 static void gcm_ghash_4bit(u64 Xi[2], const u128 Htable[16],
417 const u8 *inp, size_t len)
421 size_t rem, nlo, nhi;
430 nlo = ((const u8 *)Xi)[15];
435 Z.hi = Htable[nlo].hi;
436 Z.lo = Htable[nlo].lo;
439 rem = (size_t)Z.lo & 0xf;
440 Z.lo = (Z.hi << 60) | (Z.lo >> 4);
442 if (sizeof(size_t) == 8)
443 Z.hi ^= rem_4bit[rem];
445 Z.hi ^= (u64)rem_4bit[rem] << 32;
447 Z.hi ^= Htable[nhi].hi;
448 Z.lo ^= Htable[nhi].lo;
453 nlo = ((const u8 *)Xi)[cnt];
458 rem = (size_t)Z.lo & 0xf;
459 Z.lo = (Z.hi << 60) | (Z.lo >> 4);
461 if (sizeof(size_t) == 8)
462 Z.hi ^= rem_4bit[rem];
464 Z.hi ^= (u64)rem_4bit[rem] << 32;
466 Z.hi ^= Htable[nlo].hi;
467 Z.lo ^= Htable[nlo].lo;
471 * Extra 256+16 bytes per-key plus 512 bytes shared tables
472 * [should] give ~50% improvement... One could have PACK()-ed
473 * the rem_8bit even here, but the priority is to minimize
476 u128 Hshr4[16]; /* Htable shifted right by 4 bits */
477 u8 Hshl4[16]; /* Htable shifted left by 4 bits */
478 static const unsigned short rem_8bit[256] = {
479 0x0000, 0x01C2, 0x0384, 0x0246, 0x0708, 0x06CA, 0x048C, 0x054E,
480 0x0E10, 0x0FD2, 0x0D94, 0x0C56, 0x0918, 0x08DA, 0x0A9C, 0x0B5E,
481 0x1C20, 0x1DE2, 0x1FA4, 0x1E66, 0x1B28, 0x1AEA, 0x18AC, 0x196E,
482 0x1230, 0x13F2, 0x11B4, 0x1076, 0x1538, 0x14FA, 0x16BC, 0x177E,
483 0x3840, 0x3982, 0x3BC4, 0x3A06, 0x3F48, 0x3E8A, 0x3CCC, 0x3D0E,
484 0x3650, 0x3792, 0x35D4, 0x3416, 0x3158, 0x309A, 0x32DC, 0x331E,
485 0x2460, 0x25A2, 0x27E4, 0x2626, 0x2368, 0x22AA, 0x20EC, 0x212E,
486 0x2A70, 0x2BB2, 0x29F4, 0x2836, 0x2D78, 0x2CBA, 0x2EFC, 0x2F3E,
487 0x7080, 0x7142, 0x7304, 0x72C6, 0x7788, 0x764A, 0x740C, 0x75CE,
488 0x7E90, 0x7F52, 0x7D14, 0x7CD6, 0x7998, 0x785A, 0x7A1C, 0x7BDE,
489 0x6CA0, 0x6D62, 0x6F24, 0x6EE6, 0x6BA8, 0x6A6A, 0x682C, 0x69EE,
490 0x62B0, 0x6372, 0x6134, 0x60F6, 0x65B8, 0x647A, 0x663C, 0x67FE,
491 0x48C0, 0x4902, 0x4B44, 0x4A86, 0x4FC8, 0x4E0A, 0x4C4C, 0x4D8E,
492 0x46D0, 0x4712, 0x4554, 0x4496, 0x41D8, 0x401A, 0x425C, 0x439E,
493 0x54E0, 0x5522, 0x5764, 0x56A6, 0x53E8, 0x522A, 0x506C, 0x51AE,
494 0x5AF0, 0x5B32, 0x5974, 0x58B6, 0x5DF8, 0x5C3A, 0x5E7C, 0x5FBE,
495 0xE100, 0xE0C2, 0xE284, 0xE346, 0xE608, 0xE7CA, 0xE58C, 0xE44E,
496 0xEF10, 0xEED2, 0xEC94, 0xED56, 0xE818, 0xE9DA, 0xEB9C, 0xEA5E,
497 0xFD20, 0xFCE2, 0xFEA4, 0xFF66, 0xFA28, 0xFBEA, 0xF9AC, 0xF86E,
498 0xF330, 0xF2F2, 0xF0B4, 0xF176, 0xF438, 0xF5FA, 0xF7BC, 0xF67E,
499 0xD940, 0xD882, 0xDAC4, 0xDB06, 0xDE48, 0xDF8A, 0xDDCC, 0xDC0E,
500 0xD750, 0xD692, 0xD4D4, 0xD516, 0xD058, 0xD19A, 0xD3DC, 0xD21E,
501 0xC560, 0xC4A2, 0xC6E4, 0xC726, 0xC268, 0xC3AA, 0xC1EC, 0xC02E,
502 0xCB70, 0xCAB2, 0xC8F4, 0xC936, 0xCC78, 0xCDBA, 0xCFFC, 0xCE3E,
503 0x9180, 0x9042, 0x9204, 0x93C6, 0x9688, 0x974A, 0x950C, 0x94CE,
504 0x9F90, 0x9E52, 0x9C14, 0x9DD6, 0x9898, 0x995A, 0x9B1C, 0x9ADE,
505 0x8DA0, 0x8C62, 0x8E24, 0x8FE6, 0x8AA8, 0x8B6A, 0x892C, 0x88EE,
506 0x83B0, 0x8272, 0x8034, 0x81F6, 0x84B8, 0x857A, 0x873C, 0x86FE,
507 0xA9C0, 0xA802, 0xAA44, 0xAB86, 0xAEC8, 0xAF0A, 0xAD4C, 0xAC8E,
508 0xA7D0, 0xA612, 0xA454, 0xA596, 0xA0D8, 0xA11A, 0xA35C, 0xA29E,
509 0xB5E0, 0xB422, 0xB664, 0xB7A6, 0xB2E8, 0xB32A, 0xB16C, 0xB0AE,
510 0xBBF0, 0xBA32, 0xB874, 0xB9B6, 0xBCF8, 0xBD3A, 0xBF7C, 0xBEBE
513 * This pre-processing phase slows down procedure by approximately
514 * same time as it makes each loop spin faster. In other words
515 * single block performance is approximately same as straightforward
516 * "4-bit" implementation, and then it goes only faster...
518 for (cnt = 0; cnt < 16; ++cnt) {
519 Z.hi = Htable[cnt].hi;
520 Z.lo = Htable[cnt].lo;
521 Hshr4[cnt].lo = (Z.hi << 60) | (Z.lo >> 4);
522 Hshr4[cnt].hi = (Z.hi >> 4);
523 Hshl4[cnt] = (u8)(Z.lo << 4);
527 for (Z.lo = 0, Z.hi = 0, cnt = 15; cnt; --cnt) {
528 nlo = ((const u8 *)Xi)[cnt];
533 Z.hi ^= Htable[nlo].hi;
534 Z.lo ^= Htable[nlo].lo;
536 rem = (size_t)Z.lo & 0xff;
538 Z.lo = (Z.hi << 56) | (Z.lo >> 8);
541 Z.hi ^= Hshr4[nhi].hi;
542 Z.lo ^= Hshr4[nhi].lo;
543 Z.hi ^= (u64)rem_8bit[rem ^ Hshl4[nhi]] << 48;
546 nlo = ((const u8 *)Xi)[0];
551 Z.hi ^= Htable[nlo].hi;
552 Z.lo ^= Htable[nlo].lo;
554 rem = (size_t)Z.lo & 0xf;
556 Z.lo = (Z.hi << 60) | (Z.lo >> 4);
559 Z.hi ^= Htable[nhi].hi;
560 Z.lo ^= Htable[nhi].lo;
561 Z.hi ^= ((u64)rem_8bit[rem << 4]) << 48;
564 if (is_endian.little) {
566 Xi[0] = BSWAP8(Z.hi);
567 Xi[1] = BSWAP8(Z.lo);
571 v = (u32)(Z.hi >> 32);
575 v = (u32)(Z.lo >> 32);
584 } while (inp += 16, len -= 16);
588 void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16]);
589 void gcm_ghash_4bit(u64 Xi[2], const u128 Htable[16], const u8 *inp,
593 # define GCM_MUL(ctx,Xi) gcm_gmult_4bit(ctx->Xi.u,ctx->Htable)
594 # if defined(GHASH_ASM) || !defined(OPENSSL_SMALL_FOOTPRINT)
595 # define GHASH(ctx,in,len) gcm_ghash_4bit((ctx)->Xi.u,(ctx)->Htable,in,len)
597 * GHASH_CHUNK is "stride parameter" missioned to mitigate cache trashing
598 * effect. In other words idea is to hash data while it's still in L1 cache
599 * after encryption pass...
601 # define GHASH_CHUNK (3*1024)
604 #else /* TABLE_BITS */
606 static void gcm_gmult_1bit(u64 Xi[2], const u64 H[2])
608 u128 V, Z = { 0, 0 };
611 const long *xi = (const long *)Xi;
617 V.hi = H[0]; /* H is in host byte order, no byte swapping */
620 for (j = 0; j < 16 / sizeof(long); ++j) {
621 if (is_endian.little) {
622 if (sizeof(long) == 8) {
624 X = (long)(BSWAP8(xi[j]));
626 const u8 *p = (const u8 *)(xi + j);
627 X = (long)((u64)GETU32(p) << 32 | GETU32(p + 4));
630 const u8 *p = (const u8 *)(xi + j);
636 for (i = 0; i < 8 * sizeof(long); ++i, X <<= 1) {
637 u64 M = (u64)(X >> (8 * sizeof(long) - 1));
645 if (is_endian.little) {
647 Xi[0] = BSWAP8(Z.hi);
648 Xi[1] = BSWAP8(Z.lo);
652 v = (u32)(Z.hi >> 32);
656 v = (u32)(Z.lo >> 32);
667 # define GCM_MUL(ctx,Xi) gcm_gmult_1bit(ctx->Xi.u,ctx->H.u)
671 #if TABLE_BITS==4 && (defined(GHASH_ASM) || defined(OPENSSL_CPUID_OBJ))
672 # if !defined(I386_ONLY) && \
673 (defined(__i386) || defined(__i386__) || \
674 defined(__x86_64) || defined(__x86_64__) || \
675 defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64))
676 # define GHASH_ASM_X86_OR_64
677 # define GCM_FUNCREF_4BIT
678 extern unsigned int OPENSSL_ia32cap_P[];
680 void gcm_init_clmul(u128 Htable[16], const u64 Xi[2]);
681 void gcm_gmult_clmul(u64 Xi[2], const u128 Htable[16]);
682 void gcm_ghash_clmul(u64 Xi[2], const u128 Htable[16], const u8 *inp,
685 # if defined(__i386) || defined(__i386__) || defined(_M_IX86)
686 # define gcm_init_avx gcm_init_clmul
687 # define gcm_gmult_avx gcm_gmult_clmul
688 # define gcm_ghash_avx gcm_ghash_clmul
690 void gcm_init_avx(u128 Htable[16], const u64 Xi[2]);
691 void gcm_gmult_avx(u64 Xi[2], const u128 Htable[16]);
692 void gcm_ghash_avx(u64 Xi[2], const u128 Htable[16], const u8 *inp,
696 # if defined(__i386) || defined(__i386__) || defined(_M_IX86)
697 # define GHASH_ASM_X86
698 void gcm_gmult_4bit_mmx(u64 Xi[2], const u128 Htable[16]);
699 void gcm_ghash_4bit_mmx(u64 Xi[2], const u128 Htable[16], const u8 *inp,
702 void gcm_gmult_4bit_x86(u64 Xi[2], const u128 Htable[16]);
703 void gcm_ghash_4bit_x86(u64 Xi[2], const u128 Htable[16], const u8 *inp,
706 # elif defined(__arm__) || defined(__arm) || defined(__aarch64__)
707 # include "arm_arch.h"
708 # if __ARM_MAX_ARCH__>=7
709 # define GHASH_ASM_ARM
710 # define GCM_FUNCREF_4BIT
711 # define PMULL_CAPABLE (OPENSSL_armcap_P & ARMV8_PMULL)
712 # if defined(__arm__) || defined(__arm)
713 # define NEON_CAPABLE (OPENSSL_armcap_P & ARMV7_NEON)
715 void gcm_init_neon(u128 Htable[16], const u64 Xi[2]);
716 void gcm_gmult_neon(u64 Xi[2], const u128 Htable[16]);
717 void gcm_ghash_neon(u64 Xi[2], const u128 Htable[16], const u8 *inp,
719 void gcm_init_v8(u128 Htable[16], const u64 Xi[2]);
720 void gcm_gmult_v8(u64 Xi[2], const u128 Htable[16]);
721 void gcm_ghash_v8(u64 Xi[2], const u128 Htable[16], const u8 *inp,
724 # elif defined(__sparc__) || defined(__sparc)
725 # include "sparc_arch.h"
726 # define GHASH_ASM_SPARC
727 # define GCM_FUNCREF_4BIT
728 extern unsigned int OPENSSL_sparcv9cap_P[];
729 void gcm_init_vis3(u128 Htable[16], const u64 Xi[2]);
730 void gcm_gmult_vis3(u64 Xi[2], const u128 Htable[16]);
731 void gcm_ghash_vis3(u64 Xi[2], const u128 Htable[16], const u8 *inp,
733 # elif defined(OPENSSL_CPUID_OBJ) && (defined(__powerpc__) || defined(__ppc__) || defined(_ARCH_PPC))
734 # include "ppc_arch.h"
735 # define GHASH_ASM_PPC
736 # define GCM_FUNCREF_4BIT
737 void gcm_init_p8(u128 Htable[16], const u64 Xi[2]);
738 void gcm_gmult_p8(u64 Xi[2], const u128 Htable[16]);
739 void gcm_ghash_p8(u64 Xi[2], const u128 Htable[16], const u8 *inp,
744 #ifdef GCM_FUNCREF_4BIT
746 # define GCM_MUL(ctx,Xi) (*gcm_gmult_p)(ctx->Xi.u,ctx->Htable)
749 # define GHASH(ctx,in,len) (*gcm_ghash_p)(ctx->Xi.u,ctx->Htable,in,len)
753 void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx, void *key, block128_f block)
760 memset(ctx, 0, sizeof(*ctx));
764 (*block) (ctx->H.c, ctx->H.c, key);
766 if (is_endian.little) {
767 /* H is stored in host byte order */
769 ctx->H.u[0] = BSWAP8(ctx->H.u[0]);
770 ctx->H.u[1] = BSWAP8(ctx->H.u[1]);
774 hi = (u64)GETU32(p) << 32 | GETU32(p + 4);
775 lo = (u64)GETU32(p + 8) << 32 | GETU32(p + 12);
781 gcm_init_8bit(ctx->Htable, ctx->H.u);
784 # define CTX__GHASH(f) (ctx->ghash = (f))
786 # define CTX__GHASH(f) (ctx->ghash = NULL)
788 # if defined(GHASH_ASM_X86_OR_64)
789 # if !defined(GHASH_ASM_X86) || defined(OPENSSL_IA32_SSE2)
790 if (OPENSSL_ia32cap_P[0] & (1 << 24) && /* check FXSR bit */
791 OPENSSL_ia32cap_P[1] & (1 << 1)) { /* check PCLMULQDQ bit */
792 if (((OPENSSL_ia32cap_P[1] >> 22) & 0x41) == 0x41) { /* AVX+MOVBE */
793 gcm_init_avx(ctx->Htable, ctx->H.u);
794 ctx->gmult = gcm_gmult_avx;
795 CTX__GHASH(gcm_ghash_avx);
797 gcm_init_clmul(ctx->Htable, ctx->H.u);
798 ctx->gmult = gcm_gmult_clmul;
799 CTX__GHASH(gcm_ghash_clmul);
804 gcm_init_4bit(ctx->Htable, ctx->H.u);
805 # if defined(GHASH_ASM_X86) /* x86 only */
806 # if defined(OPENSSL_IA32_SSE2)
807 if (OPENSSL_ia32cap_P[0] & (1 << 25)) { /* check SSE bit */
809 if (OPENSSL_ia32cap_P[0] & (1 << 23)) { /* check MMX bit */
811 ctx->gmult = gcm_gmult_4bit_mmx;
812 CTX__GHASH(gcm_ghash_4bit_mmx);
814 ctx->gmult = gcm_gmult_4bit_x86;
815 CTX__GHASH(gcm_ghash_4bit_x86);
818 ctx->gmult = gcm_gmult_4bit;
819 CTX__GHASH(gcm_ghash_4bit);
821 # elif defined(GHASH_ASM_ARM)
822 # ifdef PMULL_CAPABLE
824 gcm_init_v8(ctx->Htable, ctx->H.u);
825 ctx->gmult = gcm_gmult_v8;
826 CTX__GHASH(gcm_ghash_v8);
831 gcm_init_neon(ctx->Htable, ctx->H.u);
832 ctx->gmult = gcm_gmult_neon;
833 CTX__GHASH(gcm_ghash_neon);
837 gcm_init_4bit(ctx->Htable, ctx->H.u);
838 ctx->gmult = gcm_gmult_4bit;
839 CTX__GHASH(gcm_ghash_4bit);
841 # elif defined(GHASH_ASM_SPARC)
842 if (OPENSSL_sparcv9cap_P[0] & SPARCV9_VIS3) {
843 gcm_init_vis3(ctx->Htable, ctx->H.u);
844 ctx->gmult = gcm_gmult_vis3;
845 CTX__GHASH(gcm_ghash_vis3);
847 gcm_init_4bit(ctx->Htable, ctx->H.u);
848 ctx->gmult = gcm_gmult_4bit;
849 CTX__GHASH(gcm_ghash_4bit);
851 # elif defined(GHASH_ASM_PPC)
852 if (OPENSSL_ppccap_P & PPC_CRYPTO207) {
853 gcm_init_p8(ctx->Htable, ctx->H.u);
854 ctx->gmult = gcm_gmult_p8;
855 CTX__GHASH(gcm_ghash_p8);
857 gcm_init_4bit(ctx->Htable, ctx->H.u);
858 ctx->gmult = gcm_gmult_4bit;
859 CTX__GHASH(gcm_ghash_4bit);
862 gcm_init_4bit(ctx->Htable, ctx->H.u);
868 void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx, const unsigned char *iv,
876 #ifdef GCM_FUNCREF_4BIT
877 void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
884 ctx->len.u[0] = 0; /* AAD length */
885 ctx->len.u[1] = 0; /* message length */
890 memcpy(ctx->Yi.c, iv, 12);
898 for (i = 0; i < 16; ++i)
899 ctx->Yi.c[i] ^= iv[i];
905 for (i = 0; i < len; ++i)
906 ctx->Yi.c[i] ^= iv[i];
910 if (is_endian.little) {
912 ctx->Yi.u[1] ^= BSWAP8(len0);
914 ctx->Yi.c[8] ^= (u8)(len0 >> 56);
915 ctx->Yi.c[9] ^= (u8)(len0 >> 48);
916 ctx->Yi.c[10] ^= (u8)(len0 >> 40);
917 ctx->Yi.c[11] ^= (u8)(len0 >> 32);
918 ctx->Yi.c[12] ^= (u8)(len0 >> 24);
919 ctx->Yi.c[13] ^= (u8)(len0 >> 16);
920 ctx->Yi.c[14] ^= (u8)(len0 >> 8);
921 ctx->Yi.c[15] ^= (u8)(len0);
924 ctx->Yi.u[1] ^= len0;
928 if (is_endian.little)
930 ctr = BSWAP4(ctx->Yi.d[3]);
932 ctr = GETU32(ctx->Yi.c + 12);
938 (*ctx->block) (ctx->Yi.c, ctx->EK0.c, ctx->key);
940 if (is_endian.little)
942 ctx->Yi.d[3] = BSWAP4(ctr);
944 PUTU32(ctx->Yi.c + 12, ctr);
950 int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx, const unsigned char *aad,
955 u64 alen = ctx->len.u[0];
956 #ifdef GCM_FUNCREF_4BIT
957 void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
959 void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
960 const u8 *inp, size_t len) = ctx->ghash;
968 if (alen > (U64(1) << 61) || (sizeof(len) == 8 && alen < len))
970 ctx->len.u[0] = alen;
975 ctx->Xi.c[n] ^= *(aad++);
987 if ((i = (len & (size_t)-16))) {
994 for (i = 0; i < 16; ++i)
995 ctx->Xi.c[i] ^= aad[i];
1002 n = (unsigned int)len;
1003 for (i = 0; i < len; ++i)
1004 ctx->Xi.c[i] ^= aad[i];
1011 int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
1012 const unsigned char *in, unsigned char *out,
1018 } is_endian = { 1 };
1019 unsigned int n, ctr;
1021 u64 mlen = ctx->len.u[1];
1022 block128_f block = ctx->block;
1023 void *key = ctx->key;
1024 #ifdef GCM_FUNCREF_4BIT
1025 void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
1026 # if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1027 void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
1028 const u8 *inp, size_t len) = ctx->ghash;
1033 if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1035 ctx->len.u[1] = mlen;
1038 /* First call to encrypt finalizes GHASH(AAD) */
1043 if (is_endian.little)
1045 ctr = BSWAP4(ctx->Yi.d[3]);
1047 ctr = GETU32(ctx->Yi.c + 12);
1053 #if !defined(OPENSSL_SMALL_FOOTPRINT)
1054 if (16 % sizeof(size_t) == 0) { /* always true actually */
1058 ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
1069 # if defined(STRICT_ALIGNMENT)
1070 if (((size_t)in | (size_t)out) % sizeof(size_t) != 0)
1074 # if defined(GHASH_CHUNK)
1075 while (len >= GHASH_CHUNK) {
1076 size_t j = GHASH_CHUNK;
1079 size_t *out_t = (size_t *)out;
1080 const size_t *in_t = (const size_t *)in;
1082 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1084 if (is_endian.little)
1086 ctx->Yi.d[3] = BSWAP4(ctr);
1088 PUTU32(ctx->Yi.c + 12, ctr);
1092 for (i = 0; i < 16 / sizeof(size_t); ++i)
1093 out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1098 GHASH(ctx, out - GHASH_CHUNK, GHASH_CHUNK);
1102 if ((i = (len & (size_t)-16))) {
1106 size_t *out_t = (size_t *)out;
1107 const size_t *in_t = (const size_t *)in;
1109 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1111 if (is_endian.little)
1113 ctx->Yi.d[3] = BSWAP4(ctr);
1115 PUTU32(ctx->Yi.c + 12, ctr);
1119 for (i = 0; i < 16 / sizeof(size_t); ++i)
1120 out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1125 GHASH(ctx, out - j, j);
1129 size_t *out_t = (size_t *)out;
1130 const size_t *in_t = (const size_t *)in;
1132 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1134 if (is_endian.little)
1136 ctx->Yi.d[3] = BSWAP4(ctr);
1138 PUTU32(ctx->Yi.c + 12, ctr);
1142 for (i = 0; i < 16 / sizeof(size_t); ++i)
1143 ctx->Xi.t[i] ^= out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1151 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1153 if (is_endian.little)
1155 ctx->Yi.d[3] = BSWAP4(ctr);
1157 PUTU32(ctx->Yi.c + 12, ctr);
1162 ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n];
1172 for (i = 0; i < len; ++i) {
1174 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1176 if (is_endian.little)
1178 ctx->Yi.d[3] = BSWAP4(ctr);
1180 PUTU32(ctx->Yi.c + 12, ctr);
1185 ctx->Xi.c[n] ^= out[i] = in[i] ^ ctx->EKi.c[n];
1195 int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
1196 const unsigned char *in, unsigned char *out,
1202 } is_endian = { 1 };
1203 unsigned int n, ctr;
1205 u64 mlen = ctx->len.u[1];
1206 block128_f block = ctx->block;
1207 void *key = ctx->key;
1208 #ifdef GCM_FUNCREF_4BIT
1209 void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
1210 # if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1211 void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
1212 const u8 *inp, size_t len) = ctx->ghash;
1217 if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1219 ctx->len.u[1] = mlen;
1222 /* First call to decrypt finalizes GHASH(AAD) */
1227 if (is_endian.little)
1229 ctr = BSWAP4(ctx->Yi.d[3]);
1231 ctr = GETU32(ctx->Yi.c + 12);
1237 #if !defined(OPENSSL_SMALL_FOOTPRINT)
1238 if (16 % sizeof(size_t) == 0) { /* always true actually */
1243 *(out++) = c ^ ctx->EKi.c[n];
1255 # if defined(STRICT_ALIGNMENT)
1256 if (((size_t)in | (size_t)out) % sizeof(size_t) != 0)
1260 # if defined(GHASH_CHUNK)
1261 while (len >= GHASH_CHUNK) {
1262 size_t j = GHASH_CHUNK;
1264 GHASH(ctx, in, GHASH_CHUNK);
1266 size_t *out_t = (size_t *)out;
1267 const size_t *in_t = (const size_t *)in;
1269 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1271 if (is_endian.little)
1273 ctx->Yi.d[3] = BSWAP4(ctr);
1275 PUTU32(ctx->Yi.c + 12, ctr);
1279 for (i = 0; i < 16 / sizeof(size_t); ++i)
1280 out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1288 if ((i = (len & (size_t)-16))) {
1291 size_t *out_t = (size_t *)out;
1292 const size_t *in_t = (const size_t *)in;
1294 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1296 if (is_endian.little)
1298 ctx->Yi.d[3] = BSWAP4(ctr);
1300 PUTU32(ctx->Yi.c + 12, ctr);
1304 for (i = 0; i < 16 / sizeof(size_t); ++i)
1305 out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1313 size_t *out_t = (size_t *)out;
1314 const size_t *in_t = (const size_t *)in;
1316 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1318 if (is_endian.little)
1320 ctx->Yi.d[3] = BSWAP4(ctr);
1322 PUTU32(ctx->Yi.c + 12, ctr);
1326 for (i = 0; i < 16 / sizeof(size_t); ++i) {
1328 out[i] = c ^ ctx->EKi.t[i];
1338 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1340 if (is_endian.little)
1342 ctx->Yi.d[3] = BSWAP4(ctr);
1344 PUTU32(ctx->Yi.c + 12, ctr);
1351 out[n] = c ^ ctx->EKi.c[n];
1361 for (i = 0; i < len; ++i) {
1364 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1366 if (is_endian.little)
1368 ctx->Yi.d[3] = BSWAP4(ctr);
1370 PUTU32(ctx->Yi.c + 12, ctr);
1376 out[i] = c ^ ctx->EKi.c[n];
1387 int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx,
1388 const unsigned char *in, unsigned char *out,
1389 size_t len, ctr128_f stream)
1391 #if defined(OPENSSL_SMALL_FOOTPRINT)
1392 return CRYPTO_gcm128_encrypt(ctx, in, out, len);
1397 } is_endian = { 1 };
1398 unsigned int n, ctr;
1400 u64 mlen = ctx->len.u[1];
1401 void *key = ctx->key;
1402 # ifdef GCM_FUNCREF_4BIT
1403 void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
1405 void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
1406 const u8 *inp, size_t len) = ctx->ghash;
1411 if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1413 ctx->len.u[1] = mlen;
1416 /* First call to encrypt finalizes GHASH(AAD) */
1421 if (is_endian.little)
1423 ctr = BSWAP4(ctx->Yi.d[3]);
1425 ctr = GETU32(ctx->Yi.c + 12);
1433 ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
1444 # if defined(GHASH) && defined(GHASH_CHUNK)
1445 while (len >= GHASH_CHUNK) {
1446 (*stream) (in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
1447 ctr += GHASH_CHUNK / 16;
1448 if (is_endian.little)
1450 ctx->Yi.d[3] = BSWAP4(ctr);
1452 PUTU32(ctx->Yi.c + 12, ctr);
1456 GHASH(ctx, out, GHASH_CHUNK);
1462 if ((i = (len & (size_t)-16))) {
1465 (*stream) (in, out, j, key, ctx->Yi.c);
1466 ctr += (unsigned int)j;
1467 if (is_endian.little)
1469 ctx->Yi.d[3] = BSWAP4(ctr);
1471 PUTU32(ctx->Yi.c + 12, ctr);
1482 for (i = 0; i < 16; ++i)
1483 ctx->Xi.c[i] ^= out[i];
1490 (*ctx->block) (ctx->Yi.c, ctx->EKi.c, key);
1492 if (is_endian.little)
1494 ctx->Yi.d[3] = BSWAP4(ctr);
1496 PUTU32(ctx->Yi.c + 12, ctr);
1501 ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n];
1511 int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx,
1512 const unsigned char *in, unsigned char *out,
1513 size_t len, ctr128_f stream)
1515 #if defined(OPENSSL_SMALL_FOOTPRINT)
1516 return CRYPTO_gcm128_decrypt(ctx, in, out, len);
1521 } is_endian = { 1 };
1522 unsigned int n, ctr;
1524 u64 mlen = ctx->len.u[1];
1525 void *key = ctx->key;
1526 # ifdef GCM_FUNCREF_4BIT
1527 void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
1529 void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
1530 const u8 *inp, size_t len) = ctx->ghash;
1535 if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1537 ctx->len.u[1] = mlen;
1540 /* First call to decrypt finalizes GHASH(AAD) */
1545 if (is_endian.little)
1547 ctr = BSWAP4(ctx->Yi.d[3]);
1549 ctr = GETU32(ctx->Yi.c + 12);
1558 *(out++) = c ^ ctx->EKi.c[n];
1570 # if defined(GHASH) && defined(GHASH_CHUNK)
1571 while (len >= GHASH_CHUNK) {
1572 GHASH(ctx, in, GHASH_CHUNK);
1573 (*stream) (in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
1574 ctr += GHASH_CHUNK / 16;
1575 if (is_endian.little)
1577 ctx->Yi.d[3] = BSWAP4(ctr);
1579 PUTU32(ctx->Yi.c + 12, ctr);
1588 if ((i = (len & (size_t)-16))) {
1596 for (k = 0; k < 16; ++k)
1597 ctx->Xi.c[k] ^= in[k];
1604 (*stream) (in, out, j, key, ctx->Yi.c);
1605 ctr += (unsigned int)j;
1606 if (is_endian.little)
1608 ctx->Yi.d[3] = BSWAP4(ctr);
1610 PUTU32(ctx->Yi.c + 12, ctr);
1619 (*ctx->block) (ctx->Yi.c, ctx->EKi.c, key);
1621 if (is_endian.little)
1623 ctx->Yi.d[3] = BSWAP4(ctr);
1625 PUTU32(ctx->Yi.c + 12, ctr);
1632 out[n] = c ^ ctx->EKi.c[n];
1642 int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx, const unsigned char *tag,
1648 } is_endian = { 1 };
1649 u64 alen = ctx->len.u[0] << 3;
1650 u64 clen = ctx->len.u[1] << 3;
1651 #ifdef GCM_FUNCREF_4BIT
1652 void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
1655 if (ctx->mres || ctx->ares)
1658 if (is_endian.little) {
1660 alen = BSWAP8(alen);
1661 clen = BSWAP8(clen);
1665 ctx->len.u[0] = alen;
1666 ctx->len.u[1] = clen;
1668 alen = (u64)GETU32(p) << 32 | GETU32(p + 4);
1669 clen = (u64)GETU32(p + 8) << 32 | GETU32(p + 12);
1673 ctx->Xi.u[0] ^= alen;
1674 ctx->Xi.u[1] ^= clen;
1677 ctx->Xi.u[0] ^= ctx->EK0.u[0];
1678 ctx->Xi.u[1] ^= ctx->EK0.u[1];
1680 if (tag && len <= sizeof(ctx->Xi))
1681 return CRYPTO_memcmp(ctx->Xi.c, tag, len);
1686 void CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len)
1688 CRYPTO_gcm128_finish(ctx, NULL, 0);
1689 memcpy(tag, ctx->Xi.c,
1690 len <= sizeof(ctx->Xi.c) ? len : sizeof(ctx->Xi.c));
1693 GCM128_CONTEXT *CRYPTO_gcm128_new(void *key, block128_f block)
1695 GCM128_CONTEXT *ret;
1697 if ((ret = OPENSSL_malloc(sizeof(*ret))) != NULL)
1698 CRYPTO_gcm128_init(ret, key, block);
1703 void CRYPTO_gcm128_release(GCM128_CONTEXT *ctx)
1705 OPENSSL_clear_free(ctx, sizeof(*ctx));
1708 #if defined(SELFTEST)
1710 # include <openssl/aes.h>
1713 static const u8 K1[16], *P1 = NULL, *A1 = NULL, IV1[12], *C1 = NULL;
1714 static const u8 T1[] = {
1715 0x58, 0xe2, 0xfc, 0xce, 0xfa, 0x7e, 0x30, 0x61,
1716 0x36, 0x7f, 0x1d, 0x57, 0xa4, 0xe7, 0x45, 0x5a
1723 static const u8 P2[16];
1724 static const u8 C2[] = {
1725 0x03, 0x88, 0xda, 0xce, 0x60, 0xb6, 0xa3, 0x92,
1726 0xf3, 0x28, 0xc2, 0xb9, 0x71, 0xb2, 0xfe, 0x78
1729 static const u8 T2[] = {
1730 0xab, 0x6e, 0x47, 0xd4, 0x2c, 0xec, 0x13, 0xbd,
1731 0xf5, 0x3a, 0x67, 0xb2, 0x12, 0x57, 0xbd, 0xdf
1736 static const u8 K3[] = {
1737 0xfe, 0xff, 0xe9, 0x92, 0x86, 0x65, 0x73, 0x1c,
1738 0x6d, 0x6a, 0x8f, 0x94, 0x67, 0x30, 0x83, 0x08
1741 static const u8 P3[] = {
1742 0xd9, 0x31, 0x32, 0x25, 0xf8, 0x84, 0x06, 0xe5,
1743 0xa5, 0x59, 0x09, 0xc5, 0xaf, 0xf5, 0x26, 0x9a,
1744 0x86, 0xa7, 0xa9, 0x53, 0x15, 0x34, 0xf7, 0xda,
1745 0x2e, 0x4c, 0x30, 0x3d, 0x8a, 0x31, 0x8a, 0x72,
1746 0x1c, 0x3c, 0x0c, 0x95, 0x95, 0x68, 0x09, 0x53,
1747 0x2f, 0xcf, 0x0e, 0x24, 0x49, 0xa6, 0xb5, 0x25,
1748 0xb1, 0x6a, 0xed, 0xf5, 0xaa, 0x0d, 0xe6, 0x57,
1749 0xba, 0x63, 0x7b, 0x39, 0x1a, 0xaf, 0xd2, 0x55
1752 static const u8 IV3[] = {
1753 0xca, 0xfe, 0xba, 0xbe, 0xfa, 0xce, 0xdb, 0xad,
1754 0xde, 0xca, 0xf8, 0x88
1757 static const u8 C3[] = {
1758 0x42, 0x83, 0x1e, 0xc2, 0x21, 0x77, 0x74, 0x24,
1759 0x4b, 0x72, 0x21, 0xb7, 0x84, 0xd0, 0xd4, 0x9c,
1760 0xe3, 0xaa, 0x21, 0x2f, 0x2c, 0x02, 0xa4, 0xe0,
1761 0x35, 0xc1, 0x7e, 0x23, 0x29, 0xac, 0xa1, 0x2e,
1762 0x21, 0xd5, 0x14, 0xb2, 0x54, 0x66, 0x93, 0x1c,
1763 0x7d, 0x8f, 0x6a, 0x5a, 0xac, 0x84, 0xaa, 0x05,
1764 0x1b, 0xa3, 0x0b, 0x39, 0x6a, 0x0a, 0xac, 0x97,
1765 0x3d, 0x58, 0xe0, 0x91, 0x47, 0x3f, 0x59, 0x85
1768 static const u8 T3[] = {
1769 0x4d, 0x5c, 0x2a, 0xf3, 0x27, 0xcd, 0x64, 0xa6,
1770 0x2c, 0xf3, 0x5a, 0xbd, 0x2b, 0xa6, 0xfa, 0xb4
1776 static const u8 P4[] = {
1777 0xd9, 0x31, 0x32, 0x25, 0xf8, 0x84, 0x06, 0xe5,
1778 0xa5, 0x59, 0x09, 0xc5, 0xaf, 0xf5, 0x26, 0x9a,
1779 0x86, 0xa7, 0xa9, 0x53, 0x15, 0x34, 0xf7, 0xda,
1780 0x2e, 0x4c, 0x30, 0x3d, 0x8a, 0x31, 0x8a, 0x72,
1781 0x1c, 0x3c, 0x0c, 0x95, 0x95, 0x68, 0x09, 0x53,
1782 0x2f, 0xcf, 0x0e, 0x24, 0x49, 0xa6, 0xb5, 0x25,
1783 0xb1, 0x6a, 0xed, 0xf5, 0xaa, 0x0d, 0xe6, 0x57,
1784 0xba, 0x63, 0x7b, 0x39
1787 static const u8 A4[] = {
1788 0xfe, 0xed, 0xfa, 0xce, 0xde, 0xad, 0xbe, 0xef,
1789 0xfe, 0xed, 0xfa, 0xce, 0xde, 0xad, 0xbe, 0xef,
1790 0xab, 0xad, 0xda, 0xd2
1793 static const u8 C4[] = {
1794 0x42, 0x83, 0x1e, 0xc2, 0x21, 0x77, 0x74, 0x24,
1795 0x4b, 0x72, 0x21, 0xb7, 0x84, 0xd0, 0xd4, 0x9c,
1796 0xe3, 0xaa, 0x21, 0x2f, 0x2c, 0x02, 0xa4, 0xe0,
1797 0x35, 0xc1, 0x7e, 0x23, 0x29, 0xac, 0xa1, 0x2e,
1798 0x21, 0xd5, 0x14, 0xb2, 0x54, 0x66, 0x93, 0x1c,
1799 0x7d, 0x8f, 0x6a, 0x5a, 0xac, 0x84, 0xaa, 0x05,
1800 0x1b, 0xa3, 0x0b, 0x39, 0x6a, 0x0a, 0xac, 0x97,
1801 0x3d, 0x58, 0xe0, 0x91
1804 static const u8 T4[] = {
1805 0x5b, 0xc9, 0x4f, 0xbc, 0x32, 0x21, 0xa5, 0xdb,
1806 0x94, 0xfa, 0xe9, 0x5a, 0xe7, 0x12, 0x1a, 0x47
1813 static const u8 IV5[] = {
1814 0xca, 0xfe, 0xba, 0xbe, 0xfa, 0xce, 0xdb, 0xad
1817 static const u8 C5[] = {
1818 0x61, 0x35, 0x3b, 0x4c, 0x28, 0x06, 0x93, 0x4a,
1819 0x77, 0x7f, 0xf5, 0x1f, 0xa2, 0x2a, 0x47, 0x55,
1820 0x69, 0x9b, 0x2a, 0x71, 0x4f, 0xcd, 0xc6, 0xf8,
1821 0x37, 0x66, 0xe5, 0xf9, 0x7b, 0x6c, 0x74, 0x23,
1822 0x73, 0x80, 0x69, 0x00, 0xe4, 0x9f, 0x24, 0xb2,
1823 0x2b, 0x09, 0x75, 0x44, 0xd4, 0x89, 0x6b, 0x42,
1824 0x49, 0x89, 0xb5, 0xe1, 0xeb, 0xac, 0x0f, 0x07,
1825 0xc2, 0x3f, 0x45, 0x98
1828 static const u8 T5[] = {
1829 0x36, 0x12, 0xd2, 0xe7, 0x9e, 0x3b, 0x07, 0x85,
1830 0x56, 0x1b, 0xe1, 0x4a, 0xac, 0xa2, 0xfc, 0xcb
1837 static const u8 IV6[] = {
1838 0x93, 0x13, 0x22, 0x5d, 0xf8, 0x84, 0x06, 0xe5,
1839 0x55, 0x90, 0x9c, 0x5a, 0xff, 0x52, 0x69, 0xaa,
1840 0x6a, 0x7a, 0x95, 0x38, 0x53, 0x4f, 0x7d, 0xa1,
1841 0xe4, 0xc3, 0x03, 0xd2, 0xa3, 0x18, 0xa7, 0x28,
1842 0xc3, 0xc0, 0xc9, 0x51, 0x56, 0x80, 0x95, 0x39,
1843 0xfc, 0xf0, 0xe2, 0x42, 0x9a, 0x6b, 0x52, 0x54,
1844 0x16, 0xae, 0xdb, 0xf5, 0xa0, 0xde, 0x6a, 0x57,
1845 0xa6, 0x37, 0xb3, 0x9b
1848 static const u8 C6[] = {
1849 0x8c, 0xe2, 0x49, 0x98, 0x62, 0x56, 0x15, 0xb6,
1850 0x03, 0xa0, 0x33, 0xac, 0xa1, 0x3f, 0xb8, 0x94,
1851 0xbe, 0x91, 0x12, 0xa5, 0xc3, 0xa2, 0x11, 0xa8,
1852 0xba, 0x26, 0x2a, 0x3c, 0xca, 0x7e, 0x2c, 0xa7,
1853 0x01, 0xe4, 0xa9, 0xa4, 0xfb, 0xa4, 0x3c, 0x90,
1854 0xcc, 0xdc, 0xb2, 0x81, 0xd4, 0x8c, 0x7c, 0x6f,
1855 0xd6, 0x28, 0x75, 0xd2, 0xac, 0xa4, 0x17, 0x03,
1856 0x4c, 0x34, 0xae, 0xe5
1859 static const u8 T6[] = {
1860 0x61, 0x9c, 0xc5, 0xae, 0xff, 0xfe, 0x0b, 0xfa,
1861 0x46, 0x2a, 0xf4, 0x3c, 0x16, 0x99, 0xd0, 0x50
1865 static const u8 K7[24], *P7 = NULL, *A7 = NULL, IV7[12], *C7 = NULL;
1866 static const u8 T7[] = {
1867 0xcd, 0x33, 0xb2, 0x8a, 0xc7, 0x73, 0xf7, 0x4b,
1868 0xa0, 0x0e, 0xd1, 0xf3, 0x12, 0x57, 0x24, 0x35
1875 static const u8 P8[16];
1876 static const u8 C8[] = {
1877 0x98, 0xe7, 0x24, 0x7c, 0x07, 0xf0, 0xfe, 0x41,
1878 0x1c, 0x26, 0x7e, 0x43, 0x84, 0xb0, 0xf6, 0x00
1881 static const u8 T8[] = {
1882 0x2f, 0xf5, 0x8d, 0x80, 0x03, 0x39, 0x27, 0xab,
1883 0x8e, 0xf4, 0xd4, 0x58, 0x75, 0x14, 0xf0, 0xfb
1888 static const u8 K9[] = {
1889 0xfe, 0xff, 0xe9, 0x92, 0x86, 0x65, 0x73, 0x1c,
1890 0x6d, 0x6a, 0x8f, 0x94, 0x67, 0x30, 0x83, 0x08,
1891 0xfe, 0xff, 0xe9, 0x92, 0x86, 0x65, 0x73, 0x1c
1894 static const u8 P9[] = {
1895 0xd9, 0x31, 0x32, 0x25, 0xf8, 0x84, 0x06, 0xe5,
1896 0xa5, 0x59, 0x09, 0xc5, 0xaf, 0xf5, 0x26, 0x9a,
1897 0x86, 0xa7, 0xa9, 0x53, 0x15, 0x34, 0xf7, 0xda,
1898 0x2e, 0x4c, 0x30, 0x3d, 0x8a, 0x31, 0x8a, 0x72,
1899 0x1c, 0x3c, 0x0c, 0x95, 0x95, 0x68, 0x09, 0x53,
1900 0x2f, 0xcf, 0x0e, 0x24, 0x49, 0xa6, 0xb5, 0x25,
1901 0xb1, 0x6a, 0xed, 0xf5, 0xaa, 0x0d, 0xe6, 0x57,
1902 0xba, 0x63, 0x7b, 0x39, 0x1a, 0xaf, 0xd2, 0x55
1905 static const u8 IV9[] = {
1906 0xca, 0xfe, 0xba, 0xbe, 0xfa, 0xce, 0xdb, 0xad,
1907 0xde, 0xca, 0xf8, 0x88
1910 static const u8 C9[] = {
1911 0x39, 0x80, 0xca, 0x0b, 0x3c, 0x00, 0xe8, 0x41,
1912 0xeb, 0x06, 0xfa, 0xc4, 0x87, 0x2a, 0x27, 0x57,
1913 0x85, 0x9e, 0x1c, 0xea, 0xa6, 0xef, 0xd9, 0x84,
1914 0x62, 0x85, 0x93, 0xb4, 0x0c, 0xa1, 0xe1, 0x9c,
1915 0x7d, 0x77, 0x3d, 0x00, 0xc1, 0x44, 0xc5, 0x25,
1916 0xac, 0x61, 0x9d, 0x18, 0xc8, 0x4a, 0x3f, 0x47,
1917 0x18, 0xe2, 0x44, 0x8b, 0x2f, 0xe3, 0x24, 0xd9,
1918 0xcc, 0xda, 0x27, 0x10, 0xac, 0xad, 0xe2, 0x56
1921 static const u8 T9[] = {
1922 0x99, 0x24, 0xa7, 0xc8, 0x58, 0x73, 0x36, 0xbf,
1923 0xb1, 0x18, 0x02, 0x4d, 0xb8, 0x67, 0x4a, 0x14
1929 static const u8 P10[] = {
1930 0xd9, 0x31, 0x32, 0x25, 0xf8, 0x84, 0x06, 0xe5,
1931 0xa5, 0x59, 0x09, 0xc5, 0xaf, 0xf5, 0x26, 0x9a,
1932 0x86, 0xa7, 0xa9, 0x53, 0x15, 0x34, 0xf7, 0xda,
1933 0x2e, 0x4c, 0x30, 0x3d, 0x8a, 0x31, 0x8a, 0x72,
1934 0x1c, 0x3c, 0x0c, 0x95, 0x95, 0x68, 0x09, 0x53,
1935 0x2f, 0xcf, 0x0e, 0x24, 0x49, 0xa6, 0xb5, 0x25,
1936 0xb1, 0x6a, 0xed, 0xf5, 0xaa, 0x0d, 0xe6, 0x57,
1937 0xba, 0x63, 0x7b, 0x39
1940 static const u8 A10[] = {
1941 0xfe, 0xed, 0xfa, 0xce, 0xde, 0xad, 0xbe, 0xef,
1942 0xfe, 0xed, 0xfa, 0xce, 0xde, 0xad, 0xbe, 0xef,
1943 0xab, 0xad, 0xda, 0xd2
1946 static const u8 C10[] = {
1947 0x39, 0x80, 0xca, 0x0b, 0x3c, 0x00, 0xe8, 0x41,
1948 0xeb, 0x06, 0xfa, 0xc4, 0x87, 0x2a, 0x27, 0x57,
1949 0x85, 0x9e, 0x1c, 0xea, 0xa6, 0xef, 0xd9, 0x84,
1950 0x62, 0x85, 0x93, 0xb4, 0x0c, 0xa1, 0xe1, 0x9c,
1951 0x7d, 0x77, 0x3d, 0x00, 0xc1, 0x44, 0xc5, 0x25,
1952 0xac, 0x61, 0x9d, 0x18, 0xc8, 0x4a, 0x3f, 0x47,
1953 0x18, 0xe2, 0x44, 0x8b, 0x2f, 0xe3, 0x24, 0xd9,
1954 0xcc, 0xda, 0x27, 0x10
1957 static const u8 T10[] = {
1958 0x25, 0x19, 0x49, 0x8e, 0x80, 0xf1, 0x47, 0x8f,
1959 0x37, 0xba, 0x55, 0xbd, 0x6d, 0x27, 0x61, 0x8c
1966 static const u8 IV11[] = { 0xca, 0xfe, 0xba, 0xbe, 0xfa, 0xce, 0xdb, 0xad };
1968 static const u8 C11[] = {
1969 0x0f, 0x10, 0xf5, 0x99, 0xae, 0x14, 0xa1, 0x54,
1970 0xed, 0x24, 0xb3, 0x6e, 0x25, 0x32, 0x4d, 0xb8,
1971 0xc5, 0x66, 0x63, 0x2e, 0xf2, 0xbb, 0xb3, 0x4f,
1972 0x83, 0x47, 0x28, 0x0f, 0xc4, 0x50, 0x70, 0x57,
1973 0xfd, 0xdc, 0x29, 0xdf, 0x9a, 0x47, 0x1f, 0x75,
1974 0xc6, 0x65, 0x41, 0xd4, 0xd4, 0xda, 0xd1, 0xc9,
1975 0xe9, 0x3a, 0x19, 0xa5, 0x8e, 0x8b, 0x47, 0x3f,
1976 0xa0, 0xf0, 0x62, 0xf7
1979 static const u8 T11[] = {
1980 0x65, 0xdc, 0xc5, 0x7f, 0xcf, 0x62, 0x3a, 0x24,
1981 0x09, 0x4f, 0xcc, 0xa4, 0x0d, 0x35, 0x33, 0xf8
1988 static const u8 IV12[] = {
1989 0x93, 0x13, 0x22, 0x5d, 0xf8, 0x84, 0x06, 0xe5,
1990 0x55, 0x90, 0x9c, 0x5a, 0xff, 0x52, 0x69, 0xaa,
1991 0x6a, 0x7a, 0x95, 0x38, 0x53, 0x4f, 0x7d, 0xa1,
1992 0xe4, 0xc3, 0x03, 0xd2, 0xa3, 0x18, 0xa7, 0x28,
1993 0xc3, 0xc0, 0xc9, 0x51, 0x56, 0x80, 0x95, 0x39,
1994 0xfc, 0xf0, 0xe2, 0x42, 0x9a, 0x6b, 0x52, 0x54,
1995 0x16, 0xae, 0xdb, 0xf5, 0xa0, 0xde, 0x6a, 0x57,
1996 0xa6, 0x37, 0xb3, 0x9b
1999 static const u8 C12[] = {
2000 0xd2, 0x7e, 0x88, 0x68, 0x1c, 0xe3, 0x24, 0x3c,
2001 0x48, 0x30, 0x16, 0x5a, 0x8f, 0xdc, 0xf9, 0xff,
2002 0x1d, 0xe9, 0xa1, 0xd8, 0xe6, 0xb4, 0x47, 0xef,
2003 0x6e, 0xf7, 0xb7, 0x98, 0x28, 0x66, 0x6e, 0x45,
2004 0x81, 0xe7, 0x90, 0x12, 0xaf, 0x34, 0xdd, 0xd9,
2005 0xe2, 0xf0, 0x37, 0x58, 0x9b, 0x29, 0x2d, 0xb3,
2006 0xe6, 0x7c, 0x03, 0x67, 0x45, 0xfa, 0x22, 0xe7,
2007 0xe9, 0xb7, 0x37, 0x3b
2010 static const u8 T12[] = {
2011 0xdc, 0xf5, 0x66, 0xff, 0x29, 0x1c, 0x25, 0xbb,
2012 0xb8, 0x56, 0x8f, 0xc3, 0xd3, 0x76, 0xa6, 0xd9
2016 static const u8 K13[32], *P13 = NULL, *A13 = NULL, IV13[12], *C13 = NULL;
2017 static const u8 T13[] = {
2018 0x53, 0x0f, 0x8a, 0xfb, 0xc7, 0x45, 0x36, 0xb9,
2019 0xa9, 0x63, 0xb4, 0xf1, 0xc4, 0xcb, 0x73, 0x8b
2025 static const u8 P14[16], IV14[12];
2026 static const u8 C14[] = {
2027 0xce, 0xa7, 0x40, 0x3d, 0x4d, 0x60, 0x6b, 0x6e,
2028 0x07, 0x4e, 0xc5, 0xd3, 0xba, 0xf3, 0x9d, 0x18
2031 static const u8 T14[] = {
2032 0xd0, 0xd1, 0xc8, 0xa7, 0x99, 0x99, 0x6b, 0xf0,
2033 0x26, 0x5b, 0x98, 0xb5, 0xd4, 0x8a, 0xb9, 0x19
2038 static const u8 K15[] = {
2039 0xfe, 0xff, 0xe9, 0x92, 0x86, 0x65, 0x73, 0x1c,
2040 0x6d, 0x6a, 0x8f, 0x94, 0x67, 0x30, 0x83, 0x08,
2041 0xfe, 0xff, 0xe9, 0x92, 0x86, 0x65, 0x73, 0x1c,
2042 0x6d, 0x6a, 0x8f, 0x94, 0x67, 0x30, 0x83, 0x08
2045 static const u8 P15[] = {
2046 0xd9, 0x31, 0x32, 0x25, 0xf8, 0x84, 0x06, 0xe5,
2047 0xa5, 0x59, 0x09, 0xc5, 0xaf, 0xf5, 0x26, 0x9a,
2048 0x86, 0xa7, 0xa9, 0x53, 0x15, 0x34, 0xf7, 0xda,
2049 0x2e, 0x4c, 0x30, 0x3d, 0x8a, 0x31, 0x8a, 0x72,
2050 0x1c, 0x3c, 0x0c, 0x95, 0x95, 0x68, 0x09, 0x53,
2051 0x2f, 0xcf, 0x0e, 0x24, 0x49, 0xa6, 0xb5, 0x25,
2052 0xb1, 0x6a, 0xed, 0xf5, 0xaa, 0x0d, 0xe6, 0x57,
2053 0xba, 0x63, 0x7b, 0x39, 0x1a, 0xaf, 0xd2, 0x55
2056 static const u8 IV15[] = {
2057 0xca, 0xfe, 0xba, 0xbe, 0xfa, 0xce, 0xdb, 0xad,
2058 0xde, 0xca, 0xf8, 0x88
2061 static const u8 C15[] = {
2062 0x52, 0x2d, 0xc1, 0xf0, 0x99, 0x56, 0x7d, 0x07,
2063 0xf4, 0x7f, 0x37, 0xa3, 0x2a, 0x84, 0x42, 0x7d,
2064 0x64, 0x3a, 0x8c, 0xdc, 0xbf, 0xe5, 0xc0, 0xc9,
2065 0x75, 0x98, 0xa2, 0xbd, 0x25, 0x55, 0xd1, 0xaa,
2066 0x8c, 0xb0, 0x8e, 0x48, 0x59, 0x0d, 0xbb, 0x3d,
2067 0xa7, 0xb0, 0x8b, 0x10, 0x56, 0x82, 0x88, 0x38,
2068 0xc5, 0xf6, 0x1e, 0x63, 0x93, 0xba, 0x7a, 0x0a,
2069 0xbc, 0xc9, 0xf6, 0x62, 0x89, 0x80, 0x15, 0xad
2072 static const u8 T15[] = {
2073 0xb0, 0x94, 0xda, 0xc5, 0xd9, 0x34, 0x71, 0xbd,
2074 0xec, 0x1a, 0x50, 0x22, 0x70, 0xe3, 0xcc, 0x6c
2080 static const u8 P16[] = {
2081 0xd9, 0x31, 0x32, 0x25, 0xf8, 0x84, 0x06, 0xe5,
2082 0xa5, 0x59, 0x09, 0xc5, 0xaf, 0xf5, 0x26, 0x9a,
2083 0x86, 0xa7, 0xa9, 0x53, 0x15, 0x34, 0xf7, 0xda,
2084 0x2e, 0x4c, 0x30, 0x3d, 0x8a, 0x31, 0x8a, 0x72,
2085 0x1c, 0x3c, 0x0c, 0x95, 0x95, 0x68, 0x09, 0x53,
2086 0x2f, 0xcf, 0x0e, 0x24, 0x49, 0xa6, 0xb5, 0x25,
2087 0xb1, 0x6a, 0xed, 0xf5, 0xaa, 0x0d, 0xe6, 0x57,
2088 0xba, 0x63, 0x7b, 0x39
2091 static const u8 A16[] = {
2092 0xfe, 0xed, 0xfa, 0xce, 0xde, 0xad, 0xbe, 0xef,
2093 0xfe, 0xed, 0xfa, 0xce, 0xde, 0xad, 0xbe, 0xef,
2094 0xab, 0xad, 0xda, 0xd2
2097 static const u8 C16[] = {
2098 0x52, 0x2d, 0xc1, 0xf0, 0x99, 0x56, 0x7d, 0x07,
2099 0xf4, 0x7f, 0x37, 0xa3, 0x2a, 0x84, 0x42, 0x7d,
2100 0x64, 0x3a, 0x8c, 0xdc, 0xbf, 0xe5, 0xc0, 0xc9,
2101 0x75, 0x98, 0xa2, 0xbd, 0x25, 0x55, 0xd1, 0xaa,
2102 0x8c, 0xb0, 0x8e, 0x48, 0x59, 0x0d, 0xbb, 0x3d,
2103 0xa7, 0xb0, 0x8b, 0x10, 0x56, 0x82, 0x88, 0x38,
2104 0xc5, 0xf6, 0x1e, 0x63, 0x93, 0xba, 0x7a, 0x0a,
2105 0xbc, 0xc9, 0xf6, 0x62
2108 static const u8 T16[] = {
2109 0x76, 0xfc, 0x6e, 0xce, 0x0f, 0x4e, 0x17, 0x68,
2110 0xcd, 0xdf, 0x88, 0x53, 0xbb, 0x2d, 0x55, 0x1b
2117 static const u8 IV17[] = { 0xca, 0xfe, 0xba, 0xbe, 0xfa, 0xce, 0xdb, 0xad };
2119 static const u8 C17[] = {
2120 0xc3, 0x76, 0x2d, 0xf1, 0xca, 0x78, 0x7d, 0x32,
2121 0xae, 0x47, 0xc1, 0x3b, 0xf1, 0x98, 0x44, 0xcb,
2122 0xaf, 0x1a, 0xe1, 0x4d, 0x0b, 0x97, 0x6a, 0xfa,
2123 0xc5, 0x2f, 0xf7, 0xd7, 0x9b, 0xba, 0x9d, 0xe0,
2124 0xfe, 0xb5, 0x82, 0xd3, 0x39, 0x34, 0xa4, 0xf0,
2125 0x95, 0x4c, 0xc2, 0x36, 0x3b, 0xc7, 0x3f, 0x78,
2126 0x62, 0xac, 0x43, 0x0e, 0x64, 0xab, 0xe4, 0x99,
2127 0xf4, 0x7c, 0x9b, 0x1f
2130 static const u8 T17[] = {
2131 0x3a, 0x33, 0x7d, 0xbf, 0x46, 0xa7, 0x92, 0xc4,
2132 0x5e, 0x45, 0x49, 0x13, 0xfe, 0x2e, 0xa8, 0xf2
2139 static const u8 IV18[] = {
2140 0x93, 0x13, 0x22, 0x5d, 0xf8, 0x84, 0x06, 0xe5,
2141 0x55, 0x90, 0x9c, 0x5a, 0xff, 0x52, 0x69, 0xaa,
2142 0x6a, 0x7a, 0x95, 0x38, 0x53, 0x4f, 0x7d, 0xa1,
2143 0xe4, 0xc3, 0x03, 0xd2, 0xa3, 0x18, 0xa7, 0x28,
2144 0xc3, 0xc0, 0xc9, 0x51, 0x56, 0x80, 0x95, 0x39,
2145 0xfc, 0xf0, 0xe2, 0x42, 0x9a, 0x6b, 0x52, 0x54,
2146 0x16, 0xae, 0xdb, 0xf5, 0xa0, 0xde, 0x6a, 0x57,
2147 0xa6, 0x37, 0xb3, 0x9b
2150 static const u8 C18[] = {
2151 0x5a, 0x8d, 0xef, 0x2f, 0x0c, 0x9e, 0x53, 0xf1,
2152 0xf7, 0x5d, 0x78, 0x53, 0x65, 0x9e, 0x2a, 0x20,
2153 0xee, 0xb2, 0xb2, 0x2a, 0xaf, 0xde, 0x64, 0x19,
2154 0xa0, 0x58, 0xab, 0x4f, 0x6f, 0x74, 0x6b, 0xf4,
2155 0x0f, 0xc0, 0xc3, 0xb7, 0x80, 0xf2, 0x44, 0x45,
2156 0x2d, 0xa3, 0xeb, 0xf1, 0xc5, 0xd8, 0x2c, 0xde,
2157 0xa2, 0x41, 0x89, 0x97, 0x20, 0x0e, 0xf8, 0x2e,
2158 0x44, 0xae, 0x7e, 0x3f
2161 static const u8 T18[] = {
2162 0xa4, 0x4a, 0x82, 0x66, 0xee, 0x1c, 0x8e, 0xb0,
2163 0xc8, 0xb5, 0xd4, 0xcf, 0x5a, 0xe9, 0xf1, 0x9a
2171 static const u8 A19[] = {
2172 0xd9, 0x31, 0x32, 0x25, 0xf8, 0x84, 0x06, 0xe5,
2173 0xa5, 0x59, 0x09, 0xc5, 0xaf, 0xf5, 0x26, 0x9a,
2174 0x86, 0xa7, 0xa9, 0x53, 0x15, 0x34, 0xf7, 0xda,
2175 0x2e, 0x4c, 0x30, 0x3d, 0x8a, 0x31, 0x8a, 0x72,
2176 0x1c, 0x3c, 0x0c, 0x95, 0x95, 0x68, 0x09, 0x53,
2177 0x2f, 0xcf, 0x0e, 0x24, 0x49, 0xa6, 0xb5, 0x25,
2178 0xb1, 0x6a, 0xed, 0xf5, 0xaa, 0x0d, 0xe6, 0x57,
2179 0xba, 0x63, 0x7b, 0x39, 0x1a, 0xaf, 0xd2, 0x55,
2180 0x52, 0x2d, 0xc1, 0xf0, 0x99, 0x56, 0x7d, 0x07,
2181 0xf4, 0x7f, 0x37, 0xa3, 0x2a, 0x84, 0x42, 0x7d,
2182 0x64, 0x3a, 0x8c, 0xdc, 0xbf, 0xe5, 0xc0, 0xc9,
2183 0x75, 0x98, 0xa2, 0xbd, 0x25, 0x55, 0xd1, 0xaa,
2184 0x8c, 0xb0, 0x8e, 0x48, 0x59, 0x0d, 0xbb, 0x3d,
2185 0xa7, 0xb0, 0x8b, 0x10, 0x56, 0x82, 0x88, 0x38,
2186 0xc5, 0xf6, 0x1e, 0x63, 0x93, 0xba, 0x7a, 0x0a,
2187 0xbc, 0xc9, 0xf6, 0x62, 0x89, 0x80, 0x15, 0xad
2190 static const u8 T19[] = {
2191 0x5f, 0xea, 0x79, 0x3a, 0x2d, 0x6f, 0x97, 0x4d,
2192 0x37, 0xe6, 0x8e, 0x0c, 0xb8, 0xff, 0x94, 0x92
2198 /* this results in 0xff in counter LSB */
2199 static const u8 IV20[64] = { 0xff, 0xff, 0xff, 0xff };
2201 static const u8 P20[288];
2202 static const u8 C20[] = {
2203 0x56, 0xb3, 0x37, 0x3c, 0xa9, 0xef, 0x6e, 0x4a,
2204 0x2b, 0x64, 0xfe, 0x1e, 0x9a, 0x17, 0xb6, 0x14,
2205 0x25, 0xf1, 0x0d, 0x47, 0xa7, 0x5a, 0x5f, 0xce,
2206 0x13, 0xef, 0xc6, 0xbc, 0x78, 0x4a, 0xf2, 0x4f,
2207 0x41, 0x41, 0xbd, 0xd4, 0x8c, 0xf7, 0xc7, 0x70,
2208 0x88, 0x7a, 0xfd, 0x57, 0x3c, 0xca, 0x54, 0x18,
2209 0xa9, 0xae, 0xff, 0xcd, 0x7c, 0x5c, 0xed, 0xdf,
2210 0xc6, 0xa7, 0x83, 0x97, 0xb9, 0xa8, 0x5b, 0x49,
2211 0x9d, 0xa5, 0x58, 0x25, 0x72, 0x67, 0xca, 0xab,
2212 0x2a, 0xd0, 0xb2, 0x3c, 0xa4, 0x76, 0xa5, 0x3c,
2213 0xb1, 0x7f, 0xb4, 0x1c, 0x4b, 0x8b, 0x47, 0x5c,
2214 0xb4, 0xf3, 0xf7, 0x16, 0x50, 0x94, 0xc2, 0x29,
2215 0xc9, 0xe8, 0xc4, 0xdc, 0x0a, 0x2a, 0x5f, 0xf1,
2216 0x90, 0x3e, 0x50, 0x15, 0x11, 0x22, 0x13, 0x76,
2217 0xa1, 0xcd, 0xb8, 0x36, 0x4c, 0x50, 0x61, 0xa2,
2218 0x0c, 0xae, 0x74, 0xbc, 0x4a, 0xcd, 0x76, 0xce,
2219 0xb0, 0xab, 0xc9, 0xfd, 0x32, 0x17, 0xef, 0x9f,
2220 0x8c, 0x90, 0xbe, 0x40, 0x2d, 0xdf, 0x6d, 0x86,
2221 0x97, 0xf4, 0xf8, 0x80, 0xdf, 0xf1, 0x5b, 0xfb,
2222 0x7a, 0x6b, 0x28, 0x24, 0x1e, 0xc8, 0xfe, 0x18,
2223 0x3c, 0x2d, 0x59, 0xe3, 0xf9, 0xdf, 0xff, 0x65,
2224 0x3c, 0x71, 0x26, 0xf0, 0xac, 0xb9, 0xe6, 0x42,
2225 0x11, 0xf4, 0x2b, 0xae, 0x12, 0xaf, 0x46, 0x2b,
2226 0x10, 0x70, 0xbe, 0xf1, 0xab, 0x5e, 0x36, 0x06,
2227 0x87, 0x2c, 0xa1, 0x0d, 0xee, 0x15, 0xb3, 0x24,
2228 0x9b, 0x1a, 0x1b, 0x95, 0x8f, 0x23, 0x13, 0x4c,
2229 0x4b, 0xcc, 0xb7, 0xd0, 0x32, 0x00, 0xbc, 0xe4,
2230 0x20, 0xa2, 0xf8, 0xeb, 0x66, 0xdc, 0xf3, 0x64,
2231 0x4d, 0x14, 0x23, 0xc1, 0xb5, 0x69, 0x90, 0x03,
2232 0xc1, 0x3e, 0xce, 0xf4, 0xbf, 0x38, 0xa3, 0xb6,
2233 0x0e, 0xed, 0xc3, 0x40, 0x33, 0xba, 0xc1, 0x90,
2234 0x27, 0x83, 0xdc, 0x6d, 0x89, 0xe2, 0xe7, 0x74,
2235 0x18, 0x8a, 0x43, 0x9c, 0x7e, 0xbc, 0xc0, 0x67,
2236 0x2d, 0xbd, 0xa4, 0xdd, 0xcf, 0xb2, 0x79, 0x46,
2237 0x13, 0xb0, 0xbe, 0x41, 0x31, 0x5e, 0xf7, 0x78,
2238 0x70, 0x8a, 0x70, 0xee, 0x7d, 0x75, 0x16, 0x5c
2241 static const u8 T20[] = {
2242 0x8b, 0x30, 0x7f, 0x6b, 0x33, 0x28, 0x6d, 0x0a,
2243 0xb0, 0x26, 0xa9, 0xed, 0x3f, 0xe1, 0xe8, 0x5f
2246 # define TEST_CASE(n) do { \
2247 u8 out[sizeof(P##n)]; \
2248 AES_set_encrypt_key(K##n,sizeof(K##n)*8,&key); \
2249 CRYPTO_gcm128_init(&ctx,&key,(block128_f)AES_encrypt); \
2250 CRYPTO_gcm128_setiv(&ctx,IV##n,sizeof(IV##n)); \
2251 memset(out,0,sizeof(out)); \
2252 if (A##n) CRYPTO_gcm128_aad(&ctx,A##n,sizeof(A##n)); \
2253 if (P##n) CRYPTO_gcm128_encrypt(&ctx,P##n,out,sizeof(out)); \
2254 if (CRYPTO_gcm128_finish(&ctx,T##n,16) || \
2255 (C##n && memcmp(out,C##n,sizeof(out)))) \
2256 ret++, printf ("encrypt test#%d failed.\n",n); \
2257 CRYPTO_gcm128_setiv(&ctx,IV##n,sizeof(IV##n)); \
2258 memset(out,0,sizeof(out)); \
2259 if (A##n) CRYPTO_gcm128_aad(&ctx,A##n,sizeof(A##n)); \
2260 if (C##n) CRYPTO_gcm128_decrypt(&ctx,C##n,out,sizeof(out)); \
2261 if (CRYPTO_gcm128_finish(&ctx,T##n,16) || \
2262 (P##n && memcmp(out,P##n,sizeof(out)))) \
2263 ret++, printf ("decrypt test#%d failed.\n",n); \
2293 # ifdef OPENSSL_CPUID_OBJ
2295 size_t start, stop, gcm_t, ctr_t, OPENSSL_rdtsc();
2302 AES_set_encrypt_key(K1, sizeof(K1) * 8, &key);
2303 CRYPTO_gcm128_init(&ctx, &key, (block128_f) AES_encrypt);
2304 CRYPTO_gcm128_setiv(&ctx, IV1, sizeof(IV1));
2306 CRYPTO_gcm128_encrypt(&ctx, buf.c, buf.c, sizeof(buf));
2307 start = OPENSSL_rdtsc();
2308 CRYPTO_gcm128_encrypt(&ctx, buf.c, buf.c, sizeof(buf));
2309 gcm_t = OPENSSL_rdtsc() - start;
2311 CRYPTO_ctr128_encrypt(buf.c, buf.c, sizeof(buf),
2312 &key, ctx.Yi.c, ctx.EKi.c, &ctx.mres,
2313 (block128_f) AES_encrypt);
2314 start = OPENSSL_rdtsc();
2315 CRYPTO_ctr128_encrypt(buf.c, buf.c, sizeof(buf),
2316 &key, ctx.Yi.c, ctx.EKi.c, &ctx.mres,
2317 (block128_f) AES_encrypt);
2318 ctr_t = OPENSSL_rdtsc() - start;
2320 printf("%.2f-%.2f=%.2f\n",
2321 gcm_t / (double)sizeof(buf),
2322 ctr_t / (double)sizeof(buf),
2323 (gcm_t - ctr_t) / (double)sizeof(buf));
2326 void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
2327 const u8 *inp, size_t len) = ctx.ghash;
2329 GHASH((&ctx), buf.c, sizeof(buf));
2330 start = OPENSSL_rdtsc();
2331 for (i = 0; i < 100; ++i)
2332 GHASH((&ctx), buf.c, sizeof(buf));
2333 gcm_t = OPENSSL_rdtsc() - start;
2334 printf("%.2f\n", gcm_t / (double)sizeof(buf) / (double)i);