1 /* ====================================================================
2 * Copyright (c) 2010 The OpenSSL Project. All rights reserved.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in
13 * the documentation and/or other materials provided with the
16 * 3. All advertising materials mentioning features or use of this
17 * software must display the following acknowledgment:
18 * "This product includes software developed by the OpenSSL Project
19 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
21 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
22 * endorse or promote products derived from this software without
23 * prior written permission. For written permission, please contact
24 * openssl-core@openssl.org.
26 * 5. Products derived from this software may not be called "OpenSSL"
27 * nor may "OpenSSL" appear in their names without prior written
28 * permission of the OpenSSL Project.
30 * 6. Redistributions of any form whatsoever must retain the following
32 * "This product includes software developed by the OpenSSL Project
33 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
35 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
36 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
37 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
38 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
39 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
40 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
41 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
42 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
43 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
44 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
45 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
46 * OF THE POSSIBILITY OF SUCH DAMAGE.
47 * ====================================================================
50 #define OPENSSL_FIPSAPI
52 #include <openssl/crypto.h>
53 #include "modes_lcl.h"
63 #if defined(BSWAP4) && defined(STRICT_ALIGNMENT)
64 /* redefine, because alignment is ensured */
66 #define GETU32(p) BSWAP4(*(const u32 *)(p))
68 #define PUTU32(p,v) *(u32 *)(p) = BSWAP4(v)
71 #define PACK(s) ((size_t)(s)<<(sizeof(size_t)*8-16))
72 #define REDUCE1BIT(V) do { \
73 if (sizeof(size_t)==8) { \
74 u64 T = U64(0xe100000000000000) & (0-(V.lo&1)); \
75 V.lo = (V.hi<<63)|(V.lo>>1); \
76 V.hi = (V.hi>>1 )^T; \
79 u32 T = 0xe1000000U & (0-(u32)(V.lo&1)); \
80 V.lo = (V.hi<<63)|(V.lo>>1); \
81 V.hi = (V.hi>>1 )^((u64)T<<32); \
86 * Even though permitted values for TABLE_BITS are 8, 4 and 1, it should
87 * never be set to 8. 8 is effectively reserved for testing purposes.
88 * TABLE_BITS>1 are lookup-table-driven implementations referred to as
89 * "Shoup's" in GCM specification. In other words OpenSSL does not cover
90 * whole spectrum of possible table driven implementations. Why? In
91 * non-"Shoup's" case memory access pattern is segmented in such manner,
92 * that it's trivial to see that cache timing information can reveal
93 * fair portion of intermediate hash value. Given that ciphertext is
94 * always available to attacker, it's possible for him to attempt to
95 * deduce secret parameter H and if successful, tamper with messages
96 * [which is nothing but trivial in CTR mode]. In "Shoup's" case it's
97 * not as trivial, but there is no reason to believe that it's resistant
98 * to cache-timing attack. And the thing about "8-bit" implementation is
99 * that it consumes 16 (sixteen) times more memory, 4KB per individual
100 * key + 1KB shared. Well, on pros side it should be twice as fast as
101 * "4-bit" version. And for gcc-generated x86[_64] code, "8-bit" version
102 * was observed to run ~75% faster, closer to 100% for commercial
103 * compilers... Yet "4-bit" procedure is preferred, because it's
104 * believed to provide better security-performance balance and adequate
105 * all-round performance. "All-round" refers to things like:
107 * - shorter setup time effectively improves overall timing for
108 * handling short messages;
109 * - larger table allocation can become unbearable because of VM
110 * subsystem penalties (for example on Windows large enough free
111 * results in VM working set trimming, meaning that consequent
112 * malloc would immediately incur working set expansion);
113 * - larger table has larger cache footprint, which can affect
114 * performance of other code paths (not necessarily even from same
115 * thread in Hyper-Threading world);
117 * Value of 1 is not appropriate for performance reasons.
121 static void gcm_init_8bit(u128 Htable[256], u64 H[2])
131 for (Htable[128]=V, i=64; i>0; i>>=1) {
136 for (i=2; i<256; i<<=1) {
137 u128 *Hi = Htable+i, H0 = *Hi;
138 for (j=1; j<i; ++j) {
139 Hi[j].hi = H0.hi^Htable[j].hi;
140 Hi[j].lo = H0.lo^Htable[j].lo;
145 static void gcm_gmult_8bit(u64 Xi[2], const u128 Htable[256])
148 const u8 *xi = (const u8 *)Xi+15;
150 const union { long one; char little; } is_endian = {1};
151 static const size_t rem_8bit[256] = {
152 PACK(0x0000), PACK(0x01C2), PACK(0x0384), PACK(0x0246),
153 PACK(0x0708), PACK(0x06CA), PACK(0x048C), PACK(0x054E),
154 PACK(0x0E10), PACK(0x0FD2), PACK(0x0D94), PACK(0x0C56),
155 PACK(0x0918), PACK(0x08DA), PACK(0x0A9C), PACK(0x0B5E),
156 PACK(0x1C20), PACK(0x1DE2), PACK(0x1FA4), PACK(0x1E66),
157 PACK(0x1B28), PACK(0x1AEA), PACK(0x18AC), PACK(0x196E),
158 PACK(0x1230), PACK(0x13F2), PACK(0x11B4), PACK(0x1076),
159 PACK(0x1538), PACK(0x14FA), PACK(0x16BC), PACK(0x177E),
160 PACK(0x3840), PACK(0x3982), PACK(0x3BC4), PACK(0x3A06),
161 PACK(0x3F48), PACK(0x3E8A), PACK(0x3CCC), PACK(0x3D0E),
162 PACK(0x3650), PACK(0x3792), PACK(0x35D4), PACK(0x3416),
163 PACK(0x3158), PACK(0x309A), PACK(0x32DC), PACK(0x331E),
164 PACK(0x2460), PACK(0x25A2), PACK(0x27E4), PACK(0x2626),
165 PACK(0x2368), PACK(0x22AA), PACK(0x20EC), PACK(0x212E),
166 PACK(0x2A70), PACK(0x2BB2), PACK(0x29F4), PACK(0x2836),
167 PACK(0x2D78), PACK(0x2CBA), PACK(0x2EFC), PACK(0x2F3E),
168 PACK(0x7080), PACK(0x7142), PACK(0x7304), PACK(0x72C6),
169 PACK(0x7788), PACK(0x764A), PACK(0x740C), PACK(0x75CE),
170 PACK(0x7E90), PACK(0x7F52), PACK(0x7D14), PACK(0x7CD6),
171 PACK(0x7998), PACK(0x785A), PACK(0x7A1C), PACK(0x7BDE),
172 PACK(0x6CA0), PACK(0x6D62), PACK(0x6F24), PACK(0x6EE6),
173 PACK(0x6BA8), PACK(0x6A6A), PACK(0x682C), PACK(0x69EE),
174 PACK(0x62B0), PACK(0x6372), PACK(0x6134), PACK(0x60F6),
175 PACK(0x65B8), PACK(0x647A), PACK(0x663C), PACK(0x67FE),
176 PACK(0x48C0), PACK(0x4902), PACK(0x4B44), PACK(0x4A86),
177 PACK(0x4FC8), PACK(0x4E0A), PACK(0x4C4C), PACK(0x4D8E),
178 PACK(0x46D0), PACK(0x4712), PACK(0x4554), PACK(0x4496),
179 PACK(0x41D8), PACK(0x401A), PACK(0x425C), PACK(0x439E),
180 PACK(0x54E0), PACK(0x5522), PACK(0x5764), PACK(0x56A6),
181 PACK(0x53E8), PACK(0x522A), PACK(0x506C), PACK(0x51AE),
182 PACK(0x5AF0), PACK(0x5B32), PACK(0x5974), PACK(0x58B6),
183 PACK(0x5DF8), PACK(0x5C3A), PACK(0x5E7C), PACK(0x5FBE),
184 PACK(0xE100), PACK(0xE0C2), PACK(0xE284), PACK(0xE346),
185 PACK(0xE608), PACK(0xE7CA), PACK(0xE58C), PACK(0xE44E),
186 PACK(0xEF10), PACK(0xEED2), PACK(0xEC94), PACK(0xED56),
187 PACK(0xE818), PACK(0xE9DA), PACK(0xEB9C), PACK(0xEA5E),
188 PACK(0xFD20), PACK(0xFCE2), PACK(0xFEA4), PACK(0xFF66),
189 PACK(0xFA28), PACK(0xFBEA), PACK(0xF9AC), PACK(0xF86E),
190 PACK(0xF330), PACK(0xF2F2), PACK(0xF0B4), PACK(0xF176),
191 PACK(0xF438), PACK(0xF5FA), PACK(0xF7BC), PACK(0xF67E),
192 PACK(0xD940), PACK(0xD882), PACK(0xDAC4), PACK(0xDB06),
193 PACK(0xDE48), PACK(0xDF8A), PACK(0xDDCC), PACK(0xDC0E),
194 PACK(0xD750), PACK(0xD692), PACK(0xD4D4), PACK(0xD516),
195 PACK(0xD058), PACK(0xD19A), PACK(0xD3DC), PACK(0xD21E),
196 PACK(0xC560), PACK(0xC4A2), PACK(0xC6E4), PACK(0xC726),
197 PACK(0xC268), PACK(0xC3AA), PACK(0xC1EC), PACK(0xC02E),
198 PACK(0xCB70), PACK(0xCAB2), PACK(0xC8F4), PACK(0xC936),
199 PACK(0xCC78), PACK(0xCDBA), PACK(0xCFFC), PACK(0xCE3E),
200 PACK(0x9180), PACK(0x9042), PACK(0x9204), PACK(0x93C6),
201 PACK(0x9688), PACK(0x974A), PACK(0x950C), PACK(0x94CE),
202 PACK(0x9F90), PACK(0x9E52), PACK(0x9C14), PACK(0x9DD6),
203 PACK(0x9898), PACK(0x995A), PACK(0x9B1C), PACK(0x9ADE),
204 PACK(0x8DA0), PACK(0x8C62), PACK(0x8E24), PACK(0x8FE6),
205 PACK(0x8AA8), PACK(0x8B6A), PACK(0x892C), PACK(0x88EE),
206 PACK(0x83B0), PACK(0x8272), PACK(0x8034), PACK(0x81F6),
207 PACK(0x84B8), PACK(0x857A), PACK(0x873C), PACK(0x86FE),
208 PACK(0xA9C0), PACK(0xA802), PACK(0xAA44), PACK(0xAB86),
209 PACK(0xAEC8), PACK(0xAF0A), PACK(0xAD4C), PACK(0xAC8E),
210 PACK(0xA7D0), PACK(0xA612), PACK(0xA454), PACK(0xA596),
211 PACK(0xA0D8), PACK(0xA11A), PACK(0xA35C), PACK(0xA29E),
212 PACK(0xB5E0), PACK(0xB422), PACK(0xB664), PACK(0xB7A6),
213 PACK(0xB2E8), PACK(0xB32A), PACK(0xB16C), PACK(0xB0AE),
214 PACK(0xBBF0), PACK(0xBA32), PACK(0xB874), PACK(0xB9B6),
215 PACK(0xBCF8), PACK(0xBD3A), PACK(0xBF7C), PACK(0xBEBE) };
218 Z.hi ^= Htable[n].hi;
219 Z.lo ^= Htable[n].lo;
221 if ((u8 *)Xi==xi) break;
225 rem = (size_t)Z.lo&0xff;
226 Z.lo = (Z.hi<<56)|(Z.lo>>8);
228 if (sizeof(size_t)==8)
229 Z.hi ^= rem_8bit[rem];
231 Z.hi ^= (u64)rem_8bit[rem]<<32;
234 if (is_endian.little) {
236 Xi[0] = BSWAP8(Z.hi);
237 Xi[1] = BSWAP8(Z.lo);
241 v = (u32)(Z.hi>>32); PUTU32(p,v);
242 v = (u32)(Z.hi); PUTU32(p+4,v);
243 v = (u32)(Z.lo>>32); PUTU32(p+8,v);
244 v = (u32)(Z.lo); PUTU32(p+12,v);
252 #define GCM_MUL(ctx,Xi) gcm_gmult_8bit(ctx->Xi.u,ctx->Htable)
256 static void gcm_init_4bit(u128 Htable[16], u64 H[2])
259 #if defined(OPENSSL_SMALL_FOOTPRINT)
268 #if defined(OPENSSL_SMALL_FOOTPRINT)
269 for (Htable[8]=V, i=4; i>0; i>>=1) {
274 for (i=2; i<16; i<<=1) {
277 for (V=*Hi, j=1; j<i; ++j) {
278 Hi[j].hi = V.hi^Htable[j].hi;
279 Hi[j].lo = V.lo^Htable[j].lo;
290 Htable[3].hi = V.hi^Htable[2].hi, Htable[3].lo = V.lo^Htable[2].lo;
292 Htable[5].hi = V.hi^Htable[1].hi, Htable[5].lo = V.lo^Htable[1].lo;
293 Htable[6].hi = V.hi^Htable[2].hi, Htable[6].lo = V.lo^Htable[2].lo;
294 Htable[7].hi = V.hi^Htable[3].hi, Htable[7].lo = V.lo^Htable[3].lo;
296 Htable[9].hi = V.hi^Htable[1].hi, Htable[9].lo = V.lo^Htable[1].lo;
297 Htable[10].hi = V.hi^Htable[2].hi, Htable[10].lo = V.lo^Htable[2].lo;
298 Htable[11].hi = V.hi^Htable[3].hi, Htable[11].lo = V.lo^Htable[3].lo;
299 Htable[12].hi = V.hi^Htable[4].hi, Htable[12].lo = V.lo^Htable[4].lo;
300 Htable[13].hi = V.hi^Htable[5].hi, Htable[13].lo = V.lo^Htable[5].lo;
301 Htable[14].hi = V.hi^Htable[6].hi, Htable[14].lo = V.lo^Htable[6].lo;
302 Htable[15].hi = V.hi^Htable[7].hi, Htable[15].lo = V.lo^Htable[7].lo;
304 #if defined(GHASH_ASM) && (defined(__arm__) || defined(__arm))
306 * ARM assembler expects specific dword order in Htable.
310 const union { long one; char little; } is_endian = {1};
312 if (is_endian.little)
321 Htable[j].hi = V.lo<<32|V.lo>>32;
322 Htable[j].lo = V.hi<<32|V.hi>>32;
329 static const size_t rem_4bit[16] = {
330 PACK(0x0000), PACK(0x1C20), PACK(0x3840), PACK(0x2460),
331 PACK(0x7080), PACK(0x6CA0), PACK(0x48C0), PACK(0x54E0),
332 PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560),
333 PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0) };
335 static void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16])
339 size_t rem, nlo, nhi;
340 const union { long one; char little; } is_endian = {1};
342 nlo = ((const u8 *)Xi)[15];
346 Z.hi = Htable[nlo].hi;
347 Z.lo = Htable[nlo].lo;
350 rem = (size_t)Z.lo&0xf;
351 Z.lo = (Z.hi<<60)|(Z.lo>>4);
353 if (sizeof(size_t)==8)
354 Z.hi ^= rem_4bit[rem];
356 Z.hi ^= (u64)rem_4bit[rem]<<32;
358 Z.hi ^= Htable[nhi].hi;
359 Z.lo ^= Htable[nhi].lo;
363 nlo = ((const u8 *)Xi)[cnt];
367 rem = (size_t)Z.lo&0xf;
368 Z.lo = (Z.hi<<60)|(Z.lo>>4);
370 if (sizeof(size_t)==8)
371 Z.hi ^= rem_4bit[rem];
373 Z.hi ^= (u64)rem_4bit[rem]<<32;
375 Z.hi ^= Htable[nlo].hi;
376 Z.lo ^= Htable[nlo].lo;
379 if (is_endian.little) {
381 Xi[0] = BSWAP8(Z.hi);
382 Xi[1] = BSWAP8(Z.lo);
386 v = (u32)(Z.hi>>32); PUTU32(p,v);
387 v = (u32)(Z.hi); PUTU32(p+4,v);
388 v = (u32)(Z.lo>>32); PUTU32(p+8,v);
389 v = (u32)(Z.lo); PUTU32(p+12,v);
398 #if !defined(OPENSSL_SMALL_FOOTPRINT)
400 * Streamed gcm_mult_4bit, see CRYPTO_gcm128_[en|de]crypt for
401 * details... Compiler-generated code doesn't seem to give any
402 * performance improvement, at least not on x86[_64]. It's here
403 * mostly as reference and a placeholder for possible future
404 * non-trivial optimization[s]...
406 static void gcm_ghash_4bit(u64 Xi[2],const u128 Htable[16],
407 const u8 *inp,size_t len)
411 size_t rem, nlo, nhi;
412 const union { long one; char little; } is_endian = {1};
417 nlo = ((const u8 *)Xi)[15];
422 Z.hi = Htable[nlo].hi;
423 Z.lo = Htable[nlo].lo;
426 rem = (size_t)Z.lo&0xf;
427 Z.lo = (Z.hi<<60)|(Z.lo>>4);
429 if (sizeof(size_t)==8)
430 Z.hi ^= rem_4bit[rem];
432 Z.hi ^= (u64)rem_4bit[rem]<<32;
434 Z.hi ^= Htable[nhi].hi;
435 Z.lo ^= Htable[nhi].lo;
439 nlo = ((const u8 *)Xi)[cnt];
444 rem = (size_t)Z.lo&0xf;
445 Z.lo = (Z.hi<<60)|(Z.lo>>4);
447 if (sizeof(size_t)==8)
448 Z.hi ^= rem_4bit[rem];
450 Z.hi ^= (u64)rem_4bit[rem]<<32;
452 Z.hi ^= Htable[nlo].hi;
453 Z.lo ^= Htable[nlo].lo;
457 * Extra 256+16 bytes per-key plus 512 bytes shared tables
458 * [should] give ~50% improvement... One could have PACK()-ed
459 * the rem_8bit even here, but the priority is to minimize
462 u128 Hshr4[16]; /* Htable shifted right by 4 bits */
463 u8 Hshl4[16]; /* Htable shifted left by 4 bits */
464 static const unsigned short rem_8bit[256] = {
465 0x0000, 0x01C2, 0x0384, 0x0246, 0x0708, 0x06CA, 0x048C, 0x054E,
466 0x0E10, 0x0FD2, 0x0D94, 0x0C56, 0x0918, 0x08DA, 0x0A9C, 0x0B5E,
467 0x1C20, 0x1DE2, 0x1FA4, 0x1E66, 0x1B28, 0x1AEA, 0x18AC, 0x196E,
468 0x1230, 0x13F2, 0x11B4, 0x1076, 0x1538, 0x14FA, 0x16BC, 0x177E,
469 0x3840, 0x3982, 0x3BC4, 0x3A06, 0x3F48, 0x3E8A, 0x3CCC, 0x3D0E,
470 0x3650, 0x3792, 0x35D4, 0x3416, 0x3158, 0x309A, 0x32DC, 0x331E,
471 0x2460, 0x25A2, 0x27E4, 0x2626, 0x2368, 0x22AA, 0x20EC, 0x212E,
472 0x2A70, 0x2BB2, 0x29F4, 0x2836, 0x2D78, 0x2CBA, 0x2EFC, 0x2F3E,
473 0x7080, 0x7142, 0x7304, 0x72C6, 0x7788, 0x764A, 0x740C, 0x75CE,
474 0x7E90, 0x7F52, 0x7D14, 0x7CD6, 0x7998, 0x785A, 0x7A1C, 0x7BDE,
475 0x6CA0, 0x6D62, 0x6F24, 0x6EE6, 0x6BA8, 0x6A6A, 0x682C, 0x69EE,
476 0x62B0, 0x6372, 0x6134, 0x60F6, 0x65B8, 0x647A, 0x663C, 0x67FE,
477 0x48C0, 0x4902, 0x4B44, 0x4A86, 0x4FC8, 0x4E0A, 0x4C4C, 0x4D8E,
478 0x46D0, 0x4712, 0x4554, 0x4496, 0x41D8, 0x401A, 0x425C, 0x439E,
479 0x54E0, 0x5522, 0x5764, 0x56A6, 0x53E8, 0x522A, 0x506C, 0x51AE,
480 0x5AF0, 0x5B32, 0x5974, 0x58B6, 0x5DF8, 0x5C3A, 0x5E7C, 0x5FBE,
481 0xE100, 0xE0C2, 0xE284, 0xE346, 0xE608, 0xE7CA, 0xE58C, 0xE44E,
482 0xEF10, 0xEED2, 0xEC94, 0xED56, 0xE818, 0xE9DA, 0xEB9C, 0xEA5E,
483 0xFD20, 0xFCE2, 0xFEA4, 0xFF66, 0xFA28, 0xFBEA, 0xF9AC, 0xF86E,
484 0xF330, 0xF2F2, 0xF0B4, 0xF176, 0xF438, 0xF5FA, 0xF7BC, 0xF67E,
485 0xD940, 0xD882, 0xDAC4, 0xDB06, 0xDE48, 0xDF8A, 0xDDCC, 0xDC0E,
486 0xD750, 0xD692, 0xD4D4, 0xD516, 0xD058, 0xD19A, 0xD3DC, 0xD21E,
487 0xC560, 0xC4A2, 0xC6E4, 0xC726, 0xC268, 0xC3AA, 0xC1EC, 0xC02E,
488 0xCB70, 0xCAB2, 0xC8F4, 0xC936, 0xCC78, 0xCDBA, 0xCFFC, 0xCE3E,
489 0x9180, 0x9042, 0x9204, 0x93C6, 0x9688, 0x974A, 0x950C, 0x94CE,
490 0x9F90, 0x9E52, 0x9C14, 0x9DD6, 0x9898, 0x995A, 0x9B1C, 0x9ADE,
491 0x8DA0, 0x8C62, 0x8E24, 0x8FE6, 0x8AA8, 0x8B6A, 0x892C, 0x88EE,
492 0x83B0, 0x8272, 0x8034, 0x81F6, 0x84B8, 0x857A, 0x873C, 0x86FE,
493 0xA9C0, 0xA802, 0xAA44, 0xAB86, 0xAEC8, 0xAF0A, 0xAD4C, 0xAC8E,
494 0xA7D0, 0xA612, 0xA454, 0xA596, 0xA0D8, 0xA11A, 0xA35C, 0xA29E,
495 0xB5E0, 0xB422, 0xB664, 0xB7A6, 0xB2E8, 0xB32A, 0xB16C, 0xB0AE,
496 0xBBF0, 0xBA32, 0xB874, 0xB9B6, 0xBCF8, 0xBD3A, 0xBF7C, 0xBEBE };
498 * This pre-processing phase slows down procedure by approximately
499 * same time as it makes each loop spin faster. In other words
500 * single block performance is approximately same as straightforward
501 * "4-bit" implementation, and then it goes only faster...
503 for (cnt=0; cnt<16; ++cnt) {
504 Z.hi = Htable[cnt].hi;
505 Z.lo = Htable[cnt].lo;
506 Hshr4[cnt].lo = (Z.hi<<60)|(Z.lo>>4);
507 Hshr4[cnt].hi = (Z.hi>>4);
508 Hshl4[cnt] = (u8)(Z.lo<<4);
512 for (Z.lo=0, Z.hi=0, cnt=15; cnt; --cnt) {
513 nlo = ((const u8 *)Xi)[cnt];
518 Z.hi ^= Htable[nlo].hi;
519 Z.lo ^= Htable[nlo].lo;
521 rem = (size_t)Z.lo&0xff;
523 Z.lo = (Z.hi<<56)|(Z.lo>>8);
526 Z.hi ^= Hshr4[nhi].hi;
527 Z.lo ^= Hshr4[nhi].lo;
528 Z.hi ^= (u64)rem_8bit[rem^Hshl4[nhi]]<<48;
531 nlo = ((const u8 *)Xi)[0];
536 Z.hi ^= Htable[nlo].hi;
537 Z.lo ^= Htable[nlo].lo;
539 rem = (size_t)Z.lo&0xf;
541 Z.lo = (Z.hi<<60)|(Z.lo>>4);
544 Z.hi ^= Htable[nhi].hi;
545 Z.lo ^= Htable[nhi].lo;
546 Z.hi ^= ((u64)rem_8bit[rem<<4])<<48;
549 if (is_endian.little) {
551 Xi[0] = BSWAP8(Z.hi);
552 Xi[1] = BSWAP8(Z.lo);
556 v = (u32)(Z.hi>>32); PUTU32(p,v);
557 v = (u32)(Z.hi); PUTU32(p+4,v);
558 v = (u32)(Z.lo>>32); PUTU32(p+8,v);
559 v = (u32)(Z.lo); PUTU32(p+12,v);
566 } while (inp+=16, len-=16);
570 void gcm_gmult_4bit(u64 Xi[2],const u128 Htable[16]);
571 void gcm_ghash_4bit(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
574 #define GCM_MUL(ctx,Xi) gcm_gmult_4bit(ctx->Xi.u,ctx->Htable)
575 #if defined(GHASH_ASM) || !defined(OPENSSL_SMALL_FOOTPRINT)
576 #define GHASH(ctx,in,len) gcm_ghash_4bit((ctx)->Xi.u,(ctx)->Htable,in,len)
577 /* GHASH_CHUNK is "stride parameter" missioned to mitigate cache
578 * trashing effect. In other words idea is to hash data while it's
579 * still in L1 cache after encryption pass... */
580 #define GHASH_CHUNK (3*1024)
583 #else /* TABLE_BITS */
585 static void gcm_gmult_1bit(u64 Xi[2],const u64 H[2])
590 const long *xi = (const long *)Xi;
591 const union { long one; char little; } is_endian = {1};
593 V.hi = H[0]; /* H is in host byte order, no byte swapping */
596 for (j=0; j<16/sizeof(long); ++j) {
597 if (is_endian.little) {
598 if (sizeof(long)==8) {
600 X = (long)(BSWAP8(xi[j]));
602 const u8 *p = (const u8 *)(xi+j);
603 X = (long)((u64)GETU32(p)<<32|GETU32(p+4));
607 const u8 *p = (const u8 *)(xi+j);
614 for (i=0; i<8*sizeof(long); ++i, X<<=1) {
615 u64 M = (u64)(X>>(8*sizeof(long)-1));
623 if (is_endian.little) {
625 Xi[0] = BSWAP8(Z.hi);
626 Xi[1] = BSWAP8(Z.lo);
630 v = (u32)(Z.hi>>32); PUTU32(p,v);
631 v = (u32)(Z.hi); PUTU32(p+4,v);
632 v = (u32)(Z.lo>>32); PUTU32(p+8,v);
633 v = (u32)(Z.lo); PUTU32(p+12,v);
641 #define GCM_MUL(ctx,Xi) gcm_gmult_1bit(ctx->Xi.u,ctx->H.u)
645 #if TABLE_BITS==4 && (defined(GHASH_ASM) || defined(OPENSSL_CPUID_OBJ))
646 # if !defined(I386_ONLY) && \
647 (defined(__i386) || defined(__i386__) || \
648 defined(__x86_64) || defined(__x86_64__) || \
649 defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64))
650 # define GHASH_ASM_X86_OR_64
651 # define GCM_FUNCREF_4BIT
652 extern unsigned int OPENSSL_ia32cap_P[2];
654 void gcm_init_clmul(u128 Htable[16],const u64 Xi[2]);
655 void gcm_gmult_clmul(u64 Xi[2],const u128 Htable[16]);
656 void gcm_ghash_clmul(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
658 #if defined(__i386) || defined(__i386__) || defined(_M_IX86)
659 # define gcm_init_avx gcm_init_clmul
660 # define gcm_gmult_avx gcm_gmult_clmul
661 # define gcm_ghash_avx gcm_ghash_clmul
663 void gcm_init_avx(u128 Htable[16],const u64 Xi[2]);
664 void gcm_gmult_avx(u64 Xi[2],const u128 Htable[16]);
665 void gcm_ghash_avx(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
668 # if defined(__i386) || defined(__i386__) || defined(_M_IX86)
669 # define GHASH_ASM_X86
670 void gcm_gmult_4bit_mmx(u64 Xi[2],const u128 Htable[16]);
671 void gcm_ghash_4bit_mmx(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
673 void gcm_gmult_4bit_x86(u64 Xi[2],const u128 Htable[16]);
674 void gcm_ghash_4bit_x86(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
676 # elif defined(__arm__) || defined(__arm) || defined(__aarch64__)
677 # include "arm_arch.h"
679 # define GHASH_ASM_ARM
680 # define GCM_FUNCREF_4BIT
681 # define PMULL_CAPABLE (OPENSSL_armcap_P & ARMV8_PMULL)
682 # if defined(__arm__) || defined(__arm)
683 # define NEON_CAPABLE (OPENSSL_armcap_P & ARMV7_NEON)
685 void gcm_init_neon(u128 Htable[16],const u64 Xi[2]);
686 void gcm_gmult_neon(u64 Xi[2],const u128 Htable[16]);
687 void gcm_ghash_neon(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
688 void gcm_init_v8(u128 Htable[16],const u64 Xi[2]);
689 void gcm_gmult_v8(u64 Xi[2],const u128 Htable[16]);
690 void gcm_ghash_v8(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
692 # elif defined(__sparc__) || defined(__sparc)
693 # include "sparc_arch.h"
694 # define GHASH_ASM_SPARC
695 # define GCM_FUNCREF_4BIT
696 extern unsigned int OPENSSL_sparcv9cap_P[];
697 void gcm_init_vis3(u128 Htable[16],const u64 Xi[2]);
698 void gcm_gmult_vis3(u64 Xi[2],const u128 Htable[16]);
699 void gcm_ghash_vis3(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
703 #ifdef GCM_FUNCREF_4BIT
705 # define GCM_MUL(ctx,Xi) (*gcm_gmult_p)(ctx->Xi.u,ctx->Htable)
708 # define GHASH(ctx,in,len) (*gcm_ghash_p)(ctx->Xi.u,ctx->Htable,in,len)
712 void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx,void *key,block128_f block)
714 const union { long one; char little; } is_endian = {1};
716 memset(ctx,0,sizeof(*ctx));
720 (*block)(ctx->H.c,ctx->H.c,key);
722 if (is_endian.little) {
723 /* H is stored in host byte order */
725 ctx->H.u[0] = BSWAP8(ctx->H.u[0]);
726 ctx->H.u[1] = BSWAP8(ctx->H.u[1]);
730 hi = (u64)GETU32(p) <<32|GETU32(p+4);
731 lo = (u64)GETU32(p+8)<<32|GETU32(p+12);
738 gcm_init_8bit(ctx->Htable,ctx->H.u);
740 # if defined(GHASH_ASM_X86_OR_64)
741 # if !defined(GHASH_ASM_X86) || defined(OPENSSL_IA32_SSE2)
742 if (OPENSSL_ia32cap_P[0]&(1<<24) && /* check FXSR bit */
743 OPENSSL_ia32cap_P[1]&(1<<1) ) { /* check PCLMULQDQ bit */
744 if (((OPENSSL_ia32cap_P[1]>>22)&0x41)==0x41) { /* AVX+MOVBE */
745 gcm_init_avx(ctx->Htable,ctx->H.u);
746 ctx->gmult = gcm_gmult_avx;
747 ctx->ghash = gcm_ghash_avx;
749 gcm_init_clmul(ctx->Htable,ctx->H.u);
750 ctx->gmult = gcm_gmult_clmul;
751 ctx->ghash = gcm_ghash_clmul;
756 gcm_init_4bit(ctx->Htable,ctx->H.u);
757 # if defined(GHASH_ASM_X86) /* x86 only */
758 # if defined(OPENSSL_IA32_SSE2)
759 if (OPENSSL_ia32cap_P[0]&(1<<25)) { /* check SSE bit */
761 if (OPENSSL_ia32cap_P[0]&(1<<23)) { /* check MMX bit */
763 ctx->gmult = gcm_gmult_4bit_mmx;
764 ctx->ghash = gcm_ghash_4bit_mmx;
766 ctx->gmult = gcm_gmult_4bit_x86;
767 ctx->ghash = gcm_ghash_4bit_x86;
770 ctx->gmult = gcm_gmult_4bit;
771 ctx->ghash = gcm_ghash_4bit;
773 # elif defined(GHASH_ASM_ARM)
774 # ifdef PMULL_CAPABLE
776 gcm_init_v8(ctx->Htable,ctx->H.u);
777 ctx->gmult = gcm_gmult_v8;
778 ctx->ghash = gcm_ghash_v8;
783 gcm_init_neon(ctx->Htable,ctx->H.u);
784 ctx->gmult = gcm_gmult_neon;
785 ctx->ghash = gcm_ghash_neon;
789 gcm_init_4bit(ctx->Htable,ctx->H.u);
790 ctx->gmult = gcm_gmult_4bit;
791 ctx->ghash = gcm_ghash_4bit;
793 # elif defined(GHASH_ASM_SPARC)
794 if (OPENSSL_sparcv9cap_P[0] & SPARCV9_VIS3) {
795 gcm_init_vis3(ctx->Htable,ctx->H.u);
796 ctx->gmult = gcm_gmult_vis3;
797 ctx->ghash = gcm_ghash_vis3;
799 gcm_init_4bit(ctx->Htable,ctx->H.u);
800 ctx->gmult = gcm_gmult_4bit;
801 ctx->ghash = gcm_ghash_4bit;
804 gcm_init_4bit(ctx->Htable,ctx->H.u);
809 void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx,const unsigned char *iv,size_t len)
811 const union { long one; char little; } is_endian = {1};
813 #ifdef GCM_FUNCREF_4BIT
814 void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
821 ctx->len.u[0] = 0; /* AAD length */
822 ctx->len.u[1] = 0; /* message length */
827 memcpy(ctx->Yi.c,iv,12);
836 for (i=0; i<16; ++i) ctx->Yi.c[i] ^= iv[i];
842 for (i=0; i<len; ++i) ctx->Yi.c[i] ^= iv[i];
846 if (is_endian.little) {
848 ctx->Yi.u[1] ^= BSWAP8(len0);
850 ctx->Yi.c[8] ^= (u8)(len0>>56);
851 ctx->Yi.c[9] ^= (u8)(len0>>48);
852 ctx->Yi.c[10] ^= (u8)(len0>>40);
853 ctx->Yi.c[11] ^= (u8)(len0>>32);
854 ctx->Yi.c[12] ^= (u8)(len0>>24);
855 ctx->Yi.c[13] ^= (u8)(len0>>16);
856 ctx->Yi.c[14] ^= (u8)(len0>>8);
857 ctx->Yi.c[15] ^= (u8)(len0);
861 ctx->Yi.u[1] ^= len0;
865 if (is_endian.little)
867 ctr = BSWAP4(ctx->Yi.d[3]);
869 ctr = GETU32(ctx->Yi.c+12);
875 (*ctx->block)(ctx->Yi.c,ctx->EK0.c,ctx->key);
877 if (is_endian.little)
879 ctx->Yi.d[3] = BSWAP4(ctr);
881 PUTU32(ctx->Yi.c+12,ctr);
887 int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx,const unsigned char *aad,size_t len)
891 u64 alen = ctx->len.u[0];
892 #ifdef GCM_FUNCREF_4BIT
893 void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
895 void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
896 const u8 *inp,size_t len) = ctx->ghash;
900 if (ctx->len.u[1]) return -2;
903 if (alen>(U64(1)<<61) || (sizeof(len)==8 && alen<len))
905 ctx->len.u[0] = alen;
910 ctx->Xi.c[n] ^= *(aad++);
914 if (n==0) GCM_MUL(ctx,Xi);
922 if ((i = (len&(size_t)-16))) {
929 for (i=0; i<16; ++i) ctx->Xi.c[i] ^= aad[i];
936 n = (unsigned int)len;
937 for (i=0; i<len; ++i) ctx->Xi.c[i] ^= aad[i];
944 int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
945 const unsigned char *in, unsigned char *out,
948 const union { long one; char little; } is_endian = {1};
951 u64 mlen = ctx->len.u[1];
952 block128_f block = ctx->block;
953 void *key = ctx->key;
954 #ifdef GCM_FUNCREF_4BIT
955 void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
957 void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
958 const u8 *inp,size_t len) = ctx->ghash;
963 n = (unsigned int)mlen%16; /* alternative to ctx->mres */
966 if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
968 ctx->len.u[1] = mlen;
971 /* First call to encrypt finalizes GHASH(AAD) */
976 if (is_endian.little)
978 ctr = BSWAP4(ctx->Yi.d[3]);
980 ctr = GETU32(ctx->Yi.c+12);
986 #if !defined(OPENSSL_SMALL_FOOTPRINT)
987 if (16%sizeof(size_t) == 0) do { /* always true actually */
990 ctx->Xi.c[n] ^= *(out++) = *(in++)^ctx->EKi.c[n];
994 if (n==0) GCM_MUL(ctx,Xi);
1000 #if defined(STRICT_ALIGNMENT)
1001 if (((size_t)in|(size_t)out)%sizeof(size_t) != 0)
1004 #if defined(GHASH) && defined(GHASH_CHUNK)
1005 while (len>=GHASH_CHUNK) {
1006 size_t j=GHASH_CHUNK;
1009 size_t *out_t=(size_t *)out;
1010 const size_t *in_t=(const size_t *)in;
1012 (*block)(ctx->Yi.c,ctx->EKi.c,key);
1014 if (is_endian.little)
1016 ctx->Yi.d[3] = BSWAP4(ctr);
1018 PUTU32(ctx->Yi.c+12,ctr);
1022 for (i=0; i<16/sizeof(size_t); ++i)
1023 out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1028 GHASH(ctx,out-GHASH_CHUNK,GHASH_CHUNK);
1031 if ((i = (len&(size_t)-16))) {
1035 size_t *out_t=(size_t *)out;
1036 const size_t *in_t=(const size_t *)in;
1038 (*block)(ctx->Yi.c,ctx->EKi.c,key);
1040 if (is_endian.little)
1042 ctx->Yi.d[3] = BSWAP4(ctr);
1044 PUTU32(ctx->Yi.c+12,ctr);
1048 for (i=0; i<16/sizeof(size_t); ++i)
1049 out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1058 size_t *out_t=(size_t *)out;
1059 const size_t *in_t=(const size_t *)in;
1061 (*block)(ctx->Yi.c,ctx->EKi.c,key);
1063 if (is_endian.little)
1065 ctx->Yi.d[3] = BSWAP4(ctr);
1067 PUTU32(ctx->Yi.c+12,ctr);
1071 for (i=0; i<16/sizeof(size_t); ++i)
1073 out_t[i] = in_t[i]^ctx->EKi.t[i];
1081 (*block)(ctx->Yi.c,ctx->EKi.c,key);
1083 if (is_endian.little)
1085 ctx->Yi.d[3] = BSWAP4(ctr);
1087 PUTU32(ctx->Yi.c+12,ctr);
1092 ctx->Xi.c[n] ^= out[n] = in[n]^ctx->EKi.c[n];
1101 for (i=0;i<len;++i) {
1103 (*block)(ctx->Yi.c,ctx->EKi.c,key);
1105 if (is_endian.little)
1107 ctx->Yi.d[3] = BSWAP4(ctr);
1109 PUTU32(ctx->Yi.c+12,ctr);
1114 ctx->Xi.c[n] ^= out[i] = in[i]^ctx->EKi.c[n];
1124 int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
1125 const unsigned char *in, unsigned char *out,
1128 const union { long one; char little; } is_endian = {1};
1129 unsigned int n, ctr;
1131 u64 mlen = ctx->len.u[1];
1132 block128_f block = ctx->block;
1133 void *key = ctx->key;
1134 #ifdef GCM_FUNCREF_4BIT
1135 void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
1137 void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
1138 const u8 *inp,size_t len) = ctx->ghash;
1143 if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
1145 ctx->len.u[1] = mlen;
1148 /* First call to decrypt finalizes GHASH(AAD) */
1153 if (is_endian.little)
1155 ctr = BSWAP4(ctx->Yi.d[3]);
1157 ctr = GETU32(ctx->Yi.c+12);
1163 #if !defined(OPENSSL_SMALL_FOOTPRINT)
1164 if (16%sizeof(size_t) == 0) do { /* always true actually */
1168 *(out++) = c^ctx->EKi.c[n];
1173 if (n==0) GCM_MUL (ctx,Xi);
1179 #if defined(STRICT_ALIGNMENT)
1180 if (((size_t)in|(size_t)out)%sizeof(size_t) != 0)
1183 #if defined(GHASH) && defined(GHASH_CHUNK)
1184 while (len>=GHASH_CHUNK) {
1185 size_t j=GHASH_CHUNK;
1187 GHASH(ctx,in,GHASH_CHUNK);
1189 size_t *out_t=(size_t *)out;
1190 const size_t *in_t=(const size_t *)in;
1192 (*block)(ctx->Yi.c,ctx->EKi.c,key);
1194 if (is_endian.little)
1196 ctx->Yi.d[3] = BSWAP4(ctr);
1198 PUTU32(ctx->Yi.c+12,ctr);
1202 for (i=0; i<16/sizeof(size_t); ++i)
1203 out_t[i] = in_t[i]^ctx->EKi.t[i];
1210 if ((i = (len&(size_t)-16))) {
1213 size_t *out_t=(size_t *)out;
1214 const size_t *in_t=(const size_t *)in;
1216 (*block)(ctx->Yi.c,ctx->EKi.c,key);
1218 if (is_endian.little)
1220 ctx->Yi.d[3] = BSWAP4(ctr);
1222 PUTU32(ctx->Yi.c+12,ctr);
1226 for (i=0; i<16/sizeof(size_t); ++i)
1227 out_t[i] = in_t[i]^ctx->EKi.t[i];
1235 size_t *out_t=(size_t *)out;
1236 const size_t *in_t=(const size_t *)in;
1238 (*block)(ctx->Yi.c,ctx->EKi.c,key);
1240 if (is_endian.little)
1242 ctx->Yi.d[3] = BSWAP4(ctr);
1244 PUTU32(ctx->Yi.c+12,ctr);
1248 for (i=0; i<16/sizeof(size_t); ++i) {
1250 out[i] = c^ctx->EKi.t[i];
1260 (*block)(ctx->Yi.c,ctx->EKi.c,key);
1262 if (is_endian.little)
1264 ctx->Yi.d[3] = BSWAP4(ctr);
1266 PUTU32(ctx->Yi.c+12,ctr);
1273 out[n] = c^ctx->EKi.c[n];
1282 for (i=0;i<len;++i) {
1285 (*block)(ctx->Yi.c,ctx->EKi.c,key);
1287 if (is_endian.little)
1289 ctx->Yi.d[3] = BSWAP4(ctr);
1291 PUTU32(ctx->Yi.c+12,ctr);
1297 out[i] = c^ctx->EKi.c[n];
1308 int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx,
1309 const unsigned char *in, unsigned char *out,
1310 size_t len, ctr128_f stream)
1312 const union { long one; char little; } is_endian = {1};
1313 unsigned int n, ctr;
1315 u64 mlen = ctx->len.u[1];
1316 void *key = ctx->key;
1317 #ifdef GCM_FUNCREF_4BIT
1318 void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
1320 void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
1321 const u8 *inp,size_t len) = ctx->ghash;
1326 if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
1328 ctx->len.u[1] = mlen;
1331 /* First call to encrypt finalizes GHASH(AAD) */
1336 if (is_endian.little)
1338 ctr = BSWAP4(ctx->Yi.d[3]);
1340 ctr = GETU32(ctx->Yi.c+12);
1348 ctx->Xi.c[n] ^= *(out++) = *(in++)^ctx->EKi.c[n];
1352 if (n==0) GCM_MUL(ctx,Xi);
1358 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1359 while (len>=GHASH_CHUNK) {
1360 (*stream)(in,out,GHASH_CHUNK/16,key,ctx->Yi.c);
1361 ctr += GHASH_CHUNK/16;
1362 if (is_endian.little)
1364 ctx->Yi.d[3] = BSWAP4(ctr);
1366 PUTU32(ctx->Yi.c+12,ctr);
1370 GHASH(ctx,out,GHASH_CHUNK);
1376 if ((i = (len&(size_t)-16))) {
1379 (*stream)(in,out,j,key,ctx->Yi.c);
1380 ctr += (unsigned int)j;
1381 if (is_endian.little)
1383 ctx->Yi.d[3] = BSWAP4(ctr);
1385 PUTU32(ctx->Yi.c+12,ctr);
1396 for (i=0;i<16;++i) ctx->Xi.c[i] ^= out[i];
1403 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,key);
1405 if (is_endian.little)
1407 ctx->Yi.d[3] = BSWAP4(ctr);
1409 PUTU32(ctx->Yi.c+12,ctr);
1414 ctx->Xi.c[n] ^= out[n] = in[n]^ctx->EKi.c[n];
1423 int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx,
1424 const unsigned char *in, unsigned char *out,
1425 size_t len,ctr128_f stream)
1427 const union { long one; char little; } is_endian = {1};
1428 unsigned int n, ctr;
1430 u64 mlen = ctx->len.u[1];
1431 void *key = ctx->key;
1432 #ifdef GCM_FUNCREF_4BIT
1433 void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
1435 void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
1436 const u8 *inp,size_t len) = ctx->ghash;
1441 if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
1443 ctx->len.u[1] = mlen;
1446 /* First call to decrypt finalizes GHASH(AAD) */
1451 if (is_endian.little)
1453 ctr = BSWAP4(ctx->Yi.d[3]);
1455 ctr = GETU32(ctx->Yi.c+12);
1464 *(out++) = c^ctx->EKi.c[n];
1469 if (n==0) GCM_MUL (ctx,Xi);
1475 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1476 while (len>=GHASH_CHUNK) {
1477 GHASH(ctx,in,GHASH_CHUNK);
1478 (*stream)(in,out,GHASH_CHUNK/16,key,ctx->Yi.c);
1479 ctr += GHASH_CHUNK/16;
1480 if (is_endian.little)
1482 ctx->Yi.d[3] = BSWAP4(ctr);
1484 PUTU32(ctx->Yi.c+12,ctr);
1493 if ((i = (len&(size_t)-16))) {
1501 for (k=0;k<16;++k) ctx->Xi.c[k] ^= in[k];
1508 (*stream)(in,out,j,key,ctx->Yi.c);
1509 ctr += (unsigned int)j;
1510 if (is_endian.little)
1512 ctx->Yi.d[3] = BSWAP4(ctr);
1514 PUTU32(ctx->Yi.c+12,ctr);
1523 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,key);
1525 if (is_endian.little)
1527 ctx->Yi.d[3] = BSWAP4(ctr);
1529 PUTU32(ctx->Yi.c+12,ctr);
1536 out[n] = c^ctx->EKi.c[n];
1545 int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx,const unsigned char *tag,
1548 const union { long one; char little; } is_endian = {1};
1549 u64 alen = ctx->len.u[0]<<3;
1550 u64 clen = ctx->len.u[1]<<3;
1551 #ifdef GCM_FUNCREF_4BIT
1552 void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
1555 if (ctx->mres || ctx->ares)
1558 if (is_endian.little) {
1560 alen = BSWAP8(alen);
1561 clen = BSWAP8(clen);
1565 ctx->len.u[0] = alen;
1566 ctx->len.u[1] = clen;
1568 alen = (u64)GETU32(p) <<32|GETU32(p+4);
1569 clen = (u64)GETU32(p+8)<<32|GETU32(p+12);
1573 ctx->Xi.u[0] ^= alen;
1574 ctx->Xi.u[1] ^= clen;
1577 ctx->Xi.u[0] ^= ctx->EK0.u[0];
1578 ctx->Xi.u[1] ^= ctx->EK0.u[1];
1580 if (tag && len<=sizeof(ctx->Xi))
1581 return memcmp(ctx->Xi.c,tag,len);
1586 void CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len)
1588 CRYPTO_gcm128_finish(ctx, NULL, 0);
1589 memcpy(tag, ctx->Xi.c, len<=sizeof(ctx->Xi.c)?len:sizeof(ctx->Xi.c));
1592 GCM128_CONTEXT *CRYPTO_gcm128_new(void *key, block128_f block)
1594 GCM128_CONTEXT *ret;
1596 if ((ret = (GCM128_CONTEXT *)OPENSSL_malloc(sizeof(GCM128_CONTEXT))))
1597 CRYPTO_gcm128_init(ret,key,block);
1602 void CRYPTO_gcm128_release(GCM128_CONTEXT *ctx)
1605 OPENSSL_cleanse(ctx,sizeof(*ctx));
1610 #if defined(SELFTEST)
1612 #include <openssl/aes.h>
1615 static const u8 K1[16],
1620 T1[]= {0x58,0xe2,0xfc,0xce,0xfa,0x7e,0x30,0x61,0x36,0x7f,0x1d,0x57,0xa4,0xe7,0x45,0x5a};
1626 static const u8 P2[16],
1627 C2[]= {0x03,0x88,0xda,0xce,0x60,0xb6,0xa3,0x92,0xf3,0x28,0xc2,0xb9,0x71,0xb2,0xfe,0x78},
1628 T2[]= {0xab,0x6e,0x47,0xd4,0x2c,0xec,0x13,0xbd,0xf5,0x3a,0x67,0xb2,0x12,0x57,0xbd,0xdf};
1632 static const u8 K3[]= {0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08},
1633 P3[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1634 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1635 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1636 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55},
1637 IV3[]= {0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,0xde,0xca,0xf8,0x88},
1638 C3[]= {0x42,0x83,0x1e,0xc2,0x21,0x77,0x74,0x24,0x4b,0x72,0x21,0xb7,0x84,0xd0,0xd4,0x9c,
1639 0xe3,0xaa,0x21,0x2f,0x2c,0x02,0xa4,0xe0,0x35,0xc1,0x7e,0x23,0x29,0xac,0xa1,0x2e,
1640 0x21,0xd5,0x14,0xb2,0x54,0x66,0x93,0x1c,0x7d,0x8f,0x6a,0x5a,0xac,0x84,0xaa,0x05,
1641 0x1b,0xa3,0x0b,0x39,0x6a,0x0a,0xac,0x97,0x3d,0x58,0xe0,0x91,0x47,0x3f,0x59,0x85},
1642 T3[]= {0x4d,0x5c,0x2a,0xf3,0x27,0xcd,0x64,0xa6,0x2c,0xf3,0x5a,0xbd,0x2b,0xa6,0xfa,0xb4};
1647 static const u8 P4[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1648 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1649 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1650 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39},
1651 A4[]= {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1652 0xab,0xad,0xda,0xd2},
1653 C4[]= {0x42,0x83,0x1e,0xc2,0x21,0x77,0x74,0x24,0x4b,0x72,0x21,0xb7,0x84,0xd0,0xd4,0x9c,
1654 0xe3,0xaa,0x21,0x2f,0x2c,0x02,0xa4,0xe0,0x35,0xc1,0x7e,0x23,0x29,0xac,0xa1,0x2e,
1655 0x21,0xd5,0x14,0xb2,0x54,0x66,0x93,0x1c,0x7d,0x8f,0x6a,0x5a,0xac,0x84,0xaa,0x05,
1656 0x1b,0xa3,0x0b,0x39,0x6a,0x0a,0xac,0x97,0x3d,0x58,0xe0,0x91},
1657 T4[]= {0x5b,0xc9,0x4f,0xbc,0x32,0x21,0xa5,0xdb,0x94,0xfa,0xe9,0x5a,0xe7,0x12,0x1a,0x47};
1663 static const u8 IV5[]= {0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad},
1664 C5[]= {0x61,0x35,0x3b,0x4c,0x28,0x06,0x93,0x4a,0x77,0x7f,0xf5,0x1f,0xa2,0x2a,0x47,0x55,
1665 0x69,0x9b,0x2a,0x71,0x4f,0xcd,0xc6,0xf8,0x37,0x66,0xe5,0xf9,0x7b,0x6c,0x74,0x23,
1666 0x73,0x80,0x69,0x00,0xe4,0x9f,0x24,0xb2,0x2b,0x09,0x75,0x44,0xd4,0x89,0x6b,0x42,
1667 0x49,0x89,0xb5,0xe1,0xeb,0xac,0x0f,0x07,0xc2,0x3f,0x45,0x98},
1668 T5[]= {0x36,0x12,0xd2,0xe7,0x9e,0x3b,0x07,0x85,0x56,0x1b,0xe1,0x4a,0xac,0xa2,0xfc,0xcb};
1674 static const u8 IV6[]= {0x93,0x13,0x22,0x5d,0xf8,0x84,0x06,0xe5,0x55,0x90,0x9c,0x5a,0xff,0x52,0x69,0xaa,
1675 0x6a,0x7a,0x95,0x38,0x53,0x4f,0x7d,0xa1,0xe4,0xc3,0x03,0xd2,0xa3,0x18,0xa7,0x28,
1676 0xc3,0xc0,0xc9,0x51,0x56,0x80,0x95,0x39,0xfc,0xf0,0xe2,0x42,0x9a,0x6b,0x52,0x54,
1677 0x16,0xae,0xdb,0xf5,0xa0,0xde,0x6a,0x57,0xa6,0x37,0xb3,0x9b},
1678 C6[]= {0x8c,0xe2,0x49,0x98,0x62,0x56,0x15,0xb6,0x03,0xa0,0x33,0xac,0xa1,0x3f,0xb8,0x94,
1679 0xbe,0x91,0x12,0xa5,0xc3,0xa2,0x11,0xa8,0xba,0x26,0x2a,0x3c,0xca,0x7e,0x2c,0xa7,
1680 0x01,0xe4,0xa9,0xa4,0xfb,0xa4,0x3c,0x90,0xcc,0xdc,0xb2,0x81,0xd4,0x8c,0x7c,0x6f,
1681 0xd6,0x28,0x75,0xd2,0xac,0xa4,0x17,0x03,0x4c,0x34,0xae,0xe5},
1682 T6[]= {0x61,0x9c,0xc5,0xae,0xff,0xfe,0x0b,0xfa,0x46,0x2a,0xf4,0x3c,0x16,0x99,0xd0,0x50};
1685 static const u8 K7[24],
1690 T7[]= {0xcd,0x33,0xb2,0x8a,0xc7,0x73,0xf7,0x4b,0xa0,0x0e,0xd1,0xf3,0x12,0x57,0x24,0x35};
1696 static const u8 P8[16],
1697 C8[]= {0x98,0xe7,0x24,0x7c,0x07,0xf0,0xfe,0x41,0x1c,0x26,0x7e,0x43,0x84,0xb0,0xf6,0x00},
1698 T8[]= {0x2f,0xf5,0x8d,0x80,0x03,0x39,0x27,0xab,0x8e,0xf4,0xd4,0x58,0x75,0x14,0xf0,0xfb};
1702 static const u8 K9[]= {0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08,
1703 0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c},
1704 P9[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1705 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1706 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1707 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55},
1708 IV9[]= {0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,0xde,0xca,0xf8,0x88},
1709 C9[]= {0x39,0x80,0xca,0x0b,0x3c,0x00,0xe8,0x41,0xeb,0x06,0xfa,0xc4,0x87,0x2a,0x27,0x57,
1710 0x85,0x9e,0x1c,0xea,0xa6,0xef,0xd9,0x84,0x62,0x85,0x93,0xb4,0x0c,0xa1,0xe1,0x9c,
1711 0x7d,0x77,0x3d,0x00,0xc1,0x44,0xc5,0x25,0xac,0x61,0x9d,0x18,0xc8,0x4a,0x3f,0x47,
1712 0x18,0xe2,0x44,0x8b,0x2f,0xe3,0x24,0xd9,0xcc,0xda,0x27,0x10,0xac,0xad,0xe2,0x56},
1713 T9[]= {0x99,0x24,0xa7,0xc8,0x58,0x73,0x36,0xbf,0xb1,0x18,0x02,0x4d,0xb8,0x67,0x4a,0x14};
1718 static const u8 P10[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1719 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1720 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1721 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39},
1722 A10[]= {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1723 0xab,0xad,0xda,0xd2},
1724 C10[]= {0x39,0x80,0xca,0x0b,0x3c,0x00,0xe8,0x41,0xeb,0x06,0xfa,0xc4,0x87,0x2a,0x27,0x57,
1725 0x85,0x9e,0x1c,0xea,0xa6,0xef,0xd9,0x84,0x62,0x85,0x93,0xb4,0x0c,0xa1,0xe1,0x9c,
1726 0x7d,0x77,0x3d,0x00,0xc1,0x44,0xc5,0x25,0xac,0x61,0x9d,0x18,0xc8,0x4a,0x3f,0x47,
1727 0x18,0xe2,0x44,0x8b,0x2f,0xe3,0x24,0xd9,0xcc,0xda,0x27,0x10},
1728 T10[]= {0x25,0x19,0x49,0x8e,0x80,0xf1,0x47,0x8f,0x37,0xba,0x55,0xbd,0x6d,0x27,0x61,0x8c};
1734 static const u8 IV11[]={0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad},
1735 C11[]= {0x0f,0x10,0xf5,0x99,0xae,0x14,0xa1,0x54,0xed,0x24,0xb3,0x6e,0x25,0x32,0x4d,0xb8,
1736 0xc5,0x66,0x63,0x2e,0xf2,0xbb,0xb3,0x4f,0x83,0x47,0x28,0x0f,0xc4,0x50,0x70,0x57,
1737 0xfd,0xdc,0x29,0xdf,0x9a,0x47,0x1f,0x75,0xc6,0x65,0x41,0xd4,0xd4,0xda,0xd1,0xc9,
1738 0xe9,0x3a,0x19,0xa5,0x8e,0x8b,0x47,0x3f,0xa0,0xf0,0x62,0xf7},
1739 T11[]= {0x65,0xdc,0xc5,0x7f,0xcf,0x62,0x3a,0x24,0x09,0x4f,0xcc,0xa4,0x0d,0x35,0x33,0xf8};
1745 static const u8 IV12[]={0x93,0x13,0x22,0x5d,0xf8,0x84,0x06,0xe5,0x55,0x90,0x9c,0x5a,0xff,0x52,0x69,0xaa,
1746 0x6a,0x7a,0x95,0x38,0x53,0x4f,0x7d,0xa1,0xe4,0xc3,0x03,0xd2,0xa3,0x18,0xa7,0x28,
1747 0xc3,0xc0,0xc9,0x51,0x56,0x80,0x95,0x39,0xfc,0xf0,0xe2,0x42,0x9a,0x6b,0x52,0x54,
1748 0x16,0xae,0xdb,0xf5,0xa0,0xde,0x6a,0x57,0xa6,0x37,0xb3,0x9b},
1749 C12[]= {0xd2,0x7e,0x88,0x68,0x1c,0xe3,0x24,0x3c,0x48,0x30,0x16,0x5a,0x8f,0xdc,0xf9,0xff,
1750 0x1d,0xe9,0xa1,0xd8,0xe6,0xb4,0x47,0xef,0x6e,0xf7,0xb7,0x98,0x28,0x66,0x6e,0x45,
1751 0x81,0xe7,0x90,0x12,0xaf,0x34,0xdd,0xd9,0xe2,0xf0,0x37,0x58,0x9b,0x29,0x2d,0xb3,
1752 0xe6,0x7c,0x03,0x67,0x45,0xfa,0x22,0xe7,0xe9,0xb7,0x37,0x3b},
1753 T12[]= {0xdc,0xf5,0x66,0xff,0x29,0x1c,0x25,0xbb,0xb8,0x56,0x8f,0xc3,0xd3,0x76,0xa6,0xd9};
1756 static const u8 K13[32],
1761 T13[]={0x53,0x0f,0x8a,0xfb,0xc7,0x45,0x36,0xb9,0xa9,0x63,0xb4,0xf1,0xc4,0xcb,0x73,0x8b};
1766 static const u8 P14[16],
1768 C14[]= {0xce,0xa7,0x40,0x3d,0x4d,0x60,0x6b,0x6e,0x07,0x4e,0xc5,0xd3,0xba,0xf3,0x9d,0x18},
1769 T14[]= {0xd0,0xd1,0xc8,0xa7,0x99,0x99,0x6b,0xf0,0x26,0x5b,0x98,0xb5,0xd4,0x8a,0xb9,0x19};
1773 static const u8 K15[]= {0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08,
1774 0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08},
1775 P15[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1776 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1777 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1778 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55},
1779 IV15[]={0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,0xde,0xca,0xf8,0x88},
1780 C15[]= {0x52,0x2d,0xc1,0xf0,0x99,0x56,0x7d,0x07,0xf4,0x7f,0x37,0xa3,0x2a,0x84,0x42,0x7d,
1781 0x64,0x3a,0x8c,0xdc,0xbf,0xe5,0xc0,0xc9,0x75,0x98,0xa2,0xbd,0x25,0x55,0xd1,0xaa,
1782 0x8c,0xb0,0x8e,0x48,0x59,0x0d,0xbb,0x3d,0xa7,0xb0,0x8b,0x10,0x56,0x82,0x88,0x38,
1783 0xc5,0xf6,0x1e,0x63,0x93,0xba,0x7a,0x0a,0xbc,0xc9,0xf6,0x62,0x89,0x80,0x15,0xad},
1784 T15[]= {0xb0,0x94,0xda,0xc5,0xd9,0x34,0x71,0xbd,0xec,0x1a,0x50,0x22,0x70,0xe3,0xcc,0x6c};
1789 static const u8 P16[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1790 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1791 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1792 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39},
1793 A16[]= {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1794 0xab,0xad,0xda,0xd2},
1795 C16[]= {0x52,0x2d,0xc1,0xf0,0x99,0x56,0x7d,0x07,0xf4,0x7f,0x37,0xa3,0x2a,0x84,0x42,0x7d,
1796 0x64,0x3a,0x8c,0xdc,0xbf,0xe5,0xc0,0xc9,0x75,0x98,0xa2,0xbd,0x25,0x55,0xd1,0xaa,
1797 0x8c,0xb0,0x8e,0x48,0x59,0x0d,0xbb,0x3d,0xa7,0xb0,0x8b,0x10,0x56,0x82,0x88,0x38,
1798 0xc5,0xf6,0x1e,0x63,0x93,0xba,0x7a,0x0a,0xbc,0xc9,0xf6,0x62},
1799 T16[]= {0x76,0xfc,0x6e,0xce,0x0f,0x4e,0x17,0x68,0xcd,0xdf,0x88,0x53,0xbb,0x2d,0x55,0x1b};
1805 static const u8 IV17[]={0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad},
1806 C17[]= {0xc3,0x76,0x2d,0xf1,0xca,0x78,0x7d,0x32,0xae,0x47,0xc1,0x3b,0xf1,0x98,0x44,0xcb,
1807 0xaf,0x1a,0xe1,0x4d,0x0b,0x97,0x6a,0xfa,0xc5,0x2f,0xf7,0xd7,0x9b,0xba,0x9d,0xe0,
1808 0xfe,0xb5,0x82,0xd3,0x39,0x34,0xa4,0xf0,0x95,0x4c,0xc2,0x36,0x3b,0xc7,0x3f,0x78,
1809 0x62,0xac,0x43,0x0e,0x64,0xab,0xe4,0x99,0xf4,0x7c,0x9b,0x1f},
1810 T17[]= {0x3a,0x33,0x7d,0xbf,0x46,0xa7,0x92,0xc4,0x5e,0x45,0x49,0x13,0xfe,0x2e,0xa8,0xf2};
1816 static const u8 IV18[]={0x93,0x13,0x22,0x5d,0xf8,0x84,0x06,0xe5,0x55,0x90,0x9c,0x5a,0xff,0x52,0x69,0xaa,
1817 0x6a,0x7a,0x95,0x38,0x53,0x4f,0x7d,0xa1,0xe4,0xc3,0x03,0xd2,0xa3,0x18,0xa7,0x28,
1818 0xc3,0xc0,0xc9,0x51,0x56,0x80,0x95,0x39,0xfc,0xf0,0xe2,0x42,0x9a,0x6b,0x52,0x54,
1819 0x16,0xae,0xdb,0xf5,0xa0,0xde,0x6a,0x57,0xa6,0x37,0xb3,0x9b},
1820 C18[]= {0x5a,0x8d,0xef,0x2f,0x0c,0x9e,0x53,0xf1,0xf7,0x5d,0x78,0x53,0x65,0x9e,0x2a,0x20,
1821 0xee,0xb2,0xb2,0x2a,0xaf,0xde,0x64,0x19,0xa0,0x58,0xab,0x4f,0x6f,0x74,0x6b,0xf4,
1822 0x0f,0xc0,0xc3,0xb7,0x80,0xf2,0x44,0x45,0x2d,0xa3,0xeb,0xf1,0xc5,0xd8,0x2c,0xde,
1823 0xa2,0x41,0x89,0x97,0x20,0x0e,0xf8,0x2e,0x44,0xae,0x7e,0x3f},
1824 T18[]= {0xa4,0x4a,0x82,0x66,0xee,0x1c,0x8e,0xb0,0xc8,0xb5,0xd4,0xcf,0x5a,0xe9,0xf1,0x9a};
1831 static const u8 A19[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1832 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1833 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1834 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55,
1835 0x52,0x2d,0xc1,0xf0,0x99,0x56,0x7d,0x07,0xf4,0x7f,0x37,0xa3,0x2a,0x84,0x42,0x7d,
1836 0x64,0x3a,0x8c,0xdc,0xbf,0xe5,0xc0,0xc9,0x75,0x98,0xa2,0xbd,0x25,0x55,0xd1,0xaa,
1837 0x8c,0xb0,0x8e,0x48,0x59,0x0d,0xbb,0x3d,0xa7,0xb0,0x8b,0x10,0x56,0x82,0x88,0x38,
1838 0xc5,0xf6,0x1e,0x63,0x93,0xba,0x7a,0x0a,0xbc,0xc9,0xf6,0x62,0x89,0x80,0x15,0xad},
1839 T19[]= {0x5f,0xea,0x79,0x3a,0x2d,0x6f,0x97,0x4d,0x37,0xe6,0x8e,0x0c,0xb8,0xff,0x94,0x92};
1844 static const u8 IV20[64]={0xff,0xff,0xff,0xff}, /* this results in 0xff in counter LSB */
1846 C20[]= {0x56,0xb3,0x37,0x3c,0xa9,0xef,0x6e,0x4a,0x2b,0x64,0xfe,0x1e,0x9a,0x17,0xb6,0x14,
1847 0x25,0xf1,0x0d,0x47,0xa7,0x5a,0x5f,0xce,0x13,0xef,0xc6,0xbc,0x78,0x4a,0xf2,0x4f,
1848 0x41,0x41,0xbd,0xd4,0x8c,0xf7,0xc7,0x70,0x88,0x7a,0xfd,0x57,0x3c,0xca,0x54,0x18,
1849 0xa9,0xae,0xff,0xcd,0x7c,0x5c,0xed,0xdf,0xc6,0xa7,0x83,0x97,0xb9,0xa8,0x5b,0x49,
1850 0x9d,0xa5,0x58,0x25,0x72,0x67,0xca,0xab,0x2a,0xd0,0xb2,0x3c,0xa4,0x76,0xa5,0x3c,
1851 0xb1,0x7f,0xb4,0x1c,0x4b,0x8b,0x47,0x5c,0xb4,0xf3,0xf7,0x16,0x50,0x94,0xc2,0x29,
1852 0xc9,0xe8,0xc4,0xdc,0x0a,0x2a,0x5f,0xf1,0x90,0x3e,0x50,0x15,0x11,0x22,0x13,0x76,
1853 0xa1,0xcd,0xb8,0x36,0x4c,0x50,0x61,0xa2,0x0c,0xae,0x74,0xbc,0x4a,0xcd,0x76,0xce,
1854 0xb0,0xab,0xc9,0xfd,0x32,0x17,0xef,0x9f,0x8c,0x90,0xbe,0x40,0x2d,0xdf,0x6d,0x86,
1855 0x97,0xf4,0xf8,0x80,0xdf,0xf1,0x5b,0xfb,0x7a,0x6b,0x28,0x24,0x1e,0xc8,0xfe,0x18,
1856 0x3c,0x2d,0x59,0xe3,0xf9,0xdf,0xff,0x65,0x3c,0x71,0x26,0xf0,0xac,0xb9,0xe6,0x42,
1857 0x11,0xf4,0x2b,0xae,0x12,0xaf,0x46,0x2b,0x10,0x70,0xbe,0xf1,0xab,0x5e,0x36,0x06,
1858 0x87,0x2c,0xa1,0x0d,0xee,0x15,0xb3,0x24,0x9b,0x1a,0x1b,0x95,0x8f,0x23,0x13,0x4c,
1859 0x4b,0xcc,0xb7,0xd0,0x32,0x00,0xbc,0xe4,0x20,0xa2,0xf8,0xeb,0x66,0xdc,0xf3,0x64,
1860 0x4d,0x14,0x23,0xc1,0xb5,0x69,0x90,0x03,0xc1,0x3e,0xce,0xf4,0xbf,0x38,0xa3,0xb6,
1861 0x0e,0xed,0xc3,0x40,0x33,0xba,0xc1,0x90,0x27,0x83,0xdc,0x6d,0x89,0xe2,0xe7,0x74,
1862 0x18,0x8a,0x43,0x9c,0x7e,0xbc,0xc0,0x67,0x2d,0xbd,0xa4,0xdd,0xcf,0xb2,0x79,0x46,
1863 0x13,0xb0,0xbe,0x41,0x31,0x5e,0xf7,0x78,0x70,0x8a,0x70,0xee,0x7d,0x75,0x16,0x5c},
1864 T20[]= {0x8b,0x30,0x7f,0x6b,0x33,0x28,0x6d,0x0a,0xb0,0x26,0xa9,0xed,0x3f,0xe1,0xe8,0x5f};
1866 #define TEST_CASE(n) do { \
1867 u8 out[sizeof(P##n)]; \
1868 AES_set_encrypt_key(K##n,sizeof(K##n)*8,&key); \
1869 CRYPTO_gcm128_init(&ctx,&key,(block128_f)AES_encrypt); \
1870 CRYPTO_gcm128_setiv(&ctx,IV##n,sizeof(IV##n)); \
1871 memset(out,0,sizeof(out)); \
1872 if (A##n) CRYPTO_gcm128_aad(&ctx,A##n,sizeof(A##n)); \
1873 if (P##n) CRYPTO_gcm128_encrypt(&ctx,P##n,out,sizeof(out)); \
1874 if (CRYPTO_gcm128_finish(&ctx,T##n,16) || \
1875 (C##n && memcmp(out,C##n,sizeof(out)))) \
1876 ret++, printf ("encrypt test#%d failed.\n",n); \
1877 CRYPTO_gcm128_setiv(&ctx,IV##n,sizeof(IV##n)); \
1878 memset(out,0,sizeof(out)); \
1879 if (A##n) CRYPTO_gcm128_aad(&ctx,A##n,sizeof(A##n)); \
1880 if (C##n) CRYPTO_gcm128_decrypt(&ctx,C##n,out,sizeof(out)); \
1881 if (CRYPTO_gcm128_finish(&ctx,T##n,16) || \
1882 (P##n && memcmp(out,P##n,sizeof(out)))) \
1883 ret++, printf ("decrypt test#%d failed.\n",n); \
1913 #ifdef OPENSSL_CPUID_OBJ
1915 size_t start,stop,gcm_t,ctr_t,OPENSSL_rdtsc();
1916 union { u64 u; u8 c[1024]; } buf;
1919 AES_set_encrypt_key(K1,sizeof(K1)*8,&key);
1920 CRYPTO_gcm128_init(&ctx,&key,(block128_f)AES_encrypt);
1921 CRYPTO_gcm128_setiv(&ctx,IV1,sizeof(IV1));
1923 CRYPTO_gcm128_encrypt(&ctx,buf.c,buf.c,sizeof(buf));
1924 start = OPENSSL_rdtsc();
1925 CRYPTO_gcm128_encrypt(&ctx,buf.c,buf.c,sizeof(buf));
1926 gcm_t = OPENSSL_rdtsc() - start;
1928 CRYPTO_ctr128_encrypt(buf.c,buf.c,sizeof(buf),
1929 &key,ctx.Yi.c,ctx.EKi.c,&ctx.mres,
1930 (block128_f)AES_encrypt);
1931 start = OPENSSL_rdtsc();
1932 CRYPTO_ctr128_encrypt(buf.c,buf.c,sizeof(buf),
1933 &key,ctx.Yi.c,ctx.EKi.c,&ctx.mres,
1934 (block128_f)AES_encrypt);
1935 ctr_t = OPENSSL_rdtsc() - start;
1937 printf("%.2f-%.2f=%.2f\n",
1938 gcm_t/(double)sizeof(buf),
1939 ctr_t/(double)sizeof(buf),
1940 (gcm_t-ctr_t)/(double)sizeof(buf));
1943 void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
1944 const u8 *inp,size_t len) = ctx.ghash;
1946 GHASH((&ctx),buf.c,sizeof(buf));
1947 start = OPENSSL_rdtsc();
1948 for (i=0;i<100;++i) GHASH((&ctx),buf.c,sizeof(buf));
1949 gcm_t = OPENSSL_rdtsc() - start;
1950 printf("%.2f\n",gcm_t/(double)sizeof(buf)/(double)i);