1 /* ====================================================================
2 * Copyright (c) 2010 The OpenSSL Project. All rights reserved.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in
13 * the documentation and/or other materials provided with the
16 * 3. All advertising materials mentioning features or use of this
17 * software must display the following acknowledgment:
18 * "This product includes software developed by the OpenSSL Project
19 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
21 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
22 * endorse or promote products derived from this software without
23 * prior written permission. For written permission, please contact
24 * openssl-core@openssl.org.
26 * 5. Products derived from this software may not be called "OpenSSL"
27 * nor may "OpenSSL" appear in their names without prior written
28 * permission of the OpenSSL Project.
30 * 6. Redistributions of any form whatsoever must retain the following
32 * "This product includes software developed by the OpenSSL Project
33 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
35 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
36 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
37 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
38 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
39 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
40 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
41 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
42 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
43 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
44 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
45 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
46 * OF THE POSSIBILITY OF SUCH DAMAGE.
47 * ====================================================================
50 #define OPENSSL_FIPSAPI
52 #include <openssl/crypto.h>
53 #include "modes_lcl.h"
63 #if defined(BSWAP4) && defined(STRICT_ALIGNMENT)
64 /* redefine, because alignment is ensured */
66 #define GETU32(p) BSWAP4(*(const u32 *)(p))
68 #define PUTU32(p,v) *(u32 *)(p) = BSWAP4(v)
71 #define PACK(s) ((size_t)(s)<<(sizeof(size_t)*8-16))
72 #define REDUCE1BIT(V) do { \
73 if (sizeof(size_t)==8) { \
74 u64 T = U64(0xe100000000000000) & (0-(V.lo&1)); \
75 V.lo = (V.hi<<63)|(V.lo>>1); \
76 V.hi = (V.hi>>1 )^T; \
79 u32 T = 0xe1000000U & (0-(u32)(V.lo&1)); \
80 V.lo = (V.hi<<63)|(V.lo>>1); \
81 V.hi = (V.hi>>1 )^((u64)T<<32); \
86 * Even though permitted values for TABLE_BITS are 8, 4 and 1, it should
87 * never be set to 8. 8 is effectively reserved for testing purposes.
88 * TABLE_BITS>1 are lookup-table-driven implementations referred to as
89 * "Shoup's" in GCM specification. In other words OpenSSL does not cover
90 * whole spectrum of possible table driven implementations. Why? In
91 * non-"Shoup's" case memory access pattern is segmented in such manner,
92 * that it's trivial to see that cache timing information can reveal
93 * fair portion of intermediate hash value. Given that ciphertext is
94 * always available to attacker, it's possible for him to attempt to
95 * deduce secret parameter H and if successful, tamper with messages
96 * [which is nothing but trivial in CTR mode]. In "Shoup's" case it's
97 * not as trivial, but there is no reason to believe that it's resistant
98 * to cache-timing attack. And the thing about "8-bit" implementation is
99 * that it consumes 16 (sixteen) times more memory, 4KB per individual
100 * key + 1KB shared. Well, on pros side it should be twice as fast as
101 * "4-bit" version. And for gcc-generated x86[_64] code, "8-bit" version
102 * was observed to run ~75% faster, closer to 100% for commercial
103 * compilers... Yet "4-bit" procedure is preferred, because it's
104 * believed to provide better security-performance balance and adequate
105 * all-round performance. "All-round" refers to things like:
107 * - shorter setup time effectively improves overall timing for
108 * handling short messages;
109 * - larger table allocation can become unbearable because of VM
110 * subsystem penalties (for example on Windows large enough free
111 * results in VM working set trimming, meaning that consequent
112 * malloc would immediately incur working set expansion);
113 * - larger table has larger cache footprint, which can affect
114 * performance of other code paths (not necessarily even from same
115 * thread in Hyper-Threading world);
117 * Value of 1 is not appropriate for performance reasons.
121 static void gcm_init_8bit(u128 Htable[256], u64 H[2])
131 for (Htable[128]=V, i=64; i>0; i>>=1) {
136 for (i=2; i<256; i<<=1) {
137 u128 *Hi = Htable+i, H0 = *Hi;
138 for (j=1; j<i; ++j) {
139 Hi[j].hi = H0.hi^Htable[j].hi;
140 Hi[j].lo = H0.lo^Htable[j].lo;
145 static void gcm_gmult_8bit(u64 Xi[2], const u128 Htable[256])
148 const u8 *xi = (const u8 *)Xi+15;
150 const union { long one; char little; } is_endian = {1};
151 static const size_t rem_8bit[256] = {
152 PACK(0x0000), PACK(0x01C2), PACK(0x0384), PACK(0x0246),
153 PACK(0x0708), PACK(0x06CA), PACK(0x048C), PACK(0x054E),
154 PACK(0x0E10), PACK(0x0FD2), PACK(0x0D94), PACK(0x0C56),
155 PACK(0x0918), PACK(0x08DA), PACK(0x0A9C), PACK(0x0B5E),
156 PACK(0x1C20), PACK(0x1DE2), PACK(0x1FA4), PACK(0x1E66),
157 PACK(0x1B28), PACK(0x1AEA), PACK(0x18AC), PACK(0x196E),
158 PACK(0x1230), PACK(0x13F2), PACK(0x11B4), PACK(0x1076),
159 PACK(0x1538), PACK(0x14FA), PACK(0x16BC), PACK(0x177E),
160 PACK(0x3840), PACK(0x3982), PACK(0x3BC4), PACK(0x3A06),
161 PACK(0x3F48), PACK(0x3E8A), PACK(0x3CCC), PACK(0x3D0E),
162 PACK(0x3650), PACK(0x3792), PACK(0x35D4), PACK(0x3416),
163 PACK(0x3158), PACK(0x309A), PACK(0x32DC), PACK(0x331E),
164 PACK(0x2460), PACK(0x25A2), PACK(0x27E4), PACK(0x2626),
165 PACK(0x2368), PACK(0x22AA), PACK(0x20EC), PACK(0x212E),
166 PACK(0x2A70), PACK(0x2BB2), PACK(0x29F4), PACK(0x2836),
167 PACK(0x2D78), PACK(0x2CBA), PACK(0x2EFC), PACK(0x2F3E),
168 PACK(0x7080), PACK(0x7142), PACK(0x7304), PACK(0x72C6),
169 PACK(0x7788), PACK(0x764A), PACK(0x740C), PACK(0x75CE),
170 PACK(0x7E90), PACK(0x7F52), PACK(0x7D14), PACK(0x7CD6),
171 PACK(0x7998), PACK(0x785A), PACK(0x7A1C), PACK(0x7BDE),
172 PACK(0x6CA0), PACK(0x6D62), PACK(0x6F24), PACK(0x6EE6),
173 PACK(0x6BA8), PACK(0x6A6A), PACK(0x682C), PACK(0x69EE),
174 PACK(0x62B0), PACK(0x6372), PACK(0x6134), PACK(0x60F6),
175 PACK(0x65B8), PACK(0x647A), PACK(0x663C), PACK(0x67FE),
176 PACK(0x48C0), PACK(0x4902), PACK(0x4B44), PACK(0x4A86),
177 PACK(0x4FC8), PACK(0x4E0A), PACK(0x4C4C), PACK(0x4D8E),
178 PACK(0x46D0), PACK(0x4712), PACK(0x4554), PACK(0x4496),
179 PACK(0x41D8), PACK(0x401A), PACK(0x425C), PACK(0x439E),
180 PACK(0x54E0), PACK(0x5522), PACK(0x5764), PACK(0x56A6),
181 PACK(0x53E8), PACK(0x522A), PACK(0x506C), PACK(0x51AE),
182 PACK(0x5AF0), PACK(0x5B32), PACK(0x5974), PACK(0x58B6),
183 PACK(0x5DF8), PACK(0x5C3A), PACK(0x5E7C), PACK(0x5FBE),
184 PACK(0xE100), PACK(0xE0C2), PACK(0xE284), PACK(0xE346),
185 PACK(0xE608), PACK(0xE7CA), PACK(0xE58C), PACK(0xE44E),
186 PACK(0xEF10), PACK(0xEED2), PACK(0xEC94), PACK(0xED56),
187 PACK(0xE818), PACK(0xE9DA), PACK(0xEB9C), PACK(0xEA5E),
188 PACK(0xFD20), PACK(0xFCE2), PACK(0xFEA4), PACK(0xFF66),
189 PACK(0xFA28), PACK(0xFBEA), PACK(0xF9AC), PACK(0xF86E),
190 PACK(0xF330), PACK(0xF2F2), PACK(0xF0B4), PACK(0xF176),
191 PACK(0xF438), PACK(0xF5FA), PACK(0xF7BC), PACK(0xF67E),
192 PACK(0xD940), PACK(0xD882), PACK(0xDAC4), PACK(0xDB06),
193 PACK(0xDE48), PACK(0xDF8A), PACK(0xDDCC), PACK(0xDC0E),
194 PACK(0xD750), PACK(0xD692), PACK(0xD4D4), PACK(0xD516),
195 PACK(0xD058), PACK(0xD19A), PACK(0xD3DC), PACK(0xD21E),
196 PACK(0xC560), PACK(0xC4A2), PACK(0xC6E4), PACK(0xC726),
197 PACK(0xC268), PACK(0xC3AA), PACK(0xC1EC), PACK(0xC02E),
198 PACK(0xCB70), PACK(0xCAB2), PACK(0xC8F4), PACK(0xC936),
199 PACK(0xCC78), PACK(0xCDBA), PACK(0xCFFC), PACK(0xCE3E),
200 PACK(0x9180), PACK(0x9042), PACK(0x9204), PACK(0x93C6),
201 PACK(0x9688), PACK(0x974A), PACK(0x950C), PACK(0x94CE),
202 PACK(0x9F90), PACK(0x9E52), PACK(0x9C14), PACK(0x9DD6),
203 PACK(0x9898), PACK(0x995A), PACK(0x9B1C), PACK(0x9ADE),
204 PACK(0x8DA0), PACK(0x8C62), PACK(0x8E24), PACK(0x8FE6),
205 PACK(0x8AA8), PACK(0x8B6A), PACK(0x892C), PACK(0x88EE),
206 PACK(0x83B0), PACK(0x8272), PACK(0x8034), PACK(0x81F6),
207 PACK(0x84B8), PACK(0x857A), PACK(0x873C), PACK(0x86FE),
208 PACK(0xA9C0), PACK(0xA802), PACK(0xAA44), PACK(0xAB86),
209 PACK(0xAEC8), PACK(0xAF0A), PACK(0xAD4C), PACK(0xAC8E),
210 PACK(0xA7D0), PACK(0xA612), PACK(0xA454), PACK(0xA596),
211 PACK(0xA0D8), PACK(0xA11A), PACK(0xA35C), PACK(0xA29E),
212 PACK(0xB5E0), PACK(0xB422), PACK(0xB664), PACK(0xB7A6),
213 PACK(0xB2E8), PACK(0xB32A), PACK(0xB16C), PACK(0xB0AE),
214 PACK(0xBBF0), PACK(0xBA32), PACK(0xB874), PACK(0xB9B6),
215 PACK(0xBCF8), PACK(0xBD3A), PACK(0xBF7C), PACK(0xBEBE) };
218 Z.hi ^= Htable[n].hi;
219 Z.lo ^= Htable[n].lo;
221 if ((u8 *)Xi==xi) break;
225 rem = (size_t)Z.lo&0xff;
226 Z.lo = (Z.hi<<56)|(Z.lo>>8);
228 if (sizeof(size_t)==8)
229 Z.hi ^= rem_8bit[rem];
231 Z.hi ^= (u64)rem_8bit[rem]<<32;
234 if (is_endian.little) {
236 Xi[0] = BSWAP8(Z.hi);
237 Xi[1] = BSWAP8(Z.lo);
241 v = (u32)(Z.hi>>32); PUTU32(p,v);
242 v = (u32)(Z.hi); PUTU32(p+4,v);
243 v = (u32)(Z.lo>>32); PUTU32(p+8,v);
244 v = (u32)(Z.lo); PUTU32(p+12,v);
252 #define GCM_MUL(ctx,Xi) gcm_gmult_8bit(ctx->Xi.u,ctx->Htable)
256 static void gcm_init_4bit(u128 Htable[16], u64 H[2])
259 #if defined(OPENSSL_SMALL_FOOTPRINT)
268 #if defined(OPENSSL_SMALL_FOOTPRINT)
269 for (Htable[8]=V, i=4; i>0; i>>=1) {
274 for (i=2; i<16; i<<=1) {
277 for (V=*Hi, j=1; j<i; ++j) {
278 Hi[j].hi = V.hi^Htable[j].hi;
279 Hi[j].lo = V.lo^Htable[j].lo;
290 Htable[3].hi = V.hi^Htable[2].hi, Htable[3].lo = V.lo^Htable[2].lo;
292 Htable[5].hi = V.hi^Htable[1].hi, Htable[5].lo = V.lo^Htable[1].lo;
293 Htable[6].hi = V.hi^Htable[2].hi, Htable[6].lo = V.lo^Htable[2].lo;
294 Htable[7].hi = V.hi^Htable[3].hi, Htable[7].lo = V.lo^Htable[3].lo;
296 Htable[9].hi = V.hi^Htable[1].hi, Htable[9].lo = V.lo^Htable[1].lo;
297 Htable[10].hi = V.hi^Htable[2].hi, Htable[10].lo = V.lo^Htable[2].lo;
298 Htable[11].hi = V.hi^Htable[3].hi, Htable[11].lo = V.lo^Htable[3].lo;
299 Htable[12].hi = V.hi^Htable[4].hi, Htable[12].lo = V.lo^Htable[4].lo;
300 Htable[13].hi = V.hi^Htable[5].hi, Htable[13].lo = V.lo^Htable[5].lo;
301 Htable[14].hi = V.hi^Htable[6].hi, Htable[14].lo = V.lo^Htable[6].lo;
302 Htable[15].hi = V.hi^Htable[7].hi, Htable[15].lo = V.lo^Htable[7].lo;
304 #if defined(GHASH_ASM) && (defined(__arm__) || defined(__arm))
306 * ARM assembler expects specific dword order in Htable.
310 const union { long one; char little; } is_endian = {1};
312 if (is_endian.little)
321 Htable[j].hi = V.lo<<32|V.lo>>32;
322 Htable[j].lo = V.hi<<32|V.hi>>32;
329 static const size_t rem_4bit[16] = {
330 PACK(0x0000), PACK(0x1C20), PACK(0x3840), PACK(0x2460),
331 PACK(0x7080), PACK(0x6CA0), PACK(0x48C0), PACK(0x54E0),
332 PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560),
333 PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0) };
335 static void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16])
339 size_t rem, nlo, nhi;
340 const union { long one; char little; } is_endian = {1};
342 nlo = ((const u8 *)Xi)[15];
346 Z.hi = Htable[nlo].hi;
347 Z.lo = Htable[nlo].lo;
350 rem = (size_t)Z.lo&0xf;
351 Z.lo = (Z.hi<<60)|(Z.lo>>4);
353 if (sizeof(size_t)==8)
354 Z.hi ^= rem_4bit[rem];
356 Z.hi ^= (u64)rem_4bit[rem]<<32;
358 Z.hi ^= Htable[nhi].hi;
359 Z.lo ^= Htable[nhi].lo;
363 nlo = ((const u8 *)Xi)[cnt];
367 rem = (size_t)Z.lo&0xf;
368 Z.lo = (Z.hi<<60)|(Z.lo>>4);
370 if (sizeof(size_t)==8)
371 Z.hi ^= rem_4bit[rem];
373 Z.hi ^= (u64)rem_4bit[rem]<<32;
375 Z.hi ^= Htable[nlo].hi;
376 Z.lo ^= Htable[nlo].lo;
379 if (is_endian.little) {
381 Xi[0] = BSWAP8(Z.hi);
382 Xi[1] = BSWAP8(Z.lo);
386 v = (u32)(Z.hi>>32); PUTU32(p,v);
387 v = (u32)(Z.hi); PUTU32(p+4,v);
388 v = (u32)(Z.lo>>32); PUTU32(p+8,v);
389 v = (u32)(Z.lo); PUTU32(p+12,v);
398 #if !defined(OPENSSL_SMALL_FOOTPRINT)
400 * Streamed gcm_mult_4bit, see CRYPTO_gcm128_[en|de]crypt for
401 * details... Compiler-generated code doesn't seem to give any
402 * performance improvement, at least not on x86[_64]. It's here
403 * mostly as reference and a placeholder for possible future
404 * non-trivial optimization[s]...
406 static void gcm_ghash_4bit(u64 Xi[2],const u128 Htable[16],
407 const u8 *inp,size_t len)
411 size_t rem, nlo, nhi;
412 const union { long one; char little; } is_endian = {1};
417 nlo = ((const u8 *)Xi)[15];
422 Z.hi = Htable[nlo].hi;
423 Z.lo = Htable[nlo].lo;
426 rem = (size_t)Z.lo&0xf;
427 Z.lo = (Z.hi<<60)|(Z.lo>>4);
429 if (sizeof(size_t)==8)
430 Z.hi ^= rem_4bit[rem];
432 Z.hi ^= (u64)rem_4bit[rem]<<32;
434 Z.hi ^= Htable[nhi].hi;
435 Z.lo ^= Htable[nhi].lo;
439 nlo = ((const u8 *)Xi)[cnt];
444 rem = (size_t)Z.lo&0xf;
445 Z.lo = (Z.hi<<60)|(Z.lo>>4);
447 if (sizeof(size_t)==8)
448 Z.hi ^= rem_4bit[rem];
450 Z.hi ^= (u64)rem_4bit[rem]<<32;
452 Z.hi ^= Htable[nlo].hi;
453 Z.lo ^= Htable[nlo].lo;
457 * Extra 256+16 bytes per-key plus 512 bytes shared tables
458 * [should] give ~50% improvement... One could have PACK()-ed
459 * the rem_8bit even here, but the priority is to minimize
462 u128 Hshr4[16]; /* Htable shifted right by 4 bits */
463 u8 Hshl4[16]; /* Htable shifted left by 4 bits */
464 static const unsigned short rem_8bit[256] = {
465 0x0000, 0x01C2, 0x0384, 0x0246, 0x0708, 0x06CA, 0x048C, 0x054E,
466 0x0E10, 0x0FD2, 0x0D94, 0x0C56, 0x0918, 0x08DA, 0x0A9C, 0x0B5E,
467 0x1C20, 0x1DE2, 0x1FA4, 0x1E66, 0x1B28, 0x1AEA, 0x18AC, 0x196E,
468 0x1230, 0x13F2, 0x11B4, 0x1076, 0x1538, 0x14FA, 0x16BC, 0x177E,
469 0x3840, 0x3982, 0x3BC4, 0x3A06, 0x3F48, 0x3E8A, 0x3CCC, 0x3D0E,
470 0x3650, 0x3792, 0x35D4, 0x3416, 0x3158, 0x309A, 0x32DC, 0x331E,
471 0x2460, 0x25A2, 0x27E4, 0x2626, 0x2368, 0x22AA, 0x20EC, 0x212E,
472 0x2A70, 0x2BB2, 0x29F4, 0x2836, 0x2D78, 0x2CBA, 0x2EFC, 0x2F3E,
473 0x7080, 0x7142, 0x7304, 0x72C6, 0x7788, 0x764A, 0x740C, 0x75CE,
474 0x7E90, 0x7F52, 0x7D14, 0x7CD6, 0x7998, 0x785A, 0x7A1C, 0x7BDE,
475 0x6CA0, 0x6D62, 0x6F24, 0x6EE6, 0x6BA8, 0x6A6A, 0x682C, 0x69EE,
476 0x62B0, 0x6372, 0x6134, 0x60F6, 0x65B8, 0x647A, 0x663C, 0x67FE,
477 0x48C0, 0x4902, 0x4B44, 0x4A86, 0x4FC8, 0x4E0A, 0x4C4C, 0x4D8E,
478 0x46D0, 0x4712, 0x4554, 0x4496, 0x41D8, 0x401A, 0x425C, 0x439E,
479 0x54E0, 0x5522, 0x5764, 0x56A6, 0x53E8, 0x522A, 0x506C, 0x51AE,
480 0x5AF0, 0x5B32, 0x5974, 0x58B6, 0x5DF8, 0x5C3A, 0x5E7C, 0x5FBE,
481 0xE100, 0xE0C2, 0xE284, 0xE346, 0xE608, 0xE7CA, 0xE58C, 0xE44E,
482 0xEF10, 0xEED2, 0xEC94, 0xED56, 0xE818, 0xE9DA, 0xEB9C, 0xEA5E,
483 0xFD20, 0xFCE2, 0xFEA4, 0xFF66, 0xFA28, 0xFBEA, 0xF9AC, 0xF86E,
484 0xF330, 0xF2F2, 0xF0B4, 0xF176, 0xF438, 0xF5FA, 0xF7BC, 0xF67E,
485 0xD940, 0xD882, 0xDAC4, 0xDB06, 0xDE48, 0xDF8A, 0xDDCC, 0xDC0E,
486 0xD750, 0xD692, 0xD4D4, 0xD516, 0xD058, 0xD19A, 0xD3DC, 0xD21E,
487 0xC560, 0xC4A2, 0xC6E4, 0xC726, 0xC268, 0xC3AA, 0xC1EC, 0xC02E,
488 0xCB70, 0xCAB2, 0xC8F4, 0xC936, 0xCC78, 0xCDBA, 0xCFFC, 0xCE3E,
489 0x9180, 0x9042, 0x9204, 0x93C6, 0x9688, 0x974A, 0x950C, 0x94CE,
490 0x9F90, 0x9E52, 0x9C14, 0x9DD6, 0x9898, 0x995A, 0x9B1C, 0x9ADE,
491 0x8DA0, 0x8C62, 0x8E24, 0x8FE6, 0x8AA8, 0x8B6A, 0x892C, 0x88EE,
492 0x83B0, 0x8272, 0x8034, 0x81F6, 0x84B8, 0x857A, 0x873C, 0x86FE,
493 0xA9C0, 0xA802, 0xAA44, 0xAB86, 0xAEC8, 0xAF0A, 0xAD4C, 0xAC8E,
494 0xA7D0, 0xA612, 0xA454, 0xA596, 0xA0D8, 0xA11A, 0xA35C, 0xA29E,
495 0xB5E0, 0xB422, 0xB664, 0xB7A6, 0xB2E8, 0xB32A, 0xB16C, 0xB0AE,
496 0xBBF0, 0xBA32, 0xB874, 0xB9B6, 0xBCF8, 0xBD3A, 0xBF7C, 0xBEBE };
498 * This pre-processing phase slows down procedure by approximately
499 * same time as it makes each loop spin faster. In other words
500 * single block performance is approximately same as straightforward
501 * "4-bit" implementation, and then it goes only faster...
503 for (cnt=0; cnt<16; ++cnt) {
504 Z.hi = Htable[cnt].hi;
505 Z.lo = Htable[cnt].lo;
506 Hshr4[cnt].lo = (Z.hi<<60)|(Z.lo>>4);
507 Hshr4[cnt].hi = (Z.hi>>4);
508 Hshl4[cnt] = (u8)(Z.lo<<4);
512 for (Z.lo=0, Z.hi=0, cnt=15; cnt; --cnt) {
513 nlo = ((const u8 *)Xi)[cnt];
518 Z.hi ^= Htable[nlo].hi;
519 Z.lo ^= Htable[nlo].lo;
521 rem = (size_t)Z.lo&0xff;
523 Z.lo = (Z.hi<<56)|(Z.lo>>8);
526 Z.hi ^= Hshr4[nhi].hi;
527 Z.lo ^= Hshr4[nhi].lo;
528 Z.hi ^= (u64)rem_8bit[rem^Hshl4[nhi]]<<48;
531 nlo = ((const u8 *)Xi)[0];
536 Z.hi ^= Htable[nlo].hi;
537 Z.lo ^= Htable[nlo].lo;
539 rem = (size_t)Z.lo&0xf;
541 Z.lo = (Z.hi<<60)|(Z.lo>>4);
544 Z.hi ^= Htable[nhi].hi;
545 Z.lo ^= Htable[nhi].lo;
546 Z.hi ^= ((u64)rem_8bit[rem<<4])<<48;
549 if (is_endian.little) {
551 Xi[0] = BSWAP8(Z.hi);
552 Xi[1] = BSWAP8(Z.lo);
556 v = (u32)(Z.hi>>32); PUTU32(p,v);
557 v = (u32)(Z.hi); PUTU32(p+4,v);
558 v = (u32)(Z.lo>>32); PUTU32(p+8,v);
559 v = (u32)(Z.lo); PUTU32(p+12,v);
566 } while (inp+=16, len-=16);
570 void gcm_gmult_4bit(u64 Xi[2],const u128 Htable[16]);
571 void gcm_ghash_4bit(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
574 #define GCM_MUL(ctx,Xi) gcm_gmult_4bit(ctx->Xi.u,ctx->Htable)
575 #if defined(GHASH_ASM) || !defined(OPENSSL_SMALL_FOOTPRINT)
576 #define GHASH(ctx,in,len) gcm_ghash_4bit((ctx)->Xi.u,(ctx)->Htable,in,len)
577 /* GHASH_CHUNK is "stride parameter" missioned to mitigate cache
578 * trashing effect. In other words idea is to hash data while it's
579 * still in L1 cache after encryption pass... */
580 #define GHASH_CHUNK (3*1024)
583 #else /* TABLE_BITS */
585 static void gcm_gmult_1bit(u64 Xi[2],const u64 H[2])
590 const long *xi = (const long *)Xi;
591 const union { long one; char little; } is_endian = {1};
593 V.hi = H[0]; /* H is in host byte order, no byte swapping */
596 for (j=0; j<16/sizeof(long); ++j) {
597 if (is_endian.little) {
598 if (sizeof(long)==8) {
600 X = (long)(BSWAP8(xi[j]));
602 const u8 *p = (const u8 *)(xi+j);
603 X = (long)((u64)GETU32(p)<<32|GETU32(p+4));
607 const u8 *p = (const u8 *)(xi+j);
614 for (i=0; i<8*sizeof(long); ++i, X<<=1) {
615 u64 M = (u64)(X>>(8*sizeof(long)-1));
623 if (is_endian.little) {
625 Xi[0] = BSWAP8(Z.hi);
626 Xi[1] = BSWAP8(Z.lo);
630 v = (u32)(Z.hi>>32); PUTU32(p,v);
631 v = (u32)(Z.hi); PUTU32(p+4,v);
632 v = (u32)(Z.lo>>32); PUTU32(p+8,v);
633 v = (u32)(Z.lo); PUTU32(p+12,v);
641 #define GCM_MUL(ctx,Xi) gcm_gmult_1bit(ctx->Xi.u,ctx->H.u)
645 #if TABLE_BITS==4 && defined(GHASH_ASM)
646 # if !defined(I386_ONLY) && \
647 (defined(__i386) || defined(__i386__) || \
648 defined(__x86_64) || defined(__x86_64__) || \
649 defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64))
650 # define GHASH_ASM_X86_OR_64
651 # define GCM_FUNCREF_4BIT
652 extern unsigned int OPENSSL_ia32cap_P[2];
654 void gcm_init_clmul(u128 Htable[16],const u64 Xi[2]);
655 void gcm_gmult_clmul(u64 Xi[2],const u128 Htable[16]);
656 void gcm_ghash_clmul(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
658 #if defined(__i386) || defined(__i386__) || defined(_M_IX86)
659 # define gcm_init_avx gcm_init_clmul
660 # define gcm_gmult_avx gcm_gmult_clmul
661 # define gcm_ghash_avx gcm_ghash_clmul
663 void gcm_init_avx(u128 Htable[16],const u64 Xi[2]);
664 void gcm_gmult_avx(u64 Xi[2],const u128 Htable[16]);
665 void gcm_ghash_avx(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
668 # if defined(__i386) || defined(__i386__) || defined(_M_IX86)
669 # define GHASH_ASM_X86
670 void gcm_gmult_4bit_mmx(u64 Xi[2],const u128 Htable[16]);
671 void gcm_ghash_4bit_mmx(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
673 void gcm_gmult_4bit_x86(u64 Xi[2],const u128 Htable[16]);
674 void gcm_ghash_4bit_x86(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
676 # elif defined(__arm__) || defined(__arm)
677 # include "arm_arch.h"
679 # define GHASH_ASM_ARM
680 # define GCM_FUNCREF_4BIT
681 void gcm_gmult_neon(u64 Xi[2],const u128 Htable[16]);
682 void gcm_ghash_neon(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
684 # elif defined(__sparc__) || defined(__sparc)
685 # include "sparc_arch.h"
686 # define GHASH_ASM_SPARC
687 # define GCM_FUNCREF_4BIT
688 extern unsigned int OPENSSL_sparcv9cap_P[];
689 void gcm_init_vis3(u128 Htable[16],const u64 Xi[2]);
690 void gcm_gmult_vis3(u64 Xi[2],const u128 Htable[16]);
691 void gcm_ghash_vis3(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
695 #ifdef GCM_FUNCREF_4BIT
697 # define GCM_MUL(ctx,Xi) (*gcm_gmult_p)(ctx->Xi.u,ctx->Htable)
700 # define GHASH(ctx,in,len) (*gcm_ghash_p)(ctx->Xi.u,ctx->Htable,in,len)
704 void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx,void *key,block128_f block)
706 const union { long one; char little; } is_endian = {1};
708 memset(ctx,0,sizeof(*ctx));
712 (*block)(ctx->H.c,ctx->H.c,key);
714 if (is_endian.little) {
715 /* H is stored in host byte order */
717 ctx->H.u[0] = BSWAP8(ctx->H.u[0]);
718 ctx->H.u[1] = BSWAP8(ctx->H.u[1]);
722 hi = (u64)GETU32(p) <<32|GETU32(p+4);
723 lo = (u64)GETU32(p+8)<<32|GETU32(p+12);
730 gcm_init_8bit(ctx->Htable,ctx->H.u);
732 # if defined(GHASH_ASM_X86_OR_64)
733 # if !defined(GHASH_ASM_X86) || defined(OPENSSL_IA32_SSE2)
734 if (OPENSSL_ia32cap_P[0]&(1<<24) && /* check FXSR bit */
735 OPENSSL_ia32cap_P[1]&(1<<1) ) { /* check PCLMULQDQ bit */
736 if (((OPENSSL_ia32cap_P[1]>>22)&0x41)==0x41) { /* AVX+MOVBE */
737 gcm_init_avx(ctx->Htable,ctx->H.u);
738 ctx->gmult = gcm_gmult_avx;
739 ctx->ghash = gcm_ghash_avx;
741 gcm_init_clmul(ctx->Htable,ctx->H.u);
742 ctx->gmult = gcm_gmult_clmul;
743 ctx->ghash = gcm_ghash_clmul;
748 gcm_init_4bit(ctx->Htable,ctx->H.u);
749 # if defined(GHASH_ASM_X86) /* x86 only */
750 # if defined(OPENSSL_IA32_SSE2)
751 if (OPENSSL_ia32cap_P[0]&(1<<25)) { /* check SSE bit */
753 if (OPENSSL_ia32cap_P[0]&(1<<23)) { /* check MMX bit */
755 ctx->gmult = gcm_gmult_4bit_mmx;
756 ctx->ghash = gcm_ghash_4bit_mmx;
758 ctx->gmult = gcm_gmult_4bit_x86;
759 ctx->ghash = gcm_ghash_4bit_x86;
762 ctx->gmult = gcm_gmult_4bit;
763 ctx->ghash = gcm_ghash_4bit;
765 # elif defined(GHASH_ASM_ARM)
766 if (OPENSSL_armcap_P & ARMV7_NEON) {
767 ctx->gmult = gcm_gmult_neon;
768 ctx->ghash = gcm_ghash_neon;
770 gcm_init_4bit(ctx->Htable,ctx->H.u);
771 ctx->gmult = gcm_gmult_4bit;
772 ctx->ghash = gcm_ghash_4bit;
774 # elif defined(GHASH_ASM_SPARC)
775 if (OPENSSL_sparcv9cap_P[0] & SPARCV9_VIS3) {
776 gcm_init_vis3(ctx->Htable,ctx->H.u);
777 ctx->gmult = gcm_gmult_vis3;
778 ctx->ghash = gcm_ghash_vis3;
780 gcm_init_4bit(ctx->Htable,ctx->H.u);
781 ctx->gmult = gcm_gmult_4bit;
782 ctx->ghash = gcm_ghash_4bit;
785 gcm_init_4bit(ctx->Htable,ctx->H.u);
790 void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx,const unsigned char *iv,size_t len)
792 const union { long one; char little; } is_endian = {1};
794 #ifdef GCM_FUNCREF_4BIT
795 void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
802 ctx->len.u[0] = 0; /* AAD length */
803 ctx->len.u[1] = 0; /* message length */
808 memcpy(ctx->Yi.c,iv,12);
817 for (i=0; i<16; ++i) ctx->Yi.c[i] ^= iv[i];
823 for (i=0; i<len; ++i) ctx->Yi.c[i] ^= iv[i];
827 if (is_endian.little) {
829 ctx->Yi.u[1] ^= BSWAP8(len0);
831 ctx->Yi.c[8] ^= (u8)(len0>>56);
832 ctx->Yi.c[9] ^= (u8)(len0>>48);
833 ctx->Yi.c[10] ^= (u8)(len0>>40);
834 ctx->Yi.c[11] ^= (u8)(len0>>32);
835 ctx->Yi.c[12] ^= (u8)(len0>>24);
836 ctx->Yi.c[13] ^= (u8)(len0>>16);
837 ctx->Yi.c[14] ^= (u8)(len0>>8);
838 ctx->Yi.c[15] ^= (u8)(len0);
842 ctx->Yi.u[1] ^= len0;
846 if (is_endian.little)
847 ctr = GETU32(ctx->Yi.c+12);
852 (*ctx->block)(ctx->Yi.c,ctx->EK0.c,ctx->key);
854 if (is_endian.little)
855 PUTU32(ctx->Yi.c+12,ctr);
860 int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx,const unsigned char *aad,size_t len)
864 u64 alen = ctx->len.u[0];
865 #ifdef GCM_FUNCREF_4BIT
866 void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
868 void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
869 const u8 *inp,size_t len) = ctx->ghash;
873 if (ctx->len.u[1]) return -2;
876 if (alen>(U64(1)<<61) || (sizeof(len)==8 && alen<len))
878 ctx->len.u[0] = alen;
883 ctx->Xi.c[n] ^= *(aad++);
887 if (n==0) GCM_MUL(ctx,Xi);
895 if ((i = (len&(size_t)-16))) {
902 for (i=0; i<16; ++i) ctx->Xi.c[i] ^= aad[i];
909 n = (unsigned int)len;
910 for (i=0; i<len; ++i) ctx->Xi.c[i] ^= aad[i];
917 int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
918 const unsigned char *in, unsigned char *out,
921 const union { long one; char little; } is_endian = {1};
924 u64 mlen = ctx->len.u[1];
925 block128_f block = ctx->block;
926 void *key = ctx->key;
927 #ifdef GCM_FUNCREF_4BIT
928 void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
930 void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
931 const u8 *inp,size_t len) = ctx->ghash;
936 n = (unsigned int)mlen%16; /* alternative to ctx->mres */
939 if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
941 ctx->len.u[1] = mlen;
944 /* First call to encrypt finalizes GHASH(AAD) */
949 if (is_endian.little)
950 ctr = GETU32(ctx->Yi.c+12);
955 #if !defined(OPENSSL_SMALL_FOOTPRINT)
956 if (16%sizeof(size_t) == 0) do { /* always true actually */
959 ctx->Xi.c[n] ^= *(out++) = *(in++)^ctx->EKi.c[n];
963 if (n==0) GCM_MUL(ctx,Xi);
969 #if defined(STRICT_ALIGNMENT)
970 if (((size_t)in|(size_t)out)%sizeof(size_t) != 0)
973 #if defined(GHASH) && defined(GHASH_CHUNK)
974 while (len>=GHASH_CHUNK) {
975 size_t j=GHASH_CHUNK;
978 size_t *out_t=(size_t *)out;
979 const size_t *in_t=(const size_t *)in;
981 (*block)(ctx->Yi.c,ctx->EKi.c,key);
983 if (is_endian.little)
984 PUTU32(ctx->Yi.c+12,ctr);
987 for (i=0; i<16/sizeof(size_t); ++i)
988 out_t[i] = in_t[i] ^ ctx->EKi.t[i];
993 GHASH(ctx,out-GHASH_CHUNK,GHASH_CHUNK);
996 if ((i = (len&(size_t)-16))) {
1000 size_t *out_t=(size_t *)out;
1001 const size_t *in_t=(const size_t *)in;
1003 (*block)(ctx->Yi.c,ctx->EKi.c,key);
1005 if (is_endian.little)
1006 PUTU32(ctx->Yi.c+12,ctr);
1009 for (i=0; i<16/sizeof(size_t); ++i)
1010 out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1019 size_t *out_t=(size_t *)out;
1020 const size_t *in_t=(const size_t *)in;
1022 (*block)(ctx->Yi.c,ctx->EKi.c,key);
1024 if (is_endian.little)
1025 PUTU32(ctx->Yi.c+12,ctr);
1028 for (i=0; i<16/sizeof(size_t); ++i)
1030 out_t[i] = in_t[i]^ctx->EKi.t[i];
1038 (*block)(ctx->Yi.c,ctx->EKi.c,key);
1040 if (is_endian.little)
1041 PUTU32(ctx->Yi.c+12,ctr);
1045 ctx->Xi.c[n] ^= out[n] = in[n]^ctx->EKi.c[n];
1054 for (i=0;i<len;++i) {
1056 (*block)(ctx->Yi.c,ctx->EKi.c,key);
1058 if (is_endian.little)
1059 PUTU32(ctx->Yi.c+12,ctr);
1063 ctx->Xi.c[n] ^= out[i] = in[i]^ctx->EKi.c[n];
1073 int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
1074 const unsigned char *in, unsigned char *out,
1077 const union { long one; char little; } is_endian = {1};
1078 unsigned int n, ctr;
1080 u64 mlen = ctx->len.u[1];
1081 block128_f block = ctx->block;
1082 void *key = ctx->key;
1083 #ifdef GCM_FUNCREF_4BIT
1084 void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
1086 void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
1087 const u8 *inp,size_t len) = ctx->ghash;
1092 if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
1094 ctx->len.u[1] = mlen;
1097 /* First call to decrypt finalizes GHASH(AAD) */
1102 if (is_endian.little)
1103 ctr = GETU32(ctx->Yi.c+12);
1108 #if !defined(OPENSSL_SMALL_FOOTPRINT)
1109 if (16%sizeof(size_t) == 0) do { /* always true actually */
1113 *(out++) = c^ctx->EKi.c[n];
1118 if (n==0) GCM_MUL (ctx,Xi);
1124 #if defined(STRICT_ALIGNMENT)
1125 if (((size_t)in|(size_t)out)%sizeof(size_t) != 0)
1128 #if defined(GHASH) && defined(GHASH_CHUNK)
1129 while (len>=GHASH_CHUNK) {
1130 size_t j=GHASH_CHUNK;
1132 GHASH(ctx,in,GHASH_CHUNK);
1134 size_t *out_t=(size_t *)out;
1135 const size_t *in_t=(const size_t *)in;
1137 (*block)(ctx->Yi.c,ctx->EKi.c,key);
1139 if (is_endian.little)
1140 PUTU32(ctx->Yi.c+12,ctr);
1143 for (i=0; i<16/sizeof(size_t); ++i)
1144 out_t[i] = in_t[i]^ctx->EKi.t[i];
1151 if ((i = (len&(size_t)-16))) {
1154 size_t *out_t=(size_t *)out;
1155 const size_t *in_t=(const size_t *)in;
1157 (*block)(ctx->Yi.c,ctx->EKi.c,key);
1159 if (is_endian.little)
1160 PUTU32(ctx->Yi.c+12,ctr);
1163 for (i=0; i<16/sizeof(size_t); ++i)
1164 out_t[i] = in_t[i]^ctx->EKi.t[i];
1172 size_t *out_t=(size_t *)out;
1173 const size_t *in_t=(const size_t *)in;
1175 (*block)(ctx->Yi.c,ctx->EKi.c,key);
1177 if (is_endian.little)
1178 PUTU32(ctx->Yi.c+12,ctr);
1181 for (i=0; i<16/sizeof(size_t); ++i) {
1183 out[i] = c^ctx->EKi.t[i];
1193 (*block)(ctx->Yi.c,ctx->EKi.c,key);
1195 if (is_endian.little)
1196 PUTU32(ctx->Yi.c+12,ctr);
1202 out[n] = c^ctx->EKi.c[n];
1211 for (i=0;i<len;++i) {
1214 (*block)(ctx->Yi.c,ctx->EKi.c,key);
1216 if (is_endian.little)
1217 PUTU32(ctx->Yi.c+12,ctr);
1222 out[i] = c^ctx->EKi.c[n];
1233 int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx,
1234 const unsigned char *in, unsigned char *out,
1235 size_t len, ctr128_f stream)
1237 const union { long one; char little; } is_endian = {1};
1238 unsigned int n, ctr;
1240 u64 mlen = ctx->len.u[1];
1241 void *key = ctx->key;
1242 #ifdef GCM_FUNCREF_4BIT
1243 void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
1245 void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
1246 const u8 *inp,size_t len) = ctx->ghash;
1251 if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
1253 ctx->len.u[1] = mlen;
1256 /* First call to encrypt finalizes GHASH(AAD) */
1261 if (is_endian.little)
1262 ctr = GETU32(ctx->Yi.c+12);
1269 ctx->Xi.c[n] ^= *(out++) = *(in++)^ctx->EKi.c[n];
1273 if (n==0) GCM_MUL(ctx,Xi);
1279 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1280 while (len>=GHASH_CHUNK) {
1281 (*stream)(in,out,GHASH_CHUNK/16,key,ctx->Yi.c);
1282 ctr += GHASH_CHUNK/16;
1283 if (is_endian.little)
1284 PUTU32(ctx->Yi.c+12,ctr);
1287 GHASH(ctx,out,GHASH_CHUNK);
1293 if ((i = (len&(size_t)-16))) {
1296 (*stream)(in,out,j,key,ctx->Yi.c);
1297 ctr += (unsigned int)j;
1298 if (is_endian.little)
1299 PUTU32(ctx->Yi.c+12,ctr);
1309 for (i=0;i<16;++i) ctx->Xi.c[i] ^= out[i];
1316 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,key);
1318 if (is_endian.little)
1319 PUTU32(ctx->Yi.c+12,ctr);
1323 ctx->Xi.c[n] ^= out[n] = in[n]^ctx->EKi.c[n];
1332 int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx,
1333 const unsigned char *in, unsigned char *out,
1334 size_t len,ctr128_f stream)
1336 const union { long one; char little; } is_endian = {1};
1337 unsigned int n, ctr;
1339 u64 mlen = ctx->len.u[1];
1340 void *key = ctx->key;
1341 #ifdef GCM_FUNCREF_4BIT
1342 void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
1344 void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
1345 const u8 *inp,size_t len) = ctx->ghash;
1350 if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
1352 ctx->len.u[1] = mlen;
1355 /* First call to decrypt finalizes GHASH(AAD) */
1360 if (is_endian.little)
1361 ctr = GETU32(ctx->Yi.c+12);
1369 *(out++) = c^ctx->EKi.c[n];
1374 if (n==0) GCM_MUL (ctx,Xi);
1380 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1381 while (len>=GHASH_CHUNK) {
1382 GHASH(ctx,in,GHASH_CHUNK);
1383 (*stream)(in,out,GHASH_CHUNK/16,key,ctx->Yi.c);
1384 ctr += GHASH_CHUNK/16;
1385 if (is_endian.little)
1386 PUTU32(ctx->Yi.c+12,ctr);
1394 if ((i = (len&(size_t)-16))) {
1402 for (k=0;k<16;++k) ctx->Xi.c[k] ^= in[k];
1409 (*stream)(in,out,j,key,ctx->Yi.c);
1410 ctr += (unsigned int)j;
1411 if (is_endian.little)
1412 PUTU32(ctx->Yi.c+12,ctr);
1420 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,key);
1422 if (is_endian.little)
1423 PUTU32(ctx->Yi.c+12,ctr);
1429 out[n] = c^ctx->EKi.c[n];
1438 int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx,const unsigned char *tag,
1441 const union { long one; char little; } is_endian = {1};
1442 u64 alen = ctx->len.u[0]<<3;
1443 u64 clen = ctx->len.u[1]<<3;
1444 #ifdef GCM_FUNCREF_4BIT
1445 void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
1448 if (ctx->mres || ctx->ares)
1451 if (is_endian.little) {
1453 alen = BSWAP8(alen);
1454 clen = BSWAP8(clen);
1458 ctx->len.u[0] = alen;
1459 ctx->len.u[1] = clen;
1461 alen = (u64)GETU32(p) <<32|GETU32(p+4);
1462 clen = (u64)GETU32(p+8)<<32|GETU32(p+12);
1466 ctx->Xi.u[0] ^= alen;
1467 ctx->Xi.u[1] ^= clen;
1470 ctx->Xi.u[0] ^= ctx->EK0.u[0];
1471 ctx->Xi.u[1] ^= ctx->EK0.u[1];
1473 if (tag && len<=sizeof(ctx->Xi))
1474 return memcmp(ctx->Xi.c,tag,len);
1479 void CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len)
1481 CRYPTO_gcm128_finish(ctx, NULL, 0);
1482 memcpy(tag, ctx->Xi.c, len<=sizeof(ctx->Xi.c)?len:sizeof(ctx->Xi.c));
1485 GCM128_CONTEXT *CRYPTO_gcm128_new(void *key, block128_f block)
1487 GCM128_CONTEXT *ret;
1489 if ((ret = (GCM128_CONTEXT *)OPENSSL_malloc(sizeof(GCM128_CONTEXT))))
1490 CRYPTO_gcm128_init(ret,key,block);
1495 void CRYPTO_gcm128_release(GCM128_CONTEXT *ctx)
1498 OPENSSL_cleanse(ctx,sizeof(*ctx));
1503 #if defined(SELFTEST)
1505 #include <openssl/aes.h>
1508 static const u8 K1[16],
1513 T1[]= {0x58,0xe2,0xfc,0xce,0xfa,0x7e,0x30,0x61,0x36,0x7f,0x1d,0x57,0xa4,0xe7,0x45,0x5a};
1519 static const u8 P2[16],
1520 C2[]= {0x03,0x88,0xda,0xce,0x60,0xb6,0xa3,0x92,0xf3,0x28,0xc2,0xb9,0x71,0xb2,0xfe,0x78},
1521 T2[]= {0xab,0x6e,0x47,0xd4,0x2c,0xec,0x13,0xbd,0xf5,0x3a,0x67,0xb2,0x12,0x57,0xbd,0xdf};
1525 static const u8 K3[]= {0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08},
1526 P3[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1527 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1528 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1529 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55},
1530 IV3[]= {0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,0xde,0xca,0xf8,0x88},
1531 C3[]= {0x42,0x83,0x1e,0xc2,0x21,0x77,0x74,0x24,0x4b,0x72,0x21,0xb7,0x84,0xd0,0xd4,0x9c,
1532 0xe3,0xaa,0x21,0x2f,0x2c,0x02,0xa4,0xe0,0x35,0xc1,0x7e,0x23,0x29,0xac,0xa1,0x2e,
1533 0x21,0xd5,0x14,0xb2,0x54,0x66,0x93,0x1c,0x7d,0x8f,0x6a,0x5a,0xac,0x84,0xaa,0x05,
1534 0x1b,0xa3,0x0b,0x39,0x6a,0x0a,0xac,0x97,0x3d,0x58,0xe0,0x91,0x47,0x3f,0x59,0x85},
1535 T3[]= {0x4d,0x5c,0x2a,0xf3,0x27,0xcd,0x64,0xa6,0x2c,0xf3,0x5a,0xbd,0x2b,0xa6,0xfa,0xb4};
1540 static const u8 P4[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1541 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1542 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1543 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39},
1544 A4[]= {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1545 0xab,0xad,0xda,0xd2},
1546 C4[]= {0x42,0x83,0x1e,0xc2,0x21,0x77,0x74,0x24,0x4b,0x72,0x21,0xb7,0x84,0xd0,0xd4,0x9c,
1547 0xe3,0xaa,0x21,0x2f,0x2c,0x02,0xa4,0xe0,0x35,0xc1,0x7e,0x23,0x29,0xac,0xa1,0x2e,
1548 0x21,0xd5,0x14,0xb2,0x54,0x66,0x93,0x1c,0x7d,0x8f,0x6a,0x5a,0xac,0x84,0xaa,0x05,
1549 0x1b,0xa3,0x0b,0x39,0x6a,0x0a,0xac,0x97,0x3d,0x58,0xe0,0x91},
1550 T4[]= {0x5b,0xc9,0x4f,0xbc,0x32,0x21,0xa5,0xdb,0x94,0xfa,0xe9,0x5a,0xe7,0x12,0x1a,0x47};
1556 static const u8 IV5[]= {0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad},
1557 C5[]= {0x61,0x35,0x3b,0x4c,0x28,0x06,0x93,0x4a,0x77,0x7f,0xf5,0x1f,0xa2,0x2a,0x47,0x55,
1558 0x69,0x9b,0x2a,0x71,0x4f,0xcd,0xc6,0xf8,0x37,0x66,0xe5,0xf9,0x7b,0x6c,0x74,0x23,
1559 0x73,0x80,0x69,0x00,0xe4,0x9f,0x24,0xb2,0x2b,0x09,0x75,0x44,0xd4,0x89,0x6b,0x42,
1560 0x49,0x89,0xb5,0xe1,0xeb,0xac,0x0f,0x07,0xc2,0x3f,0x45,0x98},
1561 T5[]= {0x36,0x12,0xd2,0xe7,0x9e,0x3b,0x07,0x85,0x56,0x1b,0xe1,0x4a,0xac,0xa2,0xfc,0xcb};
1567 static const u8 IV6[]= {0x93,0x13,0x22,0x5d,0xf8,0x84,0x06,0xe5,0x55,0x90,0x9c,0x5a,0xff,0x52,0x69,0xaa,
1568 0x6a,0x7a,0x95,0x38,0x53,0x4f,0x7d,0xa1,0xe4,0xc3,0x03,0xd2,0xa3,0x18,0xa7,0x28,
1569 0xc3,0xc0,0xc9,0x51,0x56,0x80,0x95,0x39,0xfc,0xf0,0xe2,0x42,0x9a,0x6b,0x52,0x54,
1570 0x16,0xae,0xdb,0xf5,0xa0,0xde,0x6a,0x57,0xa6,0x37,0xb3,0x9b},
1571 C6[]= {0x8c,0xe2,0x49,0x98,0x62,0x56,0x15,0xb6,0x03,0xa0,0x33,0xac,0xa1,0x3f,0xb8,0x94,
1572 0xbe,0x91,0x12,0xa5,0xc3,0xa2,0x11,0xa8,0xba,0x26,0x2a,0x3c,0xca,0x7e,0x2c,0xa7,
1573 0x01,0xe4,0xa9,0xa4,0xfb,0xa4,0x3c,0x90,0xcc,0xdc,0xb2,0x81,0xd4,0x8c,0x7c,0x6f,
1574 0xd6,0x28,0x75,0xd2,0xac,0xa4,0x17,0x03,0x4c,0x34,0xae,0xe5},
1575 T6[]= {0x61,0x9c,0xc5,0xae,0xff,0xfe,0x0b,0xfa,0x46,0x2a,0xf4,0x3c,0x16,0x99,0xd0,0x50};
1578 static const u8 K7[24],
1583 T7[]= {0xcd,0x33,0xb2,0x8a,0xc7,0x73,0xf7,0x4b,0xa0,0x0e,0xd1,0xf3,0x12,0x57,0x24,0x35};
1589 static const u8 P8[16],
1590 C8[]= {0x98,0xe7,0x24,0x7c,0x07,0xf0,0xfe,0x41,0x1c,0x26,0x7e,0x43,0x84,0xb0,0xf6,0x00},
1591 T8[]= {0x2f,0xf5,0x8d,0x80,0x03,0x39,0x27,0xab,0x8e,0xf4,0xd4,0x58,0x75,0x14,0xf0,0xfb};
1595 static const u8 K9[]= {0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08,
1596 0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c},
1597 P9[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1598 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1599 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1600 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55},
1601 IV9[]= {0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,0xde,0xca,0xf8,0x88},
1602 C9[]= {0x39,0x80,0xca,0x0b,0x3c,0x00,0xe8,0x41,0xeb,0x06,0xfa,0xc4,0x87,0x2a,0x27,0x57,
1603 0x85,0x9e,0x1c,0xea,0xa6,0xef,0xd9,0x84,0x62,0x85,0x93,0xb4,0x0c,0xa1,0xe1,0x9c,
1604 0x7d,0x77,0x3d,0x00,0xc1,0x44,0xc5,0x25,0xac,0x61,0x9d,0x18,0xc8,0x4a,0x3f,0x47,
1605 0x18,0xe2,0x44,0x8b,0x2f,0xe3,0x24,0xd9,0xcc,0xda,0x27,0x10,0xac,0xad,0xe2,0x56},
1606 T9[]= {0x99,0x24,0xa7,0xc8,0x58,0x73,0x36,0xbf,0xb1,0x18,0x02,0x4d,0xb8,0x67,0x4a,0x14};
1611 static const u8 P10[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1612 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1613 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1614 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39},
1615 A10[]= {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1616 0xab,0xad,0xda,0xd2},
1617 C10[]= {0x39,0x80,0xca,0x0b,0x3c,0x00,0xe8,0x41,0xeb,0x06,0xfa,0xc4,0x87,0x2a,0x27,0x57,
1618 0x85,0x9e,0x1c,0xea,0xa6,0xef,0xd9,0x84,0x62,0x85,0x93,0xb4,0x0c,0xa1,0xe1,0x9c,
1619 0x7d,0x77,0x3d,0x00,0xc1,0x44,0xc5,0x25,0xac,0x61,0x9d,0x18,0xc8,0x4a,0x3f,0x47,
1620 0x18,0xe2,0x44,0x8b,0x2f,0xe3,0x24,0xd9,0xcc,0xda,0x27,0x10},
1621 T10[]= {0x25,0x19,0x49,0x8e,0x80,0xf1,0x47,0x8f,0x37,0xba,0x55,0xbd,0x6d,0x27,0x61,0x8c};
1627 static const u8 IV11[]={0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad},
1628 C11[]= {0x0f,0x10,0xf5,0x99,0xae,0x14,0xa1,0x54,0xed,0x24,0xb3,0x6e,0x25,0x32,0x4d,0xb8,
1629 0xc5,0x66,0x63,0x2e,0xf2,0xbb,0xb3,0x4f,0x83,0x47,0x28,0x0f,0xc4,0x50,0x70,0x57,
1630 0xfd,0xdc,0x29,0xdf,0x9a,0x47,0x1f,0x75,0xc6,0x65,0x41,0xd4,0xd4,0xda,0xd1,0xc9,
1631 0xe9,0x3a,0x19,0xa5,0x8e,0x8b,0x47,0x3f,0xa0,0xf0,0x62,0xf7},
1632 T11[]= {0x65,0xdc,0xc5,0x7f,0xcf,0x62,0x3a,0x24,0x09,0x4f,0xcc,0xa4,0x0d,0x35,0x33,0xf8};
1638 static const u8 IV12[]={0x93,0x13,0x22,0x5d,0xf8,0x84,0x06,0xe5,0x55,0x90,0x9c,0x5a,0xff,0x52,0x69,0xaa,
1639 0x6a,0x7a,0x95,0x38,0x53,0x4f,0x7d,0xa1,0xe4,0xc3,0x03,0xd2,0xa3,0x18,0xa7,0x28,
1640 0xc3,0xc0,0xc9,0x51,0x56,0x80,0x95,0x39,0xfc,0xf0,0xe2,0x42,0x9a,0x6b,0x52,0x54,
1641 0x16,0xae,0xdb,0xf5,0xa0,0xde,0x6a,0x57,0xa6,0x37,0xb3,0x9b},
1642 C12[]= {0xd2,0x7e,0x88,0x68,0x1c,0xe3,0x24,0x3c,0x48,0x30,0x16,0x5a,0x8f,0xdc,0xf9,0xff,
1643 0x1d,0xe9,0xa1,0xd8,0xe6,0xb4,0x47,0xef,0x6e,0xf7,0xb7,0x98,0x28,0x66,0x6e,0x45,
1644 0x81,0xe7,0x90,0x12,0xaf,0x34,0xdd,0xd9,0xe2,0xf0,0x37,0x58,0x9b,0x29,0x2d,0xb3,
1645 0xe6,0x7c,0x03,0x67,0x45,0xfa,0x22,0xe7,0xe9,0xb7,0x37,0x3b},
1646 T12[]= {0xdc,0xf5,0x66,0xff,0x29,0x1c,0x25,0xbb,0xb8,0x56,0x8f,0xc3,0xd3,0x76,0xa6,0xd9};
1649 static const u8 K13[32],
1654 T13[]={0x53,0x0f,0x8a,0xfb,0xc7,0x45,0x36,0xb9,0xa9,0x63,0xb4,0xf1,0xc4,0xcb,0x73,0x8b};
1659 static const u8 P14[16],
1661 C14[]= {0xce,0xa7,0x40,0x3d,0x4d,0x60,0x6b,0x6e,0x07,0x4e,0xc5,0xd3,0xba,0xf3,0x9d,0x18},
1662 T14[]= {0xd0,0xd1,0xc8,0xa7,0x99,0x99,0x6b,0xf0,0x26,0x5b,0x98,0xb5,0xd4,0x8a,0xb9,0x19};
1666 static const u8 K15[]= {0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08,
1667 0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08},
1668 P15[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1669 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1670 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1671 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55},
1672 IV15[]={0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,0xde,0xca,0xf8,0x88},
1673 C15[]= {0x52,0x2d,0xc1,0xf0,0x99,0x56,0x7d,0x07,0xf4,0x7f,0x37,0xa3,0x2a,0x84,0x42,0x7d,
1674 0x64,0x3a,0x8c,0xdc,0xbf,0xe5,0xc0,0xc9,0x75,0x98,0xa2,0xbd,0x25,0x55,0xd1,0xaa,
1675 0x8c,0xb0,0x8e,0x48,0x59,0x0d,0xbb,0x3d,0xa7,0xb0,0x8b,0x10,0x56,0x82,0x88,0x38,
1676 0xc5,0xf6,0x1e,0x63,0x93,0xba,0x7a,0x0a,0xbc,0xc9,0xf6,0x62,0x89,0x80,0x15,0xad},
1677 T15[]= {0xb0,0x94,0xda,0xc5,0xd9,0x34,0x71,0xbd,0xec,0x1a,0x50,0x22,0x70,0xe3,0xcc,0x6c};
1682 static const u8 P16[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1683 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1684 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1685 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39},
1686 A16[]= {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1687 0xab,0xad,0xda,0xd2},
1688 C16[]= {0x52,0x2d,0xc1,0xf0,0x99,0x56,0x7d,0x07,0xf4,0x7f,0x37,0xa3,0x2a,0x84,0x42,0x7d,
1689 0x64,0x3a,0x8c,0xdc,0xbf,0xe5,0xc0,0xc9,0x75,0x98,0xa2,0xbd,0x25,0x55,0xd1,0xaa,
1690 0x8c,0xb0,0x8e,0x48,0x59,0x0d,0xbb,0x3d,0xa7,0xb0,0x8b,0x10,0x56,0x82,0x88,0x38,
1691 0xc5,0xf6,0x1e,0x63,0x93,0xba,0x7a,0x0a,0xbc,0xc9,0xf6,0x62},
1692 T16[]= {0x76,0xfc,0x6e,0xce,0x0f,0x4e,0x17,0x68,0xcd,0xdf,0x88,0x53,0xbb,0x2d,0x55,0x1b};
1698 static const u8 IV17[]={0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad},
1699 C17[]= {0xc3,0x76,0x2d,0xf1,0xca,0x78,0x7d,0x32,0xae,0x47,0xc1,0x3b,0xf1,0x98,0x44,0xcb,
1700 0xaf,0x1a,0xe1,0x4d,0x0b,0x97,0x6a,0xfa,0xc5,0x2f,0xf7,0xd7,0x9b,0xba,0x9d,0xe0,
1701 0xfe,0xb5,0x82,0xd3,0x39,0x34,0xa4,0xf0,0x95,0x4c,0xc2,0x36,0x3b,0xc7,0x3f,0x78,
1702 0x62,0xac,0x43,0x0e,0x64,0xab,0xe4,0x99,0xf4,0x7c,0x9b,0x1f},
1703 T17[]= {0x3a,0x33,0x7d,0xbf,0x46,0xa7,0x92,0xc4,0x5e,0x45,0x49,0x13,0xfe,0x2e,0xa8,0xf2};
1709 static const u8 IV18[]={0x93,0x13,0x22,0x5d,0xf8,0x84,0x06,0xe5,0x55,0x90,0x9c,0x5a,0xff,0x52,0x69,0xaa,
1710 0x6a,0x7a,0x95,0x38,0x53,0x4f,0x7d,0xa1,0xe4,0xc3,0x03,0xd2,0xa3,0x18,0xa7,0x28,
1711 0xc3,0xc0,0xc9,0x51,0x56,0x80,0x95,0x39,0xfc,0xf0,0xe2,0x42,0x9a,0x6b,0x52,0x54,
1712 0x16,0xae,0xdb,0xf5,0xa0,0xde,0x6a,0x57,0xa6,0x37,0xb3,0x9b},
1713 C18[]= {0x5a,0x8d,0xef,0x2f,0x0c,0x9e,0x53,0xf1,0xf7,0x5d,0x78,0x53,0x65,0x9e,0x2a,0x20,
1714 0xee,0xb2,0xb2,0x2a,0xaf,0xde,0x64,0x19,0xa0,0x58,0xab,0x4f,0x6f,0x74,0x6b,0xf4,
1715 0x0f,0xc0,0xc3,0xb7,0x80,0xf2,0x44,0x45,0x2d,0xa3,0xeb,0xf1,0xc5,0xd8,0x2c,0xde,
1716 0xa2,0x41,0x89,0x97,0x20,0x0e,0xf8,0x2e,0x44,0xae,0x7e,0x3f},
1717 T18[]= {0xa4,0x4a,0x82,0x66,0xee,0x1c,0x8e,0xb0,0xc8,0xb5,0xd4,0xcf,0x5a,0xe9,0xf1,0x9a};
1724 static const u8 A19[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1725 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1726 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1727 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55,
1728 0x52,0x2d,0xc1,0xf0,0x99,0x56,0x7d,0x07,0xf4,0x7f,0x37,0xa3,0x2a,0x84,0x42,0x7d,
1729 0x64,0x3a,0x8c,0xdc,0xbf,0xe5,0xc0,0xc9,0x75,0x98,0xa2,0xbd,0x25,0x55,0xd1,0xaa,
1730 0x8c,0xb0,0x8e,0x48,0x59,0x0d,0xbb,0x3d,0xa7,0xb0,0x8b,0x10,0x56,0x82,0x88,0x38,
1731 0xc5,0xf6,0x1e,0x63,0x93,0xba,0x7a,0x0a,0xbc,0xc9,0xf6,0x62,0x89,0x80,0x15,0xad},
1732 T19[]= {0x5f,0xea,0x79,0x3a,0x2d,0x6f,0x97,0x4d,0x37,0xe6,0x8e,0x0c,0xb8,0xff,0x94,0x92};
1737 static const u8 IV20[64]={0xff,0xff,0xff,0xff}, /* this results in 0xff in counter LSB */
1739 C20[]= {0x56,0xb3,0x37,0x3c,0xa9,0xef,0x6e,0x4a,0x2b,0x64,0xfe,0x1e,0x9a,0x17,0xb6,0x14,
1740 0x25,0xf1,0x0d,0x47,0xa7,0x5a,0x5f,0xce,0x13,0xef,0xc6,0xbc,0x78,0x4a,0xf2,0x4f,
1741 0x41,0x41,0xbd,0xd4,0x8c,0xf7,0xc7,0x70,0x88,0x7a,0xfd,0x57,0x3c,0xca,0x54,0x18,
1742 0xa9,0xae,0xff,0xcd,0x7c,0x5c,0xed,0xdf,0xc6,0xa7,0x83,0x97,0xb9,0xa8,0x5b,0x49,
1743 0x9d,0xa5,0x58,0x25,0x72,0x67,0xca,0xab,0x2a,0xd0,0xb2,0x3c,0xa4,0x76,0xa5,0x3c,
1744 0xb1,0x7f,0xb4,0x1c,0x4b,0x8b,0x47,0x5c,0xb4,0xf3,0xf7,0x16,0x50,0x94,0xc2,0x29,
1745 0xc9,0xe8,0xc4,0xdc,0x0a,0x2a,0x5f,0xf1,0x90,0x3e,0x50,0x15,0x11,0x22,0x13,0x76,
1746 0xa1,0xcd,0xb8,0x36,0x4c,0x50,0x61,0xa2,0x0c,0xae,0x74,0xbc,0x4a,0xcd,0x76,0xce,
1747 0xb0,0xab,0xc9,0xfd,0x32,0x17,0xef,0x9f,0x8c,0x90,0xbe,0x40,0x2d,0xdf,0x6d,0x86,
1748 0x97,0xf4,0xf8,0x80,0xdf,0xf1,0x5b,0xfb,0x7a,0x6b,0x28,0x24,0x1e,0xc8,0xfe,0x18,
1749 0x3c,0x2d,0x59,0xe3,0xf9,0xdf,0xff,0x65,0x3c,0x71,0x26,0xf0,0xac,0xb9,0xe6,0x42,
1750 0x11,0xf4,0x2b,0xae,0x12,0xaf,0x46,0x2b,0x10,0x70,0xbe,0xf1,0xab,0x5e,0x36,0x06,
1751 0x87,0x2c,0xa1,0x0d,0xee,0x15,0xb3,0x24,0x9b,0x1a,0x1b,0x95,0x8f,0x23,0x13,0x4c,
1752 0x4b,0xcc,0xb7,0xd0,0x32,0x00,0xbc,0xe4,0x20,0xa2,0xf8,0xeb,0x66,0xdc,0xf3,0x64,
1753 0x4d,0x14,0x23,0xc1,0xb5,0x69,0x90,0x03,0xc1,0x3e,0xce,0xf4,0xbf,0x38,0xa3,0xb6,
1754 0x0e,0xed,0xc3,0x40,0x33,0xba,0xc1,0x90,0x27,0x83,0xdc,0x6d,0x89,0xe2,0xe7,0x74,
1755 0x18,0x8a,0x43,0x9c,0x7e,0xbc,0xc0,0x67,0x2d,0xbd,0xa4,0xdd,0xcf,0xb2,0x79,0x46,
1756 0x13,0xb0,0xbe,0x41,0x31,0x5e,0xf7,0x78,0x70,0x8a,0x70,0xee,0x7d,0x75,0x16,0x5c},
1757 T20[]= {0x8b,0x30,0x7f,0x6b,0x33,0x28,0x6d,0x0a,0xb0,0x26,0xa9,0xed,0x3f,0xe1,0xe8,0x5f};
1759 #define TEST_CASE(n) do { \
1760 u8 out[sizeof(P##n)]; \
1761 AES_set_encrypt_key(K##n,sizeof(K##n)*8,&key); \
1762 CRYPTO_gcm128_init(&ctx,&key,(block128_f)AES_encrypt); \
1763 CRYPTO_gcm128_setiv(&ctx,IV##n,sizeof(IV##n)); \
1764 memset(out,0,sizeof(out)); \
1765 if (A##n) CRYPTO_gcm128_aad(&ctx,A##n,sizeof(A##n)); \
1766 if (P##n) CRYPTO_gcm128_encrypt(&ctx,P##n,out,sizeof(out)); \
1767 if (CRYPTO_gcm128_finish(&ctx,T##n,16) || \
1768 (C##n && memcmp(out,C##n,sizeof(out)))) \
1769 ret++, printf ("encrypt test#%d failed.\n",n); \
1770 CRYPTO_gcm128_setiv(&ctx,IV##n,sizeof(IV##n)); \
1771 memset(out,0,sizeof(out)); \
1772 if (A##n) CRYPTO_gcm128_aad(&ctx,A##n,sizeof(A##n)); \
1773 if (C##n) CRYPTO_gcm128_decrypt(&ctx,C##n,out,sizeof(out)); \
1774 if (CRYPTO_gcm128_finish(&ctx,T##n,16) || \
1775 (P##n && memcmp(out,P##n,sizeof(out)))) \
1776 ret++, printf ("decrypt test#%d failed.\n",n); \
1806 #ifdef OPENSSL_CPUID_OBJ
1808 size_t start,stop,gcm_t,ctr_t,OPENSSL_rdtsc();
1809 union { u64 u; u8 c[1024]; } buf;
1812 AES_set_encrypt_key(K1,sizeof(K1)*8,&key);
1813 CRYPTO_gcm128_init(&ctx,&key,(block128_f)AES_encrypt);
1814 CRYPTO_gcm128_setiv(&ctx,IV1,sizeof(IV1));
1816 CRYPTO_gcm128_encrypt(&ctx,buf.c,buf.c,sizeof(buf));
1817 start = OPENSSL_rdtsc();
1818 CRYPTO_gcm128_encrypt(&ctx,buf.c,buf.c,sizeof(buf));
1819 gcm_t = OPENSSL_rdtsc() - start;
1821 CRYPTO_ctr128_encrypt(buf.c,buf.c,sizeof(buf),
1822 &key,ctx.Yi.c,ctx.EKi.c,&ctx.mres,
1823 (block128_f)AES_encrypt);
1824 start = OPENSSL_rdtsc();
1825 CRYPTO_ctr128_encrypt(buf.c,buf.c,sizeof(buf),
1826 &key,ctx.Yi.c,ctx.EKi.c,&ctx.mres,
1827 (block128_f)AES_encrypt);
1828 ctr_t = OPENSSL_rdtsc() - start;
1830 printf("%.2f-%.2f=%.2f\n",
1831 gcm_t/(double)sizeof(buf),
1832 ctr_t/(double)sizeof(buf),
1833 (gcm_t-ctr_t)/(double)sizeof(buf));
1836 void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
1837 const u8 *inp,size_t len) = ctx.ghash;
1839 GHASH((&ctx),buf.c,sizeof(buf));
1840 start = OPENSSL_rdtsc();
1841 for (i=0;i<100;++i) GHASH((&ctx),buf.c,sizeof(buf));
1842 gcm_t = OPENSSL_rdtsc() - start;
1843 printf("%.2f\n",gcm_t/(double)sizeof(buf)/(double)i);