1 /* ====================================================================
2 * Copyright (c) 2010 The OpenSSL Project. All rights reserved.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in
13 * the documentation and/or other materials provided with the
16 * 3. All advertising materials mentioning features or use of this
17 * software must display the following acknowledgment:
18 * "This product includes software developed by the OpenSSL Project
19 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
21 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
22 * endorse or promote products derived from this software without
23 * prior written permission. For written permission, please contact
24 * openssl-core@openssl.org.
26 * 5. Products derived from this software may not be called "OpenSSL"
27 * nor may "OpenSSL" appear in their names without prior written
28 * permission of the OpenSSL Project.
30 * 6. Redistributions of any form whatsoever must retain the following
32 * "This product includes software developed by the OpenSSL Project
33 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
35 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
36 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
37 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
38 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
39 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
40 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
41 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
42 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
43 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
44 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
45 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
46 * OF THE POSSIBILITY OF SUCH DAMAGE.
47 * ====================================================================
50 #define OPENSSL_FIPSAPI
52 #include <openssl/crypto.h>
53 #include "modes_lcl.h"
63 #if defined(BSWAP4) && defined(STRICT_ALIGNMENT)
64 /* redefine, because alignment is ensured */
66 #define GETU32(p) BSWAP4(*(const u32 *)(p))
68 #define PUTU32(p,v) *(u32 *)(p) = BSWAP4(v)
71 #define PACK(s) ((size_t)(s)<<(sizeof(size_t)*8-16))
72 #define REDUCE1BIT(V) do { \
73 if (sizeof(size_t)==8) { \
74 u64 T = U64(0xe100000000000000) & (0-(V.lo&1)); \
75 V.lo = (V.hi<<63)|(V.lo>>1); \
76 V.hi = (V.hi>>1 )^T; \
79 u32 T = 0xe1000000U & (0-(u32)(V.lo&1)); \
80 V.lo = (V.hi<<63)|(V.lo>>1); \
81 V.hi = (V.hi>>1 )^((u64)T<<32); \
86 * Even though permitted values for TABLE_BITS are 8, 4 and 1, it should
87 * never be set to 8. 8 is effectively reserved for testing purposes.
88 * TABLE_BITS>1 are lookup-table-driven implementations referred to as
89 * "Shoup's" in GCM specification. In other words OpenSSL does not cover
90 * whole spectrum of possible table driven implementations. Why? In
91 * non-"Shoup's" case memory access pattern is segmented in such manner,
92 * that it's trivial to see that cache timing information can reveal
93 * fair portion of intermediate hash value. Given that ciphertext is
94 * always available to attacker, it's possible for him to attempt to
95 * deduce secret parameter H and if successful, tamper with messages
96 * [which is nothing but trivial in CTR mode]. In "Shoup's" case it's
97 * not as trivial, but there is no reason to believe that it's resistant
98 * to cache-timing attack. And the thing about "8-bit" implementation is
99 * that it consumes 16 (sixteen) times more memory, 4KB per individual
100 * key + 1KB shared. Well, on pros side it should be twice as fast as
101 * "4-bit" version. And for gcc-generated x86[_64] code, "8-bit" version
102 * was observed to run ~75% faster, closer to 100% for commercial
103 * compilers... Yet "4-bit" procedure is preferred, because it's
104 * believed to provide better security-performance balance and adequate
105 * all-round performance. "All-round" refers to things like:
107 * - shorter setup time effectively improves overall timing for
108 * handling short messages;
109 * - larger table allocation can become unbearable because of VM
110 * subsystem penalties (for example on Windows large enough free
111 * results in VM working set trimming, meaning that consequent
112 * malloc would immediately incur working set expansion);
113 * - larger table has larger cache footprint, which can affect
114 * performance of other code paths (not necessarily even from same
115 * thread in Hyper-Threading world);
117 * Value of 1 is not appropriate for performance reasons.
121 static void gcm_init_8bit(u128 Htable[256], u64 H[2])
131 for (Htable[128]=V, i=64; i>0; i>>=1) {
136 for (i=2; i<256; i<<=1) {
137 u128 *Hi = Htable+i, H0 = *Hi;
138 for (j=1; j<i; ++j) {
139 Hi[j].hi = H0.hi^Htable[j].hi;
140 Hi[j].lo = H0.lo^Htable[j].lo;
145 static void gcm_gmult_8bit(u64 Xi[2], const u128 Htable[256])
148 const u8 *xi = (const u8 *)Xi+15;
150 const union { long one; char little; } is_endian = {1};
152 static const size_t rem_8bit[256] = {
153 PACK(0x0000), PACK(0x01C2), PACK(0x0384), PACK(0x0246),
154 PACK(0x0708), PACK(0x06CA), PACK(0x048C), PACK(0x054E),
155 PACK(0x0E10), PACK(0x0FD2), PACK(0x0D94), PACK(0x0C56),
156 PACK(0x0918), PACK(0x08DA), PACK(0x0A9C), PACK(0x0B5E),
157 PACK(0x1C20), PACK(0x1DE2), PACK(0x1FA4), PACK(0x1E66),
158 PACK(0x1B28), PACK(0x1AEA), PACK(0x18AC), PACK(0x196E),
159 PACK(0x1230), PACK(0x13F2), PACK(0x11B4), PACK(0x1076),
160 PACK(0x1538), PACK(0x14FA), PACK(0x16BC), PACK(0x177E),
161 PACK(0x3840), PACK(0x3982), PACK(0x3BC4), PACK(0x3A06),
162 PACK(0x3F48), PACK(0x3E8A), PACK(0x3CCC), PACK(0x3D0E),
163 PACK(0x3650), PACK(0x3792), PACK(0x35D4), PACK(0x3416),
164 PACK(0x3158), PACK(0x309A), PACK(0x32DC), PACK(0x331E),
165 PACK(0x2460), PACK(0x25A2), PACK(0x27E4), PACK(0x2626),
166 PACK(0x2368), PACK(0x22AA), PACK(0x20EC), PACK(0x212E),
167 PACK(0x2A70), PACK(0x2BB2), PACK(0x29F4), PACK(0x2836),
168 PACK(0x2D78), PACK(0x2CBA), PACK(0x2EFC), PACK(0x2F3E),
169 PACK(0x7080), PACK(0x7142), PACK(0x7304), PACK(0x72C6),
170 PACK(0x7788), PACK(0x764A), PACK(0x740C), PACK(0x75CE),
171 PACK(0x7E90), PACK(0x7F52), PACK(0x7D14), PACK(0x7CD6),
172 PACK(0x7998), PACK(0x785A), PACK(0x7A1C), PACK(0x7BDE),
173 PACK(0x6CA0), PACK(0x6D62), PACK(0x6F24), PACK(0x6EE6),
174 PACK(0x6BA8), PACK(0x6A6A), PACK(0x682C), PACK(0x69EE),
175 PACK(0x62B0), PACK(0x6372), PACK(0x6134), PACK(0x60F6),
176 PACK(0x65B8), PACK(0x647A), PACK(0x663C), PACK(0x67FE),
177 PACK(0x48C0), PACK(0x4902), PACK(0x4B44), PACK(0x4A86),
178 PACK(0x4FC8), PACK(0x4E0A), PACK(0x4C4C), PACK(0x4D8E),
179 PACK(0x46D0), PACK(0x4712), PACK(0x4554), PACK(0x4496),
180 PACK(0x41D8), PACK(0x401A), PACK(0x425C), PACK(0x439E),
181 PACK(0x54E0), PACK(0x5522), PACK(0x5764), PACK(0x56A6),
182 PACK(0x53E8), PACK(0x522A), PACK(0x506C), PACK(0x51AE),
183 PACK(0x5AF0), PACK(0x5B32), PACK(0x5974), PACK(0x58B6),
184 PACK(0x5DF8), PACK(0x5C3A), PACK(0x5E7C), PACK(0x5FBE),
185 PACK(0xE100), PACK(0xE0C2), PACK(0xE284), PACK(0xE346),
186 PACK(0xE608), PACK(0xE7CA), PACK(0xE58C), PACK(0xE44E),
187 PACK(0xEF10), PACK(0xEED2), PACK(0xEC94), PACK(0xED56),
188 PACK(0xE818), PACK(0xE9DA), PACK(0xEB9C), PACK(0xEA5E),
189 PACK(0xFD20), PACK(0xFCE2), PACK(0xFEA4), PACK(0xFF66),
190 PACK(0xFA28), PACK(0xFBEA), PACK(0xF9AC), PACK(0xF86E),
191 PACK(0xF330), PACK(0xF2F2), PACK(0xF0B4), PACK(0xF176),
192 PACK(0xF438), PACK(0xF5FA), PACK(0xF7BC), PACK(0xF67E),
193 PACK(0xD940), PACK(0xD882), PACK(0xDAC4), PACK(0xDB06),
194 PACK(0xDE48), PACK(0xDF8A), PACK(0xDDCC), PACK(0xDC0E),
195 PACK(0xD750), PACK(0xD692), PACK(0xD4D4), PACK(0xD516),
196 PACK(0xD058), PACK(0xD19A), PACK(0xD3DC), PACK(0xD21E),
197 PACK(0xC560), PACK(0xC4A2), PACK(0xC6E4), PACK(0xC726),
198 PACK(0xC268), PACK(0xC3AA), PACK(0xC1EC), PACK(0xC02E),
199 PACK(0xCB70), PACK(0xCAB2), PACK(0xC8F4), PACK(0xC936),
200 PACK(0xCC78), PACK(0xCDBA), PACK(0xCFFC), PACK(0xCE3E),
201 PACK(0x9180), PACK(0x9042), PACK(0x9204), PACK(0x93C6),
202 PACK(0x9688), PACK(0x974A), PACK(0x950C), PACK(0x94CE),
203 PACK(0x9F90), PACK(0x9E52), PACK(0x9C14), PACK(0x9DD6),
204 PACK(0x9898), PACK(0x995A), PACK(0x9B1C), PACK(0x9ADE),
205 PACK(0x8DA0), PACK(0x8C62), PACK(0x8E24), PACK(0x8FE6),
206 PACK(0x8AA8), PACK(0x8B6A), PACK(0x892C), PACK(0x88EE),
207 PACK(0x83B0), PACK(0x8272), PACK(0x8034), PACK(0x81F6),
208 PACK(0x84B8), PACK(0x857A), PACK(0x873C), PACK(0x86FE),
209 PACK(0xA9C0), PACK(0xA802), PACK(0xAA44), PACK(0xAB86),
210 PACK(0xAEC8), PACK(0xAF0A), PACK(0xAD4C), PACK(0xAC8E),
211 PACK(0xA7D0), PACK(0xA612), PACK(0xA454), PACK(0xA596),
212 PACK(0xA0D8), PACK(0xA11A), PACK(0xA35C), PACK(0xA29E),
213 PACK(0xB5E0), PACK(0xB422), PACK(0xB664), PACK(0xB7A6),
214 PACK(0xB2E8), PACK(0xB32A), PACK(0xB16C), PACK(0xB0AE),
215 PACK(0xBBF0), PACK(0xBA32), PACK(0xB874), PACK(0xB9B6),
216 PACK(0xBCF8), PACK(0xBD3A), PACK(0xBF7C), PACK(0xBEBE) };
219 Z.hi ^= Htable[n].hi;
220 Z.lo ^= Htable[n].lo;
222 if ((u8 *)Xi==xi) break;
226 rem = (size_t)Z.lo&0xff;
227 Z.lo = (Z.hi<<56)|(Z.lo>>8);
229 if (sizeof(size_t)==8)
230 Z.hi ^= rem_8bit[rem];
232 Z.hi ^= (u64)rem_8bit[rem]<<32;
235 if (is_endian.little) {
237 Xi[0] = BSWAP8(Z.hi);
238 Xi[1] = BSWAP8(Z.lo);
242 v = (u32)(Z.hi>>32); PUTU32(p,v);
243 v = (u32)(Z.hi); PUTU32(p+4,v);
244 v = (u32)(Z.lo>>32); PUTU32(p+8,v);
245 v = (u32)(Z.lo); PUTU32(p+12,v);
253 #define GCM_MUL(ctx,Xi) gcm_gmult_8bit(ctx->Xi.u,ctx->Htable)
257 static void gcm_init_4bit(u128 Htable[16], u64 H[2])
260 #if defined(OPENSSL_SMALL_FOOTPRINT)
269 #if defined(OPENSSL_SMALL_FOOTPRINT)
270 for (Htable[8]=V, i=4; i>0; i>>=1) {
275 for (i=2; i<16; i<<=1) {
278 for (V=*Hi, j=1; j<i; ++j) {
279 Hi[j].hi = V.hi^Htable[j].hi;
280 Hi[j].lo = V.lo^Htable[j].lo;
291 Htable[3].hi = V.hi^Htable[2].hi, Htable[3].lo = V.lo^Htable[2].lo;
293 Htable[5].hi = V.hi^Htable[1].hi, Htable[5].lo = V.lo^Htable[1].lo;
294 Htable[6].hi = V.hi^Htable[2].hi, Htable[6].lo = V.lo^Htable[2].lo;
295 Htable[7].hi = V.hi^Htable[3].hi, Htable[7].lo = V.lo^Htable[3].lo;
297 Htable[9].hi = V.hi^Htable[1].hi, Htable[9].lo = V.lo^Htable[1].lo;
298 Htable[10].hi = V.hi^Htable[2].hi, Htable[10].lo = V.lo^Htable[2].lo;
299 Htable[11].hi = V.hi^Htable[3].hi, Htable[11].lo = V.lo^Htable[3].lo;
300 Htable[12].hi = V.hi^Htable[4].hi, Htable[12].lo = V.lo^Htable[4].lo;
301 Htable[13].hi = V.hi^Htable[5].hi, Htable[13].lo = V.lo^Htable[5].lo;
302 Htable[14].hi = V.hi^Htable[6].hi, Htable[14].lo = V.lo^Htable[6].lo;
303 Htable[15].hi = V.hi^Htable[7].hi, Htable[15].lo = V.lo^Htable[7].lo;
305 #if defined(GHASH_ASM) && (defined(__arm__) || defined(__arm))
307 * ARM assembler expects specific dword order in Htable.
311 const union { long one; char little; } is_endian = {1};
313 if (is_endian.little)
322 Htable[j].hi = V.lo<<32|V.lo>>32;
323 Htable[j].lo = V.hi<<32|V.hi>>32;
331 static const size_t rem_4bit[16] = {
332 PACK(0x0000), PACK(0x1C20), PACK(0x3840), PACK(0x2460),
333 PACK(0x7080), PACK(0x6CA0), PACK(0x48C0), PACK(0x54E0),
334 PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560),
335 PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0) };
337 static void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16])
341 size_t rem, nlo, nhi;
342 const union { long one; char little; } is_endian = {1};
344 nlo = ((const u8 *)Xi)[15];
348 Z.hi = Htable[nlo].hi;
349 Z.lo = Htable[nlo].lo;
352 rem = (size_t)Z.lo&0xf;
353 Z.lo = (Z.hi<<60)|(Z.lo>>4);
355 if (sizeof(size_t)==8)
356 Z.hi ^= rem_4bit[rem];
358 Z.hi ^= (u64)rem_4bit[rem]<<32;
360 Z.hi ^= Htable[nhi].hi;
361 Z.lo ^= Htable[nhi].lo;
365 nlo = ((const u8 *)Xi)[cnt];
369 rem = (size_t)Z.lo&0xf;
370 Z.lo = (Z.hi<<60)|(Z.lo>>4);
372 if (sizeof(size_t)==8)
373 Z.hi ^= rem_4bit[rem];
375 Z.hi ^= (u64)rem_4bit[rem]<<32;
377 Z.hi ^= Htable[nlo].hi;
378 Z.lo ^= Htable[nlo].lo;
381 if (is_endian.little) {
383 Xi[0] = BSWAP8(Z.hi);
384 Xi[1] = BSWAP8(Z.lo);
388 v = (u32)(Z.hi>>32); PUTU32(p,v);
389 v = (u32)(Z.hi); PUTU32(p+4,v);
390 v = (u32)(Z.lo>>32); PUTU32(p+8,v);
391 v = (u32)(Z.lo); PUTU32(p+12,v);
400 #if !defined(OPENSSL_SMALL_FOOTPRINT)
402 * Streamed gcm_mult_4bit, see CRYPTO_gcm128_[en|de]crypt for
403 * details... Compiler-generated code doesn't seem to give any
404 * performance improvement, at least not on x86[_64]. It's here
405 * mostly as reference and a placeholder for possible future
406 * non-trivial optimization[s]...
408 static void gcm_ghash_4bit(u64 Xi[2],const u128 Htable[16],
409 const u8 *inp,size_t len)
413 size_t rem, nlo, nhi;
414 const union { long one; char little; } is_endian = {1};
419 nlo = ((const u8 *)Xi)[15];
424 Z.hi = Htable[nlo].hi;
425 Z.lo = Htable[nlo].lo;
428 rem = (size_t)Z.lo&0xf;
429 Z.lo = (Z.hi<<60)|(Z.lo>>4);
431 if (sizeof(size_t)==8)
432 Z.hi ^= rem_4bit[rem];
434 Z.hi ^= (u64)rem_4bit[rem]<<32;
436 Z.hi ^= Htable[nhi].hi;
437 Z.lo ^= Htable[nhi].lo;
441 nlo = ((const u8 *)Xi)[cnt];
446 rem = (size_t)Z.lo&0xf;
447 Z.lo = (Z.hi<<60)|(Z.lo>>4);
449 if (sizeof(size_t)==8)
450 Z.hi ^= rem_4bit[rem];
452 Z.hi ^= (u64)rem_4bit[rem]<<32;
454 Z.hi ^= Htable[nlo].hi;
455 Z.lo ^= Htable[nlo].lo;
459 * Extra 256+16 bytes per-key plus 512 bytes shared tables
460 * [should] give ~50% improvement... One could have PACK()-ed
461 * the rem_8bit even here, but the priority is to minimize
464 u128 Hshr4[16]; /* Htable shifted right by 4 bits */
465 u8 Hshl4[16]; /* Htable shifted left by 4 bits */
467 static const unsigned short rem_8bit[256] = {
468 0x0000, 0x01C2, 0x0384, 0x0246, 0x0708, 0x06CA, 0x048C, 0x054E,
469 0x0E10, 0x0FD2, 0x0D94, 0x0C56, 0x0918, 0x08DA, 0x0A9C, 0x0B5E,
470 0x1C20, 0x1DE2, 0x1FA4, 0x1E66, 0x1B28, 0x1AEA, 0x18AC, 0x196E,
471 0x1230, 0x13F2, 0x11B4, 0x1076, 0x1538, 0x14FA, 0x16BC, 0x177E,
472 0x3840, 0x3982, 0x3BC4, 0x3A06, 0x3F48, 0x3E8A, 0x3CCC, 0x3D0E,
473 0x3650, 0x3792, 0x35D4, 0x3416, 0x3158, 0x309A, 0x32DC, 0x331E,
474 0x2460, 0x25A2, 0x27E4, 0x2626, 0x2368, 0x22AA, 0x20EC, 0x212E,
475 0x2A70, 0x2BB2, 0x29F4, 0x2836, 0x2D78, 0x2CBA, 0x2EFC, 0x2F3E,
476 0x7080, 0x7142, 0x7304, 0x72C6, 0x7788, 0x764A, 0x740C, 0x75CE,
477 0x7E90, 0x7F52, 0x7D14, 0x7CD6, 0x7998, 0x785A, 0x7A1C, 0x7BDE,
478 0x6CA0, 0x6D62, 0x6F24, 0x6EE6, 0x6BA8, 0x6A6A, 0x682C, 0x69EE,
479 0x62B0, 0x6372, 0x6134, 0x60F6, 0x65B8, 0x647A, 0x663C, 0x67FE,
480 0x48C0, 0x4902, 0x4B44, 0x4A86, 0x4FC8, 0x4E0A, 0x4C4C, 0x4D8E,
481 0x46D0, 0x4712, 0x4554, 0x4496, 0x41D8, 0x401A, 0x425C, 0x439E,
482 0x54E0, 0x5522, 0x5764, 0x56A6, 0x53E8, 0x522A, 0x506C, 0x51AE,
483 0x5AF0, 0x5B32, 0x5974, 0x58B6, 0x5DF8, 0x5C3A, 0x5E7C, 0x5FBE,
484 0xE100, 0xE0C2, 0xE284, 0xE346, 0xE608, 0xE7CA, 0xE58C, 0xE44E,
485 0xEF10, 0xEED2, 0xEC94, 0xED56, 0xE818, 0xE9DA, 0xEB9C, 0xEA5E,
486 0xFD20, 0xFCE2, 0xFEA4, 0xFF66, 0xFA28, 0xFBEA, 0xF9AC, 0xF86E,
487 0xF330, 0xF2F2, 0xF0B4, 0xF176, 0xF438, 0xF5FA, 0xF7BC, 0xF67E,
488 0xD940, 0xD882, 0xDAC4, 0xDB06, 0xDE48, 0xDF8A, 0xDDCC, 0xDC0E,
489 0xD750, 0xD692, 0xD4D4, 0xD516, 0xD058, 0xD19A, 0xD3DC, 0xD21E,
490 0xC560, 0xC4A2, 0xC6E4, 0xC726, 0xC268, 0xC3AA, 0xC1EC, 0xC02E,
491 0xCB70, 0xCAB2, 0xC8F4, 0xC936, 0xCC78, 0xCDBA, 0xCFFC, 0xCE3E,
492 0x9180, 0x9042, 0x9204, 0x93C6, 0x9688, 0x974A, 0x950C, 0x94CE,
493 0x9F90, 0x9E52, 0x9C14, 0x9DD6, 0x9898, 0x995A, 0x9B1C, 0x9ADE,
494 0x8DA0, 0x8C62, 0x8E24, 0x8FE6, 0x8AA8, 0x8B6A, 0x892C, 0x88EE,
495 0x83B0, 0x8272, 0x8034, 0x81F6, 0x84B8, 0x857A, 0x873C, 0x86FE,
496 0xA9C0, 0xA802, 0xAA44, 0xAB86, 0xAEC8, 0xAF0A, 0xAD4C, 0xAC8E,
497 0xA7D0, 0xA612, 0xA454, 0xA596, 0xA0D8, 0xA11A, 0xA35C, 0xA29E,
498 0xB5E0, 0xB422, 0xB664, 0xB7A6, 0xB2E8, 0xB32A, 0xB16C, 0xB0AE,
499 0xBBF0, 0xBA32, 0xB874, 0xB9B6, 0xBCF8, 0xBD3A, 0xBF7C, 0xBEBE };
501 * This pre-processing phase slows down procedure by approximately
502 * same time as it makes each loop spin faster. In other words
503 * single block performance is approximately same as straightforward
504 * "4-bit" implementation, and then it goes only faster...
506 for (cnt=0; cnt<16; ++cnt) {
507 Z.hi = Htable[cnt].hi;
508 Z.lo = Htable[cnt].lo;
509 Hshr4[cnt].lo = (Z.hi<<60)|(Z.lo>>4);
510 Hshr4[cnt].hi = (Z.hi>>4);
511 Hshl4[cnt] = (u8)(Z.lo<<4);
515 for (Z.lo=0, Z.hi=0, cnt=15; cnt; --cnt) {
516 nlo = ((const u8 *)Xi)[cnt];
521 Z.hi ^= Htable[nlo].hi;
522 Z.lo ^= Htable[nlo].lo;
524 rem = (size_t)Z.lo&0xff;
526 Z.lo = (Z.hi<<56)|(Z.lo>>8);
529 Z.hi ^= Hshr4[nhi].hi;
530 Z.lo ^= Hshr4[nhi].lo;
531 Z.hi ^= (u64)rem_8bit[rem^Hshl4[nhi]]<<48;
534 nlo = ((const u8 *)Xi)[0];
539 Z.hi ^= Htable[nlo].hi;
540 Z.lo ^= Htable[nlo].lo;
542 rem = (size_t)Z.lo&0xf;
544 Z.lo = (Z.hi<<60)|(Z.lo>>4);
547 Z.hi ^= Htable[nhi].hi;
548 Z.lo ^= Htable[nhi].lo;
549 Z.hi ^= ((u64)rem_8bit[rem<<4])<<48;
552 if (is_endian.little) {
554 Xi[0] = BSWAP8(Z.hi);
555 Xi[1] = BSWAP8(Z.lo);
559 v = (u32)(Z.hi>>32); PUTU32(p,v);
560 v = (u32)(Z.hi); PUTU32(p+4,v);
561 v = (u32)(Z.lo>>32); PUTU32(p+8,v);
562 v = (u32)(Z.lo); PUTU32(p+12,v);
569 } while (inp+=16, len-=16);
573 void gcm_gmult_4bit(u64 Xi[2],const u128 Htable[16]);
574 void gcm_ghash_4bit(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
577 #define GCM_MUL(ctx,Xi) gcm_gmult_4bit(ctx->Xi.u,ctx->Htable)
578 #if defined(GHASH_ASM) || !defined(OPENSSL_SMALL_FOOTPRINT)
579 #define GHASH(ctx,in,len) gcm_ghash_4bit((ctx)->Xi.u,(ctx)->Htable,in,len)
580 /* GHASH_CHUNK is "stride parameter" missioned to mitigate cache
581 * trashing effect. In other words idea is to hash data while it's
582 * still in L1 cache after encryption pass... */
583 #define GHASH_CHUNK (3*1024)
586 #else /* TABLE_BITS */
588 static void gcm_gmult_1bit(u64 Xi[2],const u64 H[2])
593 const long *xi = (const long *)Xi;
594 const union { long one; char little; } is_endian = {1};
596 V.hi = H[0]; /* H is in host byte order, no byte swapping */
599 for (j=0; j<16/sizeof(long); ++j) {
600 if (is_endian.little) {
601 if (sizeof(long)==8) {
603 X = (long)(BSWAP8(xi[j]));
605 const u8 *p = (const u8 *)(xi+j);
606 X = (long)((u64)GETU32(p)<<32|GETU32(p+4));
610 const u8 *p = (const u8 *)(xi+j);
617 for (i=0; i<8*sizeof(long); ++i, X<<=1) {
618 u64 M = (u64)(X>>(8*sizeof(long)-1));
626 if (is_endian.little) {
628 Xi[0] = BSWAP8(Z.hi);
629 Xi[1] = BSWAP8(Z.lo);
633 v = (u32)(Z.hi>>32); PUTU32(p,v);
634 v = (u32)(Z.hi); PUTU32(p+4,v);
635 v = (u32)(Z.lo>>32); PUTU32(p+8,v);
636 v = (u32)(Z.lo); PUTU32(p+12,v);
644 #define GCM_MUL(ctx,Xi) gcm_gmult_1bit(ctx->Xi.u,ctx->H.u)
648 #if TABLE_BITS==4 && (defined(GHASH_ASM) || defined(OPENSSL_CPUID_OBJ))
649 # if !defined(I386_ONLY) && \
650 (defined(__i386) || defined(__i386__) || \
651 defined(__x86_64) || defined(__x86_64__) || \
652 defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64))
653 # define GHASH_ASM_X86_OR_64
654 # define GCM_FUNCREF_4BIT
655 extern unsigned int OPENSSL_ia32cap_P[2];
657 void gcm_init_clmul(u128 Htable[16],const u64 Xi[2]);
658 void gcm_gmult_clmul(u64 Xi[2],const u128 Htable[16]);
659 void gcm_ghash_clmul(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
661 #if defined(__i386) || defined(__i386__) || defined(_M_IX86)
662 # define gcm_init_avx gcm_init_clmul
663 # define gcm_gmult_avx gcm_gmult_clmul
664 # define gcm_ghash_avx gcm_ghash_clmul
666 void gcm_init_avx(u128 Htable[16],const u64 Xi[2]);
667 void gcm_gmult_avx(u64 Xi[2],const u128 Htable[16]);
668 void gcm_ghash_avx(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
671 # if defined(__i386) || defined(__i386__) || defined(_M_IX86)
672 # define GHASH_ASM_X86
673 void gcm_gmult_4bit_mmx(u64 Xi[2],const u128 Htable[16]);
674 void gcm_ghash_4bit_mmx(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
676 void gcm_gmult_4bit_x86(u64 Xi[2],const u128 Htable[16]);
677 void gcm_ghash_4bit_x86(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
679 # elif defined(__arm__) || defined(__arm) || defined(__aarch64__)
680 # include "arm_arch.h"
682 # define GHASH_ASM_ARM
683 # define GCM_FUNCREF_4BIT
684 # define PMULL_CAPABLE (OPENSSL_armcap_P & ARMV8_PMULL)
685 # if defined(__arm__) || defined(__arm)
686 # define NEON_CAPABLE (OPENSSL_armcap_P & ARMV7_NEON)
688 void gcm_init_neon(u128 Htable[16],const u64 Xi[2]);
689 void gcm_gmult_neon(u64 Xi[2],const u128 Htable[16]);
690 void gcm_ghash_neon(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
691 void gcm_init_v8(u128 Htable[16],const u64 Xi[2]);
692 void gcm_gmult_v8(u64 Xi[2],const u128 Htable[16]);
693 void gcm_ghash_v8(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
695 # elif defined(__sparc__) || defined(__sparc)
696 # include "sparc_arch.h"
697 # define GHASH_ASM_SPARC
698 # define GCM_FUNCREF_4BIT
699 extern unsigned int OPENSSL_sparcv9cap_P[];
700 void gcm_init_vis3(u128 Htable[16],const u64 Xi[2]);
701 void gcm_gmult_vis3(u64 Xi[2],const u128 Htable[16]);
702 void gcm_ghash_vis3(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
703 #elif defined(OPENSSL_CPUID_OBJ) && (defined(__powerpc__) || defined(__ppc__) || defined(_ARCH_PPC))
704 # define GHASH_ASM_PPC
705 # define GCM_FUNCREF_4BIT
706 extern unsigned int OPENSSL_ppccap_P[];
707 void gcm_init_p8(u128 Htable[16],const u64 Xi[2]);
708 void gcm_gmult_p8(u64 Xi[2],const u128 Htable[16]);
709 void gcm_ghash_p8(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
713 #ifdef GCM_FUNCREF_4BIT
715 # define GCM_MUL(ctx,Xi) (*gcm_gmult_p)(ctx->Xi.u,ctx->Htable)
718 # define GHASH(ctx,in,len) (*gcm_ghash_p)(ctx->Xi.u,ctx->Htable,in,len)
722 void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx,void *key,block128_f block)
724 const union { long one; char little; } is_endian = {1};
726 memset(ctx,0,sizeof(*ctx));
730 (*block)(ctx->H.c,ctx->H.c,key);
732 if (is_endian.little) {
733 /* H is stored in host byte order */
735 ctx->H.u[0] = BSWAP8(ctx->H.u[0]);
736 ctx->H.u[1] = BSWAP8(ctx->H.u[1]);
740 hi = (u64)GETU32(p) <<32|GETU32(p+4);
741 lo = (u64)GETU32(p+8)<<32|GETU32(p+12);
748 gcm_init_8bit(ctx->Htable,ctx->H.u);
750 # if defined(GHASH_ASM_X86_OR_64)
751 # if !defined(GHASH_ASM_X86) || defined(OPENSSL_IA32_SSE2)
752 if (OPENSSL_ia32cap_P[0]&(1<<24) && /* check FXSR bit */
753 OPENSSL_ia32cap_P[1]&(1<<1) ) { /* check PCLMULQDQ bit */
754 if (((OPENSSL_ia32cap_P[1]>>22)&0x41)==0x41) { /* AVX+MOVBE */
755 gcm_init_avx(ctx->Htable,ctx->H.u);
756 ctx->gmult = gcm_gmult_avx;
757 ctx->ghash = gcm_ghash_avx;
759 gcm_init_clmul(ctx->Htable,ctx->H.u);
760 ctx->gmult = gcm_gmult_clmul;
761 ctx->ghash = gcm_ghash_clmul;
766 gcm_init_4bit(ctx->Htable,ctx->H.u);
767 # if defined(GHASH_ASM_X86) /* x86 only */
768 # if defined(OPENSSL_IA32_SSE2)
769 if (OPENSSL_ia32cap_P[0]&(1<<25)) { /* check SSE bit */
771 if (OPENSSL_ia32cap_P[0]&(1<<23)) { /* check MMX bit */
773 ctx->gmult = gcm_gmult_4bit_mmx;
774 ctx->ghash = gcm_ghash_4bit_mmx;
776 ctx->gmult = gcm_gmult_4bit_x86;
777 ctx->ghash = gcm_ghash_4bit_x86;
780 ctx->gmult = gcm_gmult_4bit;
781 ctx->ghash = gcm_ghash_4bit;
783 # elif defined(GHASH_ASM_ARM)
784 # ifdef PMULL_CAPABLE
786 gcm_init_v8(ctx->Htable,ctx->H.u);
787 ctx->gmult = gcm_gmult_v8;
788 ctx->ghash = gcm_ghash_v8;
793 gcm_init_neon(ctx->Htable,ctx->H.u);
794 ctx->gmult = gcm_gmult_neon;
795 ctx->ghash = gcm_ghash_neon;
799 gcm_init_4bit(ctx->Htable,ctx->H.u);
800 ctx->gmult = gcm_gmult_4bit;
801 ctx->ghash = gcm_ghash_4bit;
803 # elif defined(GHASH_ASM_SPARC)
804 if (OPENSSL_sparcv9cap_P[0] & SPARCV9_VIS3) {
805 gcm_init_vis3(ctx->Htable,ctx->H.u);
806 ctx->gmult = gcm_gmult_vis3;
807 ctx->ghash = gcm_ghash_vis3;
809 gcm_init_4bit(ctx->Htable,ctx->H.u);
810 ctx->gmult = gcm_gmult_4bit;
811 ctx->ghash = gcm_ghash_4bit;
813 # elif defined(GHASH_ASM_PPC)
814 if (OPENSSL_ppccap_P[0] & (1<<2)) {
815 gcm_init_p8(ctx->Htable,ctx->H.u);
816 ctx->gmult = gcm_gmult_p8;
817 ctx->ghash = gcm_ghash_p8;
819 gcm_init_4bit(ctx->Htable,ctx->H.u);
820 ctx->gmult = gcm_gmult_4bit;
821 ctx->ghash = gcm_ghash_4bit;
824 gcm_init_4bit(ctx->Htable,ctx->H.u);
829 void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx,const unsigned char *iv,size_t len)
831 const union { long one; char little; } is_endian = {1};
833 #ifdef GCM_FUNCREF_4BIT
834 void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
841 ctx->len.u[0] = 0; /* AAD length */
842 ctx->len.u[1] = 0; /* message length */
847 memcpy(ctx->Yi.c,iv,12);
856 for (i=0; i<16; ++i) ctx->Yi.c[i] ^= iv[i];
862 for (i=0; i<len; ++i) ctx->Yi.c[i] ^= iv[i];
866 if (is_endian.little) {
868 ctx->Yi.u[1] ^= BSWAP8(len0);
870 ctx->Yi.c[8] ^= (u8)(len0>>56);
871 ctx->Yi.c[9] ^= (u8)(len0>>48);
872 ctx->Yi.c[10] ^= (u8)(len0>>40);
873 ctx->Yi.c[11] ^= (u8)(len0>>32);
874 ctx->Yi.c[12] ^= (u8)(len0>>24);
875 ctx->Yi.c[13] ^= (u8)(len0>>16);
876 ctx->Yi.c[14] ^= (u8)(len0>>8);
877 ctx->Yi.c[15] ^= (u8)(len0);
881 ctx->Yi.u[1] ^= len0;
885 if (is_endian.little)
887 ctr = BSWAP4(ctx->Yi.d[3]);
889 ctr = GETU32(ctx->Yi.c+12);
895 (*ctx->block)(ctx->Yi.c,ctx->EK0.c,ctx->key);
897 if (is_endian.little)
899 ctx->Yi.d[3] = BSWAP4(ctr);
901 PUTU32(ctx->Yi.c+12,ctr);
907 int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx,const unsigned char *aad,size_t len)
911 u64 alen = ctx->len.u[0];
912 #ifdef GCM_FUNCREF_4BIT
913 void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
915 void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
916 const u8 *inp,size_t len) = ctx->ghash;
920 if (ctx->len.u[1]) return -2;
923 if (alen>(U64(1)<<61) || (sizeof(len)==8 && alen<len))
925 ctx->len.u[0] = alen;
930 ctx->Xi.c[n] ^= *(aad++);
934 if (n==0) GCM_MUL(ctx,Xi);
942 if ((i = (len&(size_t)-16))) {
949 for (i=0; i<16; ++i) ctx->Xi.c[i] ^= aad[i];
956 n = (unsigned int)len;
957 for (i=0; i<len; ++i) ctx->Xi.c[i] ^= aad[i];
964 int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
965 const unsigned char *in, unsigned char *out,
968 const union { long one; char little; } is_endian = {1};
971 u64 mlen = ctx->len.u[1];
972 block128_f block = ctx->block;
973 void *key = ctx->key;
974 #ifdef GCM_FUNCREF_4BIT
975 void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
977 void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
978 const u8 *inp,size_t len) = ctx->ghash;
983 n = (unsigned int)mlen%16; /* alternative to ctx->mres */
986 if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
988 ctx->len.u[1] = mlen;
991 /* First call to encrypt finalizes GHASH(AAD) */
996 if (is_endian.little)
998 ctr = BSWAP4(ctx->Yi.d[3]);
1000 ctr = GETU32(ctx->Yi.c+12);
1006 #if !defined(OPENSSL_SMALL_FOOTPRINT)
1007 if (16%sizeof(size_t) == 0) do { /* always true actually */
1010 ctx->Xi.c[n] ^= *(out++) = *(in++)^ctx->EKi.c[n];
1014 if (n==0) GCM_MUL(ctx,Xi);
1020 #if defined(STRICT_ALIGNMENT)
1021 if (((size_t)in|(size_t)out)%sizeof(size_t) != 0)
1024 #if defined(GHASH) && defined(GHASH_CHUNK)
1025 while (len>=GHASH_CHUNK) {
1026 size_t j=GHASH_CHUNK;
1029 size_t *out_t=(size_t *)out;
1030 const size_t *in_t=(const size_t *)in;
1032 (*block)(ctx->Yi.c,ctx->EKi.c,key);
1034 if (is_endian.little)
1036 ctx->Yi.d[3] = BSWAP4(ctr);
1038 PUTU32(ctx->Yi.c+12,ctr);
1042 for (i=0; i<16/sizeof(size_t); ++i)
1043 out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1048 GHASH(ctx,out-GHASH_CHUNK,GHASH_CHUNK);
1051 if ((i = (len&(size_t)-16))) {
1055 size_t *out_t=(size_t *)out;
1056 const size_t *in_t=(const size_t *)in;
1058 (*block)(ctx->Yi.c,ctx->EKi.c,key);
1060 if (is_endian.little)
1062 ctx->Yi.d[3] = BSWAP4(ctr);
1064 PUTU32(ctx->Yi.c+12,ctr);
1068 for (i=0; i<16/sizeof(size_t); ++i)
1069 out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1078 size_t *out_t=(size_t *)out;
1079 const size_t *in_t=(const size_t *)in;
1081 (*block)(ctx->Yi.c,ctx->EKi.c,key);
1083 if (is_endian.little)
1085 ctx->Yi.d[3] = BSWAP4(ctr);
1087 PUTU32(ctx->Yi.c+12,ctr);
1091 for (i=0; i<16/sizeof(size_t); ++i)
1093 out_t[i] = in_t[i]^ctx->EKi.t[i];
1101 (*block)(ctx->Yi.c,ctx->EKi.c,key);
1103 if (is_endian.little)
1105 ctx->Yi.d[3] = BSWAP4(ctr);
1107 PUTU32(ctx->Yi.c+12,ctr);
1112 ctx->Xi.c[n] ^= out[n] = in[n]^ctx->EKi.c[n];
1121 for (i=0;i<len;++i) {
1123 (*block)(ctx->Yi.c,ctx->EKi.c,key);
1125 if (is_endian.little)
1127 ctx->Yi.d[3] = BSWAP4(ctr);
1129 PUTU32(ctx->Yi.c+12,ctr);
1134 ctx->Xi.c[n] ^= out[i] = in[i]^ctx->EKi.c[n];
1144 int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
1145 const unsigned char *in, unsigned char *out,
1148 const union { long one; char little; } is_endian = {1};
1149 unsigned int n, ctr;
1151 u64 mlen = ctx->len.u[1];
1152 block128_f block = ctx->block;
1153 void *key = ctx->key;
1154 #ifdef GCM_FUNCREF_4BIT
1155 void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
1157 void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
1158 const u8 *inp,size_t len) = ctx->ghash;
1163 if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
1165 ctx->len.u[1] = mlen;
1168 /* First call to decrypt finalizes GHASH(AAD) */
1173 if (is_endian.little)
1175 ctr = BSWAP4(ctx->Yi.d[3]);
1177 ctr = GETU32(ctx->Yi.c+12);
1183 #if !defined(OPENSSL_SMALL_FOOTPRINT)
1184 if (16%sizeof(size_t) == 0) do { /* always true actually */
1188 *(out++) = c^ctx->EKi.c[n];
1193 if (n==0) GCM_MUL (ctx,Xi);
1199 #if defined(STRICT_ALIGNMENT)
1200 if (((size_t)in|(size_t)out)%sizeof(size_t) != 0)
1203 #if defined(GHASH) && defined(GHASH_CHUNK)
1204 while (len>=GHASH_CHUNK) {
1205 size_t j=GHASH_CHUNK;
1207 GHASH(ctx,in,GHASH_CHUNK);
1209 size_t *out_t=(size_t *)out;
1210 const size_t *in_t=(const size_t *)in;
1212 (*block)(ctx->Yi.c,ctx->EKi.c,key);
1214 if (is_endian.little)
1216 ctx->Yi.d[3] = BSWAP4(ctr);
1218 PUTU32(ctx->Yi.c+12,ctr);
1222 for (i=0; i<16/sizeof(size_t); ++i)
1223 out_t[i] = in_t[i]^ctx->EKi.t[i];
1230 if ((i = (len&(size_t)-16))) {
1233 size_t *out_t=(size_t *)out;
1234 const size_t *in_t=(const size_t *)in;
1236 (*block)(ctx->Yi.c,ctx->EKi.c,key);
1238 if (is_endian.little)
1240 ctx->Yi.d[3] = BSWAP4(ctr);
1242 PUTU32(ctx->Yi.c+12,ctr);
1246 for (i=0; i<16/sizeof(size_t); ++i)
1247 out_t[i] = in_t[i]^ctx->EKi.t[i];
1255 size_t *out_t=(size_t *)out;
1256 const size_t *in_t=(const size_t *)in;
1258 (*block)(ctx->Yi.c,ctx->EKi.c,key);
1260 if (is_endian.little)
1262 ctx->Yi.d[3] = BSWAP4(ctr);
1264 PUTU32(ctx->Yi.c+12,ctr);
1268 for (i=0; i<16/sizeof(size_t); ++i) {
1270 out[i] = c^ctx->EKi.t[i];
1280 (*block)(ctx->Yi.c,ctx->EKi.c,key);
1282 if (is_endian.little)
1284 ctx->Yi.d[3] = BSWAP4(ctr);
1286 PUTU32(ctx->Yi.c+12,ctr);
1293 out[n] = c^ctx->EKi.c[n];
1302 for (i=0;i<len;++i) {
1305 (*block)(ctx->Yi.c,ctx->EKi.c,key);
1307 if (is_endian.little)
1309 ctx->Yi.d[3] = BSWAP4(ctr);
1311 PUTU32(ctx->Yi.c+12,ctr);
1317 out[i] = c^ctx->EKi.c[n];
1328 int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx,
1329 const unsigned char *in, unsigned char *out,
1330 size_t len, ctr128_f stream)
1332 const union { long one; char little; } is_endian = {1};
1333 unsigned int n, ctr;
1335 u64 mlen = ctx->len.u[1];
1336 void *key = ctx->key;
1337 #ifdef GCM_FUNCREF_4BIT
1338 void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
1340 void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
1341 const u8 *inp,size_t len) = ctx->ghash;
1346 if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
1348 ctx->len.u[1] = mlen;
1351 /* First call to encrypt finalizes GHASH(AAD) */
1356 if (is_endian.little)
1358 ctr = BSWAP4(ctx->Yi.d[3]);
1360 ctr = GETU32(ctx->Yi.c+12);
1368 ctx->Xi.c[n] ^= *(out++) = *(in++)^ctx->EKi.c[n];
1372 if (n==0) GCM_MUL(ctx,Xi);
1378 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1379 while (len>=GHASH_CHUNK) {
1380 (*stream)(in,out,GHASH_CHUNK/16,key,ctx->Yi.c);
1381 ctr += GHASH_CHUNK/16;
1382 if (is_endian.little)
1384 ctx->Yi.d[3] = BSWAP4(ctr);
1386 PUTU32(ctx->Yi.c+12,ctr);
1390 GHASH(ctx,out,GHASH_CHUNK);
1396 if ((i = (len&(size_t)-16))) {
1399 (*stream)(in,out,j,key,ctx->Yi.c);
1400 ctr += (unsigned int)j;
1401 if (is_endian.little)
1403 ctx->Yi.d[3] = BSWAP4(ctr);
1405 PUTU32(ctx->Yi.c+12,ctr);
1416 for (i=0;i<16;++i) ctx->Xi.c[i] ^= out[i];
1423 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,key);
1425 if (is_endian.little)
1427 ctx->Yi.d[3] = BSWAP4(ctr);
1429 PUTU32(ctx->Yi.c+12,ctr);
1434 ctx->Xi.c[n] ^= out[n] = in[n]^ctx->EKi.c[n];
1443 int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx,
1444 const unsigned char *in, unsigned char *out,
1445 size_t len,ctr128_f stream)
1447 const union { long one; char little; } is_endian = {1};
1448 unsigned int n, ctr;
1450 u64 mlen = ctx->len.u[1];
1451 void *key = ctx->key;
1452 #ifdef GCM_FUNCREF_4BIT
1453 void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
1455 void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
1456 const u8 *inp,size_t len) = ctx->ghash;
1461 if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
1463 ctx->len.u[1] = mlen;
1466 /* First call to decrypt finalizes GHASH(AAD) */
1471 if (is_endian.little)
1473 ctr = BSWAP4(ctx->Yi.d[3]);
1475 ctr = GETU32(ctx->Yi.c+12);
1484 *(out++) = c^ctx->EKi.c[n];
1489 if (n==0) GCM_MUL (ctx,Xi);
1495 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1496 while (len>=GHASH_CHUNK) {
1497 GHASH(ctx,in,GHASH_CHUNK);
1498 (*stream)(in,out,GHASH_CHUNK/16,key,ctx->Yi.c);
1499 ctr += GHASH_CHUNK/16;
1500 if (is_endian.little)
1502 ctx->Yi.d[3] = BSWAP4(ctr);
1504 PUTU32(ctx->Yi.c+12,ctr);
1513 if ((i = (len&(size_t)-16))) {
1521 for (k=0;k<16;++k) ctx->Xi.c[k] ^= in[k];
1528 (*stream)(in,out,j,key,ctx->Yi.c);
1529 ctr += (unsigned int)j;
1530 if (is_endian.little)
1532 ctx->Yi.d[3] = BSWAP4(ctr);
1534 PUTU32(ctx->Yi.c+12,ctr);
1543 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,key);
1545 if (is_endian.little)
1547 ctx->Yi.d[3] = BSWAP4(ctr);
1549 PUTU32(ctx->Yi.c+12,ctr);
1556 out[n] = c^ctx->EKi.c[n];
1565 int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx,const unsigned char *tag,
1568 const union { long one; char little; } is_endian = {1};
1569 u64 alen = ctx->len.u[0]<<3;
1570 u64 clen = ctx->len.u[1]<<3;
1571 #ifdef GCM_FUNCREF_4BIT
1572 void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
1575 if (ctx->mres || ctx->ares)
1578 if (is_endian.little) {
1580 alen = BSWAP8(alen);
1581 clen = BSWAP8(clen);
1585 ctx->len.u[0] = alen;
1586 ctx->len.u[1] = clen;
1588 alen = (u64)GETU32(p) <<32|GETU32(p+4);
1589 clen = (u64)GETU32(p+8)<<32|GETU32(p+12);
1593 ctx->Xi.u[0] ^= alen;
1594 ctx->Xi.u[1] ^= clen;
1597 ctx->Xi.u[0] ^= ctx->EK0.u[0];
1598 ctx->Xi.u[1] ^= ctx->EK0.u[1];
1600 if (tag && len<=sizeof(ctx->Xi))
1601 return memcmp(ctx->Xi.c,tag,len);
1606 void CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len)
1608 CRYPTO_gcm128_finish(ctx, NULL, 0);
1609 memcpy(tag, ctx->Xi.c, len<=sizeof(ctx->Xi.c)?len:sizeof(ctx->Xi.c));
1612 GCM128_CONTEXT *CRYPTO_gcm128_new(void *key, block128_f block)
1614 GCM128_CONTEXT *ret;
1616 if ((ret = (GCM128_CONTEXT *)OPENSSL_malloc(sizeof(GCM128_CONTEXT))))
1617 CRYPTO_gcm128_init(ret,key,block);
1622 void CRYPTO_gcm128_release(GCM128_CONTEXT *ctx)
1625 OPENSSL_cleanse(ctx,sizeof(*ctx));
1630 #if defined(SELFTEST)
1632 #include <openssl/aes.h>
1635 static const u8 K1[16],
1640 T1[]= {0x58,0xe2,0xfc,0xce,0xfa,0x7e,0x30,0x61,0x36,0x7f,0x1d,0x57,0xa4,0xe7,0x45,0x5a};
1646 static const u8 P2[16],
1647 C2[]= {0x03,0x88,0xda,0xce,0x60,0xb6,0xa3,0x92,0xf3,0x28,0xc2,0xb9,0x71,0xb2,0xfe,0x78},
1648 T2[]= {0xab,0x6e,0x47,0xd4,0x2c,0xec,0x13,0xbd,0xf5,0x3a,0x67,0xb2,0x12,0x57,0xbd,0xdf};
1652 static const u8 K3[]= {0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08},
1653 P3[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1654 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1655 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1656 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55},
1657 IV3[]= {0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,0xde,0xca,0xf8,0x88},
1658 C3[]= {0x42,0x83,0x1e,0xc2,0x21,0x77,0x74,0x24,0x4b,0x72,0x21,0xb7,0x84,0xd0,0xd4,0x9c,
1659 0xe3,0xaa,0x21,0x2f,0x2c,0x02,0xa4,0xe0,0x35,0xc1,0x7e,0x23,0x29,0xac,0xa1,0x2e,
1660 0x21,0xd5,0x14,0xb2,0x54,0x66,0x93,0x1c,0x7d,0x8f,0x6a,0x5a,0xac,0x84,0xaa,0x05,
1661 0x1b,0xa3,0x0b,0x39,0x6a,0x0a,0xac,0x97,0x3d,0x58,0xe0,0x91,0x47,0x3f,0x59,0x85},
1662 T3[]= {0x4d,0x5c,0x2a,0xf3,0x27,0xcd,0x64,0xa6,0x2c,0xf3,0x5a,0xbd,0x2b,0xa6,0xfa,0xb4};
1667 static const u8 P4[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1668 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1669 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1670 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39},
1671 A4[]= {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1672 0xab,0xad,0xda,0xd2},
1673 C4[]= {0x42,0x83,0x1e,0xc2,0x21,0x77,0x74,0x24,0x4b,0x72,0x21,0xb7,0x84,0xd0,0xd4,0x9c,
1674 0xe3,0xaa,0x21,0x2f,0x2c,0x02,0xa4,0xe0,0x35,0xc1,0x7e,0x23,0x29,0xac,0xa1,0x2e,
1675 0x21,0xd5,0x14,0xb2,0x54,0x66,0x93,0x1c,0x7d,0x8f,0x6a,0x5a,0xac,0x84,0xaa,0x05,
1676 0x1b,0xa3,0x0b,0x39,0x6a,0x0a,0xac,0x97,0x3d,0x58,0xe0,0x91},
1677 T4[]= {0x5b,0xc9,0x4f,0xbc,0x32,0x21,0xa5,0xdb,0x94,0xfa,0xe9,0x5a,0xe7,0x12,0x1a,0x47};
1683 static const u8 IV5[]= {0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad},
1684 C5[]= {0x61,0x35,0x3b,0x4c,0x28,0x06,0x93,0x4a,0x77,0x7f,0xf5,0x1f,0xa2,0x2a,0x47,0x55,
1685 0x69,0x9b,0x2a,0x71,0x4f,0xcd,0xc6,0xf8,0x37,0x66,0xe5,0xf9,0x7b,0x6c,0x74,0x23,
1686 0x73,0x80,0x69,0x00,0xe4,0x9f,0x24,0xb2,0x2b,0x09,0x75,0x44,0xd4,0x89,0x6b,0x42,
1687 0x49,0x89,0xb5,0xe1,0xeb,0xac,0x0f,0x07,0xc2,0x3f,0x45,0x98},
1688 T5[]= {0x36,0x12,0xd2,0xe7,0x9e,0x3b,0x07,0x85,0x56,0x1b,0xe1,0x4a,0xac,0xa2,0xfc,0xcb};
1694 static const u8 IV6[]= {0x93,0x13,0x22,0x5d,0xf8,0x84,0x06,0xe5,0x55,0x90,0x9c,0x5a,0xff,0x52,0x69,0xaa,
1695 0x6a,0x7a,0x95,0x38,0x53,0x4f,0x7d,0xa1,0xe4,0xc3,0x03,0xd2,0xa3,0x18,0xa7,0x28,
1696 0xc3,0xc0,0xc9,0x51,0x56,0x80,0x95,0x39,0xfc,0xf0,0xe2,0x42,0x9a,0x6b,0x52,0x54,
1697 0x16,0xae,0xdb,0xf5,0xa0,0xde,0x6a,0x57,0xa6,0x37,0xb3,0x9b},
1698 C6[]= {0x8c,0xe2,0x49,0x98,0x62,0x56,0x15,0xb6,0x03,0xa0,0x33,0xac,0xa1,0x3f,0xb8,0x94,
1699 0xbe,0x91,0x12,0xa5,0xc3,0xa2,0x11,0xa8,0xba,0x26,0x2a,0x3c,0xca,0x7e,0x2c,0xa7,
1700 0x01,0xe4,0xa9,0xa4,0xfb,0xa4,0x3c,0x90,0xcc,0xdc,0xb2,0x81,0xd4,0x8c,0x7c,0x6f,
1701 0xd6,0x28,0x75,0xd2,0xac,0xa4,0x17,0x03,0x4c,0x34,0xae,0xe5},
1702 T6[]= {0x61,0x9c,0xc5,0xae,0xff,0xfe,0x0b,0xfa,0x46,0x2a,0xf4,0x3c,0x16,0x99,0xd0,0x50};
1705 static const u8 K7[24],
1710 T7[]= {0xcd,0x33,0xb2,0x8a,0xc7,0x73,0xf7,0x4b,0xa0,0x0e,0xd1,0xf3,0x12,0x57,0x24,0x35};
1716 static const u8 P8[16],
1717 C8[]= {0x98,0xe7,0x24,0x7c,0x07,0xf0,0xfe,0x41,0x1c,0x26,0x7e,0x43,0x84,0xb0,0xf6,0x00},
1718 T8[]= {0x2f,0xf5,0x8d,0x80,0x03,0x39,0x27,0xab,0x8e,0xf4,0xd4,0x58,0x75,0x14,0xf0,0xfb};
1722 static const u8 K9[]= {0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08,
1723 0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c},
1724 P9[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1725 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1726 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1727 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55},
1728 IV9[]= {0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,0xde,0xca,0xf8,0x88},
1729 C9[]= {0x39,0x80,0xca,0x0b,0x3c,0x00,0xe8,0x41,0xeb,0x06,0xfa,0xc4,0x87,0x2a,0x27,0x57,
1730 0x85,0x9e,0x1c,0xea,0xa6,0xef,0xd9,0x84,0x62,0x85,0x93,0xb4,0x0c,0xa1,0xe1,0x9c,
1731 0x7d,0x77,0x3d,0x00,0xc1,0x44,0xc5,0x25,0xac,0x61,0x9d,0x18,0xc8,0x4a,0x3f,0x47,
1732 0x18,0xe2,0x44,0x8b,0x2f,0xe3,0x24,0xd9,0xcc,0xda,0x27,0x10,0xac,0xad,0xe2,0x56},
1733 T9[]= {0x99,0x24,0xa7,0xc8,0x58,0x73,0x36,0xbf,0xb1,0x18,0x02,0x4d,0xb8,0x67,0x4a,0x14};
1738 static const u8 P10[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1739 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1740 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1741 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39},
1742 A10[]= {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1743 0xab,0xad,0xda,0xd2},
1744 C10[]= {0x39,0x80,0xca,0x0b,0x3c,0x00,0xe8,0x41,0xeb,0x06,0xfa,0xc4,0x87,0x2a,0x27,0x57,
1745 0x85,0x9e,0x1c,0xea,0xa6,0xef,0xd9,0x84,0x62,0x85,0x93,0xb4,0x0c,0xa1,0xe1,0x9c,
1746 0x7d,0x77,0x3d,0x00,0xc1,0x44,0xc5,0x25,0xac,0x61,0x9d,0x18,0xc8,0x4a,0x3f,0x47,
1747 0x18,0xe2,0x44,0x8b,0x2f,0xe3,0x24,0xd9,0xcc,0xda,0x27,0x10},
1748 T10[]= {0x25,0x19,0x49,0x8e,0x80,0xf1,0x47,0x8f,0x37,0xba,0x55,0xbd,0x6d,0x27,0x61,0x8c};
1754 static const u8 IV11[]={0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad},
1755 C11[]= {0x0f,0x10,0xf5,0x99,0xae,0x14,0xa1,0x54,0xed,0x24,0xb3,0x6e,0x25,0x32,0x4d,0xb8,
1756 0xc5,0x66,0x63,0x2e,0xf2,0xbb,0xb3,0x4f,0x83,0x47,0x28,0x0f,0xc4,0x50,0x70,0x57,
1757 0xfd,0xdc,0x29,0xdf,0x9a,0x47,0x1f,0x75,0xc6,0x65,0x41,0xd4,0xd4,0xda,0xd1,0xc9,
1758 0xe9,0x3a,0x19,0xa5,0x8e,0x8b,0x47,0x3f,0xa0,0xf0,0x62,0xf7},
1759 T11[]= {0x65,0xdc,0xc5,0x7f,0xcf,0x62,0x3a,0x24,0x09,0x4f,0xcc,0xa4,0x0d,0x35,0x33,0xf8};
1765 static const u8 IV12[]={0x93,0x13,0x22,0x5d,0xf8,0x84,0x06,0xe5,0x55,0x90,0x9c,0x5a,0xff,0x52,0x69,0xaa,
1766 0x6a,0x7a,0x95,0x38,0x53,0x4f,0x7d,0xa1,0xe4,0xc3,0x03,0xd2,0xa3,0x18,0xa7,0x28,
1767 0xc3,0xc0,0xc9,0x51,0x56,0x80,0x95,0x39,0xfc,0xf0,0xe2,0x42,0x9a,0x6b,0x52,0x54,
1768 0x16,0xae,0xdb,0xf5,0xa0,0xde,0x6a,0x57,0xa6,0x37,0xb3,0x9b},
1769 C12[]= {0xd2,0x7e,0x88,0x68,0x1c,0xe3,0x24,0x3c,0x48,0x30,0x16,0x5a,0x8f,0xdc,0xf9,0xff,
1770 0x1d,0xe9,0xa1,0xd8,0xe6,0xb4,0x47,0xef,0x6e,0xf7,0xb7,0x98,0x28,0x66,0x6e,0x45,
1771 0x81,0xe7,0x90,0x12,0xaf,0x34,0xdd,0xd9,0xe2,0xf0,0x37,0x58,0x9b,0x29,0x2d,0xb3,
1772 0xe6,0x7c,0x03,0x67,0x45,0xfa,0x22,0xe7,0xe9,0xb7,0x37,0x3b},
1773 T12[]= {0xdc,0xf5,0x66,0xff,0x29,0x1c,0x25,0xbb,0xb8,0x56,0x8f,0xc3,0xd3,0x76,0xa6,0xd9};
1776 static const u8 K13[32],
1781 T13[]={0x53,0x0f,0x8a,0xfb,0xc7,0x45,0x36,0xb9,0xa9,0x63,0xb4,0xf1,0xc4,0xcb,0x73,0x8b};
1786 static const u8 P14[16],
1788 C14[]= {0xce,0xa7,0x40,0x3d,0x4d,0x60,0x6b,0x6e,0x07,0x4e,0xc5,0xd3,0xba,0xf3,0x9d,0x18},
1789 T14[]= {0xd0,0xd1,0xc8,0xa7,0x99,0x99,0x6b,0xf0,0x26,0x5b,0x98,0xb5,0xd4,0x8a,0xb9,0x19};
1793 static const u8 K15[]= {0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08,
1794 0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08},
1795 P15[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1796 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1797 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1798 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55},
1799 IV15[]={0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,0xde,0xca,0xf8,0x88},
1800 C15[]= {0x52,0x2d,0xc1,0xf0,0x99,0x56,0x7d,0x07,0xf4,0x7f,0x37,0xa3,0x2a,0x84,0x42,0x7d,
1801 0x64,0x3a,0x8c,0xdc,0xbf,0xe5,0xc0,0xc9,0x75,0x98,0xa2,0xbd,0x25,0x55,0xd1,0xaa,
1802 0x8c,0xb0,0x8e,0x48,0x59,0x0d,0xbb,0x3d,0xa7,0xb0,0x8b,0x10,0x56,0x82,0x88,0x38,
1803 0xc5,0xf6,0x1e,0x63,0x93,0xba,0x7a,0x0a,0xbc,0xc9,0xf6,0x62,0x89,0x80,0x15,0xad},
1804 T15[]= {0xb0,0x94,0xda,0xc5,0xd9,0x34,0x71,0xbd,0xec,0x1a,0x50,0x22,0x70,0xe3,0xcc,0x6c};
1809 static const u8 P16[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1810 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1811 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1812 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39},
1813 A16[]= {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1814 0xab,0xad,0xda,0xd2},
1815 C16[]= {0x52,0x2d,0xc1,0xf0,0x99,0x56,0x7d,0x07,0xf4,0x7f,0x37,0xa3,0x2a,0x84,0x42,0x7d,
1816 0x64,0x3a,0x8c,0xdc,0xbf,0xe5,0xc0,0xc9,0x75,0x98,0xa2,0xbd,0x25,0x55,0xd1,0xaa,
1817 0x8c,0xb0,0x8e,0x48,0x59,0x0d,0xbb,0x3d,0xa7,0xb0,0x8b,0x10,0x56,0x82,0x88,0x38,
1818 0xc5,0xf6,0x1e,0x63,0x93,0xba,0x7a,0x0a,0xbc,0xc9,0xf6,0x62},
1819 T16[]= {0x76,0xfc,0x6e,0xce,0x0f,0x4e,0x17,0x68,0xcd,0xdf,0x88,0x53,0xbb,0x2d,0x55,0x1b};
1825 static const u8 IV17[]={0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad},
1826 C17[]= {0xc3,0x76,0x2d,0xf1,0xca,0x78,0x7d,0x32,0xae,0x47,0xc1,0x3b,0xf1,0x98,0x44,0xcb,
1827 0xaf,0x1a,0xe1,0x4d,0x0b,0x97,0x6a,0xfa,0xc5,0x2f,0xf7,0xd7,0x9b,0xba,0x9d,0xe0,
1828 0xfe,0xb5,0x82,0xd3,0x39,0x34,0xa4,0xf0,0x95,0x4c,0xc2,0x36,0x3b,0xc7,0x3f,0x78,
1829 0x62,0xac,0x43,0x0e,0x64,0xab,0xe4,0x99,0xf4,0x7c,0x9b,0x1f},
1830 T17[]= {0x3a,0x33,0x7d,0xbf,0x46,0xa7,0x92,0xc4,0x5e,0x45,0x49,0x13,0xfe,0x2e,0xa8,0xf2};
1836 static const u8 IV18[]={0x93,0x13,0x22,0x5d,0xf8,0x84,0x06,0xe5,0x55,0x90,0x9c,0x5a,0xff,0x52,0x69,0xaa,
1837 0x6a,0x7a,0x95,0x38,0x53,0x4f,0x7d,0xa1,0xe4,0xc3,0x03,0xd2,0xa3,0x18,0xa7,0x28,
1838 0xc3,0xc0,0xc9,0x51,0x56,0x80,0x95,0x39,0xfc,0xf0,0xe2,0x42,0x9a,0x6b,0x52,0x54,
1839 0x16,0xae,0xdb,0xf5,0xa0,0xde,0x6a,0x57,0xa6,0x37,0xb3,0x9b},
1840 C18[]= {0x5a,0x8d,0xef,0x2f,0x0c,0x9e,0x53,0xf1,0xf7,0x5d,0x78,0x53,0x65,0x9e,0x2a,0x20,
1841 0xee,0xb2,0xb2,0x2a,0xaf,0xde,0x64,0x19,0xa0,0x58,0xab,0x4f,0x6f,0x74,0x6b,0xf4,
1842 0x0f,0xc0,0xc3,0xb7,0x80,0xf2,0x44,0x45,0x2d,0xa3,0xeb,0xf1,0xc5,0xd8,0x2c,0xde,
1843 0xa2,0x41,0x89,0x97,0x20,0x0e,0xf8,0x2e,0x44,0xae,0x7e,0x3f},
1844 T18[]= {0xa4,0x4a,0x82,0x66,0xee,0x1c,0x8e,0xb0,0xc8,0xb5,0xd4,0xcf,0x5a,0xe9,0xf1,0x9a};
1851 static const u8 A19[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1852 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1853 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1854 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55,
1855 0x52,0x2d,0xc1,0xf0,0x99,0x56,0x7d,0x07,0xf4,0x7f,0x37,0xa3,0x2a,0x84,0x42,0x7d,
1856 0x64,0x3a,0x8c,0xdc,0xbf,0xe5,0xc0,0xc9,0x75,0x98,0xa2,0xbd,0x25,0x55,0xd1,0xaa,
1857 0x8c,0xb0,0x8e,0x48,0x59,0x0d,0xbb,0x3d,0xa7,0xb0,0x8b,0x10,0x56,0x82,0x88,0x38,
1858 0xc5,0xf6,0x1e,0x63,0x93,0xba,0x7a,0x0a,0xbc,0xc9,0xf6,0x62,0x89,0x80,0x15,0xad},
1859 T19[]= {0x5f,0xea,0x79,0x3a,0x2d,0x6f,0x97,0x4d,0x37,0xe6,0x8e,0x0c,0xb8,0xff,0x94,0x92};
1864 static const u8 IV20[64]={0xff,0xff,0xff,0xff}, /* this results in 0xff in counter LSB */
1866 C20[]= {0x56,0xb3,0x37,0x3c,0xa9,0xef,0x6e,0x4a,0x2b,0x64,0xfe,0x1e,0x9a,0x17,0xb6,0x14,
1867 0x25,0xf1,0x0d,0x47,0xa7,0x5a,0x5f,0xce,0x13,0xef,0xc6,0xbc,0x78,0x4a,0xf2,0x4f,
1868 0x41,0x41,0xbd,0xd4,0x8c,0xf7,0xc7,0x70,0x88,0x7a,0xfd,0x57,0x3c,0xca,0x54,0x18,
1869 0xa9,0xae,0xff,0xcd,0x7c,0x5c,0xed,0xdf,0xc6,0xa7,0x83,0x97,0xb9,0xa8,0x5b,0x49,
1870 0x9d,0xa5,0x58,0x25,0x72,0x67,0xca,0xab,0x2a,0xd0,0xb2,0x3c,0xa4,0x76,0xa5,0x3c,
1871 0xb1,0x7f,0xb4,0x1c,0x4b,0x8b,0x47,0x5c,0xb4,0xf3,0xf7,0x16,0x50,0x94,0xc2,0x29,
1872 0xc9,0xe8,0xc4,0xdc,0x0a,0x2a,0x5f,0xf1,0x90,0x3e,0x50,0x15,0x11,0x22,0x13,0x76,
1873 0xa1,0xcd,0xb8,0x36,0x4c,0x50,0x61,0xa2,0x0c,0xae,0x74,0xbc,0x4a,0xcd,0x76,0xce,
1874 0xb0,0xab,0xc9,0xfd,0x32,0x17,0xef,0x9f,0x8c,0x90,0xbe,0x40,0x2d,0xdf,0x6d,0x86,
1875 0x97,0xf4,0xf8,0x80,0xdf,0xf1,0x5b,0xfb,0x7a,0x6b,0x28,0x24,0x1e,0xc8,0xfe,0x18,
1876 0x3c,0x2d,0x59,0xe3,0xf9,0xdf,0xff,0x65,0x3c,0x71,0x26,0xf0,0xac,0xb9,0xe6,0x42,
1877 0x11,0xf4,0x2b,0xae,0x12,0xaf,0x46,0x2b,0x10,0x70,0xbe,0xf1,0xab,0x5e,0x36,0x06,
1878 0x87,0x2c,0xa1,0x0d,0xee,0x15,0xb3,0x24,0x9b,0x1a,0x1b,0x95,0x8f,0x23,0x13,0x4c,
1879 0x4b,0xcc,0xb7,0xd0,0x32,0x00,0xbc,0xe4,0x20,0xa2,0xf8,0xeb,0x66,0xdc,0xf3,0x64,
1880 0x4d,0x14,0x23,0xc1,0xb5,0x69,0x90,0x03,0xc1,0x3e,0xce,0xf4,0xbf,0x38,0xa3,0xb6,
1881 0x0e,0xed,0xc3,0x40,0x33,0xba,0xc1,0x90,0x27,0x83,0xdc,0x6d,0x89,0xe2,0xe7,0x74,
1882 0x18,0x8a,0x43,0x9c,0x7e,0xbc,0xc0,0x67,0x2d,0xbd,0xa4,0xdd,0xcf,0xb2,0x79,0x46,
1883 0x13,0xb0,0xbe,0x41,0x31,0x5e,0xf7,0x78,0x70,0x8a,0x70,0xee,0x7d,0x75,0x16,0x5c},
1884 T20[]= {0x8b,0x30,0x7f,0x6b,0x33,0x28,0x6d,0x0a,0xb0,0x26,0xa9,0xed,0x3f,0xe1,0xe8,0x5f};
1886 #define TEST_CASE(n) do { \
1887 u8 out[sizeof(P##n)]; \
1888 AES_set_encrypt_key(K##n,sizeof(K##n)*8,&key); \
1889 CRYPTO_gcm128_init(&ctx,&key,(block128_f)AES_encrypt); \
1890 CRYPTO_gcm128_setiv(&ctx,IV##n,sizeof(IV##n)); \
1891 memset(out,0,sizeof(out)); \
1892 if (A##n) CRYPTO_gcm128_aad(&ctx,A##n,sizeof(A##n)); \
1893 if (P##n) CRYPTO_gcm128_encrypt(&ctx,P##n,out,sizeof(out)); \
1894 if (CRYPTO_gcm128_finish(&ctx,T##n,16) || \
1895 (C##n && memcmp(out,C##n,sizeof(out)))) \
1896 ret++, printf ("encrypt test#%d failed.\n",n); \
1897 CRYPTO_gcm128_setiv(&ctx,IV##n,sizeof(IV##n)); \
1898 memset(out,0,sizeof(out)); \
1899 if (A##n) CRYPTO_gcm128_aad(&ctx,A##n,sizeof(A##n)); \
1900 if (C##n) CRYPTO_gcm128_decrypt(&ctx,C##n,out,sizeof(out)); \
1901 if (CRYPTO_gcm128_finish(&ctx,T##n,16) || \
1902 (P##n && memcmp(out,P##n,sizeof(out)))) \
1903 ret++, printf ("decrypt test#%d failed.\n",n); \
1933 #ifdef OPENSSL_CPUID_OBJ
1935 size_t start,stop,gcm_t,ctr_t,OPENSSL_rdtsc();
1936 union { u64 u; u8 c[1024]; } buf;
1939 AES_set_encrypt_key(K1,sizeof(K1)*8,&key);
1940 CRYPTO_gcm128_init(&ctx,&key,(block128_f)AES_encrypt);
1941 CRYPTO_gcm128_setiv(&ctx,IV1,sizeof(IV1));
1943 CRYPTO_gcm128_encrypt(&ctx,buf.c,buf.c,sizeof(buf));
1944 start = OPENSSL_rdtsc();
1945 CRYPTO_gcm128_encrypt(&ctx,buf.c,buf.c,sizeof(buf));
1946 gcm_t = OPENSSL_rdtsc() - start;
1948 CRYPTO_ctr128_encrypt(buf.c,buf.c,sizeof(buf),
1949 &key,ctx.Yi.c,ctx.EKi.c,&ctx.mres,
1950 (block128_f)AES_encrypt);
1951 start = OPENSSL_rdtsc();
1952 CRYPTO_ctr128_encrypt(buf.c,buf.c,sizeof(buf),
1953 &key,ctx.Yi.c,ctx.EKi.c,&ctx.mres,
1954 (block128_f)AES_encrypt);
1955 ctr_t = OPENSSL_rdtsc() - start;
1957 printf("%.2f-%.2f=%.2f\n",
1958 gcm_t/(double)sizeof(buf),
1959 ctr_t/(double)sizeof(buf),
1960 (gcm_t-ctr_t)/(double)sizeof(buf));
1963 void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
1964 const u8 *inp,size_t len) = ctx.ghash;
1966 GHASH((&ctx),buf.c,sizeof(buf));
1967 start = OPENSSL_rdtsc();
1968 for (i=0;i<100;++i) GHASH((&ctx),buf.c,sizeof(buf));
1969 gcm_t = OPENSSL_rdtsc() - start;
1970 printf("%.2f\n",gcm_t/(double)sizeof(buf)/(double)i);