1 /* ====================================================================
2 * Copyright (c) 2010 The OpenSSL Project. All rights reserved.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in
13 * the documentation and/or other materials provided with the
16 * 3. All advertising materials mentioning features or use of this
17 * software must display the following acknowledgment:
18 * "This product includes software developed by the OpenSSL Project
19 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
21 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
22 * endorse or promote products derived from this software without
23 * prior written permission. For written permission, please contact
24 * openssl-core@openssl.org.
26 * 5. Products derived from this software may not be called "OpenSSL"
27 * nor may "OpenSSL" appear in their names without prior written
28 * permission of the OpenSSL Project.
30 * 6. Redistributions of any form whatsoever must retain the following
32 * "This product includes software developed by the OpenSSL Project
33 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
35 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
36 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
37 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
38 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
39 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
40 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
41 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
42 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
43 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
44 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
45 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
46 * OF THE POSSIBILITY OF SUCH DAMAGE.
47 * ====================================================================
50 #define OPENSSL_FIPSAPI
52 #include <openssl/crypto.h>
53 #include "modes_lcl.h"
63 #if defined(BSWAP4) && defined(STRICT_ALIGNMENT)
64 /* redefine, because alignment is ensured */
66 #define GETU32(p) BSWAP4(*(const u32 *)(p))
68 #define PUTU32(p,v) *(u32 *)(p) = BSWAP4(v)
71 #define PACK(s) ((size_t)(s)<<(sizeof(size_t)*8-16))
72 #define REDUCE1BIT(V) do { \
73 if (sizeof(size_t)==8) { \
74 u64 T = U64(0xe100000000000000) & (0-(V.lo&1)); \
75 V.lo = (V.hi<<63)|(V.lo>>1); \
76 V.hi = (V.hi>>1 )^T; \
79 u32 T = 0xe1000000U & (0-(u32)(V.lo&1)); \
80 V.lo = (V.hi<<63)|(V.lo>>1); \
81 V.hi = (V.hi>>1 )^((u64)T<<32); \
86 * Even though permitted values for TABLE_BITS are 8, 4 and 1, it should
87 * never be set to 8. 8 is effectively reserved for testing purposes.
88 * TABLE_BITS>1 are lookup-table-driven implementations referred to as
89 * "Shoup's" in GCM specification. In other words OpenSSL does not cover
90 * whole spectrum of possible table driven implementations. Why? In
91 * non-"Shoup's" case memory access pattern is segmented in such manner,
92 * that it's trivial to see that cache timing information can reveal
93 * fair portion of intermediate hash value. Given that ciphertext is
94 * always available to attacker, it's possible for him to attempt to
95 * deduce secret parameter H and if successful, tamper with messages
96 * [which is nothing but trivial in CTR mode]. In "Shoup's" case it's
97 * not as trivial, but there is no reason to believe that it's resistant
98 * to cache-timing attack. And the thing about "8-bit" implementation is
99 * that it consumes 16 (sixteen) times more memory, 4KB per individual
100 * key + 1KB shared. Well, on pros side it should be twice as fast as
101 * "4-bit" version. And for gcc-generated x86[_64] code, "8-bit" version
102 * was observed to run ~75% faster, closer to 100% for commercial
103 * compilers... Yet "4-bit" procedure is preferred, because it's
104 * believed to provide better security-performance balance and adequate
105 * all-round performance. "All-round" refers to things like:
107 * - shorter setup time effectively improves overall timing for
108 * handling short messages;
109 * - larger table allocation can become unbearable because of VM
110 * subsystem penalties (for example on Windows large enough free
111 * results in VM working set trimming, meaning that consequent
112 * malloc would immediately incur working set expansion);
113 * - larger table has larger cache footprint, which can affect
114 * performance of other code paths (not necessarily even from same
115 * thread in Hyper-Threading world);
117 * Value of 1 is not appropriate for performance reasons.
121 static void gcm_init_8bit(u128 Htable[256], u64 H[2])
131 for (Htable[128]=V, i=64; i>0; i>>=1) {
136 for (i=2; i<256; i<<=1) {
137 u128 *Hi = Htable+i, H0 = *Hi;
138 for (j=1; j<i; ++j) {
139 Hi[j].hi = H0.hi^Htable[j].hi;
140 Hi[j].lo = H0.lo^Htable[j].lo;
145 static void gcm_gmult_8bit(u64 Xi[2], const u128 Htable[256])
148 const u8 *xi = (const u8 *)Xi+15;
150 const union { long one; char little; } is_endian = {1};
152 static const size_t rem_8bit[256] = {
153 PACK(0x0000), PACK(0x01C2), PACK(0x0384), PACK(0x0246),
154 PACK(0x0708), PACK(0x06CA), PACK(0x048C), PACK(0x054E),
155 PACK(0x0E10), PACK(0x0FD2), PACK(0x0D94), PACK(0x0C56),
156 PACK(0x0918), PACK(0x08DA), PACK(0x0A9C), PACK(0x0B5E),
157 PACK(0x1C20), PACK(0x1DE2), PACK(0x1FA4), PACK(0x1E66),
158 PACK(0x1B28), PACK(0x1AEA), PACK(0x18AC), PACK(0x196E),
159 PACK(0x1230), PACK(0x13F2), PACK(0x11B4), PACK(0x1076),
160 PACK(0x1538), PACK(0x14FA), PACK(0x16BC), PACK(0x177E),
161 PACK(0x3840), PACK(0x3982), PACK(0x3BC4), PACK(0x3A06),
162 PACK(0x3F48), PACK(0x3E8A), PACK(0x3CCC), PACK(0x3D0E),
163 PACK(0x3650), PACK(0x3792), PACK(0x35D4), PACK(0x3416),
164 PACK(0x3158), PACK(0x309A), PACK(0x32DC), PACK(0x331E),
165 PACK(0x2460), PACK(0x25A2), PACK(0x27E4), PACK(0x2626),
166 PACK(0x2368), PACK(0x22AA), PACK(0x20EC), PACK(0x212E),
167 PACK(0x2A70), PACK(0x2BB2), PACK(0x29F4), PACK(0x2836),
168 PACK(0x2D78), PACK(0x2CBA), PACK(0x2EFC), PACK(0x2F3E),
169 PACK(0x7080), PACK(0x7142), PACK(0x7304), PACK(0x72C6),
170 PACK(0x7788), PACK(0x764A), PACK(0x740C), PACK(0x75CE),
171 PACK(0x7E90), PACK(0x7F52), PACK(0x7D14), PACK(0x7CD6),
172 PACK(0x7998), PACK(0x785A), PACK(0x7A1C), PACK(0x7BDE),
173 PACK(0x6CA0), PACK(0x6D62), PACK(0x6F24), PACK(0x6EE6),
174 PACK(0x6BA8), PACK(0x6A6A), PACK(0x682C), PACK(0x69EE),
175 PACK(0x62B0), PACK(0x6372), PACK(0x6134), PACK(0x60F6),
176 PACK(0x65B8), PACK(0x647A), PACK(0x663C), PACK(0x67FE),
177 PACK(0x48C0), PACK(0x4902), PACK(0x4B44), PACK(0x4A86),
178 PACK(0x4FC8), PACK(0x4E0A), PACK(0x4C4C), PACK(0x4D8E),
179 PACK(0x46D0), PACK(0x4712), PACK(0x4554), PACK(0x4496),
180 PACK(0x41D8), PACK(0x401A), PACK(0x425C), PACK(0x439E),
181 PACK(0x54E0), PACK(0x5522), PACK(0x5764), PACK(0x56A6),
182 PACK(0x53E8), PACK(0x522A), PACK(0x506C), PACK(0x51AE),
183 PACK(0x5AF0), PACK(0x5B32), PACK(0x5974), PACK(0x58B6),
184 PACK(0x5DF8), PACK(0x5C3A), PACK(0x5E7C), PACK(0x5FBE),
185 PACK(0xE100), PACK(0xE0C2), PACK(0xE284), PACK(0xE346),
186 PACK(0xE608), PACK(0xE7CA), PACK(0xE58C), PACK(0xE44E),
187 PACK(0xEF10), PACK(0xEED2), PACK(0xEC94), PACK(0xED56),
188 PACK(0xE818), PACK(0xE9DA), PACK(0xEB9C), PACK(0xEA5E),
189 PACK(0xFD20), PACK(0xFCE2), PACK(0xFEA4), PACK(0xFF66),
190 PACK(0xFA28), PACK(0xFBEA), PACK(0xF9AC), PACK(0xF86E),
191 PACK(0xF330), PACK(0xF2F2), PACK(0xF0B4), PACK(0xF176),
192 PACK(0xF438), PACK(0xF5FA), PACK(0xF7BC), PACK(0xF67E),
193 PACK(0xD940), PACK(0xD882), PACK(0xDAC4), PACK(0xDB06),
194 PACK(0xDE48), PACK(0xDF8A), PACK(0xDDCC), PACK(0xDC0E),
195 PACK(0xD750), PACK(0xD692), PACK(0xD4D4), PACK(0xD516),
196 PACK(0xD058), PACK(0xD19A), PACK(0xD3DC), PACK(0xD21E),
197 PACK(0xC560), PACK(0xC4A2), PACK(0xC6E4), PACK(0xC726),
198 PACK(0xC268), PACK(0xC3AA), PACK(0xC1EC), PACK(0xC02E),
199 PACK(0xCB70), PACK(0xCAB2), PACK(0xC8F4), PACK(0xC936),
200 PACK(0xCC78), PACK(0xCDBA), PACK(0xCFFC), PACK(0xCE3E),
201 PACK(0x9180), PACK(0x9042), PACK(0x9204), PACK(0x93C6),
202 PACK(0x9688), PACK(0x974A), PACK(0x950C), PACK(0x94CE),
203 PACK(0x9F90), PACK(0x9E52), PACK(0x9C14), PACK(0x9DD6),
204 PACK(0x9898), PACK(0x995A), PACK(0x9B1C), PACK(0x9ADE),
205 PACK(0x8DA0), PACK(0x8C62), PACK(0x8E24), PACK(0x8FE6),
206 PACK(0x8AA8), PACK(0x8B6A), PACK(0x892C), PACK(0x88EE),
207 PACK(0x83B0), PACK(0x8272), PACK(0x8034), PACK(0x81F6),
208 PACK(0x84B8), PACK(0x857A), PACK(0x873C), PACK(0x86FE),
209 PACK(0xA9C0), PACK(0xA802), PACK(0xAA44), PACK(0xAB86),
210 PACK(0xAEC8), PACK(0xAF0A), PACK(0xAD4C), PACK(0xAC8E),
211 PACK(0xA7D0), PACK(0xA612), PACK(0xA454), PACK(0xA596),
212 PACK(0xA0D8), PACK(0xA11A), PACK(0xA35C), PACK(0xA29E),
213 PACK(0xB5E0), PACK(0xB422), PACK(0xB664), PACK(0xB7A6),
214 PACK(0xB2E8), PACK(0xB32A), PACK(0xB16C), PACK(0xB0AE),
215 PACK(0xBBF0), PACK(0xBA32), PACK(0xB874), PACK(0xB9B6),
216 PACK(0xBCF8), PACK(0xBD3A), PACK(0xBF7C), PACK(0xBEBE) };
219 Z.hi ^= Htable[n].hi;
220 Z.lo ^= Htable[n].lo;
222 if ((u8 *)Xi==xi) break;
226 rem = (size_t)Z.lo&0xff;
227 Z.lo = (Z.hi<<56)|(Z.lo>>8);
229 if (sizeof(size_t)==8)
230 Z.hi ^= rem_8bit[rem];
232 Z.hi ^= (u64)rem_8bit[rem]<<32;
235 if (is_endian.little) {
237 Xi[0] = BSWAP8(Z.hi);
238 Xi[1] = BSWAP8(Z.lo);
242 v = (u32)(Z.hi>>32); PUTU32(p,v);
243 v = (u32)(Z.hi); PUTU32(p+4,v);
244 v = (u32)(Z.lo>>32); PUTU32(p+8,v);
245 v = (u32)(Z.lo); PUTU32(p+12,v);
253 #define GCM_MUL(ctx,Xi) gcm_gmult_8bit(ctx->Xi.u,ctx->Htable)
257 static void gcm_init_4bit(u128 Htable[16], u64 H[2])
260 #if defined(OPENSSL_SMALL_FOOTPRINT)
269 #if defined(OPENSSL_SMALL_FOOTPRINT)
270 for (Htable[8]=V, i=4; i>0; i>>=1) {
275 for (i=2; i<16; i<<=1) {
278 for (V=*Hi, j=1; j<i; ++j) {
279 Hi[j].hi = V.hi^Htable[j].hi;
280 Hi[j].lo = V.lo^Htable[j].lo;
291 Htable[3].hi = V.hi^Htable[2].hi, Htable[3].lo = V.lo^Htable[2].lo;
293 Htable[5].hi = V.hi^Htable[1].hi, Htable[5].lo = V.lo^Htable[1].lo;
294 Htable[6].hi = V.hi^Htable[2].hi, Htable[6].lo = V.lo^Htable[2].lo;
295 Htable[7].hi = V.hi^Htable[3].hi, Htable[7].lo = V.lo^Htable[3].lo;
297 Htable[9].hi = V.hi^Htable[1].hi, Htable[9].lo = V.lo^Htable[1].lo;
298 Htable[10].hi = V.hi^Htable[2].hi, Htable[10].lo = V.lo^Htable[2].lo;
299 Htable[11].hi = V.hi^Htable[3].hi, Htable[11].lo = V.lo^Htable[3].lo;
300 Htable[12].hi = V.hi^Htable[4].hi, Htable[12].lo = V.lo^Htable[4].lo;
301 Htable[13].hi = V.hi^Htable[5].hi, Htable[13].lo = V.lo^Htable[5].lo;
302 Htable[14].hi = V.hi^Htable[6].hi, Htable[14].lo = V.lo^Htable[6].lo;
303 Htable[15].hi = V.hi^Htable[7].hi, Htable[15].lo = V.lo^Htable[7].lo;
305 #if defined(GHASH_ASM) && (defined(__arm__) || defined(__arm))
307 * ARM assembler expects specific dword order in Htable.
311 const union { long one; char little; } is_endian = {1};
313 if (is_endian.little)
322 Htable[j].hi = V.lo<<32|V.lo>>32;
323 Htable[j].lo = V.hi<<32|V.hi>>32;
331 static const size_t rem_4bit[16] = {
332 PACK(0x0000), PACK(0x1C20), PACK(0x3840), PACK(0x2460),
333 PACK(0x7080), PACK(0x6CA0), PACK(0x48C0), PACK(0x54E0),
334 PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560),
335 PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0) };
337 static void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16])
341 size_t rem, nlo, nhi;
342 const union { long one; char little; } is_endian = {1};
344 nlo = ((const u8 *)Xi)[15];
348 Z.hi = Htable[nlo].hi;
349 Z.lo = Htable[nlo].lo;
352 rem = (size_t)Z.lo&0xf;
353 Z.lo = (Z.hi<<60)|(Z.lo>>4);
355 if (sizeof(size_t)==8)
356 Z.hi ^= rem_4bit[rem];
358 Z.hi ^= (u64)rem_4bit[rem]<<32;
360 Z.hi ^= Htable[nhi].hi;
361 Z.lo ^= Htable[nhi].lo;
365 nlo = ((const u8 *)Xi)[cnt];
369 rem = (size_t)Z.lo&0xf;
370 Z.lo = (Z.hi<<60)|(Z.lo>>4);
372 if (sizeof(size_t)==8)
373 Z.hi ^= rem_4bit[rem];
375 Z.hi ^= (u64)rem_4bit[rem]<<32;
377 Z.hi ^= Htable[nlo].hi;
378 Z.lo ^= Htable[nlo].lo;
381 if (is_endian.little) {
383 Xi[0] = BSWAP8(Z.hi);
384 Xi[1] = BSWAP8(Z.lo);
388 v = (u32)(Z.hi>>32); PUTU32(p,v);
389 v = (u32)(Z.hi); PUTU32(p+4,v);
390 v = (u32)(Z.lo>>32); PUTU32(p+8,v);
391 v = (u32)(Z.lo); PUTU32(p+12,v);
400 #if !defined(OPENSSL_SMALL_FOOTPRINT)
402 * Streamed gcm_mult_4bit, see CRYPTO_gcm128_[en|de]crypt for
403 * details... Compiler-generated code doesn't seem to give any
404 * performance improvement, at least not on x86[_64]. It's here
405 * mostly as reference and a placeholder for possible future
406 * non-trivial optimization[s]...
408 static void gcm_ghash_4bit(u64 Xi[2],const u128 Htable[16],
409 const u8 *inp,size_t len)
413 size_t rem, nlo, nhi;
414 const union { long one; char little; } is_endian = {1};
419 nlo = ((const u8 *)Xi)[15];
424 Z.hi = Htable[nlo].hi;
425 Z.lo = Htable[nlo].lo;
428 rem = (size_t)Z.lo&0xf;
429 Z.lo = (Z.hi<<60)|(Z.lo>>4);
431 if (sizeof(size_t)==8)
432 Z.hi ^= rem_4bit[rem];
434 Z.hi ^= (u64)rem_4bit[rem]<<32;
436 Z.hi ^= Htable[nhi].hi;
437 Z.lo ^= Htable[nhi].lo;
441 nlo = ((const u8 *)Xi)[cnt];
446 rem = (size_t)Z.lo&0xf;
447 Z.lo = (Z.hi<<60)|(Z.lo>>4);
449 if (sizeof(size_t)==8)
450 Z.hi ^= rem_4bit[rem];
452 Z.hi ^= (u64)rem_4bit[rem]<<32;
454 Z.hi ^= Htable[nlo].hi;
455 Z.lo ^= Htable[nlo].lo;
459 * Extra 256+16 bytes per-key plus 512 bytes shared tables
460 * [should] give ~50% improvement... One could have PACK()-ed
461 * the rem_8bit even here, but the priority is to minimize
464 u128 Hshr4[16]; /* Htable shifted right by 4 bits */
465 u8 Hshl4[16]; /* Htable shifted left by 4 bits */
467 static const unsigned short rem_8bit[256] = {
468 0x0000, 0x01C2, 0x0384, 0x0246, 0x0708, 0x06CA, 0x048C, 0x054E,
469 0x0E10, 0x0FD2, 0x0D94, 0x0C56, 0x0918, 0x08DA, 0x0A9C, 0x0B5E,
470 0x1C20, 0x1DE2, 0x1FA4, 0x1E66, 0x1B28, 0x1AEA, 0x18AC, 0x196E,
471 0x1230, 0x13F2, 0x11B4, 0x1076, 0x1538, 0x14FA, 0x16BC, 0x177E,
472 0x3840, 0x3982, 0x3BC4, 0x3A06, 0x3F48, 0x3E8A, 0x3CCC, 0x3D0E,
473 0x3650, 0x3792, 0x35D4, 0x3416, 0x3158, 0x309A, 0x32DC, 0x331E,
474 0x2460, 0x25A2, 0x27E4, 0x2626, 0x2368, 0x22AA, 0x20EC, 0x212E,
475 0x2A70, 0x2BB2, 0x29F4, 0x2836, 0x2D78, 0x2CBA, 0x2EFC, 0x2F3E,
476 0x7080, 0x7142, 0x7304, 0x72C6, 0x7788, 0x764A, 0x740C, 0x75CE,
477 0x7E90, 0x7F52, 0x7D14, 0x7CD6, 0x7998, 0x785A, 0x7A1C, 0x7BDE,
478 0x6CA0, 0x6D62, 0x6F24, 0x6EE6, 0x6BA8, 0x6A6A, 0x682C, 0x69EE,
479 0x62B0, 0x6372, 0x6134, 0x60F6, 0x65B8, 0x647A, 0x663C, 0x67FE,
480 0x48C0, 0x4902, 0x4B44, 0x4A86, 0x4FC8, 0x4E0A, 0x4C4C, 0x4D8E,
481 0x46D0, 0x4712, 0x4554, 0x4496, 0x41D8, 0x401A, 0x425C, 0x439E,
482 0x54E0, 0x5522, 0x5764, 0x56A6, 0x53E8, 0x522A, 0x506C, 0x51AE,
483 0x5AF0, 0x5B32, 0x5974, 0x58B6, 0x5DF8, 0x5C3A, 0x5E7C, 0x5FBE,
484 0xE100, 0xE0C2, 0xE284, 0xE346, 0xE608, 0xE7CA, 0xE58C, 0xE44E,
485 0xEF10, 0xEED2, 0xEC94, 0xED56, 0xE818, 0xE9DA, 0xEB9C, 0xEA5E,
486 0xFD20, 0xFCE2, 0xFEA4, 0xFF66, 0xFA28, 0xFBEA, 0xF9AC, 0xF86E,
487 0xF330, 0xF2F2, 0xF0B4, 0xF176, 0xF438, 0xF5FA, 0xF7BC, 0xF67E,
488 0xD940, 0xD882, 0xDAC4, 0xDB06, 0xDE48, 0xDF8A, 0xDDCC, 0xDC0E,
489 0xD750, 0xD692, 0xD4D4, 0xD516, 0xD058, 0xD19A, 0xD3DC, 0xD21E,
490 0xC560, 0xC4A2, 0xC6E4, 0xC726, 0xC268, 0xC3AA, 0xC1EC, 0xC02E,
491 0xCB70, 0xCAB2, 0xC8F4, 0xC936, 0xCC78, 0xCDBA, 0xCFFC, 0xCE3E,
492 0x9180, 0x9042, 0x9204, 0x93C6, 0x9688, 0x974A, 0x950C, 0x94CE,
493 0x9F90, 0x9E52, 0x9C14, 0x9DD6, 0x9898, 0x995A, 0x9B1C, 0x9ADE,
494 0x8DA0, 0x8C62, 0x8E24, 0x8FE6, 0x8AA8, 0x8B6A, 0x892C, 0x88EE,
495 0x83B0, 0x8272, 0x8034, 0x81F6, 0x84B8, 0x857A, 0x873C, 0x86FE,
496 0xA9C0, 0xA802, 0xAA44, 0xAB86, 0xAEC8, 0xAF0A, 0xAD4C, 0xAC8E,
497 0xA7D0, 0xA612, 0xA454, 0xA596, 0xA0D8, 0xA11A, 0xA35C, 0xA29E,
498 0xB5E0, 0xB422, 0xB664, 0xB7A6, 0xB2E8, 0xB32A, 0xB16C, 0xB0AE,
499 0xBBF0, 0xBA32, 0xB874, 0xB9B6, 0xBCF8, 0xBD3A, 0xBF7C, 0xBEBE };
501 * This pre-processing phase slows down procedure by approximately
502 * same time as it makes each loop spin faster. In other words
503 * single block performance is approximately same as straightforward
504 * "4-bit" implementation, and then it goes only faster...
506 for (cnt=0; cnt<16; ++cnt) {
507 Z.hi = Htable[cnt].hi;
508 Z.lo = Htable[cnt].lo;
509 Hshr4[cnt].lo = (Z.hi<<60)|(Z.lo>>4);
510 Hshr4[cnt].hi = (Z.hi>>4);
511 Hshl4[cnt] = (u8)(Z.lo<<4);
515 for (Z.lo=0, Z.hi=0, cnt=15; cnt; --cnt) {
516 nlo = ((const u8 *)Xi)[cnt];
521 Z.hi ^= Htable[nlo].hi;
522 Z.lo ^= Htable[nlo].lo;
524 rem = (size_t)Z.lo&0xff;
526 Z.lo = (Z.hi<<56)|(Z.lo>>8);
529 Z.hi ^= Hshr4[nhi].hi;
530 Z.lo ^= Hshr4[nhi].lo;
531 Z.hi ^= (u64)rem_8bit[rem^Hshl4[nhi]]<<48;
534 nlo = ((const u8 *)Xi)[0];
539 Z.hi ^= Htable[nlo].hi;
540 Z.lo ^= Htable[nlo].lo;
542 rem = (size_t)Z.lo&0xf;
544 Z.lo = (Z.hi<<60)|(Z.lo>>4);
547 Z.hi ^= Htable[nhi].hi;
548 Z.lo ^= Htable[nhi].lo;
549 Z.hi ^= ((u64)rem_8bit[rem<<4])<<48;
552 if (is_endian.little) {
554 Xi[0] = BSWAP8(Z.hi);
555 Xi[1] = BSWAP8(Z.lo);
559 v = (u32)(Z.hi>>32); PUTU32(p,v);
560 v = (u32)(Z.hi); PUTU32(p+4,v);
561 v = (u32)(Z.lo>>32); PUTU32(p+8,v);
562 v = (u32)(Z.lo); PUTU32(p+12,v);
569 } while (inp+=16, len-=16);
573 void gcm_gmult_4bit(u64 Xi[2],const u128 Htable[16]);
574 void gcm_ghash_4bit(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
577 #define GCM_MUL(ctx,Xi) gcm_gmult_4bit(ctx->Xi.u,ctx->Htable)
578 #if defined(GHASH_ASM) || !defined(OPENSSL_SMALL_FOOTPRINT)
579 #define GHASH(ctx,in,len) gcm_ghash_4bit((ctx)->Xi.u,(ctx)->Htable,in,len)
580 /* GHASH_CHUNK is "stride parameter" missioned to mitigate cache
581 * trashing effect. In other words idea is to hash data while it's
582 * still in L1 cache after encryption pass... */
583 #define GHASH_CHUNK (3*1024)
586 #else /* TABLE_BITS */
588 static void gcm_gmult_1bit(u64 Xi[2],const u64 H[2])
593 const long *xi = (const long *)Xi;
594 const union { long one; char little; } is_endian = {1};
596 V.hi = H[0]; /* H is in host byte order, no byte swapping */
599 for (j=0; j<16/sizeof(long); ++j) {
600 if (is_endian.little) {
601 if (sizeof(long)==8) {
603 X = (long)(BSWAP8(xi[j]));
605 const u8 *p = (const u8 *)(xi+j);
606 X = (long)((u64)GETU32(p)<<32|GETU32(p+4));
610 const u8 *p = (const u8 *)(xi+j);
617 for (i=0; i<8*sizeof(long); ++i, X<<=1) {
618 u64 M = (u64)(X>>(8*sizeof(long)-1));
626 if (is_endian.little) {
628 Xi[0] = BSWAP8(Z.hi);
629 Xi[1] = BSWAP8(Z.lo);
633 v = (u32)(Z.hi>>32); PUTU32(p,v);
634 v = (u32)(Z.hi); PUTU32(p+4,v);
635 v = (u32)(Z.lo>>32); PUTU32(p+8,v);
636 v = (u32)(Z.lo); PUTU32(p+12,v);
644 #define GCM_MUL(ctx,Xi) gcm_gmult_1bit(ctx->Xi.u,ctx->H.u)
648 #if TABLE_BITS==4 && defined(GHASH_ASM)
649 # if !defined(I386_ONLY) && \
650 (defined(__i386) || defined(__i386__) || \
651 defined(__x86_64) || defined(__x86_64__) || \
652 defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64))
653 # define GHASH_ASM_X86_OR_64
654 # define GCM_FUNCREF_4BIT
655 extern unsigned int OPENSSL_ia32cap_P[2];
657 void gcm_init_clmul(u128 Htable[16],const u64 Xi[2]);
658 void gcm_gmult_clmul(u64 Xi[2],const u128 Htable[16]);
659 void gcm_ghash_clmul(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
661 # if defined(__i386) || defined(__i386__) || defined(_M_IX86)
662 # define GHASH_ASM_X86
663 void gcm_gmult_4bit_mmx(u64 Xi[2],const u128 Htable[16]);
664 void gcm_ghash_4bit_mmx(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
666 void gcm_gmult_4bit_x86(u64 Xi[2],const u128 Htable[16]);
667 void gcm_ghash_4bit_x86(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
669 # elif defined(__arm__) || defined(__arm)
670 # include "arm_arch.h"
672 # define GHASH_ASM_ARM
673 # define GCM_FUNCREF_4BIT
674 void gcm_gmult_neon(u64 Xi[2],const u128 Htable[16]);
675 void gcm_ghash_neon(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
677 # elif defined(__sparc__) || defined(__sparc)
678 # include "sparc_arch.h"
679 # define GHASH_ASM_SPARC
680 # define GCM_FUNCREF_4BIT
681 extern unsigned int OPENSSL_sparcv9cap_P[];
682 void gcm_init_vis3(u128 Htable[16],const u64 Xi[2]);
683 void gcm_gmult_vis3(u64 Xi[2],const u128 Htable[16]);
684 void gcm_ghash_vis3(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
688 #ifdef GCM_FUNCREF_4BIT
690 # define GCM_MUL(ctx,Xi) (*gcm_gmult_p)(ctx->Xi.u,ctx->Htable)
693 # define GHASH(ctx,in,len) (*gcm_ghash_p)(ctx->Xi.u,ctx->Htable,in,len)
697 void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx,void *key,block128_f block)
699 const union { long one; char little; } is_endian = {1};
701 memset(ctx,0,sizeof(*ctx));
705 (*block)(ctx->H.c,ctx->H.c,key);
707 if (is_endian.little) {
708 /* H is stored in host byte order */
710 ctx->H.u[0] = BSWAP8(ctx->H.u[0]);
711 ctx->H.u[1] = BSWAP8(ctx->H.u[1]);
715 hi = (u64)GETU32(p) <<32|GETU32(p+4);
716 lo = (u64)GETU32(p+8)<<32|GETU32(p+12);
723 gcm_init_8bit(ctx->Htable,ctx->H.u);
725 # if defined(GHASH_ASM_X86_OR_64)
726 # if !defined(GHASH_ASM_X86) || defined(OPENSSL_IA32_SSE2)
727 if (OPENSSL_ia32cap_P[0]&(1<<24) && /* check FXSR bit */
728 OPENSSL_ia32cap_P[1]&(1<<1) ) { /* check PCLMULQDQ bit */
729 gcm_init_clmul(ctx->Htable,ctx->H.u);
730 ctx->gmult = gcm_gmult_clmul;
731 ctx->ghash = gcm_ghash_clmul;
735 gcm_init_4bit(ctx->Htable,ctx->H.u);
736 # if defined(GHASH_ASM_X86) /* x86 only */
737 # if defined(OPENSSL_IA32_SSE2)
738 if (OPENSSL_ia32cap_P[0]&(1<<25)) { /* check SSE bit */
740 if (OPENSSL_ia32cap_P[0]&(1<<23)) { /* check MMX bit */
742 ctx->gmult = gcm_gmult_4bit_mmx;
743 ctx->ghash = gcm_ghash_4bit_mmx;
745 ctx->gmult = gcm_gmult_4bit_x86;
746 ctx->ghash = gcm_ghash_4bit_x86;
749 ctx->gmult = gcm_gmult_4bit;
750 ctx->ghash = gcm_ghash_4bit;
752 # elif defined(GHASH_ASM_ARM)
753 if (OPENSSL_armcap_P & ARMV7_NEON) {
754 ctx->gmult = gcm_gmult_neon;
755 ctx->ghash = gcm_ghash_neon;
757 gcm_init_4bit(ctx->Htable,ctx->H.u);
758 ctx->gmult = gcm_gmult_4bit;
759 ctx->ghash = gcm_ghash_4bit;
761 # elif defined(GHASH_ASM_SPARC)
762 if (OPENSSL_sparcv9cap_P[0] & SPARCV9_VIS3) {
763 gcm_init_vis3(ctx->Htable,ctx->H.u);
764 ctx->gmult = gcm_gmult_vis3;
765 ctx->ghash = gcm_ghash_vis3;
767 gcm_init_4bit(ctx->Htable,ctx->H.u);
768 ctx->gmult = gcm_gmult_4bit;
769 ctx->ghash = gcm_ghash_4bit;
772 gcm_init_4bit(ctx->Htable,ctx->H.u);
777 void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx,const unsigned char *iv,size_t len)
779 const union { long one; char little; } is_endian = {1};
781 #ifdef GCM_FUNCREF_4BIT
782 void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
789 ctx->len.u[0] = 0; /* AAD length */
790 ctx->len.u[1] = 0; /* message length */
795 memcpy(ctx->Yi.c,iv,12);
804 for (i=0; i<16; ++i) ctx->Yi.c[i] ^= iv[i];
810 for (i=0; i<len; ++i) ctx->Yi.c[i] ^= iv[i];
814 if (is_endian.little) {
816 ctx->Yi.u[1] ^= BSWAP8(len0);
818 ctx->Yi.c[8] ^= (u8)(len0>>56);
819 ctx->Yi.c[9] ^= (u8)(len0>>48);
820 ctx->Yi.c[10] ^= (u8)(len0>>40);
821 ctx->Yi.c[11] ^= (u8)(len0>>32);
822 ctx->Yi.c[12] ^= (u8)(len0>>24);
823 ctx->Yi.c[13] ^= (u8)(len0>>16);
824 ctx->Yi.c[14] ^= (u8)(len0>>8);
825 ctx->Yi.c[15] ^= (u8)(len0);
829 ctx->Yi.u[1] ^= len0;
833 if (is_endian.little)
834 ctr = GETU32(ctx->Yi.c+12);
839 (*ctx->block)(ctx->Yi.c,ctx->EK0.c,ctx->key);
841 if (is_endian.little)
842 PUTU32(ctx->Yi.c+12,ctr);
847 int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx,const unsigned char *aad,size_t len)
851 u64 alen = ctx->len.u[0];
852 #ifdef GCM_FUNCREF_4BIT
853 void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
855 void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
856 const u8 *inp,size_t len) = ctx->ghash;
860 if (ctx->len.u[1]) return -2;
863 if (alen>(U64(1)<<61) || (sizeof(len)==8 && alen<len))
865 ctx->len.u[0] = alen;
870 ctx->Xi.c[n] ^= *(aad++);
874 if (n==0) GCM_MUL(ctx,Xi);
882 if ((i = (len&(size_t)-16))) {
889 for (i=0; i<16; ++i) ctx->Xi.c[i] ^= aad[i];
896 n = (unsigned int)len;
897 for (i=0; i<len; ++i) ctx->Xi.c[i] ^= aad[i];
904 int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
905 const unsigned char *in, unsigned char *out,
908 const union { long one; char little; } is_endian = {1};
911 u64 mlen = ctx->len.u[1];
912 block128_f block = ctx->block;
913 void *key = ctx->key;
914 #ifdef GCM_FUNCREF_4BIT
915 void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
917 void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
918 const u8 *inp,size_t len) = ctx->ghash;
923 n = (unsigned int)mlen%16; /* alternative to ctx->mres */
926 if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
928 ctx->len.u[1] = mlen;
931 /* First call to encrypt finalizes GHASH(AAD) */
936 if (is_endian.little)
937 ctr = GETU32(ctx->Yi.c+12);
942 #if !defined(OPENSSL_SMALL_FOOTPRINT)
943 if (16%sizeof(size_t) == 0) do { /* always true actually */
946 ctx->Xi.c[n] ^= *(out++) = *(in++)^ctx->EKi.c[n];
950 if (n==0) GCM_MUL(ctx,Xi);
956 #if defined(STRICT_ALIGNMENT)
957 if (((size_t)in|(size_t)out)%sizeof(size_t) != 0)
960 #if defined(GHASH) && defined(GHASH_CHUNK)
961 while (len>=GHASH_CHUNK) {
962 size_t j=GHASH_CHUNK;
965 size_t *out_t=(size_t *)out, *ivec_t=(size_t *)ivec;
966 const size_t *in_t=(const size_t *)in;
967 (*block)(ctx->Yi.c,ctx->EKi.c,key);
969 if (is_endian.little)
970 PUTU32(ctx->Yi.c+12,ctr);
973 for (i=0; i<16/sizeof(size_t); ++i)
974 out_t[i] = in_t[i] ^ ctx->EKi.t[i];
979 GHASH(ctx,out-GHASH_CHUNK,GHASH_CHUNK);
982 if ((i = (len&(size_t)-16))) {
986 (*block)(ctx->Yi.c,ctx->EKi.c,key);
988 if (is_endian.little)
989 PUTU32(ctx->Yi.c+12,ctr);
992 for (i=0; i<16; i+=sizeof(size_t))
994 *(size_t *)(in+i)^*(size_t *)(ctx->EKi.c+i);
1003 (*block)(ctx->Yi.c,ctx->EKi.c,key);
1005 if (is_endian.little)
1006 PUTU32(ctx->Yi.c+12,ctr);
1009 for (i=0; i<16; i+=sizeof(size_t))
1010 *(size_t *)(ctx->Xi.c+i) ^=
1011 *(size_t *)(out+i) =
1012 *(size_t *)(in+i)^*(size_t *)(ctx->EKi.c+i);
1020 (*block)(ctx->Yi.c,ctx->EKi.c,key);
1022 if (is_endian.little)
1023 PUTU32(ctx->Yi.c+12,ctr);
1027 ctx->Xi.c[n] ^= out[n] = in[n]^ctx->EKi.c[n];
1036 for (i=0;i<len;++i) {
1038 (*block)(ctx->Yi.c,ctx->EKi.c,key);
1040 if (is_endian.little)
1041 PUTU32(ctx->Yi.c+12,ctr);
1045 ctx->Xi.c[n] ^= out[i] = in[i]^ctx->EKi.c[n];
1055 int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
1056 const unsigned char *in, unsigned char *out,
1059 const union { long one; char little; } is_endian = {1};
1060 unsigned int n, ctr;
1062 u64 mlen = ctx->len.u[1];
1063 block128_f block = ctx->block;
1064 void *key = ctx->key;
1065 #ifdef GCM_FUNCREF_4BIT
1066 void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
1068 void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
1069 const u8 *inp,size_t len) = ctx->ghash;
1074 if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
1076 ctx->len.u[1] = mlen;
1079 /* First call to decrypt finalizes GHASH(AAD) */
1084 if (is_endian.little)
1085 ctr = GETU32(ctx->Yi.c+12);
1090 #if !defined(OPENSSL_SMALL_FOOTPRINT)
1091 if (16%sizeof(size_t) == 0) do { /* always true actually */
1095 *(out++) = c^ctx->EKi.c[n];
1100 if (n==0) GCM_MUL (ctx,Xi);
1106 #if defined(STRICT_ALIGNMENT)
1107 if (((size_t)in|(size_t)out)%sizeof(size_t) != 0)
1110 #if defined(GHASH) && defined(GHASH_CHUNK)
1111 while (len>=GHASH_CHUNK) {
1112 size_t j=GHASH_CHUNK;
1114 GHASH(ctx,in,GHASH_CHUNK);
1116 (*block)(ctx->Yi.c,ctx->EKi.c,key);
1118 if (is_endian.little)
1119 PUTU32(ctx->Yi.c+12,ctr);
1122 for (i=0; i<16; i+=sizeof(size_t))
1123 *(size_t *)(out+i) =
1124 *(size_t *)(in+i)^*(size_t *)(ctx->EKi.c+i);
1131 if ((i = (len&(size_t)-16))) {
1134 (*block)(ctx->Yi.c,ctx->EKi.c,key);
1136 if (is_endian.little)
1137 PUTU32(ctx->Yi.c+12,ctr);
1140 for (i=0; i<16; i+=sizeof(size_t))
1141 *(size_t *)(out+i) =
1142 *(size_t *)(in+i)^*(size_t *)(ctx->EKi.c+i);
1150 (*block)(ctx->Yi.c,ctx->EKi.c,key);
1152 if (is_endian.little)
1153 PUTU32(ctx->Yi.c+12,ctr);
1156 for (i=0; i<16; i+=sizeof(size_t)) {
1157 size_t c = *(size_t *)(in+i);
1158 *(size_t *)(out+i) = c^*(size_t *)(ctx->EKi.c+i);
1159 *(size_t *)(ctx->Xi.c+i) ^= c;
1168 (*block)(ctx->Yi.c,ctx->EKi.c,key);
1170 if (is_endian.little)
1171 PUTU32(ctx->Yi.c+12,ctr);
1177 out[n] = c^ctx->EKi.c[n];
1186 for (i=0;i<len;++i) {
1189 (*block)(ctx->Yi.c,ctx->EKi.c,key);
1191 if (is_endian.little)
1192 PUTU32(ctx->Yi.c+12,ctr);
1197 out[i] = c^ctx->EKi.c[n];
1208 int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx,
1209 const unsigned char *in, unsigned char *out,
1210 size_t len, ctr128_f stream)
1212 const union { long one; char little; } is_endian = {1};
1213 unsigned int n, ctr;
1215 u64 mlen = ctx->len.u[1];
1216 void *key = ctx->key;
1217 #ifdef GCM_FUNCREF_4BIT
1218 void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
1220 void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
1221 const u8 *inp,size_t len) = ctx->ghash;
1226 if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
1228 ctx->len.u[1] = mlen;
1231 /* First call to encrypt finalizes GHASH(AAD) */
1236 if (is_endian.little)
1237 ctr = GETU32(ctx->Yi.c+12);
1244 ctx->Xi.c[n] ^= *(out++) = *(in++)^ctx->EKi.c[n];
1248 if (n==0) GCM_MUL(ctx,Xi);
1254 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1255 while (len>=GHASH_CHUNK) {
1256 (*stream)(in,out,GHASH_CHUNK/16,key,ctx->Yi.c);
1257 ctr += GHASH_CHUNK/16;
1258 if (is_endian.little)
1259 PUTU32(ctx->Yi.c+12,ctr);
1262 GHASH(ctx,out,GHASH_CHUNK);
1268 if ((i = (len&(size_t)-16))) {
1271 (*stream)(in,out,j,key,ctx->Yi.c);
1272 ctr += (unsigned int)j;
1273 if (is_endian.little)
1274 PUTU32(ctx->Yi.c+12,ctr);
1284 for (i=0;i<16;++i) ctx->Xi.c[i] ^= out[i];
1291 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,key);
1293 if (is_endian.little)
1294 PUTU32(ctx->Yi.c+12,ctr);
1298 ctx->Xi.c[n] ^= out[n] = in[n]^ctx->EKi.c[n];
1307 int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx,
1308 const unsigned char *in, unsigned char *out,
1309 size_t len,ctr128_f stream)
1311 const union { long one; char little; } is_endian = {1};
1312 unsigned int n, ctr;
1314 u64 mlen = ctx->len.u[1];
1315 void *key = ctx->key;
1316 #ifdef GCM_FUNCREF_4BIT
1317 void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
1319 void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
1320 const u8 *inp,size_t len) = ctx->ghash;
1325 if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
1327 ctx->len.u[1] = mlen;
1330 /* First call to decrypt finalizes GHASH(AAD) */
1335 if (is_endian.little)
1336 ctr = GETU32(ctx->Yi.c+12);
1344 *(out++) = c^ctx->EKi.c[n];
1349 if (n==0) GCM_MUL (ctx,Xi);
1355 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1356 while (len>=GHASH_CHUNK) {
1357 GHASH(ctx,in,GHASH_CHUNK);
1358 (*stream)(in,out,GHASH_CHUNK/16,key,ctx->Yi.c);
1359 ctr += GHASH_CHUNK/16;
1360 if (is_endian.little)
1361 PUTU32(ctx->Yi.c+12,ctr);
1369 if ((i = (len&(size_t)-16))) {
1377 for (k=0;k<16;++k) ctx->Xi.c[k] ^= in[k];
1384 (*stream)(in,out,j,key,ctx->Yi.c);
1385 ctr += (unsigned int)j;
1386 if (is_endian.little)
1387 PUTU32(ctx->Yi.c+12,ctr);
1395 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,key);
1397 if (is_endian.little)
1398 PUTU32(ctx->Yi.c+12,ctr);
1404 out[n] = c^ctx->EKi.c[n];
1413 int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx,const unsigned char *tag,
1416 const union { long one; char little; } is_endian = {1};
1417 u64 alen = ctx->len.u[0]<<3;
1418 u64 clen = ctx->len.u[1]<<3;
1419 #ifdef GCM_FUNCREF_4BIT
1420 void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
1423 if (ctx->mres || ctx->ares)
1426 if (is_endian.little) {
1428 alen = BSWAP8(alen);
1429 clen = BSWAP8(clen);
1433 ctx->len.u[0] = alen;
1434 ctx->len.u[1] = clen;
1436 alen = (u64)GETU32(p) <<32|GETU32(p+4);
1437 clen = (u64)GETU32(p+8)<<32|GETU32(p+12);
1441 ctx->Xi.u[0] ^= alen;
1442 ctx->Xi.u[1] ^= clen;
1445 ctx->Xi.u[0] ^= ctx->EK0.u[0];
1446 ctx->Xi.u[1] ^= ctx->EK0.u[1];
1448 if (tag && len<=sizeof(ctx->Xi))
1449 return memcmp(ctx->Xi.c,tag,len);
1454 void CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len)
1456 CRYPTO_gcm128_finish(ctx, NULL, 0);
1457 memcpy(tag, ctx->Xi.c, len<=sizeof(ctx->Xi.c)?len:sizeof(ctx->Xi.c));
1460 GCM128_CONTEXT *CRYPTO_gcm128_new(void *key, block128_f block)
1462 GCM128_CONTEXT *ret;
1464 if ((ret = (GCM128_CONTEXT *)OPENSSL_malloc(sizeof(GCM128_CONTEXT))))
1465 CRYPTO_gcm128_init(ret,key,block);
1470 void CRYPTO_gcm128_release(GCM128_CONTEXT *ctx)
1473 OPENSSL_cleanse(ctx,sizeof(*ctx));
1478 #if defined(SELFTEST)
1480 #include <openssl/aes.h>
1483 static const u8 K1[16],
1488 T1[]= {0x58,0xe2,0xfc,0xce,0xfa,0x7e,0x30,0x61,0x36,0x7f,0x1d,0x57,0xa4,0xe7,0x45,0x5a};
1494 static const u8 P2[16],
1495 C2[]= {0x03,0x88,0xda,0xce,0x60,0xb6,0xa3,0x92,0xf3,0x28,0xc2,0xb9,0x71,0xb2,0xfe,0x78},
1496 T2[]= {0xab,0x6e,0x47,0xd4,0x2c,0xec,0x13,0xbd,0xf5,0x3a,0x67,0xb2,0x12,0x57,0xbd,0xdf};
1500 static const u8 K3[]= {0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08},
1501 P3[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1502 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1503 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1504 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55},
1505 IV3[]= {0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,0xde,0xca,0xf8,0x88},
1506 C3[]= {0x42,0x83,0x1e,0xc2,0x21,0x77,0x74,0x24,0x4b,0x72,0x21,0xb7,0x84,0xd0,0xd4,0x9c,
1507 0xe3,0xaa,0x21,0x2f,0x2c,0x02,0xa4,0xe0,0x35,0xc1,0x7e,0x23,0x29,0xac,0xa1,0x2e,
1508 0x21,0xd5,0x14,0xb2,0x54,0x66,0x93,0x1c,0x7d,0x8f,0x6a,0x5a,0xac,0x84,0xaa,0x05,
1509 0x1b,0xa3,0x0b,0x39,0x6a,0x0a,0xac,0x97,0x3d,0x58,0xe0,0x91,0x47,0x3f,0x59,0x85},
1510 T3[]= {0x4d,0x5c,0x2a,0xf3,0x27,0xcd,0x64,0xa6,0x2c,0xf3,0x5a,0xbd,0x2b,0xa6,0xfa,0xb4};
1515 static const u8 P4[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1516 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1517 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1518 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39},
1519 A4[]= {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1520 0xab,0xad,0xda,0xd2},
1521 C4[]= {0x42,0x83,0x1e,0xc2,0x21,0x77,0x74,0x24,0x4b,0x72,0x21,0xb7,0x84,0xd0,0xd4,0x9c,
1522 0xe3,0xaa,0x21,0x2f,0x2c,0x02,0xa4,0xe0,0x35,0xc1,0x7e,0x23,0x29,0xac,0xa1,0x2e,
1523 0x21,0xd5,0x14,0xb2,0x54,0x66,0x93,0x1c,0x7d,0x8f,0x6a,0x5a,0xac,0x84,0xaa,0x05,
1524 0x1b,0xa3,0x0b,0x39,0x6a,0x0a,0xac,0x97,0x3d,0x58,0xe0,0x91},
1525 T4[]= {0x5b,0xc9,0x4f,0xbc,0x32,0x21,0xa5,0xdb,0x94,0xfa,0xe9,0x5a,0xe7,0x12,0x1a,0x47};
1531 static const u8 IV5[]= {0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad},
1532 C5[]= {0x61,0x35,0x3b,0x4c,0x28,0x06,0x93,0x4a,0x77,0x7f,0xf5,0x1f,0xa2,0x2a,0x47,0x55,
1533 0x69,0x9b,0x2a,0x71,0x4f,0xcd,0xc6,0xf8,0x37,0x66,0xe5,0xf9,0x7b,0x6c,0x74,0x23,
1534 0x73,0x80,0x69,0x00,0xe4,0x9f,0x24,0xb2,0x2b,0x09,0x75,0x44,0xd4,0x89,0x6b,0x42,
1535 0x49,0x89,0xb5,0xe1,0xeb,0xac,0x0f,0x07,0xc2,0x3f,0x45,0x98},
1536 T5[]= {0x36,0x12,0xd2,0xe7,0x9e,0x3b,0x07,0x85,0x56,0x1b,0xe1,0x4a,0xac,0xa2,0xfc,0xcb};
1542 static const u8 IV6[]= {0x93,0x13,0x22,0x5d,0xf8,0x84,0x06,0xe5,0x55,0x90,0x9c,0x5a,0xff,0x52,0x69,0xaa,
1543 0x6a,0x7a,0x95,0x38,0x53,0x4f,0x7d,0xa1,0xe4,0xc3,0x03,0xd2,0xa3,0x18,0xa7,0x28,
1544 0xc3,0xc0,0xc9,0x51,0x56,0x80,0x95,0x39,0xfc,0xf0,0xe2,0x42,0x9a,0x6b,0x52,0x54,
1545 0x16,0xae,0xdb,0xf5,0xa0,0xde,0x6a,0x57,0xa6,0x37,0xb3,0x9b},
1546 C6[]= {0x8c,0xe2,0x49,0x98,0x62,0x56,0x15,0xb6,0x03,0xa0,0x33,0xac,0xa1,0x3f,0xb8,0x94,
1547 0xbe,0x91,0x12,0xa5,0xc3,0xa2,0x11,0xa8,0xba,0x26,0x2a,0x3c,0xca,0x7e,0x2c,0xa7,
1548 0x01,0xe4,0xa9,0xa4,0xfb,0xa4,0x3c,0x90,0xcc,0xdc,0xb2,0x81,0xd4,0x8c,0x7c,0x6f,
1549 0xd6,0x28,0x75,0xd2,0xac,0xa4,0x17,0x03,0x4c,0x34,0xae,0xe5},
1550 T6[]= {0x61,0x9c,0xc5,0xae,0xff,0xfe,0x0b,0xfa,0x46,0x2a,0xf4,0x3c,0x16,0x99,0xd0,0x50};
1553 static const u8 K7[24],
1558 T7[]= {0xcd,0x33,0xb2,0x8a,0xc7,0x73,0xf7,0x4b,0xa0,0x0e,0xd1,0xf3,0x12,0x57,0x24,0x35};
1564 static const u8 P8[16],
1565 C8[]= {0x98,0xe7,0x24,0x7c,0x07,0xf0,0xfe,0x41,0x1c,0x26,0x7e,0x43,0x84,0xb0,0xf6,0x00},
1566 T8[]= {0x2f,0xf5,0x8d,0x80,0x03,0x39,0x27,0xab,0x8e,0xf4,0xd4,0x58,0x75,0x14,0xf0,0xfb};
1570 static const u8 K9[]= {0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08,
1571 0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c},
1572 P9[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1573 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1574 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1575 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55},
1576 IV9[]= {0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,0xde,0xca,0xf8,0x88},
1577 C9[]= {0x39,0x80,0xca,0x0b,0x3c,0x00,0xe8,0x41,0xeb,0x06,0xfa,0xc4,0x87,0x2a,0x27,0x57,
1578 0x85,0x9e,0x1c,0xea,0xa6,0xef,0xd9,0x84,0x62,0x85,0x93,0xb4,0x0c,0xa1,0xe1,0x9c,
1579 0x7d,0x77,0x3d,0x00,0xc1,0x44,0xc5,0x25,0xac,0x61,0x9d,0x18,0xc8,0x4a,0x3f,0x47,
1580 0x18,0xe2,0x44,0x8b,0x2f,0xe3,0x24,0xd9,0xcc,0xda,0x27,0x10,0xac,0xad,0xe2,0x56},
1581 T9[]= {0x99,0x24,0xa7,0xc8,0x58,0x73,0x36,0xbf,0xb1,0x18,0x02,0x4d,0xb8,0x67,0x4a,0x14};
1586 static const u8 P10[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1587 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1588 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1589 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39},
1590 A10[]= {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1591 0xab,0xad,0xda,0xd2},
1592 C10[]= {0x39,0x80,0xca,0x0b,0x3c,0x00,0xe8,0x41,0xeb,0x06,0xfa,0xc4,0x87,0x2a,0x27,0x57,
1593 0x85,0x9e,0x1c,0xea,0xa6,0xef,0xd9,0x84,0x62,0x85,0x93,0xb4,0x0c,0xa1,0xe1,0x9c,
1594 0x7d,0x77,0x3d,0x00,0xc1,0x44,0xc5,0x25,0xac,0x61,0x9d,0x18,0xc8,0x4a,0x3f,0x47,
1595 0x18,0xe2,0x44,0x8b,0x2f,0xe3,0x24,0xd9,0xcc,0xda,0x27,0x10},
1596 T10[]= {0x25,0x19,0x49,0x8e,0x80,0xf1,0x47,0x8f,0x37,0xba,0x55,0xbd,0x6d,0x27,0x61,0x8c};
1602 static const u8 IV11[]={0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad},
1603 C11[]= {0x0f,0x10,0xf5,0x99,0xae,0x14,0xa1,0x54,0xed,0x24,0xb3,0x6e,0x25,0x32,0x4d,0xb8,
1604 0xc5,0x66,0x63,0x2e,0xf2,0xbb,0xb3,0x4f,0x83,0x47,0x28,0x0f,0xc4,0x50,0x70,0x57,
1605 0xfd,0xdc,0x29,0xdf,0x9a,0x47,0x1f,0x75,0xc6,0x65,0x41,0xd4,0xd4,0xda,0xd1,0xc9,
1606 0xe9,0x3a,0x19,0xa5,0x8e,0x8b,0x47,0x3f,0xa0,0xf0,0x62,0xf7},
1607 T11[]= {0x65,0xdc,0xc5,0x7f,0xcf,0x62,0x3a,0x24,0x09,0x4f,0xcc,0xa4,0x0d,0x35,0x33,0xf8};
1613 static const u8 IV12[]={0x93,0x13,0x22,0x5d,0xf8,0x84,0x06,0xe5,0x55,0x90,0x9c,0x5a,0xff,0x52,0x69,0xaa,
1614 0x6a,0x7a,0x95,0x38,0x53,0x4f,0x7d,0xa1,0xe4,0xc3,0x03,0xd2,0xa3,0x18,0xa7,0x28,
1615 0xc3,0xc0,0xc9,0x51,0x56,0x80,0x95,0x39,0xfc,0xf0,0xe2,0x42,0x9a,0x6b,0x52,0x54,
1616 0x16,0xae,0xdb,0xf5,0xa0,0xde,0x6a,0x57,0xa6,0x37,0xb3,0x9b},
1617 C12[]= {0xd2,0x7e,0x88,0x68,0x1c,0xe3,0x24,0x3c,0x48,0x30,0x16,0x5a,0x8f,0xdc,0xf9,0xff,
1618 0x1d,0xe9,0xa1,0xd8,0xe6,0xb4,0x47,0xef,0x6e,0xf7,0xb7,0x98,0x28,0x66,0x6e,0x45,
1619 0x81,0xe7,0x90,0x12,0xaf,0x34,0xdd,0xd9,0xe2,0xf0,0x37,0x58,0x9b,0x29,0x2d,0xb3,
1620 0xe6,0x7c,0x03,0x67,0x45,0xfa,0x22,0xe7,0xe9,0xb7,0x37,0x3b},
1621 T12[]= {0xdc,0xf5,0x66,0xff,0x29,0x1c,0x25,0xbb,0xb8,0x56,0x8f,0xc3,0xd3,0x76,0xa6,0xd9};
1624 static const u8 K13[32],
1629 T13[]={0x53,0x0f,0x8a,0xfb,0xc7,0x45,0x36,0xb9,0xa9,0x63,0xb4,0xf1,0xc4,0xcb,0x73,0x8b};
1634 static const u8 P14[16],
1636 C14[]= {0xce,0xa7,0x40,0x3d,0x4d,0x60,0x6b,0x6e,0x07,0x4e,0xc5,0xd3,0xba,0xf3,0x9d,0x18},
1637 T14[]= {0xd0,0xd1,0xc8,0xa7,0x99,0x99,0x6b,0xf0,0x26,0x5b,0x98,0xb5,0xd4,0x8a,0xb9,0x19};
1641 static const u8 K15[]= {0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08,
1642 0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08},
1643 P15[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1644 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1645 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1646 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55},
1647 IV15[]={0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,0xde,0xca,0xf8,0x88},
1648 C15[]= {0x52,0x2d,0xc1,0xf0,0x99,0x56,0x7d,0x07,0xf4,0x7f,0x37,0xa3,0x2a,0x84,0x42,0x7d,
1649 0x64,0x3a,0x8c,0xdc,0xbf,0xe5,0xc0,0xc9,0x75,0x98,0xa2,0xbd,0x25,0x55,0xd1,0xaa,
1650 0x8c,0xb0,0x8e,0x48,0x59,0x0d,0xbb,0x3d,0xa7,0xb0,0x8b,0x10,0x56,0x82,0x88,0x38,
1651 0xc5,0xf6,0x1e,0x63,0x93,0xba,0x7a,0x0a,0xbc,0xc9,0xf6,0x62,0x89,0x80,0x15,0xad},
1652 T15[]= {0xb0,0x94,0xda,0xc5,0xd9,0x34,0x71,0xbd,0xec,0x1a,0x50,0x22,0x70,0xe3,0xcc,0x6c};
1657 static const u8 P16[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1658 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1659 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1660 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39},
1661 A16[]= {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1662 0xab,0xad,0xda,0xd2},
1663 C16[]= {0x52,0x2d,0xc1,0xf0,0x99,0x56,0x7d,0x07,0xf4,0x7f,0x37,0xa3,0x2a,0x84,0x42,0x7d,
1664 0x64,0x3a,0x8c,0xdc,0xbf,0xe5,0xc0,0xc9,0x75,0x98,0xa2,0xbd,0x25,0x55,0xd1,0xaa,
1665 0x8c,0xb0,0x8e,0x48,0x59,0x0d,0xbb,0x3d,0xa7,0xb0,0x8b,0x10,0x56,0x82,0x88,0x38,
1666 0xc5,0xf6,0x1e,0x63,0x93,0xba,0x7a,0x0a,0xbc,0xc9,0xf6,0x62},
1667 T16[]= {0x76,0xfc,0x6e,0xce,0x0f,0x4e,0x17,0x68,0xcd,0xdf,0x88,0x53,0xbb,0x2d,0x55,0x1b};
1673 static const u8 IV17[]={0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad},
1674 C17[]= {0xc3,0x76,0x2d,0xf1,0xca,0x78,0x7d,0x32,0xae,0x47,0xc1,0x3b,0xf1,0x98,0x44,0xcb,
1675 0xaf,0x1a,0xe1,0x4d,0x0b,0x97,0x6a,0xfa,0xc5,0x2f,0xf7,0xd7,0x9b,0xba,0x9d,0xe0,
1676 0xfe,0xb5,0x82,0xd3,0x39,0x34,0xa4,0xf0,0x95,0x4c,0xc2,0x36,0x3b,0xc7,0x3f,0x78,
1677 0x62,0xac,0x43,0x0e,0x64,0xab,0xe4,0x99,0xf4,0x7c,0x9b,0x1f},
1678 T17[]= {0x3a,0x33,0x7d,0xbf,0x46,0xa7,0x92,0xc4,0x5e,0x45,0x49,0x13,0xfe,0x2e,0xa8,0xf2};
1684 static const u8 IV18[]={0x93,0x13,0x22,0x5d,0xf8,0x84,0x06,0xe5,0x55,0x90,0x9c,0x5a,0xff,0x52,0x69,0xaa,
1685 0x6a,0x7a,0x95,0x38,0x53,0x4f,0x7d,0xa1,0xe4,0xc3,0x03,0xd2,0xa3,0x18,0xa7,0x28,
1686 0xc3,0xc0,0xc9,0x51,0x56,0x80,0x95,0x39,0xfc,0xf0,0xe2,0x42,0x9a,0x6b,0x52,0x54,
1687 0x16,0xae,0xdb,0xf5,0xa0,0xde,0x6a,0x57,0xa6,0x37,0xb3,0x9b},
1688 C18[]= {0x5a,0x8d,0xef,0x2f,0x0c,0x9e,0x53,0xf1,0xf7,0x5d,0x78,0x53,0x65,0x9e,0x2a,0x20,
1689 0xee,0xb2,0xb2,0x2a,0xaf,0xde,0x64,0x19,0xa0,0x58,0xab,0x4f,0x6f,0x74,0x6b,0xf4,
1690 0x0f,0xc0,0xc3,0xb7,0x80,0xf2,0x44,0x45,0x2d,0xa3,0xeb,0xf1,0xc5,0xd8,0x2c,0xde,
1691 0xa2,0x41,0x89,0x97,0x20,0x0e,0xf8,0x2e,0x44,0xae,0x7e,0x3f},
1692 T18[]= {0xa4,0x4a,0x82,0x66,0xee,0x1c,0x8e,0xb0,0xc8,0xb5,0xd4,0xcf,0x5a,0xe9,0xf1,0x9a};
1694 #define TEST_CASE(n) do { \
1695 u8 out[sizeof(P##n)]; \
1696 AES_set_encrypt_key(K##n,sizeof(K##n)*8,&key); \
1697 CRYPTO_gcm128_init(&ctx,&key,(block128_f)AES_encrypt); \
1698 CRYPTO_gcm128_setiv(&ctx,IV##n,sizeof(IV##n)); \
1699 memset(out,0,sizeof(out)); \
1700 if (A##n) CRYPTO_gcm128_aad(&ctx,A##n,sizeof(A##n)); \
1701 if (P##n) CRYPTO_gcm128_encrypt(&ctx,P##n,out,sizeof(out)); \
1702 if (CRYPTO_gcm128_finish(&ctx,T##n,16) || \
1703 (C##n && memcmp(out,C##n,sizeof(out)))) \
1704 ret++, printf ("encrypt test#%d failed.\n",n); \
1705 CRYPTO_gcm128_setiv(&ctx,IV##n,sizeof(IV##n)); \
1706 memset(out,0,sizeof(out)); \
1707 if (A##n) CRYPTO_gcm128_aad(&ctx,A##n,sizeof(A##n)); \
1708 if (C##n) CRYPTO_gcm128_decrypt(&ctx,C##n,out,sizeof(out)); \
1709 if (CRYPTO_gcm128_finish(&ctx,T##n,16) || \
1710 (P##n && memcmp(out,P##n,sizeof(out)))) \
1711 ret++, printf ("decrypt test#%d failed.\n",n); \
1739 #ifdef OPENSSL_CPUID_OBJ
1741 size_t start,stop,gcm_t,ctr_t,OPENSSL_rdtsc();
1742 union { u64 u; u8 c[1024]; } buf;
1745 AES_set_encrypt_key(K1,sizeof(K1)*8,&key);
1746 CRYPTO_gcm128_init(&ctx,&key,(block128_f)AES_encrypt);
1747 CRYPTO_gcm128_setiv(&ctx,IV1,sizeof(IV1));
1749 CRYPTO_gcm128_encrypt(&ctx,buf.c,buf.c,sizeof(buf));
1750 start = OPENSSL_rdtsc();
1751 CRYPTO_gcm128_encrypt(&ctx,buf.c,buf.c,sizeof(buf));
1752 gcm_t = OPENSSL_rdtsc() - start;
1754 CRYPTO_ctr128_encrypt(buf.c,buf.c,sizeof(buf),
1755 &key,ctx.Yi.c,ctx.EKi.c,&ctx.mres,
1756 (block128_f)AES_encrypt);
1757 start = OPENSSL_rdtsc();
1758 CRYPTO_ctr128_encrypt(buf.c,buf.c,sizeof(buf),
1759 &key,ctx.Yi.c,ctx.EKi.c,&ctx.mres,
1760 (block128_f)AES_encrypt);
1761 ctr_t = OPENSSL_rdtsc() - start;
1763 printf("%.2f-%.2f=%.2f\n",
1764 gcm_t/(double)sizeof(buf),
1765 ctr_t/(double)sizeof(buf),
1766 (gcm_t-ctr_t)/(double)sizeof(buf));
1769 void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
1770 const u8 *inp,size_t len) = ctx.ghash;
1772 GHASH((&ctx),buf.c,sizeof(buf));
1773 start = OPENSSL_rdtsc();
1774 for (i=0;i<100;++i) GHASH((&ctx),buf.c,sizeof(buf));
1775 gcm_t = OPENSSL_rdtsc() - start;
1776 printf("%.2f\n",gcm_t/(double)sizeof(buf)/(double)i);