1 /* ====================================================================
2 * Copyright (c) 2010 The OpenSSL Project. All rights reserved.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in
13 * the documentation and/or other materials provided with the
16 * 3. All advertising materials mentioning features or use of this
17 * software must display the following acknowledgment:
18 * "This product includes software developed by the OpenSSL Project
19 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
21 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
22 * endorse or promote products derived from this software without
23 * prior written permission. For written permission, please contact
24 * openssl-core@openssl.org.
26 * 5. Products derived from this software may not be called "OpenSSL"
27 * nor may "OpenSSL" appear in their names without prior written
28 * permission of the OpenSSL Project.
30 * 6. Redistributions of any form whatsoever must retain the following
32 * "This product includes software developed by the OpenSSL Project
33 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
35 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
36 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
37 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
38 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
39 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
40 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
41 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
42 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
43 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
44 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
45 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
46 * OF THE POSSIBILITY OF SUCH DAMAGE.
47 * ====================================================================
60 #if (defined(_WIN32) || defined(_WIN64)) && !defined(__MINGW32__)
62 typedef unsigned __int64 u64;
63 #define U64(C) C##UI64
64 #elif defined(__arch64__)
66 typedef unsigned long u64;
69 typedef long long i64;
70 typedef unsigned long long u64;
74 typedef unsigned int u32;
75 typedef unsigned char u8;
76 typedef struct { u64 hi,lo; } u128;
78 #define STRICT_ALIGNMENT
79 #if defined(__i386) || defined(__i386__) || \
80 defined(__x86_64) || defined(__x86_64__) || \
81 defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64) || \
82 defined(__s390__) || defined(__s390x__)
83 # undef STRICT_ALIGNMENT
86 #if defined(__GNUC__) && __GNUC__>=2
87 # if defined(__x86_64) || defined(__x86_64__)
88 # define BSWAP8(x) ({ u64 ret=(x); \
89 asm volatile ("bswapq %0" \
91 # define BSWAP4(x) ({ u32 ret=(x); \
92 asm volatile ("bswapl %0" \
94 # elif defined(__i386) || defined(__i386__)
95 # define BSWAP8(x) ({ u32 lo=(u64)(x)>>32,hi=(x); \
96 asm volatile ("bswapl %0; bswapl %1" \
97 : "+r"(hi),"+r"(lo)); \
99 # define BSWAP4(x) ({ u32 ret=(x); \
100 asm volatile ("bswapl %0" \
101 : "+r"(ret)); ret; })
103 #elif defined(_MSC_VER)
105 # pragma intrinsic(_byteswap_uint64,_byteswap_ulong)
106 # define BSWAP8(x) _byteswap_uint64((u64)(x))
107 # define BSWAP4(x) _byteswap_ulong((u32)(x))
108 # elif defined(_M_IX86)
113 #define GETU32(p) BSWAP4(*(const u32 *)(p))
114 #define PUTU32(p,v) *(u32 *)(p) = BSWAP4(v)
116 #define GETU32(p) ((u32)(p)[0]<<24|(u32)(p)[1]<<16|(u32)(p)[2]<<8|(u32)(p)[3])
117 #define PUTU32(p,v) ((p)[0]=(u8)((v)>>24),(p)[1]=(u8)((v)>>16),(p)[2]=(u8)((v)>>8),(p)[3]=(u8)(v))
120 #define PACK(s) ((size_t)(s)<<(sizeof(size_t)*8-16))
125 * Even though permitted values for TABLE_BITS are 8, 4 and 1, it should
126 * never be set to 8. 8 is effectively reserved for testing purposes.
127 * Under ideal conditions "8-bit" version should be twice as fast as
128 * "4-bit" one. But world is far from ideal. For gcc-generated x86 code,
129 * "8-bit" was observed to run only ~50% faster. On x86_64 observed
130 * improvement was ~75%, much closer to optimal, but the fact of
131 * deviation means that references to pre-computed tables end up on
132 * critical path and as tables are pretty big, 4KB per key+1KB shared,
133 * execution time is sensitive to cache timing. It's not actually
134 * proven, but 4-bit procedure is believed to provide adequate
135 * all-round performance...
141 static void gcm_init_8bit(u128 Htable[256], u64 H[2])
151 for (Htable[128]=V, i=64; i>0; i>>=1) {
152 if (sizeof(size_t)==8) {
153 u64 T = U64(0xe100000000000000) & (0-(V.lo&1));
154 V.lo = (V.hi<<63)|(V.lo>>1);
158 u32 T = 0xe1000000U & (0-(u32)(V.lo&1));
159 V.lo = (V.hi<<63)|(V.lo>>1);
160 V.hi = (V.hi>>1 )^((u64)T<<32);
165 for (i=2; i<256; i<<=1) {
166 u128 *Hi = Htable+i, H0 = *Hi;
167 for (j=1; j<i; ++j) {
168 Hi[j].hi = H0.hi^Htable[j].hi;
169 Hi[j].lo = H0.lo^Htable[j].lo;
174 static void gcm_gmult_8bit(u64 Xi[2], u128 Htable[256])
177 const u8 *xi = (const u8 *)Xi+15;
179 const union { long one; char little; } is_endian = {1};
180 static const size_t rem_8bit[256] = {
181 PACK(0x0000), PACK(0x01C2), PACK(0x0384), PACK(0x0246),
182 PACK(0x0708), PACK(0x06CA), PACK(0x048C), PACK(0x054E),
183 PACK(0x0E10), PACK(0x0FD2), PACK(0x0D94), PACK(0x0C56),
184 PACK(0x0918), PACK(0x08DA), PACK(0x0A9C), PACK(0x0B5E),
185 PACK(0x1C20), PACK(0x1DE2), PACK(0x1FA4), PACK(0x1E66),
186 PACK(0x1B28), PACK(0x1AEA), PACK(0x18AC), PACK(0x196E),
187 PACK(0x1230), PACK(0x13F2), PACK(0x11B4), PACK(0x1076),
188 PACK(0x1538), PACK(0x14FA), PACK(0x16BC), PACK(0x177E),
189 PACK(0x3840), PACK(0x3982), PACK(0x3BC4), PACK(0x3A06),
190 PACK(0x3F48), PACK(0x3E8A), PACK(0x3CCC), PACK(0x3D0E),
191 PACK(0x3650), PACK(0x3792), PACK(0x35D4), PACK(0x3416),
192 PACK(0x3158), PACK(0x309A), PACK(0x32DC), PACK(0x331E),
193 PACK(0x2460), PACK(0x25A2), PACK(0x27E4), PACK(0x2626),
194 PACK(0x2368), PACK(0x22AA), PACK(0x20EC), PACK(0x212E),
195 PACK(0x2A70), PACK(0x2BB2), PACK(0x29F4), PACK(0x2836),
196 PACK(0x2D78), PACK(0x2CBA), PACK(0x2EFC), PACK(0x2F3E),
197 PACK(0x7080), PACK(0x7142), PACK(0x7304), PACK(0x72C6),
198 PACK(0x7788), PACK(0x764A), PACK(0x740C), PACK(0x75CE),
199 PACK(0x7E90), PACK(0x7F52), PACK(0x7D14), PACK(0x7CD6),
200 PACK(0x7998), PACK(0x785A), PACK(0x7A1C), PACK(0x7BDE),
201 PACK(0x6CA0), PACK(0x6D62), PACK(0x6F24), PACK(0x6EE6),
202 PACK(0x6BA8), PACK(0x6A6A), PACK(0x682C), PACK(0x69EE),
203 PACK(0x62B0), PACK(0x6372), PACK(0x6134), PACK(0x60F6),
204 PACK(0x65B8), PACK(0x647A), PACK(0x663C), PACK(0x67FE),
205 PACK(0x48C0), PACK(0x4902), PACK(0x4B44), PACK(0x4A86),
206 PACK(0x4FC8), PACK(0x4E0A), PACK(0x4C4C), PACK(0x4D8E),
207 PACK(0x46D0), PACK(0x4712), PACK(0x4554), PACK(0x4496),
208 PACK(0x41D8), PACK(0x401A), PACK(0x425C), PACK(0x439E),
209 PACK(0x54E0), PACK(0x5522), PACK(0x5764), PACK(0x56A6),
210 PACK(0x53E8), PACK(0x522A), PACK(0x506C), PACK(0x51AE),
211 PACK(0x5AF0), PACK(0x5B32), PACK(0x5974), PACK(0x58B6),
212 PACK(0x5DF8), PACK(0x5C3A), PACK(0x5E7C), PACK(0x5FBE),
213 PACK(0xE100), PACK(0xE0C2), PACK(0xE284), PACK(0xE346),
214 PACK(0xE608), PACK(0xE7CA), PACK(0xE58C), PACK(0xE44E),
215 PACK(0xEF10), PACK(0xEED2), PACK(0xEC94), PACK(0xED56),
216 PACK(0xE818), PACK(0xE9DA), PACK(0xEB9C), PACK(0xEA5E),
217 PACK(0xFD20), PACK(0xFCE2), PACK(0xFEA4), PACK(0xFF66),
218 PACK(0xFA28), PACK(0xFBEA), PACK(0xF9AC), PACK(0xF86E),
219 PACK(0xF330), PACK(0xF2F2), PACK(0xF0B4), PACK(0xF176),
220 PACK(0xF438), PACK(0xF5FA), PACK(0xF7BC), PACK(0xF67E),
221 PACK(0xD940), PACK(0xD882), PACK(0xDAC4), PACK(0xDB06),
222 PACK(0xDE48), PACK(0xDF8A), PACK(0xDDCC), PACK(0xDC0E),
223 PACK(0xD750), PACK(0xD692), PACK(0xD4D4), PACK(0xD516),
224 PACK(0xD058), PACK(0xD19A), PACK(0xD3DC), PACK(0xD21E),
225 PACK(0xC560), PACK(0xC4A2), PACK(0xC6E4), PACK(0xC726),
226 PACK(0xC268), PACK(0xC3AA), PACK(0xC1EC), PACK(0xC02E),
227 PACK(0xCB70), PACK(0xCAB2), PACK(0xC8F4), PACK(0xC936),
228 PACK(0xCC78), PACK(0xCDBA), PACK(0xCFFC), PACK(0xCE3E),
229 PACK(0x9180), PACK(0x9042), PACK(0x9204), PACK(0x93C6),
230 PACK(0x9688), PACK(0x974A), PACK(0x950C), PACK(0x94CE),
231 PACK(0x9F90), PACK(0x9E52), PACK(0x9C14), PACK(0x9DD6),
232 PACK(0x9898), PACK(0x995A), PACK(0x9B1C), PACK(0x9ADE),
233 PACK(0x8DA0), PACK(0x8C62), PACK(0x8E24), PACK(0x8FE6),
234 PACK(0x8AA8), PACK(0x8B6A), PACK(0x892C), PACK(0x88EE),
235 PACK(0x83B0), PACK(0x8272), PACK(0x8034), PACK(0x81F6),
236 PACK(0x84B8), PACK(0x857A), PACK(0x873C), PACK(0x86FE),
237 PACK(0xA9C0), PACK(0xA802), PACK(0xAA44), PACK(0xAB86),
238 PACK(0xAEC8), PACK(0xAF0A), PACK(0xAD4C), PACK(0xAC8E),
239 PACK(0xA7D0), PACK(0xA612), PACK(0xA454), PACK(0xA596),
240 PACK(0xA0D8), PACK(0xA11A), PACK(0xA35C), PACK(0xA29E),
241 PACK(0xB5E0), PACK(0xB422), PACK(0xB664), PACK(0xB7A6),
242 PACK(0xB2E8), PACK(0xB32A), PACK(0xB16C), PACK(0xB0AE),
243 PACK(0xBBF0), PACK(0xBA32), PACK(0xB874), PACK(0xB9B6),
244 PACK(0xBCF8), PACK(0xBD3A), PACK(0xBF7C), PACK(0xBEBE) };
247 Z.hi ^= Htable[n].hi;
248 Z.lo ^= Htable[n].lo;
250 if ((u8 *)Xi==xi) break;
254 rem = (size_t)Z.lo&0xff;
255 Z.lo = (Z.hi<<56)|(Z.lo>>8);
257 if (sizeof(size_t)==8)
258 Z.hi ^= rem_8bit[rem];
260 Z.hi ^= (u64)rem_8bit[rem]<<32;
263 if (is_endian.little) {
265 Xi[0] = BSWAP8(Z.hi);
266 Xi[1] = BSWAP8(Z.lo);
270 v = (u32)(Z.hi>>32); PUTU32(p,v);
271 v = (u32)(Z.hi); PUTU32(p+4,v);
272 v = (u32)(Z.lo>>32); PUTU32(p+8,v);
273 v = (u32)(Z.lo); PUTU32(p+12,v);
281 #define GCM_MUL(ctx,Xi) gcm_gmult_8bit(ctx->Xi.u,ctx->Htable)
285 static void gcm_init_4bit(u128 Htable[16], u64 H[2])
295 for (Htable[8]=V, i=4; i>0; i>>=1) {
296 if (sizeof(size_t)==8) {
297 u64 T = U64(0xe100000000000000) & (0-(V.lo&1));
298 V.lo = (V.hi<<63)|(V.lo>>1);
302 u32 T = 0xe1000000U & (0-(u32)(V.lo&1));
303 V.lo = (V.hi<<63)|(V.lo>>1);
304 V.hi = (V.hi>>1 )^((u64)T<<32);
309 #if defined(OPENSSL_SMALL_FOOTPRINT)
310 for (i=2; i<16; i<<=1) {
313 for (V=*Hi, j=1; j<i; ++j) {
314 Hi[j].hi = V.hi^Htable[j].hi;
315 Hi[j].lo = V.lo^Htable[j].lo;
319 Htable[3].hi = V.hi^Htable[2].hi, Htable[3].lo = V.lo^Htable[2].lo;
321 Htable[5].hi = V.hi^Htable[1].hi, Htable[5].lo = V.lo^Htable[1].lo;
322 Htable[6].hi = V.hi^Htable[2].hi, Htable[6].lo = V.lo^Htable[2].lo;
323 Htable[7].hi = V.hi^Htable[3].hi, Htable[7].lo = V.lo^Htable[3].lo;
325 Htable[9].hi = V.hi^Htable[1].hi, Htable[9].lo = V.lo^Htable[1].lo;
326 Htable[10].hi = V.hi^Htable[2].hi, Htable[10].lo = V.lo^Htable[2].lo;
327 Htable[11].hi = V.hi^Htable[3].hi, Htable[11].lo = V.lo^Htable[3].lo;
328 Htable[12].hi = V.hi^Htable[4].hi, Htable[12].lo = V.lo^Htable[4].lo;
329 Htable[13].hi = V.hi^Htable[5].hi, Htable[13].lo = V.lo^Htable[5].lo;
330 Htable[14].hi = V.hi^Htable[6].hi, Htable[14].lo = V.lo^Htable[6].lo;
331 Htable[15].hi = V.hi^Htable[7].hi, Htable[15].lo = V.lo^Htable[7].lo;
336 static const size_t rem_4bit[16] = {
337 PACK(0x0000), PACK(0x1C20), PACK(0x3840), PACK(0x2460),
338 PACK(0x7080), PACK(0x6CA0), PACK(0x48C0), PACK(0x54E0),
339 PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560),
340 PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0) };
342 static void gcm_gmult_4bit(u64 Xi[2], u128 Htable[16])
346 size_t rem, nlo, nhi;
347 const union { long one; char little; } is_endian = {1};
349 nlo = ((const u8 *)Xi)[15];
353 Z.hi = Htable[nlo].hi;
354 Z.lo = Htable[nlo].lo;
357 rem = (size_t)Z.lo&0xf;
358 Z.lo = (Z.hi<<60)|(Z.lo>>4);
360 if (sizeof(size_t)==8)
361 Z.hi ^= rem_4bit[rem];
363 Z.hi ^= (u64)rem_4bit[rem]<<32;
365 Z.hi ^= Htable[nhi].hi;
366 Z.lo ^= Htable[nhi].lo;
370 nlo = ((const u8 *)Xi)[cnt];
374 rem = (size_t)Z.lo&0xf;
375 Z.lo = (Z.hi<<60)|(Z.lo>>4);
377 if (sizeof(size_t)==8)
378 Z.hi ^= rem_4bit[rem];
380 Z.hi ^= (u64)rem_4bit[rem]<<32;
382 Z.hi ^= Htable[nlo].hi;
383 Z.lo ^= Htable[nlo].lo;
386 if (is_endian.little) {
388 Xi[0] = BSWAP8(Z.hi);
389 Xi[1] = BSWAP8(Z.lo);
393 v = (u32)(Z.hi>>32); PUTU32(p,v);
394 v = (u32)(Z.hi); PUTU32(p+4,v);
395 v = (u32)(Z.lo>>32); PUTU32(p+8,v);
396 v = (u32)(Z.lo); PUTU32(p+12,v);
405 #if !defined(OPENSSL_SMALL_FOOTPRINT)
407 * Streamed gcm_mult_4bit, see CRYPTO_gcm128_[en|de]crypt for
408 * details... Compiler-generated code doesn't seem to give any
409 * performance improvement, at least not on x86[_64]. It's here
410 * mostly as reference and a placeholder for possible future
411 * non-trivial optimization[s]...
413 static void gcm_ghash_4bit(const u8 *inp,size_t len,u64 Xi[2], u128 Htable[16])
417 size_t rem, nlo, nhi;
418 const union { long one; char little; } is_endian = {1};
422 nlo = ((const u8 *)Xi)[15];
427 Z.hi = Htable[nlo].hi;
428 Z.lo = Htable[nlo].lo;
431 rem = (size_t)Z.lo&0xf;
432 Z.lo = (Z.hi<<60)|(Z.lo>>4);
434 if (sizeof(size_t)==8)
435 Z.hi ^= rem_4bit[rem];
437 Z.hi ^= (u64)rem_4bit[rem]<<32;
439 Z.hi ^= Htable[nhi].hi;
440 Z.lo ^= Htable[nhi].lo;
444 nlo = ((const u8 *)Xi)[cnt];
449 rem = (size_t)Z.lo&0xf;
450 Z.lo = (Z.hi<<60)|(Z.lo>>4);
452 if (sizeof(size_t)==8)
453 Z.hi ^= rem_4bit[rem];
455 Z.hi ^= (u64)rem_4bit[rem]<<32;
457 Z.hi ^= Htable[nlo].hi;
458 Z.lo ^= Htable[nlo].lo;
461 if (is_endian.little) {
463 Xi[0] = BSWAP8(Z.hi);
464 Xi[1] = BSWAP8(Z.lo);
468 v = (u32)(Z.hi>>32); PUTU32(p,v);
469 v = (u32)(Z.hi); PUTU32(p+4,v);
470 v = (u32)(Z.lo>>32); PUTU32(p+8,v);
471 v = (u32)(Z.lo); PUTU32(p+12,v);
478 } while (inp+=16, len-=16);
482 void gcm_gmult_4bit(u64 Xi[2],u128 Htable[16]);
483 void gcm_ghash_4bit(const u8 *inp,size_t len,u64 Xi[2],u128 Htable[16]);
486 #define GCM_MUL(ctx,Xi) gcm_gmult_4bit(ctx->Xi.u,ctx->Htable)
487 #if defined(GHASH_ASM) || !defined(OPENSSL_SMALL_FOOTPRINT)
488 #define GHASH(in,len,ctx) gcm_ghash_4bit(in,len,(ctx)->Xi.u,(ctx)->Htable)
489 /* GHASH_CHUNK is "stride parameter" missioned to mitigate cache
490 * trashing effect. In other words idea is to hash data while it's
491 * still in L1 cache after encryption pass... */
492 #define GHASH_CHUNK 1024
495 #else /* TABLE_BITS */
497 static void gcm_gmult_1bit(u64 Xi[2],const u64 H[2])
502 const long *xi = (const long *)Xi;
503 const union { long one; char little; } is_endian = {1};
505 V.hi = H[0]; /* H is in host byte order, no byte swapping */
508 for (j=0; j<16/sizeof(long); ++j) {
509 if (is_endian.little) {
510 if (sizeof(long)==8) {
512 X = (long)(BSWAP8(xi[j]));
514 const u8 *p = (const u8 *)(xi+j);
515 X = (long)((u64)GETU32(p)<<32|GETU32(p+4));
519 const u8 *p = (const u8 *)(xi+j);
526 for (i=0; i<8*sizeof(long); ++i, X<<=1) {
527 u64 M = (u64)(X>>(8*sizeof(long)-1));
531 if (sizeof(size_t)==8) {
532 u64 T = U64(0xe100000000000000) & (0-(V.lo&1));
533 V.lo = (V.hi<<63)|(V.lo>>1);
537 u32 T = 0xe1000000U & (0-(u32)(V.lo&1));
538 V.lo = (V.hi<<63)|(V.lo>>1);
539 V.hi = (V.hi>>1 )^((u64)T<<32);
545 if (is_endian.little) {
547 Xi[0] = BSWAP8(Z.hi);
548 Xi[1] = BSWAP8(Z.lo);
552 v = (u32)(Z.hi>>32); PUTU32(p,v);
553 v = (u32)(Z.hi); PUTU32(p+4,v);
554 v = (u32)(Z.lo>>32); PUTU32(p+8,v);
555 v = (u32)(Z.lo); PUTU32(p+12,v);
563 #define GCM_MUL(ctx,Xi) gcm_gmult_1bit(ctx->Xi.u,ctx->H.u)
568 /* Following 6 names follow names in GCM specification */
569 union { u64 u[2]; u32 d[4]; u8 c[16]; } Yi,EKi,EK0,
572 /* Pre-computed table used by gcm_gmult_* */
578 unsigned int res, ctr;
583 void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx,void *key,block128_f block)
585 const union { long one; char little; } is_endian = {1};
587 memset(ctx,0,sizeof(*ctx));
591 (*block)(ctx->H.c,ctx->H.c,key);
593 if (is_endian.little) {
594 /* H is stored in host byte order */
596 ctx->H.u[0] = BSWAP8(ctx->H.u[0]);
597 ctx->H.u[1] = BSWAP8(ctx->H.u[1]);
601 hi = (u64)GETU32(p) <<32|GETU32(p+4);
602 lo = (u64)GETU32(p+8)<<32|GETU32(p+12);
609 gcm_init_8bit(ctx->Htable,ctx->H.u);
611 gcm_init_4bit(ctx->Htable,ctx->H.u);
615 void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx,const unsigned char *iv,size_t len)
617 const union { long one; char little; } is_endian = {1};
628 memcpy(ctx->Yi.c,iv,12);
637 for (i=0; i<16; ++i) ctx->Yi.c[i] ^= iv[i];
643 for (i=0; i<len; ++i) ctx->Yi.c[i] ^= iv[i];
647 if (is_endian.little) {
649 ctx->Yi.u[1] ^= BSWAP8(len0);
651 ctx->Yi.c[8] ^= (u8)(len0>>56);
652 ctx->Yi.c[9] ^= (u8)(len0>>48);
653 ctx->Yi.c[10] ^= (u8)(len0>>40);
654 ctx->Yi.c[11] ^= (u8)(len0>>32);
655 ctx->Yi.c[12] ^= (u8)(len0>>24);
656 ctx->Yi.c[13] ^= (u8)(len0>>16);
657 ctx->Yi.c[14] ^= (u8)(len0>>8);
658 ctx->Yi.c[15] ^= (u8)(len0);
662 ctx->Yi.u[1] ^= len0;
666 if (is_endian.little)
667 ctx->ctr = GETU32(ctx->Yi.c+12);
669 ctx->ctr = ctx->Yi.d[3];
672 (*ctx->block)(ctx->Yi.c,ctx->EK0.c,ctx->key);
674 if (is_endian.little)
675 PUTU32(ctx->Yi.c+12,ctx->ctr);
677 ctx->Yi.d[3] = ctx->ctr;
680 void CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx,const unsigned char *aad,size_t len)
684 ctx->len.u[0] += len;
687 if ((i = (len&(size_t)-16))) {
694 for (i=0; i<16; ++i) ctx->Xi.c[i] ^= aad[i];
701 for (i=0; i<len; ++i) ctx->Xi.c[i] ^= aad[i];
706 void CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
707 const unsigned char *in, unsigned char *out,
710 const union { long one; char little; } is_endian = {1};
714 ctx->len.u[1] += len;
718 #if !defined(OPENSSL_SMALL_FOOTPRINT)
719 if (16%sizeof(size_t) == 0) do { /* always true actually */
722 ctx->Xi.c[n] ^= *(out++) = *(in++)^ctx->EKi.c[n];
726 if (n==0) GCM_MUL(ctx,Xi);
732 #if defined(STRICT_ALIGNMENT)
733 if (((size_t)in|(size_t)out)%sizeof(size_t) != 0)
736 #if defined(GHASH) && defined(GHASH_CHUNK)
737 while (len>=GHASH_CHUNK) {
738 size_t j=GHASH_CHUNK;
741 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
743 if (is_endian.little)
744 PUTU32(ctx->Yi.c+12,ctr);
747 for (i=0; i<16; i+=sizeof(size_t))
749 *(size_t *)(in+i)^*(size_t *)(ctx->EKi.c+i);
754 GHASH(out-GHASH_CHUNK,GHASH_CHUNK,ctx);
757 if ((i = (len&(size_t)-16))) {
761 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
763 if (is_endian.little)
764 PUTU32(ctx->Yi.c+12,ctr);
767 for (i=0; i<16; i+=sizeof(size_t))
769 *(size_t *)(in+i)^*(size_t *)(ctx->EKi.c+i);
778 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
780 if (is_endian.little)
781 PUTU32(ctx->Yi.c+12,ctr);
784 for (i=0; i<16; i+=sizeof(size_t))
785 *(size_t *)(ctx->Xi.c+i) ^=
787 *(size_t *)(in+i)^*(size_t *)(ctx->EKi.c+i);
795 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
797 if (is_endian.little)
798 PUTU32(ctx->Yi.c+12,ctr);
802 ctx->Xi.c[n] ^= out[n] = in[n]^ctx->EKi.c[n];
812 for (i=0;i<len;++i) {
814 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
816 if (is_endian.little)
817 PUTU32(ctx->Yi.c+12,ctr);
821 ctx->Xi.c[n] ^= out[i] = in[i]^ctx->EKi.c[n];
831 void CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
832 const unsigned char *in, unsigned char *out,
835 const union { long one; char little; } is_endian = {1};
839 ctx->len.u[1] += len;
843 #if !defined(OPENSSL_SMALL_FOOTPRINT)
844 if (16%sizeof(size_t) == 0) do { /* always true actually */
848 *(out++) = c^ctx->EKi.c[n];
853 if (n==0) GCM_MUL (ctx,Xi);
859 #if defined(STRICT_ALIGNMENT)
860 if (((size_t)in|(size_t)out)%sizeof(size_t) != 0)
863 #if defined(GHASH) && defined(GHASH_CHUNK)
864 while (len>=GHASH_CHUNK) {
865 size_t j=GHASH_CHUNK;
867 GHASH(in,GHASH_CHUNK,ctx);
869 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
871 if (is_endian.little)
872 PUTU32(ctx->Yi.c+12,ctr);
875 for (i=0; i<16; i+=sizeof(size_t))
877 *(size_t *)(in+i)^*(size_t *)(ctx->EKi.c+i);
884 if ((i = (len&(size_t)-16))) {
887 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
889 if (is_endian.little)
890 PUTU32(ctx->Yi.c+12,ctr);
893 for (i=0; i<16; i+=sizeof(size_t))
895 *(size_t *)(in+i)^*(size_t *)(ctx->EKi.c+i);
903 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
905 if (is_endian.little)
906 PUTU32(ctx->Yi.c+12,ctr);
909 for (i=0; i<16; i+=sizeof(size_t)) {
910 size_t c = *(size_t *)(in+i);
911 *(size_t *)(out+i) = c^*(size_t *)(ctx->EKi.c+i);
912 *(size_t *)(ctx->Xi.c+i) ^= c;
921 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
923 if (is_endian.little)
924 PUTU32(ctx->Yi.c+12,ctr);
930 out[n] = c^ctx->EKi.c[n];
940 for (i=0;i<len;++i) {
943 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
945 if (is_endian.little)
946 PUTU32(ctx->Yi.c+12,ctr);
951 out[i] ^= ctx->EKi.c[n];
962 void CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx)
964 const union { long one; char little; } is_endian = {1};
965 u64 alen = ctx->len.u[0]<<3;
966 u64 clen = ctx->len.u[1]<<3;
971 if (is_endian.little) {
978 ctx->len.u[0] = alen;
979 ctx->len.u[1] = clen;
981 alen = (u64)GETU32(p) <<32|GETU32(p+4);
982 clen = (u64)GETU32(p+8)<<32|GETU32(p+12);
986 ctx->Xi.u[0] ^= alen;
987 ctx->Xi.u[1] ^= clen;
990 ctx->Xi.u[0] ^= ctx->EK0.u[0];
991 ctx->Xi.u[1] ^= ctx->EK0.u[1];
994 #if defined(SELFTEST)
996 #include <openssl/aes.h>
999 static const u8 K1[16],
1004 T1[]= {0x58,0xe2,0xfc,0xce,0xfa,0x7e,0x30,0x61,0x36,0x7f,0x1d,0x57,0xa4,0xe7,0x45,0x5a};
1010 static const u8 P2[16],
1011 C2[]= {0x03,0x88,0xda,0xce,0x60,0xb6,0xa3,0x92,0xf3,0x28,0xc2,0xb9,0x71,0xb2,0xfe,0x78},
1012 T2[]= {0xab,0x6e,0x47,0xd4,0x2c,0xec,0x13,0xbd,0xf5,0x3a,0x67,0xb2,0x12,0x57,0xbd,0xdf};
1016 static const u8 K3[]= {0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08},
1017 P3[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1018 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1019 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1020 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55},
1021 IV3[]= {0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,0xde,0xca,0xf8,0x88},
1022 C3[]= {0x42,0x83,0x1e,0xc2,0x21,0x77,0x74,0x24,0x4b,0x72,0x21,0xb7,0x84,0xd0,0xd4,0x9c,
1023 0xe3,0xaa,0x21,0x2f,0x2c,0x02,0xa4,0xe0,0x35,0xc1,0x7e,0x23,0x29,0xac,0xa1,0x2e,
1024 0x21,0xd5,0x14,0xb2,0x54,0x66,0x93,0x1c,0x7d,0x8f,0x6a,0x5a,0xac,0x84,0xaa,0x05,
1025 0x1b,0xa3,0x0b,0x39,0x6a,0x0a,0xac,0x97,0x3d,0x58,0xe0,0x91,0x47,0x3f,0x59,0x85},
1026 T3[]= {0x4d,0x5c,0x2a,0xf3,0x27,0xcd,0x64,0xa6,0x2c,0xf3,0x5a,0xbd,0x2b,0xa6,0xfa,0xb4,};
1031 static const u8 P4[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1032 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1033 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1034 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39},
1035 A4[]= {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1036 0xab,0xad,0xda,0xd2},
1037 C4[]= {0x42,0x83,0x1e,0xc2,0x21,0x77,0x74,0x24,0x4b,0x72,0x21,0xb7,0x84,0xd0,0xd4,0x9c,
1038 0xe3,0xaa,0x21,0x2f,0x2c,0x02,0xa4,0xe0,0x35,0xc1,0x7e,0x23,0x29,0xac,0xa1,0x2e,
1039 0x21,0xd5,0x14,0xb2,0x54,0x66,0x93,0x1c,0x7d,0x8f,0x6a,0x5a,0xac,0x84,0xaa,0x05,
1040 0x1b,0xa3,0x0b,0x39,0x6a,0x0a,0xac,0x97,0x3d,0x58,0xe0,0x91},
1041 T4[]= {0x5b,0xc9,0x4f,0xbc,0x32,0x21,0xa5,0xdb,0x94,0xfa,0xe9,0x5a,0xe7,0x12,0x1a,0x47};
1046 static const u8 A5[]= {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1047 0xab,0xad,0xda,0xd2},
1048 IV5[]= {0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad},
1049 C5[]= {0x61,0x35,0x3b,0x4c,0x28,0x06,0x93,0x4a,0x77,0x7f,0xf5,0x1f,0xa2,0x2a,0x47,0x55,
1050 0x69,0x9b,0x2a,0x71,0x4f,0xcd,0xc6,0xf8,0x37,0x66,0xe5,0xf9,0x7b,0x6c,0x74,0x23,
1051 0x73,0x80,0x69,0x00,0xe4,0x9f,0x24,0xb2,0x2b,0x09,0x75,0x44,0xd4,0x89,0x6b,0x42,
1052 0x49,0x89,0xb5,0xe1,0xeb,0xac,0x0f,0x07,0xc2,0x3f,0x45,0x98},
1053 T5[]= {0x36,0x12,0xd2,0xe7,0x9e,0x3b,0x07,0x85,0x56,0x1b,0xe1,0x4a,0xac,0xa2,0xfc,0xcb};
1059 static const u8 IV6[]= {0x93,0x13,0x22,0x5d,0xf8,0x84,0x06,0xe5,0x55,0x90,0x9c,0x5a,0xff,0x52,0x69,0xaa,
1060 0x6a,0x7a,0x95,0x38,0x53,0x4f,0x7d,0xa1,0xe4,0xc3,0x03,0xd2,0xa3,0x18,0xa7,0x28,
1061 0xc3,0xc0,0xc9,0x51,0x56,0x80,0x95,0x39,0xfc,0xf0,0xe2,0x42,0x9a,0x6b,0x52,0x54,
1062 0x16,0xae,0xdb,0xf5,0xa0,0xde,0x6a,0x57,0xa6,0x37,0xb3,0x9b},
1063 C6[]= {0x8c,0xe2,0x49,0x98,0x62,0x56,0x15,0xb6,0x03,0xa0,0x33,0xac,0xa1,0x3f,0xb8,0x94,
1064 0xbe,0x91,0x12,0xa5,0xc3,0xa2,0x11,0xa8,0xba,0x26,0x2a,0x3c,0xca,0x7e,0x2c,0xa7,
1065 0x01,0xe4,0xa9,0xa4,0xfb,0xa4,0x3c,0x90,0xcc,0xdc,0xb2,0x81,0xd4,0x8c,0x7c,0x6f,
1066 0xd6,0x28,0x75,0xd2,0xac,0xa4,0x17,0x03,0x4c,0x34,0xae,0xe5},
1067 T6[]= {0x61,0x9c,0xc5,0xae,0xff,0xfe,0x0b,0xfa,0x46,0x2a,0xf4,0x3c,0x16,0x99,0xd0,0x50};
1070 static const u8 K7[24],
1075 T7[]= {0xcd,0x33,0xb2,0x8a,0xc7,0x73,0xf7,0x4b,0xa0,0x0e,0xd1,0xf3,0x12,0x57,0x24,0x35};
1081 static const u8 P8[16],
1082 C8[]= {0x98,0xe7,0x24,0x7c,0x07,0xf0,0xfe,0x41,0x1c,0x26,0x7e,0x43,0x84,0xb0,0xf6,0x00},
1083 T8[]= {0x2f,0xf5,0x8d,0x80,0x03,0x39,0x27,0xab,0x8e,0xf4,0xd4,0x58,0x75,0x14,0xf0,0xfb};
1087 static const u8 K9[]= {0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08,
1088 0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c},
1089 P9[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1090 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1091 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1092 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55},
1093 IV9[]= {0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,0xde,0xca,0xf8,0x88},
1094 C9[]= {0x39,0x80,0xca,0x0b,0x3c,0x00,0xe8,0x41,0xeb,0x06,0xfa,0xc4,0x87,0x2a,0x27,0x57,
1095 0x85,0x9e,0x1c,0xea,0xa6,0xef,0xd9,0x84,0x62,0x85,0x93,0xb4,0x0c,0xa1,0xe1,0x9c,
1096 0x7d,0x77,0x3d,0x00,0xc1,0x44,0xc5,0x25,0xac,0x61,0x9d,0x18,0xc8,0x4a,0x3f,0x47,
1097 0x18,0xe2,0x44,0x8b,0x2f,0xe3,0x24,0xd9,0xcc,0xda,0x27,0x10,0xac,0xad,0xe2,0x56},
1098 T9[]= {0x99,0x24,0xa7,0xc8,0x58,0x73,0x36,0xbf,0xb1,0x18,0x02,0x4d,0xb8,0x67,0x4a,0x14};
1103 static const u8 P10[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1104 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1105 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1106 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39},
1107 A10[]= {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1108 0xab,0xad,0xda,0xd2},
1109 C10[]= {0x39,0x80,0xca,0x0b,0x3c,0x00,0xe8,0x41,0xeb,0x06,0xfa,0xc4,0x87,0x2a,0x27,0x57,
1110 0x85,0x9e,0x1c,0xea,0xa6,0xef,0xd9,0x84,0x62,0x85,0x93,0xb4,0x0c,0xa1,0xe1,0x9c,
1111 0x7d,0x77,0x3d,0x00,0xc1,0x44,0xc5,0x25,0xac,0x61,0x9d,0x18,0xc8,0x4a,0x3f,0x47,
1112 0x18,0xe2,0x44,0x8b,0x2f,0xe3,0x24,0xd9,0xcc,0xda,0x27,0x10},
1113 T10[]= {0x25,0x19,0x49,0x8e,0x80,0xf1,0x47,0x8f,0x37,0xba,0x55,0xbd,0x6d,0x27,0x61,0x8c};
1119 static const u8 IV11[]={0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad},
1120 C11[]= {0x0f,0x10,0xf5,0x99,0xae,0x14,0xa1,0x54,0xed,0x24,0xb3,0x6e,0x25,0x32,0x4d,0xb8,
1121 0xc5,0x66,0x63,0x2e,0xf2,0xbb,0xb3,0x4f,0x83,0x47,0x28,0x0f,0xc4,0x50,0x70,0x57,
1122 0xfd,0xdc,0x29,0xdf,0x9a,0x47,0x1f,0x75,0xc6,0x65,0x41,0xd4,0xd4,0xda,0xd1,0xc9,
1123 0xe9,0x3a,0x19,0xa5,0x8e,0x8b,0x47,0x3f,0xa0,0xf0,0x62,0xf7},
1124 T11[]= {0x65,0xdc,0xc5,0x7f,0xcf,0x62,0x3a,0x24,0x09,0x4f,0xcc,0xa4,0x0d,0x35,0x33,0xf8};
1130 static const u8 IV12[]={0x93,0x13,0x22,0x5d,0xf8,0x84,0x06,0xe5,0x55,0x90,0x9c,0x5a,0xff,0x52,0x69,0xaa,
1131 0x6a,0x7a,0x95,0x38,0x53,0x4f,0x7d,0xa1,0xe4,0xc3,0x03,0xd2,0xa3,0x18,0xa7,0x28,
1132 0xc3,0xc0,0xc9,0x51,0x56,0x80,0x95,0x39,0xfc,0xf0,0xe2,0x42,0x9a,0x6b,0x52,0x54,
1133 0x16,0xae,0xdb,0xf5,0xa0,0xde,0x6a,0x57,0xa6,0x37,0xb3,0x9b},
1134 C12[]= {0xd2,0x7e,0x88,0x68,0x1c,0xe3,0x24,0x3c,0x48,0x30,0x16,0x5a,0x8f,0xdc,0xf9,0xff,
1135 0x1d,0xe9,0xa1,0xd8,0xe6,0xb4,0x47,0xef,0x6e,0xf7,0xb7,0x98,0x28,0x66,0x6e,0x45,
1136 0x81,0xe7,0x90,0x12,0xaf,0x34,0xdd,0xd9,0xe2,0xf0,0x37,0x58,0x9b,0x29,0x2d,0xb3,
1137 0xe6,0x7c,0x03,0x67,0x45,0xfa,0x22,0xe7,0xe9,0xb7,0x37,0x3b},
1138 T12[]= {0xdc,0xf5,0x66,0xff,0x29,0x1c,0x25,0xbb,0xb8,0x56,0x8f,0xc3,0xd3,0x76,0xa6,0xd9};
1141 static const u8 K13[32],
1146 T13[]={0x53,0x0f,0x8a,0xfb,0xc7,0x45,0x36,0xb9,0xa9,0x63,0xb4,0xf1,0xc4,0xcb,0x73,0x8b};
1151 static const u8 P14[16],
1153 C14[]= {0xce,0xa7,0x40,0x3d,0x4d,0x60,0x6b,0x6e,0x07,0x4e,0xc5,0xd3,0xba,0xf3,0x9d,0x18},
1154 T14[]= {0xd0,0xd1,0xc8,0xa7,0x99,0x99,0x6b,0xf0,0x26,0x5b,0x98,0xb5,0xd4,0x8a,0xb9,0x19};
1158 static const u8 K15[]= {0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08,
1159 0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08},
1160 P15[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1161 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1162 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1163 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55},
1164 IV15[]={0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,0xde,0xca,0xf8,0x88},
1165 C15[]= {0x52,0x2d,0xc1,0xf0,0x99,0x56,0x7d,0x07,0xf4,0x7f,0x37,0xa3,0x2a,0x84,0x42,0x7d,
1166 0x64,0x3a,0x8c,0xdc,0xbf,0xe5,0xc0,0xc9,0x75,0x98,0xa2,0xbd,0x25,0x55,0xd1,0xaa,
1167 0x8c,0xb0,0x8e,0x48,0x59,0x0d,0xbb,0x3d,0xa7,0xb0,0x8b,0x10,0x56,0x82,0x88,0x38,
1168 0xc5,0xf6,0x1e,0x63,0x93,0xba,0x7a,0x0a,0xbc,0xc9,0xf6,0x62,0x89,0x80,0x15,0xad},
1169 T15[]= {0xb0,0x94,0xda,0xc5,0xd9,0x34,0x71,0xbd,0xec,0x1a,0x50,0x22,0x70,0xe3,0xcc,0x6c};
1174 static const u8 P16[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1175 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1176 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1177 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39},
1178 A16[]= {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1179 0xab,0xad,0xda,0xd2},
1180 C16[]= {0x52,0x2d,0xc1,0xf0,0x99,0x56,0x7d,0x07,0xf4,0x7f,0x37,0xa3,0x2a,0x84,0x42,0x7d,
1181 0x64,0x3a,0x8c,0xdc,0xbf,0xe5,0xc0,0xc9,0x75,0x98,0xa2,0xbd,0x25,0x55,0xd1,0xaa,
1182 0x8c,0xb0,0x8e,0x48,0x59,0x0d,0xbb,0x3d,0xa7,0xb0,0x8b,0x10,0x56,0x82,0x88,0x38,
1183 0xc5,0xf6,0x1e,0x63,0x93,0xba,0x7a,0x0a,0xbc,0xc9,0xf6,0x62},
1184 T16[]= {0x76,0xfc,0x6e,0xce,0x0f,0x4e,0x17,0x68,0xcd,0xdf,0x88,0x53,0xbb,0x2d,0x55,0x1b};
1190 static const u8 IV17[]={0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad},
1191 C17[]= {0xc3,0x76,0x2d,0xf1,0xca,0x78,0x7d,0x32,0xae,0x47,0xc1,0x3b,0xf1,0x98,0x44,0xcb,
1192 0xaf,0x1a,0xe1,0x4d,0x0b,0x97,0x6a,0xfa,0xc5,0x2f,0xf7,0xd7,0x9b,0xba,0x9d,0xe0,
1193 0xfe,0xb5,0x82,0xd3,0x39,0x34,0xa4,0xf0,0x95,0x4c,0xc2,0x36,0x3b,0xc7,0x3f,0x78,
1194 0x62,0xac,0x43,0x0e,0x64,0xab,0xe4,0x99,0xf4,0x7c,0x9b,0x1f},
1195 T17[]= {0x3a,0x33,0x7d,0xbf,0x46,0xa7,0x92,0xc4,0x5e,0x45,0x49,0x13,0xfe,0x2e,0xa8,0xf2};
1201 static const u8 IV18[]={0x93,0x13,0x22,0x5d,0xf8,0x84,0x06,0xe5,0x55,0x90,0x9c,0x5a,0xff,0x52,0x69,0xaa,
1202 0x6a,0x7a,0x95,0x38,0x53,0x4f,0x7d,0xa1,0xe4,0xc3,0x03,0xd2,0xa3,0x18,0xa7,0x28,
1203 0xc3,0xc0,0xc9,0x51,0x56,0x80,0x95,0x39,0xfc,0xf0,0xe2,0x42,0x9a,0x6b,0x52,0x54,
1204 0x16,0xae,0xdb,0xf5,0xa0,0xde,0x6a,0x57,0xa6,0x37,0xb3,0x9b},
1205 C18[]= {0x5a,0x8d,0xef,0x2f,0x0c,0x9e,0x53,0xf1,0xf7,0x5d,0x78,0x53,0x65,0x9e,0x2a,0x20,
1206 0xee,0xb2,0xb2,0x2a,0xaf,0xde,0x64,0x19,0xa0,0x58,0xab,0x4f,0x6f,0x74,0x6b,0xf4,
1207 0x0f,0xc0,0xc3,0xb7,0x80,0xf2,0x44,0x45,0x2d,0xa3,0xeb,0xf1,0xc5,0xd8,0x2c,0xde,
1208 0xa2,0x41,0x89,0x97,0x20,0x0e,0xf8,0x2e,0x44,0xae,0x7e,0x3f},
1209 T18[]= {0xa4,0x4a,0x82,0x66,0xee,0x1c,0x8e,0xb0,0xc8,0xb5,0xd4,0xcf,0x5a,0xe9,0xf1,0x9a};
1211 #define TEST_CASE(n) do { \
1212 u8 out[sizeof(P##n)]; \
1213 AES_set_encrypt_key(K##n,sizeof(K##n)*8,&key); \
1214 CRYPTO_gcm128_init(&ctx,&key,(block128_f)AES_encrypt); \
1215 CRYPTO_gcm128_setiv(&ctx,IV##n,sizeof(IV##n)); \
1216 if (A##n) CRYPTO_gcm128_aad(&ctx,A##n,sizeof(A##n)); \
1217 if (P##n) CRYPTO_gcm128_encrypt(&ctx,P##n,out,sizeof(out)); \
1218 CRYPTO_gcm128_finish(&ctx); \
1219 if (memcmp(ctx.Xi.c,T##n,16) || (C##n && memcmp(out,C##n,sizeof(out)))) \
1220 ret++, printf ("encrypt test#%d failed.\n",n);\
1221 CRYPTO_gcm128_setiv(&ctx,IV##n,sizeof(IV##n)); \
1222 if (A##n) CRYPTO_gcm128_aad(&ctx,A##n,sizeof(A##n)); \
1223 if (C##n) CRYPTO_gcm128_decrypt(&ctx,C##n,out,sizeof(out)); \
1224 CRYPTO_gcm128_finish(&ctx); \
1225 if (memcmp(ctx.Xi.c,T##n,16) || (P##n && memcmp(out,P##n,sizeof(out)))) \
1226 ret++, printf ("decrypt test#%d failed.\n",n);\
1254 #ifdef OPENSSL_CPUID_OBJ
1256 size_t start,stop,gcm_t,ctr_t,OPENSSL_rdtsc();
1257 union { u64 u; u8 c[1024]; } buf;
1259 AES_set_encrypt_key(K1,sizeof(K1)*8,&key);
1260 CRYPTO_gcm128_init(&ctx,&key,(block128_f)AES_encrypt);
1261 CRYPTO_gcm128_setiv(&ctx,IV1,sizeof(IV1));
1263 CRYPTO_gcm128_encrypt(&ctx,buf.c,buf.c,sizeof(buf));
1264 start = OPENSSL_rdtsc();
1265 CRYPTO_gcm128_encrypt(&ctx,buf.c,buf.c,sizeof(buf));
1266 gcm_t = OPENSSL_rdtsc() - start;
1268 CRYPTO_ctr128_encrypt(buf.c,buf.c,sizeof(buf),
1269 &key,ctx.Yi.c,ctx.EKi.c,&ctx.res,
1270 (block128_f)AES_encrypt);
1271 start = OPENSSL_rdtsc();
1272 CRYPTO_ctr128_encrypt(buf.c,buf.c,sizeof(buf),
1273 &key,ctx.Yi.c,ctx.EKi.c,&ctx.res,
1274 (block128_f)AES_encrypt);
1275 ctr_t = OPENSSL_rdtsc() - start;
1277 printf("%.2f-%.2f=%.2f\n",
1278 gcm_t/(double)sizeof(buf),
1279 ctr_t/(double)sizeof(buf),
1280 (gcm_t-ctr_t)/(double)sizeof(buf));
1282 GHASH(buf.c,sizeof(buf),&ctx);
1283 start = OPENSSL_rdtsc();
1284 GHASH(buf.c,sizeof(buf),&ctx);
1285 gcm_t = OPENSSL_rdtsc() - start;
1286 printf("%.2f\n",gcm_t/(double)sizeof(buf));