1 /* ====================================================================
2 * Copyright (c) 2010 The OpenSSL Project. All rights reserved.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in
13 * the documentation and/or other materials provided with the
16 * 3. All advertising materials mentioning features or use of this
17 * software must display the following acknowledgment:
18 * "This product includes software developed by the OpenSSL Project
19 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
21 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
22 * endorse or promote products derived from this software without
23 * prior written permission. For written permission, please contact
24 * openssl-core@openssl.org.
26 * 5. Products derived from this software may not be called "OpenSSL"
27 * nor may "OpenSSL" appear in their names without prior written
28 * permission of the OpenSSL Project.
30 * 6. Redistributions of any form whatsoever must retain the following
32 * "This product includes software developed by the OpenSSL Project
33 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
35 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
36 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
37 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
38 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
39 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
40 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
41 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
42 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
43 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
44 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
45 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
46 * OF THE POSSIBILITY OF SUCH DAMAGE.
47 * ====================================================================
60 #if (defined(_WIN32) || defined(_WIN64)) && !defined(__MINGW32__)
62 typedef unsigned __int64 u64;
63 #define U64(C) C##UI64
64 #elif defined(__arch64__)
66 typedef unsigned long u64;
69 typedef long long i64;
70 typedef unsigned long long u64;
74 typedef unsigned int u32;
75 typedef unsigned char u8;
76 typedef struct { u64 hi,lo; } u128;
78 #define STRICT_ALIGNMENT
79 #if defined(__i386) || defined(__i386__) || \
80 defined(__x86_64) || defined(__x86_64__) || \
81 defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64) || \
82 defined(__s390__) || defined(__s390x__)
83 # undef STRICT_ALIGNMENT
86 #if defined(__GNUC__) && __GNUC__>=2
87 # if defined(__x86_64) || defined(__x86_64__)
88 # define BSWAP8(x) ({ u64 ret=(x); \
89 asm volatile ("bswapq %0" \
91 # define BSWAP4(x) ({ u32 ret=(x); \
92 asm volatile ("bswapl %0" \
94 # elif defined(__i386) || defined(__i386__)
95 # define BSWAP8(x) ({ u32 lo=(u64)(x)>>32,hi=(x); \
96 asm volatile ("bswapl %0; bswapl %1" \
97 : "+r"(hi),"+r"(lo)); \
99 # define BSWAP4(x) ({ u32 ret=(x); \
100 asm volatile ("bswapl %0" \
101 : "+r"(ret)); ret; })
103 #elif defined(_MSC_VER)
105 # pragma intrinsic(_byteswap_uint64,_byteswap_ulong)
106 # define BSWAP8(x) _byteswap_uint64((u64)(x))
107 # define BSWAP4(x) _byteswap_ulong((u32)(x))
108 # elif defined(_M_IX86)
113 #define GETU32(p) BSWAP4(*(const u32 *)(p))
114 #define PUTU32(p,v) *(u32 *)(p) = BSWAP4(v)
116 #define GETU32(p) ((u32)(p)[0]<<24|(u32)(p)[1]<<16|(u32)(p)[2]<<8|(u32)(p)[3])
117 #define PUTU32(p,v) ((p)[0]=(u8)((v)>>24),(p)[1]=(u8)((v)>>16),(p)[2]=(u8)((v)>>8),(p)[3]=(u8)(v))
120 #define PACK(s) ((size_t)(s)<<(sizeof(size_t)*8-16))
125 * Even though permitted values for TABLE_BITS are 8, 4 and 1, it should
126 * never be set to 8. 8 is effectively reserved for testing purposes.
127 * Under ideal conditions "8-bit" version should be twice as fast as
128 * "4-bit" one. But world is far from ideal. For gcc-generated x86 code,
129 * "8-bit" was observed to run only ~50% faster. On x86_64 observed
130 * improvement was ~75%, much closer to optimal, but the fact of
131 * deviation means that references to pre-computed tables end up on
132 * critical path and as tables are pretty big, 4KB per key+1KB shared,
133 * execution time is sensitive to cache timing. It's not actually
134 * proven, but 4-bit procedure is believed to provide adequate
135 * all-round performance...
141 static void gcm_init_8bit(u128 Htable[256], u64 H[2])
151 for (Htable[128]=V, i=64; i>0; i>>=1) {
152 if (sizeof(size_t)==8) {
153 u64 T = U64(0xe100000000000000) & (0-(V.lo&1));
154 V.lo = (V.hi<<63)|(V.lo>>1);
158 u32 T = 0xe1000000U & (0-(u32)(V.lo&1));
159 V.lo = (V.hi<<63)|(V.lo>>1);
160 V.hi = (V.hi>>1 )^((u64)T<<32);
165 for (i=2; i<256; i<<=1) {
166 u128 *Hi = Htable+i, H0 = *Hi;
167 for (j=1; j<i; ++j) {
168 Hi[j].hi = H0.hi^Htable[j].hi;
169 Hi[j].lo = H0.lo^Htable[j].lo;
174 static void gcm_gmult_8bit(u64 Xi[2], u128 Htable[256])
177 const u8 *xi = (const u8 *)Xi+15;
179 const union { long one; char little; } is_endian = {1};
180 static const size_t rem_8bit[256] = {
181 PACK(0x0000), PACK(0x01C2), PACK(0x0384), PACK(0x0246),
182 PACK(0x0708), PACK(0x06CA), PACK(0x048C), PACK(0x054E),
183 PACK(0x0E10), PACK(0x0FD2), PACK(0x0D94), PACK(0x0C56),
184 PACK(0x0918), PACK(0x08DA), PACK(0x0A9C), PACK(0x0B5E),
185 PACK(0x1C20), PACK(0x1DE2), PACK(0x1FA4), PACK(0x1E66),
186 PACK(0x1B28), PACK(0x1AEA), PACK(0x18AC), PACK(0x196E),
187 PACK(0x1230), PACK(0x13F2), PACK(0x11B4), PACK(0x1076),
188 PACK(0x1538), PACK(0x14FA), PACK(0x16BC), PACK(0x177E),
189 PACK(0x3840), PACK(0x3982), PACK(0x3BC4), PACK(0x3A06),
190 PACK(0x3F48), PACK(0x3E8A), PACK(0x3CCC), PACK(0x3D0E),
191 PACK(0x3650), PACK(0x3792), PACK(0x35D4), PACK(0x3416),
192 PACK(0x3158), PACK(0x309A), PACK(0x32DC), PACK(0x331E),
193 PACK(0x2460), PACK(0x25A2), PACK(0x27E4), PACK(0x2626),
194 PACK(0x2368), PACK(0x22AA), PACK(0x20EC), PACK(0x212E),
195 PACK(0x2A70), PACK(0x2BB2), PACK(0x29F4), PACK(0x2836),
196 PACK(0x2D78), PACK(0x2CBA), PACK(0x2EFC), PACK(0x2F3E),
197 PACK(0x7080), PACK(0x7142), PACK(0x7304), PACK(0x72C6),
198 PACK(0x7788), PACK(0x764A), PACK(0x740C), PACK(0x75CE),
199 PACK(0x7E90), PACK(0x7F52), PACK(0x7D14), PACK(0x7CD6),
200 PACK(0x7998), PACK(0x785A), PACK(0x7A1C), PACK(0x7BDE),
201 PACK(0x6CA0), PACK(0x6D62), PACK(0x6F24), PACK(0x6EE6),
202 PACK(0x6BA8), PACK(0x6A6A), PACK(0x682C), PACK(0x69EE),
203 PACK(0x62B0), PACK(0x6372), PACK(0x6134), PACK(0x60F6),
204 PACK(0x65B8), PACK(0x647A), PACK(0x663C), PACK(0x67FE),
205 PACK(0x48C0), PACK(0x4902), PACK(0x4B44), PACK(0x4A86),
206 PACK(0x4FC8), PACK(0x4E0A), PACK(0x4C4C), PACK(0x4D8E),
207 PACK(0x46D0), PACK(0x4712), PACK(0x4554), PACK(0x4496),
208 PACK(0x41D8), PACK(0x401A), PACK(0x425C), PACK(0x439E),
209 PACK(0x54E0), PACK(0x5522), PACK(0x5764), PACK(0x56A6),
210 PACK(0x53E8), PACK(0x522A), PACK(0x506C), PACK(0x51AE),
211 PACK(0x5AF0), PACK(0x5B32), PACK(0x5974), PACK(0x58B6),
212 PACK(0x5DF8), PACK(0x5C3A), PACK(0x5E7C), PACK(0x5FBE),
213 PACK(0xE100), PACK(0xE0C2), PACK(0xE284), PACK(0xE346),
214 PACK(0xE608), PACK(0xE7CA), PACK(0xE58C), PACK(0xE44E),
215 PACK(0xEF10), PACK(0xEED2), PACK(0xEC94), PACK(0xED56),
216 PACK(0xE818), PACK(0xE9DA), PACK(0xEB9C), PACK(0xEA5E),
217 PACK(0xFD20), PACK(0xFCE2), PACK(0xFEA4), PACK(0xFF66),
218 PACK(0xFA28), PACK(0xFBEA), PACK(0xF9AC), PACK(0xF86E),
219 PACK(0xF330), PACK(0xF2F2), PACK(0xF0B4), PACK(0xF176),
220 PACK(0xF438), PACK(0xF5FA), PACK(0xF7BC), PACK(0xF67E),
221 PACK(0xD940), PACK(0xD882), PACK(0xDAC4), PACK(0xDB06),
222 PACK(0xDE48), PACK(0xDF8A), PACK(0xDDCC), PACK(0xDC0E),
223 PACK(0xD750), PACK(0xD692), PACK(0xD4D4), PACK(0xD516),
224 PACK(0xD058), PACK(0xD19A), PACK(0xD3DC), PACK(0xD21E),
225 PACK(0xC560), PACK(0xC4A2), PACK(0xC6E4), PACK(0xC726),
226 PACK(0xC268), PACK(0xC3AA), PACK(0xC1EC), PACK(0xC02E),
227 PACK(0xCB70), PACK(0xCAB2), PACK(0xC8F4), PACK(0xC936),
228 PACK(0xCC78), PACK(0xCDBA), PACK(0xCFFC), PACK(0xCE3E),
229 PACK(0x9180), PACK(0x9042), PACK(0x9204), PACK(0x93C6),
230 PACK(0x9688), PACK(0x974A), PACK(0x950C), PACK(0x94CE),
231 PACK(0x9F90), PACK(0x9E52), PACK(0x9C14), PACK(0x9DD6),
232 PACK(0x9898), PACK(0x995A), PACK(0x9B1C), PACK(0x9ADE),
233 PACK(0x8DA0), PACK(0x8C62), PACK(0x8E24), PACK(0x8FE6),
234 PACK(0x8AA8), PACK(0x8B6A), PACK(0x892C), PACK(0x88EE),
235 PACK(0x83B0), PACK(0x8272), PACK(0x8034), PACK(0x81F6),
236 PACK(0x84B8), PACK(0x857A), PACK(0x873C), PACK(0x86FE),
237 PACK(0xA9C0), PACK(0xA802), PACK(0xAA44), PACK(0xAB86),
238 PACK(0xAEC8), PACK(0xAF0A), PACK(0xAD4C), PACK(0xAC8E),
239 PACK(0xA7D0), PACK(0xA612), PACK(0xA454), PACK(0xA596),
240 PACK(0xA0D8), PACK(0xA11A), PACK(0xA35C), PACK(0xA29E),
241 PACK(0xB5E0), PACK(0xB422), PACK(0xB664), PACK(0xB7A6),
242 PACK(0xB2E8), PACK(0xB32A), PACK(0xB16C), PACK(0xB0AE),
243 PACK(0xBBF0), PACK(0xBA32), PACK(0xB874), PACK(0xB9B6),
244 PACK(0xBCF8), PACK(0xBD3A), PACK(0xBF7C), PACK(0xBEBE) };
247 Z.hi ^= Htable[n].hi;
248 Z.lo ^= Htable[n].lo;
250 if ((u8 *)Xi==xi) break;
254 rem = (size_t)Z.lo&0xff;
255 Z.lo = (Z.hi<<56)|(Z.lo>>8);
257 if (sizeof(size_t)==8)
258 Z.hi ^= rem_8bit[rem];
260 Z.hi ^= (u64)rem_8bit[rem]<<32;
263 if (is_endian.little) {
265 Xi[0] = BSWAP8(Z.hi);
266 Xi[1] = BSWAP8(Z.lo);
270 v = (u32)(Z.hi>>32); PUTU32(p,v);
271 v = (u32)(Z.hi); PUTU32(p+4,v);
272 v = (u32)(Z.lo>>32); PUTU32(p+8,v);
273 v = (u32)(Z.lo); PUTU32(p+12,v);
281 #define GCM_MUL(ctx,Xi) gcm_gmult_8bit(ctx->Xi.u,ctx->Htable)
285 static void gcm_init_4bit(u128 Htable[16], u64 H[2])
295 for (Htable[8]=V, i=4; i>0; i>>=1) {
296 if (sizeof(size_t)==8) {
297 u64 T = U64(0xe100000000000000) & (0-(V.lo&1));
298 V.lo = (V.hi<<63)|(V.lo>>1);
302 u32 T = 0xe1000000U & (0-(u32)(V.lo&1));
303 V.lo = (V.hi<<63)|(V.lo>>1);
304 V.hi = (V.hi>>1 )^((u64)T<<32);
309 #if defined(OPENSSL_SMALL_FOOTPRINT)
310 for (i=2; i<16; i<<=1) {
313 for (V=*Hi, j=1; j<i; ++j) {
314 Hi[j].hi = V.hi^Htable[j].hi;
315 Hi[j].lo = V.lo^Htable[j].lo;
319 Htable[3].hi = V.hi^Htable[2].hi, Htable[3].lo = V.lo^Htable[2].lo;
321 Htable[5].hi = V.hi^Htable[1].hi, Htable[5].lo = V.lo^Htable[1].lo;
322 Htable[6].hi = V.hi^Htable[2].hi, Htable[6].lo = V.lo^Htable[2].lo;
323 Htable[7].hi = V.hi^Htable[3].hi, Htable[7].lo = V.lo^Htable[3].lo;
325 Htable[9].hi = V.hi^Htable[1].hi, Htable[9].lo = V.lo^Htable[1].lo;
326 Htable[10].hi = V.hi^Htable[2].hi, Htable[10].lo = V.lo^Htable[2].lo;
327 Htable[11].hi = V.hi^Htable[3].hi, Htable[11].lo = V.lo^Htable[3].lo;
328 Htable[12].hi = V.hi^Htable[4].hi, Htable[12].lo = V.lo^Htable[4].lo;
329 Htable[13].hi = V.hi^Htable[5].hi, Htable[13].lo = V.lo^Htable[5].lo;
330 Htable[14].hi = V.hi^Htable[6].hi, Htable[14].lo = V.lo^Htable[6].lo;
331 Htable[15].hi = V.hi^Htable[7].hi, Htable[15].lo = V.lo^Htable[7].lo;
336 static const size_t rem_4bit[16] = {
337 PACK(0x0000), PACK(0x1C20), PACK(0x3840), PACK(0x2460),
338 PACK(0x7080), PACK(0x6CA0), PACK(0x48C0), PACK(0x54E0),
339 PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560),
340 PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0) };
342 static void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16])
346 size_t rem, nlo, nhi;
347 const union { long one; char little; } is_endian = {1};
349 nlo = ((const u8 *)Xi)[15];
353 Z.hi = Htable[nlo].hi;
354 Z.lo = Htable[nlo].lo;
357 rem = (size_t)Z.lo&0xf;
358 Z.lo = (Z.hi<<60)|(Z.lo>>4);
360 if (sizeof(size_t)==8)
361 Z.hi ^= rem_4bit[rem];
363 Z.hi ^= (u64)rem_4bit[rem]<<32;
365 Z.hi ^= Htable[nhi].hi;
366 Z.lo ^= Htable[nhi].lo;
370 nlo = ((const u8 *)Xi)[cnt];
374 rem = (size_t)Z.lo&0xf;
375 Z.lo = (Z.hi<<60)|(Z.lo>>4);
377 if (sizeof(size_t)==8)
378 Z.hi ^= rem_4bit[rem];
380 Z.hi ^= (u64)rem_4bit[rem]<<32;
382 Z.hi ^= Htable[nlo].hi;
383 Z.lo ^= Htable[nlo].lo;
386 if (is_endian.little) {
388 Xi[0] = BSWAP8(Z.hi);
389 Xi[1] = BSWAP8(Z.lo);
393 v = (u32)(Z.hi>>32); PUTU32(p,v);
394 v = (u32)(Z.hi); PUTU32(p+4,v);
395 v = (u32)(Z.lo>>32); PUTU32(p+8,v);
396 v = (u32)(Z.lo); PUTU32(p+12,v);
405 #if !defined(OPENSSL_SMALL_FOOTPRINT)
407 * Streamed gcm_mult_4bit, see CRYPTO_gcm128_[en|de]crypt for
408 * details... Compiler-generated code doesn't seem to give any
409 * performance improvement, at least not on x86[_64]. It's here
410 * mostly as reference and a placeholder for possible future
411 * non-trivial optimization[s]...
413 static void gcm_ghash_4bit(u64 Xi[2],const u128 Htable[16],
414 const u8 *inp,size_t len)
418 size_t rem, nlo, nhi;
419 const union { long one; char little; } is_endian = {1};
423 nlo = ((const u8 *)Xi)[15];
428 Z.hi = Htable[nlo].hi;
429 Z.lo = Htable[nlo].lo;
432 rem = (size_t)Z.lo&0xf;
433 Z.lo = (Z.hi<<60)|(Z.lo>>4);
435 if (sizeof(size_t)==8)
436 Z.hi ^= rem_4bit[rem];
438 Z.hi ^= (u64)rem_4bit[rem]<<32;
440 Z.hi ^= Htable[nhi].hi;
441 Z.lo ^= Htable[nhi].lo;
445 nlo = ((const u8 *)Xi)[cnt];
450 rem = (size_t)Z.lo&0xf;
451 Z.lo = (Z.hi<<60)|(Z.lo>>4);
453 if (sizeof(size_t)==8)
454 Z.hi ^= rem_4bit[rem];
456 Z.hi ^= (u64)rem_4bit[rem]<<32;
458 Z.hi ^= Htable[nlo].hi;
459 Z.lo ^= Htable[nlo].lo;
462 if (is_endian.little) {
464 Xi[0] = BSWAP8(Z.hi);
465 Xi[1] = BSWAP8(Z.lo);
469 v = (u32)(Z.hi>>32); PUTU32(p,v);
470 v = (u32)(Z.hi); PUTU32(p+4,v);
471 v = (u32)(Z.lo>>32); PUTU32(p+8,v);
472 v = (u32)(Z.lo); PUTU32(p+12,v);
479 } while (inp+=16, len-=16);
483 void gcm_gmult_4bit(u64 Xi[2],const u128 Htable[16]);
484 void gcm_ghash_4bit(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
487 #define GCM_MUL(ctx,Xi) gcm_gmult_4bit(ctx->Xi.u,ctx->Htable)
488 #if defined(GHASH_ASM) || !defined(OPENSSL_SMALL_FOOTPRINT)
489 #define GHASH(in,len,ctx) gcm_ghash_4bit((ctx)->Xi.u,(ctx)->Htable,in,len)
490 /* GHASH_CHUNK is "stride parameter" missioned to mitigate cache
491 * trashing effect. In other words idea is to hash data while it's
492 * still in L1 cache after encryption pass... */
493 #define GHASH_CHUNK 1024
496 #else /* TABLE_BITS */
498 static void gcm_gmult_1bit(u64 Xi[2],const u64 H[2])
503 const long *xi = (const long *)Xi;
504 const union { long one; char little; } is_endian = {1};
506 V.hi = H[0]; /* H is in host byte order, no byte swapping */
509 for (j=0; j<16/sizeof(long); ++j) {
510 if (is_endian.little) {
511 if (sizeof(long)==8) {
513 X = (long)(BSWAP8(xi[j]));
515 const u8 *p = (const u8 *)(xi+j);
516 X = (long)((u64)GETU32(p)<<32|GETU32(p+4));
520 const u8 *p = (const u8 *)(xi+j);
527 for (i=0; i<8*sizeof(long); ++i, X<<=1) {
528 u64 M = (u64)(X>>(8*sizeof(long)-1));
532 if (sizeof(size_t)==8) {
533 u64 T = U64(0xe100000000000000) & (0-(V.lo&1));
534 V.lo = (V.hi<<63)|(V.lo>>1);
538 u32 T = 0xe1000000U & (0-(u32)(V.lo&1));
539 V.lo = (V.hi<<63)|(V.lo>>1);
540 V.hi = (V.hi>>1 )^((u64)T<<32);
546 if (is_endian.little) {
548 Xi[0] = BSWAP8(Z.hi);
549 Xi[1] = BSWAP8(Z.lo);
553 v = (u32)(Z.hi>>32); PUTU32(p,v);
554 v = (u32)(Z.hi); PUTU32(p+4,v);
555 v = (u32)(Z.lo>>32); PUTU32(p+8,v);
556 v = (u32)(Z.lo); PUTU32(p+12,v);
564 #define GCM_MUL(ctx,Xi) gcm_gmult_1bit(ctx->Xi.u,ctx->H.u)
569 /* Following 6 names follow names in GCM specification */
570 union { u64 u[2]; u32 d[4]; u8 c[16]; } Yi,EKi,EK0,
573 /* Pre-computed table used by gcm_gmult_* */
579 unsigned int res, ctr;
584 void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx,void *key,block128_f block)
586 const union { long one; char little; } is_endian = {1};
588 memset(ctx,0,sizeof(*ctx));
592 (*block)(ctx->H.c,ctx->H.c,key);
594 if (is_endian.little) {
595 /* H is stored in host byte order */
597 ctx->H.u[0] = BSWAP8(ctx->H.u[0]);
598 ctx->H.u[1] = BSWAP8(ctx->H.u[1]);
602 hi = (u64)GETU32(p) <<32|GETU32(p+4);
603 lo = (u64)GETU32(p+8)<<32|GETU32(p+12);
610 gcm_init_8bit(ctx->Htable,ctx->H.u);
612 gcm_init_4bit(ctx->Htable,ctx->H.u);
616 void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx,const unsigned char *iv,size_t len)
618 const union { long one; char little; } is_endian = {1};
629 memcpy(ctx->Yi.c,iv,12);
638 for (i=0; i<16; ++i) ctx->Yi.c[i] ^= iv[i];
644 for (i=0; i<len; ++i) ctx->Yi.c[i] ^= iv[i];
648 if (is_endian.little) {
650 ctx->Yi.u[1] ^= BSWAP8(len0);
652 ctx->Yi.c[8] ^= (u8)(len0>>56);
653 ctx->Yi.c[9] ^= (u8)(len0>>48);
654 ctx->Yi.c[10] ^= (u8)(len0>>40);
655 ctx->Yi.c[11] ^= (u8)(len0>>32);
656 ctx->Yi.c[12] ^= (u8)(len0>>24);
657 ctx->Yi.c[13] ^= (u8)(len0>>16);
658 ctx->Yi.c[14] ^= (u8)(len0>>8);
659 ctx->Yi.c[15] ^= (u8)(len0);
663 ctx->Yi.u[1] ^= len0;
667 if (is_endian.little)
668 ctx->ctr = GETU32(ctx->Yi.c+12);
670 ctx->ctr = ctx->Yi.d[3];
673 (*ctx->block)(ctx->Yi.c,ctx->EK0.c,ctx->key);
675 if (is_endian.little)
676 PUTU32(ctx->Yi.c+12,ctx->ctr);
678 ctx->Yi.d[3] = ctx->ctr;
681 void CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx,const unsigned char *aad,size_t len)
685 ctx->len.u[0] += len;
688 if ((i = (len&(size_t)-16))) {
695 for (i=0; i<16; ++i) ctx->Xi.c[i] ^= aad[i];
702 for (i=0; i<len; ++i) ctx->Xi.c[i] ^= aad[i];
707 void CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
708 const unsigned char *in, unsigned char *out,
711 const union { long one; char little; } is_endian = {1};
715 ctx->len.u[1] += len;
719 #if !defined(OPENSSL_SMALL_FOOTPRINT)
720 if (16%sizeof(size_t) == 0) do { /* always true actually */
723 ctx->Xi.c[n] ^= *(out++) = *(in++)^ctx->EKi.c[n];
727 if (n==0) GCM_MUL(ctx,Xi);
733 #if defined(STRICT_ALIGNMENT)
734 if (((size_t)in|(size_t)out)%sizeof(size_t) != 0)
737 #if defined(GHASH) && defined(GHASH_CHUNK)
738 while (len>=GHASH_CHUNK) {
739 size_t j=GHASH_CHUNK;
742 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
744 if (is_endian.little)
745 PUTU32(ctx->Yi.c+12,ctr);
748 for (i=0; i<16; i+=sizeof(size_t))
750 *(size_t *)(in+i)^*(size_t *)(ctx->EKi.c+i);
755 GHASH(out-GHASH_CHUNK,GHASH_CHUNK,ctx);
758 if ((i = (len&(size_t)-16))) {
762 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
764 if (is_endian.little)
765 PUTU32(ctx->Yi.c+12,ctr);
768 for (i=0; i<16; i+=sizeof(size_t))
770 *(size_t *)(in+i)^*(size_t *)(ctx->EKi.c+i);
779 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
781 if (is_endian.little)
782 PUTU32(ctx->Yi.c+12,ctr);
785 for (i=0; i<16; i+=sizeof(size_t))
786 *(size_t *)(ctx->Xi.c+i) ^=
788 *(size_t *)(in+i)^*(size_t *)(ctx->EKi.c+i);
796 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
798 if (is_endian.little)
799 PUTU32(ctx->Yi.c+12,ctr);
803 ctx->Xi.c[n] ^= out[n] = in[n]^ctx->EKi.c[n];
813 for (i=0;i<len;++i) {
815 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
817 if (is_endian.little)
818 PUTU32(ctx->Yi.c+12,ctr);
822 ctx->Xi.c[n] ^= out[i] = in[i]^ctx->EKi.c[n];
832 void CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
833 const unsigned char *in, unsigned char *out,
836 const union { long one; char little; } is_endian = {1};
840 ctx->len.u[1] += len;
844 #if !defined(OPENSSL_SMALL_FOOTPRINT)
845 if (16%sizeof(size_t) == 0) do { /* always true actually */
849 *(out++) = c^ctx->EKi.c[n];
854 if (n==0) GCM_MUL (ctx,Xi);
860 #if defined(STRICT_ALIGNMENT)
861 if (((size_t)in|(size_t)out)%sizeof(size_t) != 0)
864 #if defined(GHASH) && defined(GHASH_CHUNK)
865 while (len>=GHASH_CHUNK) {
866 size_t j=GHASH_CHUNK;
868 GHASH(in,GHASH_CHUNK,ctx);
870 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
872 if (is_endian.little)
873 PUTU32(ctx->Yi.c+12,ctr);
876 for (i=0; i<16; i+=sizeof(size_t))
878 *(size_t *)(in+i)^*(size_t *)(ctx->EKi.c+i);
885 if ((i = (len&(size_t)-16))) {
888 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
890 if (is_endian.little)
891 PUTU32(ctx->Yi.c+12,ctr);
894 for (i=0; i<16; i+=sizeof(size_t))
896 *(size_t *)(in+i)^*(size_t *)(ctx->EKi.c+i);
904 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
906 if (is_endian.little)
907 PUTU32(ctx->Yi.c+12,ctr);
910 for (i=0; i<16; i+=sizeof(size_t)) {
911 size_t c = *(size_t *)(in+i);
912 *(size_t *)(out+i) = c^*(size_t *)(ctx->EKi.c+i);
913 *(size_t *)(ctx->Xi.c+i) ^= c;
922 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
924 if (is_endian.little)
925 PUTU32(ctx->Yi.c+12,ctr);
931 out[n] = c^ctx->EKi.c[n];
941 for (i=0;i<len;++i) {
944 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
946 if (is_endian.little)
947 PUTU32(ctx->Yi.c+12,ctr);
952 out[i] ^= ctx->EKi.c[n];
963 void CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx)
965 const union { long one; char little; } is_endian = {1};
966 u64 alen = ctx->len.u[0]<<3;
967 u64 clen = ctx->len.u[1]<<3;
972 if (is_endian.little) {
979 ctx->len.u[0] = alen;
980 ctx->len.u[1] = clen;
982 alen = (u64)GETU32(p) <<32|GETU32(p+4);
983 clen = (u64)GETU32(p+8)<<32|GETU32(p+12);
987 ctx->Xi.u[0] ^= alen;
988 ctx->Xi.u[1] ^= clen;
991 ctx->Xi.u[0] ^= ctx->EK0.u[0];
992 ctx->Xi.u[1] ^= ctx->EK0.u[1];
995 #if defined(SELFTEST)
997 #include <openssl/aes.h>
1000 static const u8 K1[16],
1005 T1[]= {0x58,0xe2,0xfc,0xce,0xfa,0x7e,0x30,0x61,0x36,0x7f,0x1d,0x57,0xa4,0xe7,0x45,0x5a};
1011 static const u8 P2[16],
1012 C2[]= {0x03,0x88,0xda,0xce,0x60,0xb6,0xa3,0x92,0xf3,0x28,0xc2,0xb9,0x71,0xb2,0xfe,0x78},
1013 T2[]= {0xab,0x6e,0x47,0xd4,0x2c,0xec,0x13,0xbd,0xf5,0x3a,0x67,0xb2,0x12,0x57,0xbd,0xdf};
1017 static const u8 K3[]= {0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08},
1018 P3[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1019 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1020 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1021 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55},
1022 IV3[]= {0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,0xde,0xca,0xf8,0x88},
1023 C3[]= {0x42,0x83,0x1e,0xc2,0x21,0x77,0x74,0x24,0x4b,0x72,0x21,0xb7,0x84,0xd0,0xd4,0x9c,
1024 0xe3,0xaa,0x21,0x2f,0x2c,0x02,0xa4,0xe0,0x35,0xc1,0x7e,0x23,0x29,0xac,0xa1,0x2e,
1025 0x21,0xd5,0x14,0xb2,0x54,0x66,0x93,0x1c,0x7d,0x8f,0x6a,0x5a,0xac,0x84,0xaa,0x05,
1026 0x1b,0xa3,0x0b,0x39,0x6a,0x0a,0xac,0x97,0x3d,0x58,0xe0,0x91,0x47,0x3f,0x59,0x85},
1027 T3[]= {0x4d,0x5c,0x2a,0xf3,0x27,0xcd,0x64,0xa6,0x2c,0xf3,0x5a,0xbd,0x2b,0xa6,0xfa,0xb4,};
1032 static const u8 P4[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1033 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1034 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1035 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39},
1036 A4[]= {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1037 0xab,0xad,0xda,0xd2},
1038 C4[]= {0x42,0x83,0x1e,0xc2,0x21,0x77,0x74,0x24,0x4b,0x72,0x21,0xb7,0x84,0xd0,0xd4,0x9c,
1039 0xe3,0xaa,0x21,0x2f,0x2c,0x02,0xa4,0xe0,0x35,0xc1,0x7e,0x23,0x29,0xac,0xa1,0x2e,
1040 0x21,0xd5,0x14,0xb2,0x54,0x66,0x93,0x1c,0x7d,0x8f,0x6a,0x5a,0xac,0x84,0xaa,0x05,
1041 0x1b,0xa3,0x0b,0x39,0x6a,0x0a,0xac,0x97,0x3d,0x58,0xe0,0x91},
1042 T4[]= {0x5b,0xc9,0x4f,0xbc,0x32,0x21,0xa5,0xdb,0x94,0xfa,0xe9,0x5a,0xe7,0x12,0x1a,0x47};
1047 static const u8 A5[]= {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1048 0xab,0xad,0xda,0xd2},
1049 IV5[]= {0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad},
1050 C5[]= {0x61,0x35,0x3b,0x4c,0x28,0x06,0x93,0x4a,0x77,0x7f,0xf5,0x1f,0xa2,0x2a,0x47,0x55,
1051 0x69,0x9b,0x2a,0x71,0x4f,0xcd,0xc6,0xf8,0x37,0x66,0xe5,0xf9,0x7b,0x6c,0x74,0x23,
1052 0x73,0x80,0x69,0x00,0xe4,0x9f,0x24,0xb2,0x2b,0x09,0x75,0x44,0xd4,0x89,0x6b,0x42,
1053 0x49,0x89,0xb5,0xe1,0xeb,0xac,0x0f,0x07,0xc2,0x3f,0x45,0x98},
1054 T5[]= {0x36,0x12,0xd2,0xe7,0x9e,0x3b,0x07,0x85,0x56,0x1b,0xe1,0x4a,0xac,0xa2,0xfc,0xcb};
1060 static const u8 IV6[]= {0x93,0x13,0x22,0x5d,0xf8,0x84,0x06,0xe5,0x55,0x90,0x9c,0x5a,0xff,0x52,0x69,0xaa,
1061 0x6a,0x7a,0x95,0x38,0x53,0x4f,0x7d,0xa1,0xe4,0xc3,0x03,0xd2,0xa3,0x18,0xa7,0x28,
1062 0xc3,0xc0,0xc9,0x51,0x56,0x80,0x95,0x39,0xfc,0xf0,0xe2,0x42,0x9a,0x6b,0x52,0x54,
1063 0x16,0xae,0xdb,0xf5,0xa0,0xde,0x6a,0x57,0xa6,0x37,0xb3,0x9b},
1064 C6[]= {0x8c,0xe2,0x49,0x98,0x62,0x56,0x15,0xb6,0x03,0xa0,0x33,0xac,0xa1,0x3f,0xb8,0x94,
1065 0xbe,0x91,0x12,0xa5,0xc3,0xa2,0x11,0xa8,0xba,0x26,0x2a,0x3c,0xca,0x7e,0x2c,0xa7,
1066 0x01,0xe4,0xa9,0xa4,0xfb,0xa4,0x3c,0x90,0xcc,0xdc,0xb2,0x81,0xd4,0x8c,0x7c,0x6f,
1067 0xd6,0x28,0x75,0xd2,0xac,0xa4,0x17,0x03,0x4c,0x34,0xae,0xe5},
1068 T6[]= {0x61,0x9c,0xc5,0xae,0xff,0xfe,0x0b,0xfa,0x46,0x2a,0xf4,0x3c,0x16,0x99,0xd0,0x50};
1071 static const u8 K7[24],
1076 T7[]= {0xcd,0x33,0xb2,0x8a,0xc7,0x73,0xf7,0x4b,0xa0,0x0e,0xd1,0xf3,0x12,0x57,0x24,0x35};
1082 static const u8 P8[16],
1083 C8[]= {0x98,0xe7,0x24,0x7c,0x07,0xf0,0xfe,0x41,0x1c,0x26,0x7e,0x43,0x84,0xb0,0xf6,0x00},
1084 T8[]= {0x2f,0xf5,0x8d,0x80,0x03,0x39,0x27,0xab,0x8e,0xf4,0xd4,0x58,0x75,0x14,0xf0,0xfb};
1088 static const u8 K9[]= {0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08,
1089 0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c},
1090 P9[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1091 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1092 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1093 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55},
1094 IV9[]= {0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,0xde,0xca,0xf8,0x88},
1095 C9[]= {0x39,0x80,0xca,0x0b,0x3c,0x00,0xe8,0x41,0xeb,0x06,0xfa,0xc4,0x87,0x2a,0x27,0x57,
1096 0x85,0x9e,0x1c,0xea,0xa6,0xef,0xd9,0x84,0x62,0x85,0x93,0xb4,0x0c,0xa1,0xe1,0x9c,
1097 0x7d,0x77,0x3d,0x00,0xc1,0x44,0xc5,0x25,0xac,0x61,0x9d,0x18,0xc8,0x4a,0x3f,0x47,
1098 0x18,0xe2,0x44,0x8b,0x2f,0xe3,0x24,0xd9,0xcc,0xda,0x27,0x10,0xac,0xad,0xe2,0x56},
1099 T9[]= {0x99,0x24,0xa7,0xc8,0x58,0x73,0x36,0xbf,0xb1,0x18,0x02,0x4d,0xb8,0x67,0x4a,0x14};
1104 static const u8 P10[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1105 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1106 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1107 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39},
1108 A10[]= {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1109 0xab,0xad,0xda,0xd2},
1110 C10[]= {0x39,0x80,0xca,0x0b,0x3c,0x00,0xe8,0x41,0xeb,0x06,0xfa,0xc4,0x87,0x2a,0x27,0x57,
1111 0x85,0x9e,0x1c,0xea,0xa6,0xef,0xd9,0x84,0x62,0x85,0x93,0xb4,0x0c,0xa1,0xe1,0x9c,
1112 0x7d,0x77,0x3d,0x00,0xc1,0x44,0xc5,0x25,0xac,0x61,0x9d,0x18,0xc8,0x4a,0x3f,0x47,
1113 0x18,0xe2,0x44,0x8b,0x2f,0xe3,0x24,0xd9,0xcc,0xda,0x27,0x10},
1114 T10[]= {0x25,0x19,0x49,0x8e,0x80,0xf1,0x47,0x8f,0x37,0xba,0x55,0xbd,0x6d,0x27,0x61,0x8c};
1120 static const u8 IV11[]={0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad},
1121 C11[]= {0x0f,0x10,0xf5,0x99,0xae,0x14,0xa1,0x54,0xed,0x24,0xb3,0x6e,0x25,0x32,0x4d,0xb8,
1122 0xc5,0x66,0x63,0x2e,0xf2,0xbb,0xb3,0x4f,0x83,0x47,0x28,0x0f,0xc4,0x50,0x70,0x57,
1123 0xfd,0xdc,0x29,0xdf,0x9a,0x47,0x1f,0x75,0xc6,0x65,0x41,0xd4,0xd4,0xda,0xd1,0xc9,
1124 0xe9,0x3a,0x19,0xa5,0x8e,0x8b,0x47,0x3f,0xa0,0xf0,0x62,0xf7},
1125 T11[]= {0x65,0xdc,0xc5,0x7f,0xcf,0x62,0x3a,0x24,0x09,0x4f,0xcc,0xa4,0x0d,0x35,0x33,0xf8};
1131 static const u8 IV12[]={0x93,0x13,0x22,0x5d,0xf8,0x84,0x06,0xe5,0x55,0x90,0x9c,0x5a,0xff,0x52,0x69,0xaa,
1132 0x6a,0x7a,0x95,0x38,0x53,0x4f,0x7d,0xa1,0xe4,0xc3,0x03,0xd2,0xa3,0x18,0xa7,0x28,
1133 0xc3,0xc0,0xc9,0x51,0x56,0x80,0x95,0x39,0xfc,0xf0,0xe2,0x42,0x9a,0x6b,0x52,0x54,
1134 0x16,0xae,0xdb,0xf5,0xa0,0xde,0x6a,0x57,0xa6,0x37,0xb3,0x9b},
1135 C12[]= {0xd2,0x7e,0x88,0x68,0x1c,0xe3,0x24,0x3c,0x48,0x30,0x16,0x5a,0x8f,0xdc,0xf9,0xff,
1136 0x1d,0xe9,0xa1,0xd8,0xe6,0xb4,0x47,0xef,0x6e,0xf7,0xb7,0x98,0x28,0x66,0x6e,0x45,
1137 0x81,0xe7,0x90,0x12,0xaf,0x34,0xdd,0xd9,0xe2,0xf0,0x37,0x58,0x9b,0x29,0x2d,0xb3,
1138 0xe6,0x7c,0x03,0x67,0x45,0xfa,0x22,0xe7,0xe9,0xb7,0x37,0x3b},
1139 T12[]= {0xdc,0xf5,0x66,0xff,0x29,0x1c,0x25,0xbb,0xb8,0x56,0x8f,0xc3,0xd3,0x76,0xa6,0xd9};
1142 static const u8 K13[32],
1147 T13[]={0x53,0x0f,0x8a,0xfb,0xc7,0x45,0x36,0xb9,0xa9,0x63,0xb4,0xf1,0xc4,0xcb,0x73,0x8b};
1152 static const u8 P14[16],
1154 C14[]= {0xce,0xa7,0x40,0x3d,0x4d,0x60,0x6b,0x6e,0x07,0x4e,0xc5,0xd3,0xba,0xf3,0x9d,0x18},
1155 T14[]= {0xd0,0xd1,0xc8,0xa7,0x99,0x99,0x6b,0xf0,0x26,0x5b,0x98,0xb5,0xd4,0x8a,0xb9,0x19};
1159 static const u8 K15[]= {0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08,
1160 0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08},
1161 P15[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1162 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1163 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1164 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55},
1165 IV15[]={0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,0xde,0xca,0xf8,0x88},
1166 C15[]= {0x52,0x2d,0xc1,0xf0,0x99,0x56,0x7d,0x07,0xf4,0x7f,0x37,0xa3,0x2a,0x84,0x42,0x7d,
1167 0x64,0x3a,0x8c,0xdc,0xbf,0xe5,0xc0,0xc9,0x75,0x98,0xa2,0xbd,0x25,0x55,0xd1,0xaa,
1168 0x8c,0xb0,0x8e,0x48,0x59,0x0d,0xbb,0x3d,0xa7,0xb0,0x8b,0x10,0x56,0x82,0x88,0x38,
1169 0xc5,0xf6,0x1e,0x63,0x93,0xba,0x7a,0x0a,0xbc,0xc9,0xf6,0x62,0x89,0x80,0x15,0xad},
1170 T15[]= {0xb0,0x94,0xda,0xc5,0xd9,0x34,0x71,0xbd,0xec,0x1a,0x50,0x22,0x70,0xe3,0xcc,0x6c};
1175 static const u8 P16[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1176 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1177 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1178 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39},
1179 A16[]= {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1180 0xab,0xad,0xda,0xd2},
1181 C16[]= {0x52,0x2d,0xc1,0xf0,0x99,0x56,0x7d,0x07,0xf4,0x7f,0x37,0xa3,0x2a,0x84,0x42,0x7d,
1182 0x64,0x3a,0x8c,0xdc,0xbf,0xe5,0xc0,0xc9,0x75,0x98,0xa2,0xbd,0x25,0x55,0xd1,0xaa,
1183 0x8c,0xb0,0x8e,0x48,0x59,0x0d,0xbb,0x3d,0xa7,0xb0,0x8b,0x10,0x56,0x82,0x88,0x38,
1184 0xc5,0xf6,0x1e,0x63,0x93,0xba,0x7a,0x0a,0xbc,0xc9,0xf6,0x62},
1185 T16[]= {0x76,0xfc,0x6e,0xce,0x0f,0x4e,0x17,0x68,0xcd,0xdf,0x88,0x53,0xbb,0x2d,0x55,0x1b};
1191 static const u8 IV17[]={0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad},
1192 C17[]= {0xc3,0x76,0x2d,0xf1,0xca,0x78,0x7d,0x32,0xae,0x47,0xc1,0x3b,0xf1,0x98,0x44,0xcb,
1193 0xaf,0x1a,0xe1,0x4d,0x0b,0x97,0x6a,0xfa,0xc5,0x2f,0xf7,0xd7,0x9b,0xba,0x9d,0xe0,
1194 0xfe,0xb5,0x82,0xd3,0x39,0x34,0xa4,0xf0,0x95,0x4c,0xc2,0x36,0x3b,0xc7,0x3f,0x78,
1195 0x62,0xac,0x43,0x0e,0x64,0xab,0xe4,0x99,0xf4,0x7c,0x9b,0x1f},
1196 T17[]= {0x3a,0x33,0x7d,0xbf,0x46,0xa7,0x92,0xc4,0x5e,0x45,0x49,0x13,0xfe,0x2e,0xa8,0xf2};
1202 static const u8 IV18[]={0x93,0x13,0x22,0x5d,0xf8,0x84,0x06,0xe5,0x55,0x90,0x9c,0x5a,0xff,0x52,0x69,0xaa,
1203 0x6a,0x7a,0x95,0x38,0x53,0x4f,0x7d,0xa1,0xe4,0xc3,0x03,0xd2,0xa3,0x18,0xa7,0x28,
1204 0xc3,0xc0,0xc9,0x51,0x56,0x80,0x95,0x39,0xfc,0xf0,0xe2,0x42,0x9a,0x6b,0x52,0x54,
1205 0x16,0xae,0xdb,0xf5,0xa0,0xde,0x6a,0x57,0xa6,0x37,0xb3,0x9b},
1206 C18[]= {0x5a,0x8d,0xef,0x2f,0x0c,0x9e,0x53,0xf1,0xf7,0x5d,0x78,0x53,0x65,0x9e,0x2a,0x20,
1207 0xee,0xb2,0xb2,0x2a,0xaf,0xde,0x64,0x19,0xa0,0x58,0xab,0x4f,0x6f,0x74,0x6b,0xf4,
1208 0x0f,0xc0,0xc3,0xb7,0x80,0xf2,0x44,0x45,0x2d,0xa3,0xeb,0xf1,0xc5,0xd8,0x2c,0xde,
1209 0xa2,0x41,0x89,0x97,0x20,0x0e,0xf8,0x2e,0x44,0xae,0x7e,0x3f},
1210 T18[]= {0xa4,0x4a,0x82,0x66,0xee,0x1c,0x8e,0xb0,0xc8,0xb5,0xd4,0xcf,0x5a,0xe9,0xf1,0x9a};
1212 #define TEST_CASE(n) do { \
1213 u8 out[sizeof(P##n)]; \
1214 AES_set_encrypt_key(K##n,sizeof(K##n)*8,&key); \
1215 CRYPTO_gcm128_init(&ctx,&key,(block128_f)AES_encrypt); \
1216 CRYPTO_gcm128_setiv(&ctx,IV##n,sizeof(IV##n)); \
1217 if (A##n) CRYPTO_gcm128_aad(&ctx,A##n,sizeof(A##n)); \
1218 if (P##n) CRYPTO_gcm128_encrypt(&ctx,P##n,out,sizeof(out)); \
1219 CRYPTO_gcm128_finish(&ctx); \
1220 if (memcmp(ctx.Xi.c,T##n,16) || (C##n && memcmp(out,C##n,sizeof(out)))) \
1221 ret++, printf ("encrypt test#%d failed.\n",n);\
1222 CRYPTO_gcm128_setiv(&ctx,IV##n,sizeof(IV##n)); \
1223 if (A##n) CRYPTO_gcm128_aad(&ctx,A##n,sizeof(A##n)); \
1224 if (C##n) CRYPTO_gcm128_decrypt(&ctx,C##n,out,sizeof(out)); \
1225 CRYPTO_gcm128_finish(&ctx); \
1226 if (memcmp(ctx.Xi.c,T##n,16) || (P##n && memcmp(out,P##n,sizeof(out)))) \
1227 ret++, printf ("decrypt test#%d failed.\n",n);\
1255 #ifdef OPENSSL_CPUID_OBJ
1257 size_t start,stop,gcm_t,ctr_t,OPENSSL_rdtsc();
1258 union { u64 u; u8 c[1024]; } buf;
1260 AES_set_encrypt_key(K1,sizeof(K1)*8,&key);
1261 CRYPTO_gcm128_init(&ctx,&key,(block128_f)AES_encrypt);
1262 CRYPTO_gcm128_setiv(&ctx,IV1,sizeof(IV1));
1264 CRYPTO_gcm128_encrypt(&ctx,buf.c,buf.c,sizeof(buf));
1265 start = OPENSSL_rdtsc();
1266 CRYPTO_gcm128_encrypt(&ctx,buf.c,buf.c,sizeof(buf));
1267 gcm_t = OPENSSL_rdtsc() - start;
1269 CRYPTO_ctr128_encrypt(buf.c,buf.c,sizeof(buf),
1270 &key,ctx.Yi.c,ctx.EKi.c,&ctx.res,
1271 (block128_f)AES_encrypt);
1272 start = OPENSSL_rdtsc();
1273 CRYPTO_ctr128_encrypt(buf.c,buf.c,sizeof(buf),
1274 &key,ctx.Yi.c,ctx.EKi.c,&ctx.res,
1275 (block128_f)AES_encrypt);
1276 ctr_t = OPENSSL_rdtsc() - start;
1278 printf("%.2f-%.2f=%.2f\n",
1279 gcm_t/(double)sizeof(buf),
1280 ctr_t/(double)sizeof(buf),
1281 (gcm_t-ctr_t)/(double)sizeof(buf));
1283 GHASH(buf.c,sizeof(buf),&ctx);
1284 start = OPENSSL_rdtsc();
1285 GHASH(buf.c,sizeof(buf),&ctx);
1286 gcm_t = OPENSSL_rdtsc() - start;
1287 printf("%.2f\n",gcm_t/(double)sizeof(buf));