1 /* ====================================================================
2 * Copyright (c) 2010 The OpenSSL Project. All rights reserved.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in
13 * the documentation and/or other materials provided with the
16 * 3. All advertising materials mentioning features or use of this
17 * software must display the following acknowledgment:
18 * "This product includes software developed by the OpenSSL Project
19 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
21 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
22 * endorse or promote products derived from this software without
23 * prior written permission. For written permission, please contact
24 * openssl-core@openssl.org.
26 * 5. Products derived from this software may not be called "OpenSSL"
27 * nor may "OpenSSL" appear in their names without prior written
28 * permission of the OpenSSL Project.
30 * 6. Redistributions of any form whatsoever must retain the following
32 * "This product includes software developed by the OpenSSL Project
33 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
35 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
36 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
37 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
38 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
39 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
40 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
41 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
42 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
43 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
44 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
45 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
46 * OF THE POSSIBILITY OF SUCH DAMAGE.
47 * ====================================================================
60 #if (defined(_WIN32) || defined(_WIN64)) && !defined(__MINGW32__)
62 typedef unsigned __int64 u64;
63 #define U64(C) C##UI64
64 #elif defined(__arch64__)
66 typedef unsigned long u64;
69 typedef long long i64;
70 typedef unsigned long long u64;
74 typedef unsigned int u32;
75 typedef unsigned char u8;
76 typedef struct { u64 hi,lo; } u128;
78 #define STRICT_ALIGNMENT
79 #if defined(__i386) || defined(__i386__) || \
80 defined(__x86_64) || defined(__x86_64__) || \
81 defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64) || \
82 defined(__s390__) || defined(__s390x__)
83 # undef STRICT_ALIGNMENT
86 #if defined(__GNUC__) && __GNUC__>=2
87 # if defined(__x86_64) || defined(__x86_64__)
88 # define BSWAP8(x) ({ u64 ret=(x); \
89 asm volatile ("bswapq %0" \
91 # define BSWAP4(x) ({ u32 ret=(x); \
92 asm volatile ("bswapl %0" \
94 # elif defined(__i386) || defined(__i386__)
95 # define BSWAP8(x) ({ u32 lo=(u64)(x)>>32,hi=(x); \
96 asm volatile ("bswapl %0; bswapl %1" \
97 : "+r"(hi),"+r"(lo)); \
99 # define BSWAP4(x) ({ u32 ret=(x); \
100 asm volatile ("bswapl %0" \
101 : "+r"(ret)); ret; })
103 #elif defined(_MSC_VER)
105 # pragma intrinsic(_byteswap_uint64,_byteswap_ulong)
106 # define BSWAP8(x) _byteswap_uint64((u64)(x))
107 # define BSWAP4(x) _byteswap_ulong((u32)(x))
108 # elif defined(_M_IX86)
113 #define GETU32(p) BSWAP4(*(const u32 *)(p))
114 #define PUTU32(p,v) *(u32 *)(p) = BSWAP4(v)
116 #define GETU32(p) ((u32)(p)[0]<<24|(u32)(p)[1]<<16|(u32)(p)[2]<<8|(u32)(p)[3])
117 #define PUTU32(p,v) ((p)[0]=(u8)((v)>>24),(p)[1]=(u8)((v)>>16),(p)[2]=(u8)((v)>>8),(p)[3]=(u8)(v))
120 #define PACK(s) ((size_t)(s)<<(sizeof(size_t)*8-16))
124 * Under ideal conditions 8-bit version should be twice as fast as
125 * 4-bit one. But world is far from ideal. For gcc-generated x86 code,
126 * 8-bit was observed to run "only" ~50% faster. On x86_64 observed
127 * improvement was ~75%, much closer to optimal, but the fact of
128 * deviation means that references to pre-computed tables end up on
129 * critical path and as tables are pretty big, 4KB per key+1KB shared,
130 * execution time is sensitive to cache trashing. It's not actually
131 * proven, but 4-bit procedure is believed to provide adequate
132 * all-round performance...
134 static void gcm_init_8bit(u128 Htable[256], u64 H[2])
144 for (Htable[128]=V, i=64; i>0; i>>=1) {
145 if (sizeof(size_t)==8) {
146 u64 T = U64(0xe100000000000000) & (0-(V.lo&1));
147 V.lo = (V.hi<<63)|(V.lo>>1);
151 u32 T = 0xe1000000U & (0-(u32)(V.lo&1));
152 V.lo = (V.hi<<63)|(V.lo>>1);
153 V.hi = (V.hi>>1) ^((u64)T<<32);
158 for (i=2; i<256; i<<=1) {
159 u128 *Hi = Htable+i, H0 = *Hi;
160 for (j=1; j<i; ++j) {
161 Hi[j].hi = H0.hi^Htable[j].hi;
162 Hi[j].lo = H0.lo^Htable[j].lo;
167 static void gcm_gmult_8bit(u64 Xi[2], u128 Htable[256])
170 const u8 *xi = (const u8 *)Xi+15;
172 const union { long one; char little; } is_endian = {1};
173 static const size_t rem_8bit[256] = {
174 PACK(0x0000), PACK(0x01C2), PACK(0x0384), PACK(0x0246),
175 PACK(0x0708), PACK(0x06CA), PACK(0x048C), PACK(0x054E),
176 PACK(0x0E10), PACK(0x0FD2), PACK(0x0D94), PACK(0x0C56),
177 PACK(0x0918), PACK(0x08DA), PACK(0x0A9C), PACK(0x0B5E),
178 PACK(0x1C20), PACK(0x1DE2), PACK(0x1FA4), PACK(0x1E66),
179 PACK(0x1B28), PACK(0x1AEA), PACK(0x18AC), PACK(0x196E),
180 PACK(0x1230), PACK(0x13F2), PACK(0x11B4), PACK(0x1076),
181 PACK(0x1538), PACK(0x14FA), PACK(0x16BC), PACK(0x177E),
182 PACK(0x3840), PACK(0x3982), PACK(0x3BC4), PACK(0x3A06),
183 PACK(0x3F48), PACK(0x3E8A), PACK(0x3CCC), PACK(0x3D0E),
184 PACK(0x3650), PACK(0x3792), PACK(0x35D4), PACK(0x3416),
185 PACK(0x3158), PACK(0x309A), PACK(0x32DC), PACK(0x331E),
186 PACK(0x2460), PACK(0x25A2), PACK(0x27E4), PACK(0x2626),
187 PACK(0x2368), PACK(0x22AA), PACK(0x20EC), PACK(0x212E),
188 PACK(0x2A70), PACK(0x2BB2), PACK(0x29F4), PACK(0x2836),
189 PACK(0x2D78), PACK(0x2CBA), PACK(0x2EFC), PACK(0x2F3E),
190 PACK(0x7080), PACK(0x7142), PACK(0x7304), PACK(0x72C6),
191 PACK(0x7788), PACK(0x764A), PACK(0x740C), PACK(0x75CE),
192 PACK(0x7E90), PACK(0x7F52), PACK(0x7D14), PACK(0x7CD6),
193 PACK(0x7998), PACK(0x785A), PACK(0x7A1C), PACK(0x7BDE),
194 PACK(0x6CA0), PACK(0x6D62), PACK(0x6F24), PACK(0x6EE6),
195 PACK(0x6BA8), PACK(0x6A6A), PACK(0x682C), PACK(0x69EE),
196 PACK(0x62B0), PACK(0x6372), PACK(0x6134), PACK(0x60F6),
197 PACK(0x65B8), PACK(0x647A), PACK(0x663C), PACK(0x67FE),
198 PACK(0x48C0), PACK(0x4902), PACK(0x4B44), PACK(0x4A86),
199 PACK(0x4FC8), PACK(0x4E0A), PACK(0x4C4C), PACK(0x4D8E),
200 PACK(0x46D0), PACK(0x4712), PACK(0x4554), PACK(0x4496),
201 PACK(0x41D8), PACK(0x401A), PACK(0x425C), PACK(0x439E),
202 PACK(0x54E0), PACK(0x5522), PACK(0x5764), PACK(0x56A6),
203 PACK(0x53E8), PACK(0x522A), PACK(0x506C), PACK(0x51AE),
204 PACK(0x5AF0), PACK(0x5B32), PACK(0x5974), PACK(0x58B6),
205 PACK(0x5DF8), PACK(0x5C3A), PACK(0x5E7C), PACK(0x5FBE),
206 PACK(0xE100), PACK(0xE0C2), PACK(0xE284), PACK(0xE346),
207 PACK(0xE608), PACK(0xE7CA), PACK(0xE58C), PACK(0xE44E),
208 PACK(0xEF10), PACK(0xEED2), PACK(0xEC94), PACK(0xED56),
209 PACK(0xE818), PACK(0xE9DA), PACK(0xEB9C), PACK(0xEA5E),
210 PACK(0xFD20), PACK(0xFCE2), PACK(0xFEA4), PACK(0xFF66),
211 PACK(0xFA28), PACK(0xFBEA), PACK(0xF9AC), PACK(0xF86E),
212 PACK(0xF330), PACK(0xF2F2), PACK(0xF0B4), PACK(0xF176),
213 PACK(0xF438), PACK(0xF5FA), PACK(0xF7BC), PACK(0xF67E),
214 PACK(0xD940), PACK(0xD882), PACK(0xDAC4), PACK(0xDB06),
215 PACK(0xDE48), PACK(0xDF8A), PACK(0xDDCC), PACK(0xDC0E),
216 PACK(0xD750), PACK(0xD692), PACK(0xD4D4), PACK(0xD516),
217 PACK(0xD058), PACK(0xD19A), PACK(0xD3DC), PACK(0xD21E),
218 PACK(0xC560), PACK(0xC4A2), PACK(0xC6E4), PACK(0xC726),
219 PACK(0xC268), PACK(0xC3AA), PACK(0xC1EC), PACK(0xC02E),
220 PACK(0xCB70), PACK(0xCAB2), PACK(0xC8F4), PACK(0xC936),
221 PACK(0xCC78), PACK(0xCDBA), PACK(0xCFFC), PACK(0xCE3E),
222 PACK(0x9180), PACK(0x9042), PACK(0x9204), PACK(0x93C6),
223 PACK(0x9688), PACK(0x974A), PACK(0x950C), PACK(0x94CE),
224 PACK(0x9F90), PACK(0x9E52), PACK(0x9C14), PACK(0x9DD6),
225 PACK(0x9898), PACK(0x995A), PACK(0x9B1C), PACK(0x9ADE),
226 PACK(0x8DA0), PACK(0x8C62), PACK(0x8E24), PACK(0x8FE6),
227 PACK(0x8AA8), PACK(0x8B6A), PACK(0x892C), PACK(0x88EE),
228 PACK(0x83B0), PACK(0x8272), PACK(0x8034), PACK(0x81F6),
229 PACK(0x84B8), PACK(0x857A), PACK(0x873C), PACK(0x86FE),
230 PACK(0xA9C0), PACK(0xA802), PACK(0xAA44), PACK(0xAB86),
231 PACK(0xAEC8), PACK(0xAF0A), PACK(0xAD4C), PACK(0xAC8E),
232 PACK(0xA7D0), PACK(0xA612), PACK(0xA454), PACK(0xA596),
233 PACK(0xA0D8), PACK(0xA11A), PACK(0xA35C), PACK(0xA29E),
234 PACK(0xB5E0), PACK(0xB422), PACK(0xB664), PACK(0xB7A6),
235 PACK(0xB2E8), PACK(0xB32A), PACK(0xB16C), PACK(0xB0AE),
236 PACK(0xBBF0), PACK(0xBA32), PACK(0xB874), PACK(0xB9B6),
237 PACK(0xBCF8), PACK(0xBD3A), PACK(0xBF7C), PACK(0xBEBE) };
240 Z.hi ^= Htable[n].hi;
241 Z.lo ^= Htable[n].lo;
243 if ((u8 *)Xi==xi) break;
247 rem = (size_t)Z.lo&0xff;
248 Z.lo = (Z.hi<<56)|(Z.lo>>8);
250 if (sizeof(size_t)==8)
251 Z.hi ^= rem_8bit[rem];
253 Z.hi ^= (u64)rem_8bit[rem]<<32;
256 if (is_endian.little) {
258 Xi[0] = BSWAP8(Z.hi);
259 Xi[1] = BSWAP8(Z.lo);
263 v = (u32)(Z.hi>>32); PUTU32(p,v);
264 v = (u32)(Z.hi); PUTU32(p+4,v);
265 v = (u32)(Z.lo>>32); PUTU32(p+8,v);
266 v = (u32)(Z.lo); PUTU32(p+12,v);
276 #define _4BIT 1 /* change to 0 to switch to 1-bit multiplication */
279 static void gcm_init_4bit(u128 Htable[16], u64 H[2])
289 for (Htable[8]=V, i=4; i>0; i>>=1) {
290 if (sizeof(size_t)==8) {
291 u64 T = U64(0xe100000000000000) & (0-(V.lo&1));
292 V.lo = (V.hi<<63)|(V.lo>>1);
296 u32 T = 0xe1000000U & (0-(u32)(V.lo&1));
297 V.lo = (V.hi<<63)|(V.lo>>1);
298 V.hi = (V.hi>>1 )^((u64)T<<32);
303 #if defined(OPENSSL_SMALL_FOOTPRINT)
304 for (i=2; i<16; i<<=1) {
307 for (V=*Hi, j=1; j<i; ++j) {
308 Hi[j].hi = V.hi^Htable[j].hi;
309 Hi[j].lo = V.lo^Htable[j].lo;
313 Htable[3].hi = V.hi^Htable[2].hi, Htable[3].lo = V.lo^Htable[2].lo;
315 Htable[5].hi = V.hi^Htable[1].hi, Htable[5].lo = V.lo^Htable[1].lo;
316 Htable[6].hi = V.hi^Htable[2].hi, Htable[6].lo = V.lo^Htable[2].lo;
317 Htable[7].hi = V.hi^Htable[3].hi, Htable[7].lo = V.lo^Htable[3].lo;
319 Htable[9].hi = V.hi^Htable[1].hi, Htable[9].lo = V.lo^Htable[1].lo;
320 Htable[10].hi = V.hi^Htable[2].hi, Htable[10].lo = V.lo^Htable[2].lo;
321 Htable[11].hi = V.hi^Htable[3].hi, Htable[11].lo = V.lo^Htable[3].lo;
322 Htable[12].hi = V.hi^Htable[4].hi, Htable[12].lo = V.lo^Htable[4].lo;
323 Htable[13].hi = V.hi^Htable[5].hi, Htable[13].lo = V.lo^Htable[5].lo;
324 Htable[14].hi = V.hi^Htable[6].hi, Htable[14].lo = V.lo^Htable[6].lo;
325 Htable[15].hi = V.hi^Htable[7].hi, Htable[15].lo = V.lo^Htable[7].lo;
330 static const size_t rem_4bit[16] = {
331 PACK(0x0000), PACK(0x1C20), PACK(0x3840), PACK(0x2460),
332 PACK(0x7080), PACK(0x6CA0), PACK(0x48C0), PACK(0x54E0),
333 PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560),
334 PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0) };
336 static void gcm_gmult_4bit(u64 Xi[2], u128 Htable[16])
340 size_t rem, nlo, nhi;
341 const union { long one; char little; } is_endian = {1};
343 nlo = ((const u8 *)Xi)[15];
347 Z.hi = Htable[nlo].hi;
348 Z.lo = Htable[nlo].lo;
351 rem = (size_t)Z.lo&0xf;
352 Z.lo = (Z.hi<<60)|(Z.lo>>4);
354 if (sizeof(size_t)==8)
355 Z.hi ^= rem_4bit[rem];
357 Z.hi ^= (u64)rem_4bit[rem]<<32;
359 Z.hi ^= Htable[nhi].hi;
360 Z.lo ^= Htable[nhi].lo;
364 nlo = ((const u8 *)Xi)[cnt];
368 rem = (size_t)Z.lo&0xf;
369 Z.lo = (Z.hi<<60)|(Z.lo>>4);
371 if (sizeof(size_t)==8)
372 Z.hi ^= rem_4bit[rem];
374 Z.hi ^= (u64)rem_4bit[rem]<<32;
376 Z.hi ^= Htable[nlo].hi;
377 Z.lo ^= Htable[nlo].lo;
380 if (is_endian.little) {
382 Xi[0] = BSWAP8(Z.hi);
383 Xi[1] = BSWAP8(Z.lo);
387 v = (u32)(Z.hi>>32); PUTU32(p,v);
388 v = (u32)(Z.hi); PUTU32(p+4,v);
389 v = (u32)(Z.lo>>32); PUTU32(p+8,v);
390 v = (u32)(Z.lo); PUTU32(p+12,v);
399 #if !defined(OPENSSL_SMALL_FOOTPRINT)
401 * Streamed gcm_mult_4bit, see CRYPTO_gcm128_[en|de]crypt for
402 * details... It doesn't give any performance improvement, at least
403 * not on x86[_64]. It's here mostly as a placeholder for possible
404 * future non-trivial optimization[s]...
406 static void gcm_ghash_4bit(const u8 *inp,size_t len,u64 Xi[2], u128 Htable[16])
410 size_t rem, nlo, nhi;
411 const union { long one; char little; } is_endian = {1};
415 nlo = ((const u8 *)Xi)[15];
420 Z.hi = Htable[nlo].hi;
421 Z.lo = Htable[nlo].lo;
424 rem = (size_t)Z.lo&0xf;
425 Z.lo = (Z.hi<<60)|(Z.lo>>4);
427 if (sizeof(size_t)==8)
428 Z.hi ^= rem_4bit[rem];
430 Z.hi ^= (u64)rem_4bit[rem]<<32;
432 Z.hi ^= Htable[nhi].hi;
433 Z.lo ^= Htable[nhi].lo;
437 nlo = ((const u8 *)Xi)[cnt];
442 rem = (size_t)Z.lo&0xf;
443 Z.lo = (Z.hi<<60)|(Z.lo>>4);
445 if (sizeof(size_t)==8)
446 Z.hi ^= rem_4bit[rem];
448 Z.hi ^= (u64)rem_4bit[rem]<<32;
450 Z.hi ^= Htable[nlo].hi;
451 Z.lo ^= Htable[nlo].lo;
454 if (is_endian.little) {
456 Xi[0] = BSWAP8(Z.hi);
457 Xi[1] = BSWAP8(Z.lo);
461 v = (u32)(Z.hi>>32); PUTU32(p,v);
462 v = (u32)(Z.hi); PUTU32(p+4,v);
463 v = (u32)(Z.lo>>32); PUTU32(p+8,v);
464 v = (u32)(Z.lo); PUTU32(p+12,v);
471 } while (inp+=16, len-=16);
475 void gcm_gmult_4bit(u64 Xi[2],u128 Htable[16]);
476 void gcm_ghash_4bit(const u8 *inp,size_t len,u64 Xi[2],u128 Htable[16]);
479 #define GCM_MUL(ctx,Xi) gcm_gmult_4bit(ctx->Xi.u,ctx->Htable)
480 #define GHASH(in,len,ctx) gcm_ghash_4bit(in,len,ctx->Xi.u,ctx->Htable)
481 #define GHASH_CHUNK 1024
485 static void gcm_gmult_1bit(u64 Xi[2],const u64 H[2])
490 const long *xi = (const long *)Xi;
491 const union { long one; char little; } is_endian = {1};
493 V.hi = H[0]; /* H is in host byte order, no byte swapping */
496 for (j=0; j<16/sizeof(long); ++j) {
497 if (is_endian.little) {
498 if (sizeof(long)==8) {
500 X = (long)(BSWAP8(xi[j]));
502 const u8 *p = (const u8 *)(xi+j);
503 X = (long)((u64)GETU32(p)<<32|GETU32(p+4));
507 const u8 *p = (const u8 *)(xi+j);
514 for (i=0; i<8*sizeof(long); ++i, X<<=1) {
515 u64 M = (u64)(X>>(8*sizeof(long)-1));
519 if (sizeof(size_t)==8) {
520 u64 T = U64(0xe100000000000000) & (0-(V.lo&1));
521 V.lo = (V.hi<<63)|(V.lo>>1);
525 u32 T = 0xe1000000U & (0-(u32)(V.lo&1));
526 V.lo = (V.hi<<63)|(V.lo>>1);
527 V.hi = (V.hi>>1 )^((u64)T<<32);
533 if (is_endian.little) {
535 Xi[0] = BSWAP8(Z.hi);
536 Xi[1] = BSWAP8(Z.lo);
540 v = (u32)(Z.hi>>32); PUTU32(p,v);
541 v = (u32)(Z.hi); PUTU32(p+4,v);
542 v = (u32)(Z.lo>>32); PUTU32(p+8,v);
543 v = (u32)(Z.lo); PUTU32(p+12,v);
551 #define GCM_MUL(ctx,Xi) gcm_gmult_1bit(ctx->Xi.u,ctx->H.u)
555 /* Following 6 names follow names in GCM specification */
556 union { u64 u[2]; u32 d[4]; u8 c[16]; } Yi,EKi,EK0,
559 /* Pre-computed table used by gcm_gmult_4bit */
561 unsigned int res, ctr;
566 void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx,void *key,block128_f block)
568 const union { long one; char little; } is_endian = {1};
570 memset(ctx,0,sizeof(*ctx));
574 (*block)(ctx->H.c,ctx->H.c,key);
576 if (is_endian.little) {
577 /* H is stored in host byte order */
579 ctx->H.u[0] = BSWAP8(ctx->H.u[0]);
580 ctx->H.u[1] = BSWAP8(ctx->H.u[1]);
584 hi = (u64)GETU32(p) <<32|GETU32(p+4);
585 lo = (u64)GETU32(p+8)<<32|GETU32(p+12);
591 gcm_init_4bit(ctx->Htable,ctx->H.u);
594 void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx,const unsigned char *iv,size_t len)
596 const union { long one; char little; } is_endian = {1};
607 memcpy(ctx->Yi.c,iv,12);
616 for (i=0; i<16; ++i) ctx->Yi.c[i] ^= iv[i];
622 for (i=0; i<len; ++i) ctx->Yi.c[i] ^= iv[i];
626 if (is_endian.little) {
628 ctx->Yi.u[1] ^= BSWAP8(len0);
630 ctx->Yi.c[8] ^= (u8)(len0>>56);
631 ctx->Yi.c[9] ^= (u8)(len0>>48);
632 ctx->Yi.c[10] ^= (u8)(len0>>40);
633 ctx->Yi.c[11] ^= (u8)(len0>>32);
634 ctx->Yi.c[12] ^= (u8)(len0>>24);
635 ctx->Yi.c[13] ^= (u8)(len0>>16);
636 ctx->Yi.c[14] ^= (u8)(len0>>8);
637 ctx->Yi.c[15] ^= (u8)(len0);
641 ctx->Yi.u[1] ^= len0;
645 if (is_endian.little)
646 ctx->ctr = GETU32(ctx->Yi.c+12);
648 ctx->ctr = ctx->Yi.d[3];
651 (*ctx->block)(ctx->Yi.c,ctx->EK0.c,ctx->key);
653 if (is_endian.little)
654 PUTU32(ctx->Yi.c+12,ctx->ctr);
656 ctx->Yi.d[3] = ctx->ctr;
659 void CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx,const unsigned char *aad,size_t len)
663 ctx->len.u[0] += len;
666 if ((i = (len&(size_t)-16))) {
673 for (i=0; i<16; ++i) ctx->Xi.c[i] ^= aad[i];
681 for (i=0; i<len; ++i) ctx->Xi.c[i] ^= aad[i];
686 void CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
687 const unsigned char *in, unsigned char *out,
690 const union { long one; char little; } is_endian = {1};
694 ctx->len.u[1] += len;
698 #if !defined(OPENSSL_SMALL_FOOTPRINT)
699 if (16%sizeof(size_t) == 0) do { /* always true actually */
702 ctx->Xi.c[n] ^= *(out++) = *(in++)^ctx->EKi.c[n];
706 if (n==0) GCM_MUL(ctx,Xi);
712 #if defined(STRICT_ALIGNMENT)
713 if (((size_t)in|(size_t)out)%sizeof(size_t) != 0)
717 while (len>=GHASH_CHUNK) {
718 size_t j=GHASH_CHUNK;
721 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
723 if (is_endian.little)
724 PUTU32(ctx->Yi.c+12,ctr);
727 for (i=0; i<16; i+=sizeof(size_t))
729 *(size_t *)(in+i)^*(size_t *)(ctx->EKi.c+i);
734 GHASH(out-GHASH_CHUNK,GHASH_CHUNK,ctx);
737 if ((i = (len&(size_t)-16))) {
741 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
743 if (is_endian.little)
744 PUTU32(ctx->Yi.c+12,ctr);
747 for (i=0; i<16; i+=sizeof(size_t))
749 *(size_t *)(in+i)^*(size_t *)(ctx->EKi.c+i);
758 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
760 if (is_endian.little)
761 PUTU32(ctx->Yi.c+12,ctr);
764 for (i=0; i<16; i+=sizeof(size_t))
765 *(size_t *)(ctx->Xi.c+i) ^=
767 *(size_t *)(in+i)^*(size_t *)(ctx->EKi.c+i);
775 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
777 if (is_endian.little)
778 PUTU32(ctx->Yi.c+12,ctr);
782 ctx->Xi.c[n] ^= out[n] = in[n]^ctx->EKi.c[n];
792 for (i=0;i<len;++i) {
794 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
796 if (is_endian.little)
797 PUTU32(ctx->Yi.c+12,ctr);
801 ctx->Xi.c[n] ^= out[i] = in[i]^ctx->EKi.c[n];
811 void CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
812 const unsigned char *in, unsigned char *out,
815 const union { long one; char little; } is_endian = {1};
819 ctx->len.u[1] += len;
823 #if !defined(OPENSSL_SMALL_FOOTPRINT)
824 if (16%sizeof(size_t) == 0) do { /* always true actually */
828 *(out++) = c^ctx->EKi.c[n];
833 if (n==0) GCM_MUL (ctx,Xi);
839 #if defined(STRICT_ALIGNMENT)
840 if (((size_t)in|(size_t)out)%sizeof(size_t) != 0)
844 while (len>=GHASH_CHUNK) {
845 size_t j=GHASH_CHUNK;
847 GHASH(in,GHASH_CHUNK,ctx);
849 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
851 if (is_endian.little)
852 PUTU32(ctx->Yi.c+12,ctr);
855 for (i=0; i<16; i+=sizeof(size_t))
857 *(size_t *)(in+i)^*(size_t *)(ctx->EKi.c+i);
864 if ((i = (len&(size_t)-16))) {
867 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
869 if (is_endian.little)
870 PUTU32(ctx->Yi.c+12,ctr);
873 for (i=0; i<16; i+=sizeof(size_t))
875 *(size_t *)(in+i)^*(size_t *)(ctx->EKi.c+i);
883 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
885 if (is_endian.little)
886 PUTU32(ctx->Yi.c+12,ctr);
889 for (i=0; i<16; i+=sizeof(size_t)) {
890 size_t c = *(size_t *)(in+i);
891 *(size_t *)(out+i) = c^*(size_t *)(ctx->EKi.c+i);
892 *(size_t *)(ctx->Xi.c+i) ^= c;
901 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
903 if (is_endian.little)
904 PUTU32(ctx->Yi.c+12,ctr);
910 out[n] = c^ctx->EKi.c[n];
920 for (i=0;i<len;++i) {
923 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
925 if (is_endian.little)
926 PUTU32(ctx->Yi.c+12,ctr);
931 out[i] ^= ctx->EKi.c[n];
942 void CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx)
944 const union { long one; char little; } is_endian = {1};
945 u64 alen = ctx->len.u[0]<<3;
946 u64 clen = ctx->len.u[1]<<3;
951 if (is_endian.little) {
958 ctx->len.u[0] = alen;
959 ctx->len.u[1] = clen;
961 alen = (u64)GETU32(p) <<32|GETU32(p+4);
962 clen = (u64)GETU32(p+8)<<32|GETU32(p+12);
966 ctx->Xi.u[0] ^= alen;
967 ctx->Xi.u[1] ^= clen;
970 ctx->Xi.u[0] ^= ctx->EK0.u[0];
971 ctx->Xi.u[1] ^= ctx->EK0.u[1];
974 #if defined(SELFTEST)
976 #include <openssl/aes.h>
979 static const u8 K1[16],
984 T1[]= {0x58,0xe2,0xfc,0xce,0xfa,0x7e,0x30,0x61,0x36,0x7f,0x1d,0x57,0xa4,0xe7,0x45,0x5a};
989 static const u8 P2[16],
990 C2[]= {0x03,0x88,0xda,0xce,0x60,0xb6,0xa3,0x92,0xf3,0x28,0xc2,0xb9,0x71,0xb2,0xfe,0x78},
991 T2[]= {0xab,0x6e,0x47,0xd4,0x2c,0xec,0x13,0xbd,0xf5,0x3a,0x67,0xb2,0x12,0x57,0xbd,0xdf};
995 static const u8 K3[]= {0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08},
996 P3[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
997 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
998 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
999 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55},
1000 IV3[]= {0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,0xde,0xca,0xf8,0x88},
1001 C3[]= {0x42,0x83,0x1e,0xc2,0x21,0x77,0x74,0x24,0x4b,0x72,0x21,0xb7,0x84,0xd0,0xd4,0x9c,
1002 0xe3,0xaa,0x21,0x2f,0x2c,0x02,0xa4,0xe0,0x35,0xc1,0x7e,0x23,0x29,0xac,0xa1,0x2e,
1003 0x21,0xd5,0x14,0xb2,0x54,0x66,0x93,0x1c,0x7d,0x8f,0x6a,0x5a,0xac,0x84,0xaa,0x05,
1004 0x1b,0xa3,0x0b,0x39,0x6a,0x0a,0xac,0x97,0x3d,0x58,0xe0,0x91,0x47,0x3f,0x59,0x85},
1005 T3[]= {0x4d,0x5c,0x2a,0xf3,0x27,0xcd,0x64,0xa6,0x2c,0xf3,0x5a,0xbd,0x2b,0xa6,0xfa,0xb4,};
1010 static const u8 P4[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1011 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1012 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1013 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39},
1014 A4[]= {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1015 0xab,0xad,0xda,0xd2},
1016 C4[]= {0x42,0x83,0x1e,0xc2,0x21,0x77,0x74,0x24,0x4b,0x72,0x21,0xb7,0x84,0xd0,0xd4,0x9c,
1017 0xe3,0xaa,0x21,0x2f,0x2c,0x02,0xa4,0xe0,0x35,0xc1,0x7e,0x23,0x29,0xac,0xa1,0x2e,
1018 0x21,0xd5,0x14,0xb2,0x54,0x66,0x93,0x1c,0x7d,0x8f,0x6a,0x5a,0xac,0x84,0xaa,0x05,
1019 0x1b,0xa3,0x0b,0x39,0x6a,0x0a,0xac,0x97,0x3d,0x58,0xe0,0x91},
1020 T4[]= {0x5b,0xc9,0x4f,0xbc,0x32,0x21,0xa5,0xdb,0x94,0xfa,0xe9,0x5a,0xe7,0x12,0x1a,0x47};
1025 static const u8 A5[]= {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1026 0xab,0xad,0xda,0xd2},
1027 IV5[]= {0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad},
1028 C5[]= {0x61,0x35,0x3b,0x4c,0x28,0x06,0x93,0x4a,0x77,0x7f,0xf5,0x1f,0xa2,0x2a,0x47,0x55,
1029 0x69,0x9b,0x2a,0x71,0x4f,0xcd,0xc6,0xf8,0x37,0x66,0xe5,0xf9,0x7b,0x6c,0x74,0x23,
1030 0x73,0x80,0x69,0x00,0xe4,0x9f,0x24,0xb2,0x2b,0x09,0x75,0x44,0xd4,0x89,0x6b,0x42,
1031 0x49,0x89,0xb5,0xe1,0xeb,0xac,0x0f,0x07,0xc2,0x3f,0x45,0x98},
1032 T5[]= {0x36,0x12,0xd2,0xe7,0x9e,0x3b,0x07,0x85,0x56,0x1b,0xe1,0x4a,0xac,0xa2,0xfc,0xcb};
1037 static const u8 IV6[]= {0x93,0x13,0x22,0x5d,0xf8,0x84,0x06,0xe5,0x55,0x90,0x9c,0x5a,0xff,0x52,0x69,0xaa,
1038 0x6a,0x7a,0x95,0x38,0x53,0x4f,0x7d,0xa1,0xe4,0xc3,0x03,0xd2,0xa3,0x18,0xa7,0x28,
1039 0xc3,0xc0,0xc9,0x51,0x56,0x80,0x95,0x39,0xfc,0xf0,0xe2,0x42,0x9a,0x6b,0x52,0x54,
1040 0x16,0xae,0xdb,0xf5,0xa0,0xde,0x6a,0x57,0xa6,0x37,0xb3,0x9b},
1041 C6[]= {0x8c,0xe2,0x49,0x98,0x62,0x56,0x15,0xb6,0x03,0xa0,0x33,0xac,0xa1,0x3f,0xb8,0x94,
1042 0xbe,0x91,0x12,0xa5,0xc3,0xa2,0x11,0xa8,0xba,0x26,0x2a,0x3c,0xca,0x7e,0x2c,0xa7,
1043 0x01,0xe4,0xa9,0xa4,0xfb,0xa4,0x3c,0x90,0xcc,0xdc,0xb2,0x81,0xd4,0x8c,0x7c,0x6f,
1044 0xd6,0x28,0x75,0xd2,0xac,0xa4,0x17,0x03,0x4c,0x34,0xae,0xe5},
1045 T6[]= {0x61,0x9c,0xc5,0xae,0xff,0xfe,0x0b,0xfa,0x46,0x2a,0xf4,0x3c,0x16,0x99,0xd0,0x50};
1048 static const u8 K7[24],
1053 T7[]= {0xcd,0x33,0xb2,0x8a,0xc7,0x73,0xf7,0x4b,0xa0,0x0e,0xd1,0xf3,0x12,0x57,0x24,0x35};
1059 static const u8 P8[16],
1060 C8[]= {0x98,0xe7,0x24,0x7c,0x07,0xf0,0xfe,0x41,0x1c,0x26,0x7e,0x43,0x84,0xb0,0xf6,0x00},
1061 T8[]= {0x2f,0xf5,0x8d,0x80,0x03,0x39,0x27,0xab,0x8e,0xf4,0xd4,0x58,0x75,0x14,0xf0,0xfb};
1065 static const u8 K9[]= {0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08,
1066 0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c},
1067 P9[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1068 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1069 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1070 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55},
1071 IV9[]= {0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,0xde,0xca,0xf8,0x88},
1072 C9[]= {0x39,0x80,0xca,0x0b,0x3c,0x00,0xe8,0x41,0xeb,0x06,0xfa,0xc4,0x87,0x2a,0x27,0x57,
1073 0x85,0x9e,0x1c,0xea,0xa6,0xef,0xd9,0x84,0x62,0x85,0x93,0xb4,0x0c,0xa1,0xe1,0x9c,
1074 0x7d,0x77,0x3d,0x00,0xc1,0x44,0xc5,0x25,0xac,0x61,0x9d,0x18,0xc8,0x4a,0x3f,0x47,
1075 0x18,0xe2,0x44,0x8b,0x2f,0xe3,0x24,0xd9,0xcc,0xda,0x27,0x10,0xac,0xad,0xe2,0x56},
1076 T9[]= {0x99,0x24,0xa7,0xc8,0x58,0x73,0x36,0xbf,0xb1,0x18,0x02,0x4d,0xb8,0x67,0x4a,0x14};
1081 static const u8 P10[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1082 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1083 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1084 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39},
1085 A10[]= {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1086 0xab,0xad,0xda,0xd2},
1087 C10[]= {0x39,0x80,0xca,0x0b,0x3c,0x00,0xe8,0x41,0xeb,0x06,0xfa,0xc4,0x87,0x2a,0x27,0x57,
1088 0x85,0x9e,0x1c,0xea,0xa6,0xef,0xd9,0x84,0x62,0x85,0x93,0xb4,0x0c,0xa1,0xe1,0x9c,
1089 0x7d,0x77,0x3d,0x00,0xc1,0x44,0xc5,0x25,0xac,0x61,0x9d,0x18,0xc8,0x4a,0x3f,0x47,
1090 0x18,0xe2,0x44,0x8b,0x2f,0xe3,0x24,0xd9,0xcc,0xda,0x27,0x10},
1091 T10[]= {0x25,0x19,0x49,0x8e,0x80,0xf1,0x47,0x8f,0x37,0xba,0x55,0xbd,0x6d,0x27,0x61,0x8c};
1097 static const u8 IV11[]={0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad},
1098 C11[]= {0x0f,0x10,0xf5,0x99,0xae,0x14,0xa1,0x54,0xed,0x24,0xb3,0x6e,0x25,0x32,0x4d,0xb8,
1099 0xc5,0x66,0x63,0x2e,0xf2,0xbb,0xb3,0x4f,0x83,0x47,0x28,0x0f,0xc4,0x50,0x70,0x57,
1100 0xfd,0xdc,0x29,0xdf,0x9a,0x47,0x1f,0x75,0xc6,0x65,0x41,0xd4,0xd4,0xda,0xd1,0xc9,
1101 0xe9,0x3a,0x19,0xa5,0x8e,0x8b,0x47,0x3f,0xa0,0xf0,0x62,0xf7},
1102 T11[]= {0x65,0xdc,0xc5,0x7f,0xcf,0x62,0x3a,0x24,0x09,0x4f,0xcc,0xa4,0x0d,0x35,0x33,0xf8};
1108 static const u8 IV12[]={0x93,0x13,0x22,0x5d,0xf8,0x84,0x06,0xe5,0x55,0x90,0x9c,0x5a,0xff,0x52,0x69,0xaa,
1109 0x6a,0x7a,0x95,0x38,0x53,0x4f,0x7d,0xa1,0xe4,0xc3,0x03,0xd2,0xa3,0x18,0xa7,0x28,
1110 0xc3,0xc0,0xc9,0x51,0x56,0x80,0x95,0x39,0xfc,0xf0,0xe2,0x42,0x9a,0x6b,0x52,0x54,
1111 0x16,0xae,0xdb,0xf5,0xa0,0xde,0x6a,0x57,0xa6,0x37,0xb3,0x9b},
1112 C12[]= {0xd2,0x7e,0x88,0x68,0x1c,0xe3,0x24,0x3c,0x48,0x30,0x16,0x5a,0x8f,0xdc,0xf9,0xff,
1113 0x1d,0xe9,0xa1,0xd8,0xe6,0xb4,0x47,0xef,0x6e,0xf7,0xb7,0x98,0x28,0x66,0x6e,0x45,
1114 0x81,0xe7,0x90,0x12,0xaf,0x34,0xdd,0xd9,0xe2,0xf0,0x37,0x58,0x9b,0x29,0x2d,0xb3,
1115 0xe6,0x7c,0x03,0x67,0x45,0xfa,0x22,0xe7,0xe9,0xb7,0x37,0x3b},
1116 T12[]= {0xdc,0xf5,0x66,0xff,0x29,0x1c,0x25,0xbb,0xb8,0x56,0x8f,0xc3,0xd3,0x76,0xa6,0xd9};
1119 static const u8 K13[32],
1124 T13[]={0x53,0x0f,0x8a,0xfb,0xc7,0x45,0x36,0xb9,0xa9,0x63,0xb4,0xf1,0xc4,0xcb,0x73,0x8b};
1129 static const u8 P14[16],
1131 C14[]= {0xce,0xa7,0x40,0x3d,0x4d,0x60,0x6b,0x6e,0x07,0x4e,0xc5,0xd3,0xba,0xf3,0x9d,0x18},
1132 T14[]= {0xd0,0xd1,0xc8,0xa7,0x99,0x99,0x6b,0xf0,0x26,0x5b,0x98,0xb5,0xd4,0x8a,0xb9,0x19};
1136 static const u8 K15[]= {0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08,
1137 0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08},
1138 P15[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1139 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1140 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1141 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55},
1142 IV15[]={0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,0xde,0xca,0xf8,0x88},
1143 C15[]= {0x52,0x2d,0xc1,0xf0,0x99,0x56,0x7d,0x07,0xf4,0x7f,0x37,0xa3,0x2a,0x84,0x42,0x7d,
1144 0x64,0x3a,0x8c,0xdc,0xbf,0xe5,0xc0,0xc9,0x75,0x98,0xa2,0xbd,0x25,0x55,0xd1,0xaa,
1145 0x8c,0xb0,0x8e,0x48,0x59,0x0d,0xbb,0x3d,0xa7,0xb0,0x8b,0x10,0x56,0x82,0x88,0x38,
1146 0xc5,0xf6,0x1e,0x63,0x93,0xba,0x7a,0x0a,0xbc,0xc9,0xf6,0x62,0x89,0x80,0x15,0xad},
1147 T15[]= {0xb0,0x94,0xda,0xc5,0xd9,0x34,0x71,0xbd,0xec,0x1a,0x50,0x22,0x70,0xe3,0xcc,0x6c};
1152 static const u8 P16[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1153 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1154 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1155 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39},
1156 A16[]= {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1157 0xab,0xad,0xda,0xd2},
1158 C16[]= {0x52,0x2d,0xc1,0xf0,0x99,0x56,0x7d,0x07,0xf4,0x7f,0x37,0xa3,0x2a,0x84,0x42,0x7d,
1159 0x64,0x3a,0x8c,0xdc,0xbf,0xe5,0xc0,0xc9,0x75,0x98,0xa2,0xbd,0x25,0x55,0xd1,0xaa,
1160 0x8c,0xb0,0x8e,0x48,0x59,0x0d,0xbb,0x3d,0xa7,0xb0,0x8b,0x10,0x56,0x82,0x88,0x38,
1161 0xc5,0xf6,0x1e,0x63,0x93,0xba,0x7a,0x0a,0xbc,0xc9,0xf6,0x62},
1162 T16[]= {0x76,0xfc,0x6e,0xce,0x0f,0x4e,0x17,0x68,0xcd,0xdf,0x88,0x53,0xbb,0x2d,0x55,0x1b};
1168 static const u8 IV17[]={0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad},
1169 C17[]= {0xc3,0x76,0x2d,0xf1,0xca,0x78,0x7d,0x32,0xae,0x47,0xc1,0x3b,0xf1,0x98,0x44,0xcb,
1170 0xaf,0x1a,0xe1,0x4d,0x0b,0x97,0x6a,0xfa,0xc5,0x2f,0xf7,0xd7,0x9b,0xba,0x9d,0xe0,
1171 0xfe,0xb5,0x82,0xd3,0x39,0x34,0xa4,0xf0,0x95,0x4c,0xc2,0x36,0x3b,0xc7,0x3f,0x78,
1172 0x62,0xac,0x43,0x0e,0x64,0xab,0xe4,0x99,0xf4,0x7c,0x9b,0x1f},
1173 T17[]= {0x3a,0x33,0x7d,0xbf,0x46,0xa7,0x92,0xc4,0x5e,0x45,0x49,0x13,0xfe,0x2e,0xa8,0xf2};
1179 static const u8 IV18[]={0x93,0x13,0x22,0x5d,0xf8,0x84,0x06,0xe5,0x55,0x90,0x9c,0x5a,0xff,0x52,0x69,0xaa,
1180 0x6a,0x7a,0x95,0x38,0x53,0x4f,0x7d,0xa1,0xe4,0xc3,0x03,0xd2,0xa3,0x18,0xa7,0x28,
1181 0xc3,0xc0,0xc9,0x51,0x56,0x80,0x95,0x39,0xfc,0xf0,0xe2,0x42,0x9a,0x6b,0x52,0x54,
1182 0x16,0xae,0xdb,0xf5,0xa0,0xde,0x6a,0x57,0xa6,0x37,0xb3,0x9b},
1183 C18[]= {0x5a,0x8d,0xef,0x2f,0x0c,0x9e,0x53,0xf1,0xf7,0x5d,0x78,0x53,0x65,0x9e,0x2a,0x20,
1184 0xee,0xb2,0xb2,0x2a,0xaf,0xde,0x64,0x19,0xa0,0x58,0xab,0x4f,0x6f,0x74,0x6b,0xf4,
1185 0x0f,0xc0,0xc3,0xb7,0x80,0xf2,0x44,0x45,0x2d,0xa3,0xeb,0xf1,0xc5,0xd8,0x2c,0xde,
1186 0xa2,0x41,0x89,0x97,0x20,0x0e,0xf8,0x2e,0x44,0xae,0x7e,0x3f},
1187 T18[]= {0xa4,0x4a,0x82,0x66,0xee,0x1c,0x8e,0xb0,0xc8,0xb5,0xd4,0xcf,0x5a,0xe9,0xf1,0x9a};
1189 #define TEST_CASE(n) do { \
1190 u8 out[sizeof(P##n)]; \
1191 AES_set_encrypt_key(K##n,sizeof(K##n)*8,&key); \
1192 CRYPTO_gcm128_init(&ctx,&key,(block128_f)AES_encrypt); \
1193 CRYPTO_gcm128_setiv(&ctx,IV##n,sizeof(IV##n)); \
1194 if (A##n) CRYPTO_gcm128_aad(&ctx,A##n,sizeof(A##n)); \
1195 if (P##n) CRYPTO_gcm128_encrypt(&ctx,P##n,out,sizeof(out)); \
1196 CRYPTO_gcm128_finish(&ctx); \
1197 if (memcmp(ctx.Xi.c,T##n,16) || (C##n && memcmp(out,C##n,sizeof(out)))) \
1198 ret++, printf ("encrypt test#%d failed.\n",n);\
1199 CRYPTO_gcm128_setiv(&ctx,IV##n,sizeof(IV##n)); \
1200 if (A##n) CRYPTO_gcm128_aad(&ctx,A##n,sizeof(A##n)); \
1201 if (C##n) CRYPTO_gcm128_decrypt(&ctx,C##n,out,sizeof(out)); \
1202 CRYPTO_gcm128_finish(&ctx); \
1203 if (memcmp(ctx.Xi.c,T##n,16) || (P##n && memcmp(out,P##n,sizeof(out)))) \
1204 ret++, printf ("decrypt test#%d failed.\n",n);\
1233 size_t start,stop,gcm_t,ctr_t,OPENSSL_rdtsc();
1234 union { u64 u; u8 c[1024]; } buf;
1237 AES_set_encrypt_key(K1,sizeof(K1)*8,&key);
1238 CRYPTO_gcm128_init(&ctx,&key,(block128_f)AES_encrypt);
1239 CRYPTO_gcm128_setiv(&ctx,IV1,sizeof(IV1));
1241 CRYPTO_gcm128_encrypt(&ctx,buf.c,buf.c,sizeof(buf));
1242 start = OPENSSL_rdtsc();
1243 CRYPTO_gcm128_encrypt(&ctx,buf.c,buf.c,sizeof(buf));
1244 gcm_t = OPENSSL_rdtsc() - start;
1246 CRYPTO_ctr128_encrypt(buf.c,buf.c,sizeof(buf),
1247 &key,ctx.Yi.c,ctx.EKi.c,&ctx.res,
1248 (block128_f)AES_encrypt);
1249 start = OPENSSL_rdtsc();
1250 CRYPTO_ctr128_encrypt(buf.c,buf.c,sizeof(buf),
1251 &key,ctx.Yi.c,ctx.EKi.c,&ctx.res,
1252 (block128_f)AES_encrypt);
1253 ctr_t = OPENSSL_rdtsc() - start;
1255 printf("%.2f-%.2f=%.2f\n",
1256 gcm_t/(double)sizeof(buf),
1257 ctr_t/(double)sizeof(buf),
1258 (gcm_t-ctr_t)/(double)sizeof(buf));