1 /* ====================================================================
2 * Copyright (c) 2010 The OpenSSL Project. All rights reserved.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in
13 * the documentation and/or other materials provided with the
16 * 3. All advertising materials mentioning features or use of this
17 * software must display the following acknowledgment:
18 * "This product includes software developed by the OpenSSL Project
19 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
21 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
22 * endorse or promote products derived from this software without
23 * prior written permission. For written permission, please contact
24 * openssl-core@openssl.org.
26 * 5. Products derived from this software may not be called "OpenSSL"
27 * nor may "OpenSSL" appear in their names without prior written
28 * permission of the OpenSSL Project.
30 * 6. Redistributions of any form whatsoever must retain the following
32 * "This product includes software developed by the OpenSSL Project
33 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
35 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
36 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
37 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
38 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
39 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
40 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
41 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
42 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
43 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
44 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
45 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
46 * OF THE POSSIBILITY OF SUCH DAMAGE.
47 * ====================================================================
50 #define OPENSSL_FIPSAPI
52 #include <openssl/crypto.h>
53 #include "modes_lcl.h"
63 #if defined(BSWAP4) && defined(STRICT_ALIGNMENT)
64 /* redefine, because alignment is ensured */
66 #define GETU32(p) BSWAP4(*(const u32 *)(p))
68 #define PUTU32(p,v) *(u32 *)(p) = BSWAP4(v)
71 #define PACK(s) ((size_t)(s)<<(sizeof(size_t)*8-16))
72 #define REDUCE1BIT(V) do { \
73 if (sizeof(size_t)==8) { \
74 u64 T = U64(0xe100000000000000) & (0-(V.lo&1)); \
75 V.lo = (V.hi<<63)|(V.lo>>1); \
76 V.hi = (V.hi>>1 )^T; \
79 u32 T = 0xe1000000U & (0-(u32)(V.lo&1)); \
80 V.lo = (V.hi<<63)|(V.lo>>1); \
81 V.hi = (V.hi>>1 )^((u64)T<<32); \
87 static void gcm_init_8bit(u128 Htable[256], u64 H[2])
97 for (Htable[128]=V, i=64; i>0; i>>=1) {
102 for (i=2; i<256; i<<=1) {
103 u128 *Hi = Htable+i, H0 = *Hi;
104 for (j=1; j<i; ++j) {
105 Hi[j].hi = H0.hi^Htable[j].hi;
106 Hi[j].lo = H0.lo^Htable[j].lo;
111 static void gcm_gmult_8bit(u64 Xi[2], u128 Htable[256])
114 const u8 *xi = (const u8 *)Xi+15;
116 const union { long one; char little; } is_endian = {1};
117 static const size_t rem_8bit[256] = {
118 PACK(0x0000), PACK(0x01C2), PACK(0x0384), PACK(0x0246),
119 PACK(0x0708), PACK(0x06CA), PACK(0x048C), PACK(0x054E),
120 PACK(0x0E10), PACK(0x0FD2), PACK(0x0D94), PACK(0x0C56),
121 PACK(0x0918), PACK(0x08DA), PACK(0x0A9C), PACK(0x0B5E),
122 PACK(0x1C20), PACK(0x1DE2), PACK(0x1FA4), PACK(0x1E66),
123 PACK(0x1B28), PACK(0x1AEA), PACK(0x18AC), PACK(0x196E),
124 PACK(0x1230), PACK(0x13F2), PACK(0x11B4), PACK(0x1076),
125 PACK(0x1538), PACK(0x14FA), PACK(0x16BC), PACK(0x177E),
126 PACK(0x3840), PACK(0x3982), PACK(0x3BC4), PACK(0x3A06),
127 PACK(0x3F48), PACK(0x3E8A), PACK(0x3CCC), PACK(0x3D0E),
128 PACK(0x3650), PACK(0x3792), PACK(0x35D4), PACK(0x3416),
129 PACK(0x3158), PACK(0x309A), PACK(0x32DC), PACK(0x331E),
130 PACK(0x2460), PACK(0x25A2), PACK(0x27E4), PACK(0x2626),
131 PACK(0x2368), PACK(0x22AA), PACK(0x20EC), PACK(0x212E),
132 PACK(0x2A70), PACK(0x2BB2), PACK(0x29F4), PACK(0x2836),
133 PACK(0x2D78), PACK(0x2CBA), PACK(0x2EFC), PACK(0x2F3E),
134 PACK(0x7080), PACK(0x7142), PACK(0x7304), PACK(0x72C6),
135 PACK(0x7788), PACK(0x764A), PACK(0x740C), PACK(0x75CE),
136 PACK(0x7E90), PACK(0x7F52), PACK(0x7D14), PACK(0x7CD6),
137 PACK(0x7998), PACK(0x785A), PACK(0x7A1C), PACK(0x7BDE),
138 PACK(0x6CA0), PACK(0x6D62), PACK(0x6F24), PACK(0x6EE6),
139 PACK(0x6BA8), PACK(0x6A6A), PACK(0x682C), PACK(0x69EE),
140 PACK(0x62B0), PACK(0x6372), PACK(0x6134), PACK(0x60F6),
141 PACK(0x65B8), PACK(0x647A), PACK(0x663C), PACK(0x67FE),
142 PACK(0x48C0), PACK(0x4902), PACK(0x4B44), PACK(0x4A86),
143 PACK(0x4FC8), PACK(0x4E0A), PACK(0x4C4C), PACK(0x4D8E),
144 PACK(0x46D0), PACK(0x4712), PACK(0x4554), PACK(0x4496),
145 PACK(0x41D8), PACK(0x401A), PACK(0x425C), PACK(0x439E),
146 PACK(0x54E0), PACK(0x5522), PACK(0x5764), PACK(0x56A6),
147 PACK(0x53E8), PACK(0x522A), PACK(0x506C), PACK(0x51AE),
148 PACK(0x5AF0), PACK(0x5B32), PACK(0x5974), PACK(0x58B6),
149 PACK(0x5DF8), PACK(0x5C3A), PACK(0x5E7C), PACK(0x5FBE),
150 PACK(0xE100), PACK(0xE0C2), PACK(0xE284), PACK(0xE346),
151 PACK(0xE608), PACK(0xE7CA), PACK(0xE58C), PACK(0xE44E),
152 PACK(0xEF10), PACK(0xEED2), PACK(0xEC94), PACK(0xED56),
153 PACK(0xE818), PACK(0xE9DA), PACK(0xEB9C), PACK(0xEA5E),
154 PACK(0xFD20), PACK(0xFCE2), PACK(0xFEA4), PACK(0xFF66),
155 PACK(0xFA28), PACK(0xFBEA), PACK(0xF9AC), PACK(0xF86E),
156 PACK(0xF330), PACK(0xF2F2), PACK(0xF0B4), PACK(0xF176),
157 PACK(0xF438), PACK(0xF5FA), PACK(0xF7BC), PACK(0xF67E),
158 PACK(0xD940), PACK(0xD882), PACK(0xDAC4), PACK(0xDB06),
159 PACK(0xDE48), PACK(0xDF8A), PACK(0xDDCC), PACK(0xDC0E),
160 PACK(0xD750), PACK(0xD692), PACK(0xD4D4), PACK(0xD516),
161 PACK(0xD058), PACK(0xD19A), PACK(0xD3DC), PACK(0xD21E),
162 PACK(0xC560), PACK(0xC4A2), PACK(0xC6E4), PACK(0xC726),
163 PACK(0xC268), PACK(0xC3AA), PACK(0xC1EC), PACK(0xC02E),
164 PACK(0xCB70), PACK(0xCAB2), PACK(0xC8F4), PACK(0xC936),
165 PACK(0xCC78), PACK(0xCDBA), PACK(0xCFFC), PACK(0xCE3E),
166 PACK(0x9180), PACK(0x9042), PACK(0x9204), PACK(0x93C6),
167 PACK(0x9688), PACK(0x974A), PACK(0x950C), PACK(0x94CE),
168 PACK(0x9F90), PACK(0x9E52), PACK(0x9C14), PACK(0x9DD6),
169 PACK(0x9898), PACK(0x995A), PACK(0x9B1C), PACK(0x9ADE),
170 PACK(0x8DA0), PACK(0x8C62), PACK(0x8E24), PACK(0x8FE6),
171 PACK(0x8AA8), PACK(0x8B6A), PACK(0x892C), PACK(0x88EE),
172 PACK(0x83B0), PACK(0x8272), PACK(0x8034), PACK(0x81F6),
173 PACK(0x84B8), PACK(0x857A), PACK(0x873C), PACK(0x86FE),
174 PACK(0xA9C0), PACK(0xA802), PACK(0xAA44), PACK(0xAB86),
175 PACK(0xAEC8), PACK(0xAF0A), PACK(0xAD4C), PACK(0xAC8E),
176 PACK(0xA7D0), PACK(0xA612), PACK(0xA454), PACK(0xA596),
177 PACK(0xA0D8), PACK(0xA11A), PACK(0xA35C), PACK(0xA29E),
178 PACK(0xB5E0), PACK(0xB422), PACK(0xB664), PACK(0xB7A6),
179 PACK(0xB2E8), PACK(0xB32A), PACK(0xB16C), PACK(0xB0AE),
180 PACK(0xBBF0), PACK(0xBA32), PACK(0xB874), PACK(0xB9B6),
181 PACK(0xBCF8), PACK(0xBD3A), PACK(0xBF7C), PACK(0xBEBE) };
184 Z.hi ^= Htable[n].hi;
185 Z.lo ^= Htable[n].lo;
187 if ((u8 *)Xi==xi) break;
191 rem = (size_t)Z.lo&0xff;
192 Z.lo = (Z.hi<<56)|(Z.lo>>8);
194 if (sizeof(size_t)==8)
195 Z.hi ^= rem_8bit[rem];
197 Z.hi ^= (u64)rem_8bit[rem]<<32;
200 if (is_endian.little) {
202 Xi[0] = BSWAP8(Z.hi);
203 Xi[1] = BSWAP8(Z.lo);
207 v = (u32)(Z.hi>>32); PUTU32(p,v);
208 v = (u32)(Z.hi); PUTU32(p+4,v);
209 v = (u32)(Z.lo>>32); PUTU32(p+8,v);
210 v = (u32)(Z.lo); PUTU32(p+12,v);
218 #define GCM_MUL(ctx,Xi) gcm_gmult_8bit(ctx->Xi.u,ctx->Htable)
222 static void gcm_init_4bit(u128 Htable[16], u64 H[2])
225 #if defined(OPENSSL_SMALL_FOOTPRINT)
234 #if defined(OPENSSL_SMALL_FOOTPRINT)
235 for (Htable[8]=V, i=4; i>0; i>>=1) {
240 for (i=2; i<16; i<<=1) {
243 for (V=*Hi, j=1; j<i; ++j) {
244 Hi[j].hi = V.hi^Htable[j].hi;
245 Hi[j].lo = V.lo^Htable[j].lo;
256 Htable[3].hi = V.hi^Htable[2].hi, Htable[3].lo = V.lo^Htable[2].lo;
258 Htable[5].hi = V.hi^Htable[1].hi, Htable[5].lo = V.lo^Htable[1].lo;
259 Htable[6].hi = V.hi^Htable[2].hi, Htable[6].lo = V.lo^Htable[2].lo;
260 Htable[7].hi = V.hi^Htable[3].hi, Htable[7].lo = V.lo^Htable[3].lo;
262 Htable[9].hi = V.hi^Htable[1].hi, Htable[9].lo = V.lo^Htable[1].lo;
263 Htable[10].hi = V.hi^Htable[2].hi, Htable[10].lo = V.lo^Htable[2].lo;
264 Htable[11].hi = V.hi^Htable[3].hi, Htable[11].lo = V.lo^Htable[3].lo;
265 Htable[12].hi = V.hi^Htable[4].hi, Htable[12].lo = V.lo^Htable[4].lo;
266 Htable[13].hi = V.hi^Htable[5].hi, Htable[13].lo = V.lo^Htable[5].lo;
267 Htable[14].hi = V.hi^Htable[6].hi, Htable[14].lo = V.lo^Htable[6].lo;
268 Htable[15].hi = V.hi^Htable[7].hi, Htable[15].lo = V.lo^Htable[7].lo;
270 #if defined(GHASH_ASM) && (defined(__arm__) || defined(__arm))
272 * ARM assembler expects specific dword order in Htable.
276 const union { long one; char little; } is_endian = {1};
278 if (is_endian.little)
287 Htable[j].hi = V.lo<<32|V.lo>>32;
288 Htable[j].lo = V.hi<<32|V.hi>>32;
295 static const size_t rem_4bit[16] = {
296 PACK(0x0000), PACK(0x1C20), PACK(0x3840), PACK(0x2460),
297 PACK(0x7080), PACK(0x6CA0), PACK(0x48C0), PACK(0x54E0),
298 PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560),
299 PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0) };
301 static void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16])
305 size_t rem, nlo, nhi;
306 const union { long one; char little; } is_endian = {1};
308 nlo = ((const u8 *)Xi)[15];
312 Z.hi = Htable[nlo].hi;
313 Z.lo = Htable[nlo].lo;
316 rem = (size_t)Z.lo&0xf;
317 Z.lo = (Z.hi<<60)|(Z.lo>>4);
319 if (sizeof(size_t)==8)
320 Z.hi ^= rem_4bit[rem];
322 Z.hi ^= (u64)rem_4bit[rem]<<32;
324 Z.hi ^= Htable[nhi].hi;
325 Z.lo ^= Htable[nhi].lo;
329 nlo = ((const u8 *)Xi)[cnt];
333 rem = (size_t)Z.lo&0xf;
334 Z.lo = (Z.hi<<60)|(Z.lo>>4);
336 if (sizeof(size_t)==8)
337 Z.hi ^= rem_4bit[rem];
339 Z.hi ^= (u64)rem_4bit[rem]<<32;
341 Z.hi ^= Htable[nlo].hi;
342 Z.lo ^= Htable[nlo].lo;
345 if (is_endian.little) {
347 Xi[0] = BSWAP8(Z.hi);
348 Xi[1] = BSWAP8(Z.lo);
352 v = (u32)(Z.hi>>32); PUTU32(p,v);
353 v = (u32)(Z.hi); PUTU32(p+4,v);
354 v = (u32)(Z.lo>>32); PUTU32(p+8,v);
355 v = (u32)(Z.lo); PUTU32(p+12,v);
364 #if !defined(OPENSSL_SMALL_FOOTPRINT)
366 * Streamed gcm_mult_4bit, see CRYPTO_gcm128_[en|de]crypt for
367 * details... Compiler-generated code doesn't seem to give any
368 * performance improvement, at least not on x86[_64]. It's here
369 * mostly as reference and a placeholder for possible future
370 * non-trivial optimization[s]...
372 static void gcm_ghash_4bit(u64 Xi[2],const u128 Htable[16],
373 const u8 *inp,size_t len)
377 size_t rem, nlo, nhi;
378 const union { long one; char little; } is_endian = {1};
383 nlo = ((const u8 *)Xi)[15];
388 Z.hi = Htable[nlo].hi;
389 Z.lo = Htable[nlo].lo;
392 rem = (size_t)Z.lo&0xf;
393 Z.lo = (Z.hi<<60)|(Z.lo>>4);
395 if (sizeof(size_t)==8)
396 Z.hi ^= rem_4bit[rem];
398 Z.hi ^= (u64)rem_4bit[rem]<<32;
400 Z.hi ^= Htable[nhi].hi;
401 Z.lo ^= Htable[nhi].lo;
405 nlo = ((const u8 *)Xi)[cnt];
410 rem = (size_t)Z.lo&0xf;
411 Z.lo = (Z.hi<<60)|(Z.lo>>4);
413 if (sizeof(size_t)==8)
414 Z.hi ^= rem_4bit[rem];
416 Z.hi ^= (u64)rem_4bit[rem]<<32;
418 Z.hi ^= Htable[nlo].hi;
419 Z.lo ^= Htable[nlo].lo;
423 * Extra 256+16 bytes per-key plus 512 bytes shared tables
424 * [should] give ~50% improvement... One could have PACK()-ed
425 * the rem_8bit even here, but the priority is to minimize
428 u128 Hshr4[16]; /* Htable shifted right by 4 bits */
429 u8 Hshl4[16]; /* Htable shifted left by 4 bits */
430 static const unsigned short rem_8bit[256] = {
431 0x0000, 0x01C2, 0x0384, 0x0246, 0x0708, 0x06CA, 0x048C, 0x054E,
432 0x0E10, 0x0FD2, 0x0D94, 0x0C56, 0x0918, 0x08DA, 0x0A9C, 0x0B5E,
433 0x1C20, 0x1DE2, 0x1FA4, 0x1E66, 0x1B28, 0x1AEA, 0x18AC, 0x196E,
434 0x1230, 0x13F2, 0x11B4, 0x1076, 0x1538, 0x14FA, 0x16BC, 0x177E,
435 0x3840, 0x3982, 0x3BC4, 0x3A06, 0x3F48, 0x3E8A, 0x3CCC, 0x3D0E,
436 0x3650, 0x3792, 0x35D4, 0x3416, 0x3158, 0x309A, 0x32DC, 0x331E,
437 0x2460, 0x25A2, 0x27E4, 0x2626, 0x2368, 0x22AA, 0x20EC, 0x212E,
438 0x2A70, 0x2BB2, 0x29F4, 0x2836, 0x2D78, 0x2CBA, 0x2EFC, 0x2F3E,
439 0x7080, 0x7142, 0x7304, 0x72C6, 0x7788, 0x764A, 0x740C, 0x75CE,
440 0x7E90, 0x7F52, 0x7D14, 0x7CD6, 0x7998, 0x785A, 0x7A1C, 0x7BDE,
441 0x6CA0, 0x6D62, 0x6F24, 0x6EE6, 0x6BA8, 0x6A6A, 0x682C, 0x69EE,
442 0x62B0, 0x6372, 0x6134, 0x60F6, 0x65B8, 0x647A, 0x663C, 0x67FE,
443 0x48C0, 0x4902, 0x4B44, 0x4A86, 0x4FC8, 0x4E0A, 0x4C4C, 0x4D8E,
444 0x46D0, 0x4712, 0x4554, 0x4496, 0x41D8, 0x401A, 0x425C, 0x439E,
445 0x54E0, 0x5522, 0x5764, 0x56A6, 0x53E8, 0x522A, 0x506C, 0x51AE,
446 0x5AF0, 0x5B32, 0x5974, 0x58B6, 0x5DF8, 0x5C3A, 0x5E7C, 0x5FBE,
447 0xE100, 0xE0C2, 0xE284, 0xE346, 0xE608, 0xE7CA, 0xE58C, 0xE44E,
448 0xEF10, 0xEED2, 0xEC94, 0xED56, 0xE818, 0xE9DA, 0xEB9C, 0xEA5E,
449 0xFD20, 0xFCE2, 0xFEA4, 0xFF66, 0xFA28, 0xFBEA, 0xF9AC, 0xF86E,
450 0xF330, 0xF2F2, 0xF0B4, 0xF176, 0xF438, 0xF5FA, 0xF7BC, 0xF67E,
451 0xD940, 0xD882, 0xDAC4, 0xDB06, 0xDE48, 0xDF8A, 0xDDCC, 0xDC0E,
452 0xD750, 0xD692, 0xD4D4, 0xD516, 0xD058, 0xD19A, 0xD3DC, 0xD21E,
453 0xC560, 0xC4A2, 0xC6E4, 0xC726, 0xC268, 0xC3AA, 0xC1EC, 0xC02E,
454 0xCB70, 0xCAB2, 0xC8F4, 0xC936, 0xCC78, 0xCDBA, 0xCFFC, 0xCE3E,
455 0x9180, 0x9042, 0x9204, 0x93C6, 0x9688, 0x974A, 0x950C, 0x94CE,
456 0x9F90, 0x9E52, 0x9C14, 0x9DD6, 0x9898, 0x995A, 0x9B1C, 0x9ADE,
457 0x8DA0, 0x8C62, 0x8E24, 0x8FE6, 0x8AA8, 0x8B6A, 0x892C, 0x88EE,
458 0x83B0, 0x8272, 0x8034, 0x81F6, 0x84B8, 0x857A, 0x873C, 0x86FE,
459 0xA9C0, 0xA802, 0xAA44, 0xAB86, 0xAEC8, 0xAF0A, 0xAD4C, 0xAC8E,
460 0xA7D0, 0xA612, 0xA454, 0xA596, 0xA0D8, 0xA11A, 0xA35C, 0xA29E,
461 0xB5E0, 0xB422, 0xB664, 0xB7A6, 0xB2E8, 0xB32A, 0xB16C, 0xB0AE,
462 0xBBF0, 0xBA32, 0xB874, 0xB9B6, 0xBCF8, 0xBD3A, 0xBF7C, 0xBEBE };
464 * This pre-processing phase slows down procedure by approximately
465 * same time as it makes each loop spin faster. In other words
466 * single block performance is approximately same as straightforward
467 * "4-bit" implementation, and then it goes only faster...
469 for (cnt=0; cnt<16; ++cnt) {
470 Z.hi = Htable[cnt].hi;
471 Z.lo = Htable[cnt].lo;
472 Hshr4[cnt].lo = (Z.hi<<60)|(Z.lo>>4);
473 Hshr4[cnt].hi = (Z.hi>>4);
474 Hshl4[cnt] = (u8)(Z.lo<<4);
478 for (Z.lo=0, Z.hi=0, cnt=15; cnt; --cnt) {
479 nlo = ((const u8 *)Xi)[cnt];
484 Z.hi ^= Htable[nlo].hi;
485 Z.lo ^= Htable[nlo].lo;
487 rem = (size_t)Z.lo&0xff;
489 Z.lo = (Z.hi<<56)|(Z.lo>>8);
492 Z.hi ^= Hshr4[nhi].hi;
493 Z.lo ^= Hshr4[nhi].lo;
494 Z.hi ^= (u64)rem_8bit[rem^Hshl4[nhi]]<<48;
497 nlo = ((const u8 *)Xi)[0];
502 Z.hi ^= Htable[nlo].hi;
503 Z.lo ^= Htable[nlo].lo;
505 rem = (size_t)Z.lo&0xf;
507 Z.lo = (Z.hi<<60)|(Z.lo>>4);
510 Z.hi ^= Htable[nhi].hi;
511 Z.lo ^= Htable[nhi].lo;
512 Z.hi ^= ((u64)rem_8bit[rem<<4])<<48;
515 if (is_endian.little) {
517 Xi[0] = BSWAP8(Z.hi);
518 Xi[1] = BSWAP8(Z.lo);
522 v = (u32)(Z.hi>>32); PUTU32(p,v);
523 v = (u32)(Z.hi); PUTU32(p+4,v);
524 v = (u32)(Z.lo>>32); PUTU32(p+8,v);
525 v = (u32)(Z.lo); PUTU32(p+12,v);
532 } while (inp+=16, len-=16);
536 void gcm_gmult_4bit(u64 Xi[2],const u128 Htable[16]);
537 void gcm_ghash_4bit(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
540 #define GCM_MUL(ctx,Xi) gcm_gmult_4bit(ctx->Xi.u,ctx->Htable)
541 #if defined(GHASH_ASM) || !defined(OPENSSL_SMALL_FOOTPRINT)
542 #define GHASH(ctx,in,len) gcm_ghash_4bit((ctx)->Xi.u,(ctx)->Htable,in,len)
543 /* GHASH_CHUNK is "stride parameter" missioned to mitigate cache
544 * trashing effect. In other words idea is to hash data while it's
545 * still in L1 cache after encryption pass... */
546 #define GHASH_CHUNK (3*1024)
549 #else /* TABLE_BITS */
551 static void gcm_gmult_1bit(u64 Xi[2],const u64 H[2])
556 const long *xi = (const long *)Xi;
557 const union { long one; char little; } is_endian = {1};
559 V.hi = H[0]; /* H is in host byte order, no byte swapping */
562 for (j=0; j<16/sizeof(long); ++j) {
563 if (is_endian.little) {
564 if (sizeof(long)==8) {
566 X = (long)(BSWAP8(xi[j]));
568 const u8 *p = (const u8 *)(xi+j);
569 X = (long)((u64)GETU32(p)<<32|GETU32(p+4));
573 const u8 *p = (const u8 *)(xi+j);
580 for (i=0; i<8*sizeof(long); ++i, X<<=1) {
581 u64 M = (u64)(X>>(8*sizeof(long)-1));
589 if (is_endian.little) {
591 Xi[0] = BSWAP8(Z.hi);
592 Xi[1] = BSWAP8(Z.lo);
596 v = (u32)(Z.hi>>32); PUTU32(p,v);
597 v = (u32)(Z.hi); PUTU32(p+4,v);
598 v = (u32)(Z.lo>>32); PUTU32(p+8,v);
599 v = (u32)(Z.lo); PUTU32(p+12,v);
607 #define GCM_MUL(ctx,Xi) gcm_gmult_1bit(ctx->Xi.u,ctx->H.u)
611 #if TABLE_BITS==4 && defined(GHASH_ASM) && !defined(I386_ONLY) && \
612 (defined(__i386) || defined(__i386__) || \
613 defined(__x86_64) || defined(__x86_64__) || \
614 defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64))
615 # define GHASH_ASM_IAX
616 extern unsigned int OPENSSL_ia32cap_P[2];
618 void gcm_init_clmul(u128 Htable[16],const u64 Xi[2]);
619 void gcm_gmult_clmul(u64 Xi[2],const u128 Htable[16]);
620 void gcm_ghash_clmul(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
622 # if defined(__i386) || defined(__i386__) || defined(_M_IX86)
623 # define GHASH_ASM_X86
624 void gcm_gmult_4bit_mmx(u64 Xi[2],const u128 Htable[16]);
625 void gcm_ghash_4bit_mmx(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
627 void gcm_gmult_4bit_x86(u64 Xi[2],const u128 Htable[16]);
628 void gcm_ghash_4bit_x86(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
632 # define GCM_MUL(ctx,Xi) (*((ctx)->gmult))(ctx->Xi.u,ctx->Htable)
634 # define GHASH(ctx,in,len) (*((ctx)->ghash))((ctx)->Xi.u,(ctx)->Htable,in,len)
637 void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx,void *key,block128_f block)
639 const union { long one; char little; } is_endian = {1};
641 memset(ctx,0,sizeof(*ctx));
645 (*block)(ctx->H.c,ctx->H.c,key);
647 if (is_endian.little) {
648 /* H is stored in host byte order */
650 ctx->H.u[0] = BSWAP8(ctx->H.u[0]);
651 ctx->H.u[1] = BSWAP8(ctx->H.u[1]);
655 hi = (u64)GETU32(p) <<32|GETU32(p+4);
656 lo = (u64)GETU32(p+8)<<32|GETU32(p+12);
663 gcm_init_8bit(ctx->Htable,ctx->H.u);
665 # if defined(GHASH_ASM_IAX) /* both x86 and x86_64 */
666 # if !defined(GHASH_ASM_X86) || defined(OPENSSL_IA32_SSE2)
667 if (OPENSSL_ia32cap_P[1]&(1<<1)) {
668 gcm_init_clmul(ctx->Htable,ctx->H.u);
669 ctx->gmult = gcm_gmult_clmul;
670 ctx->ghash = gcm_ghash_clmul;
674 gcm_init_4bit(ctx->Htable,ctx->H.u);
675 # if defined(GHASH_ASM_X86) /* x86 only */
676 if (OPENSSL_ia32cap_P[0]&(1<<23)) {
677 ctx->gmult = gcm_gmult_4bit_mmx;
678 ctx->ghash = gcm_ghash_4bit_mmx;
680 ctx->gmult = gcm_gmult_4bit_x86;
681 ctx->ghash = gcm_ghash_4bit_x86;
684 ctx->gmult = gcm_gmult_4bit;
685 ctx->ghash = gcm_ghash_4bit;
688 gcm_init_4bit(ctx->Htable,ctx->H.u);
693 void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx,const unsigned char *iv,size_t len)
695 const union { long one; char little; } is_endian = {1};
702 ctx->len.u[0] = 0; /* AAD length */
703 ctx->len.u[1] = 0; /* message length */
708 memcpy(ctx->Yi.c,iv,12);
717 for (i=0; i<16; ++i) ctx->Yi.c[i] ^= iv[i];
723 for (i=0; i<len; ++i) ctx->Yi.c[i] ^= iv[i];
727 if (is_endian.little) {
729 ctx->Yi.u[1] ^= BSWAP8(len0);
731 ctx->Yi.c[8] ^= (u8)(len0>>56);
732 ctx->Yi.c[9] ^= (u8)(len0>>48);
733 ctx->Yi.c[10] ^= (u8)(len0>>40);
734 ctx->Yi.c[11] ^= (u8)(len0>>32);
735 ctx->Yi.c[12] ^= (u8)(len0>>24);
736 ctx->Yi.c[13] ^= (u8)(len0>>16);
737 ctx->Yi.c[14] ^= (u8)(len0>>8);
738 ctx->Yi.c[15] ^= (u8)(len0);
742 ctx->Yi.u[1] ^= len0;
746 if (is_endian.little)
747 ctr = GETU32(ctx->Yi.c+12);
752 (*ctx->block)(ctx->Yi.c,ctx->EK0.c,ctx->key);
754 if (is_endian.little)
755 PUTU32(ctx->Yi.c+12,ctr);
760 int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx,const unsigned char *aad,size_t len)
764 u64 alen = ctx->len.u[0];
766 if (ctx->len.u[1]) return -2;
769 if (alen>(U64(1)<<61) || (sizeof(len)==8 && alen<len))
771 ctx->len.u[0] = alen;
776 ctx->Xi.c[n] ^= *(aad++);
780 if (n==0) GCM_MUL(ctx,Xi);
788 if ((i = (len&(size_t)-16))) {
795 for (i=0; i<16; ++i) ctx->Xi.c[i] ^= aad[i];
802 n = (unsigned int)len;
803 for (i=0; i<len; ++i) ctx->Xi.c[i] ^= aad[i];
810 int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
811 const unsigned char *in, unsigned char *out,
814 const union { long one; char little; } is_endian = {1};
817 u64 mlen = ctx->len.u[1];
820 n = (unsigned int)mlen%16; /* alternative to ctx->mres */
823 if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
825 ctx->len.u[1] = mlen;
828 /* First call to encrypt finalizes GHASH(AAD) */
833 if (is_endian.little)
834 ctr = GETU32(ctx->Yi.c+12);
839 #if !defined(OPENSSL_SMALL_FOOTPRINT)
840 if (16%sizeof(size_t) == 0) do { /* always true actually */
843 ctx->Xi.c[n] ^= *(out++) = *(in++)^ctx->EKi.c[n];
847 if (n==0) GCM_MUL(ctx,Xi);
853 #if defined(STRICT_ALIGNMENT)
854 if (((size_t)in|(size_t)out)%sizeof(size_t) != 0)
857 #if defined(GHASH) && defined(GHASH_CHUNK)
858 while (len>=GHASH_CHUNK) {
859 size_t j=GHASH_CHUNK;
862 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
864 if (is_endian.little)
865 PUTU32(ctx->Yi.c+12,ctr);
868 for (i=0; i<16; i+=sizeof(size_t))
870 *(size_t *)(in+i)^*(size_t *)(ctx->EKi.c+i);
875 GHASH(ctx,out-GHASH_CHUNK,GHASH_CHUNK);
878 if ((i = (len&(size_t)-16))) {
882 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
884 if (is_endian.little)
885 PUTU32(ctx->Yi.c+12,ctr);
888 for (i=0; i<16; i+=sizeof(size_t))
890 *(size_t *)(in+i)^*(size_t *)(ctx->EKi.c+i);
899 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
901 if (is_endian.little)
902 PUTU32(ctx->Yi.c+12,ctr);
905 for (i=0; i<16; i+=sizeof(size_t))
906 *(size_t *)(ctx->Xi.c+i) ^=
908 *(size_t *)(in+i)^*(size_t *)(ctx->EKi.c+i);
916 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
918 if (is_endian.little)
919 PUTU32(ctx->Yi.c+12,ctr);
923 ctx->Xi.c[n] ^= out[n] = in[n]^ctx->EKi.c[n];
932 for (i=0;i<len;++i) {
934 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
936 if (is_endian.little)
937 PUTU32(ctx->Yi.c+12,ctr);
941 ctx->Xi.c[n] ^= out[i] = in[i]^ctx->EKi.c[n];
951 int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
952 const unsigned char *in, unsigned char *out,
955 const union { long one; char little; } is_endian = {1};
958 u64 mlen = ctx->len.u[1];
961 if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
963 ctx->len.u[1] = mlen;
966 /* First call to decrypt finalizes GHASH(AAD) */
971 if (is_endian.little)
972 ctr = GETU32(ctx->Yi.c+12);
977 #if !defined(OPENSSL_SMALL_FOOTPRINT)
978 if (16%sizeof(size_t) == 0) do { /* always true actually */
982 *(out++) = c^ctx->EKi.c[n];
987 if (n==0) GCM_MUL (ctx,Xi);
993 #if defined(STRICT_ALIGNMENT)
994 if (((size_t)in|(size_t)out)%sizeof(size_t) != 0)
997 #if defined(GHASH) && defined(GHASH_CHUNK)
998 while (len>=GHASH_CHUNK) {
999 size_t j=GHASH_CHUNK;
1001 GHASH(ctx,in,GHASH_CHUNK);
1003 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
1005 if (is_endian.little)
1006 PUTU32(ctx->Yi.c+12,ctr);
1009 for (i=0; i<16; i+=sizeof(size_t))
1010 *(size_t *)(out+i) =
1011 *(size_t *)(in+i)^*(size_t *)(ctx->EKi.c+i);
1018 if ((i = (len&(size_t)-16))) {
1021 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
1023 if (is_endian.little)
1024 PUTU32(ctx->Yi.c+12,ctr);
1027 for (i=0; i<16; i+=sizeof(size_t))
1028 *(size_t *)(out+i) =
1029 *(size_t *)(in+i)^*(size_t *)(ctx->EKi.c+i);
1037 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
1039 if (is_endian.little)
1040 PUTU32(ctx->Yi.c+12,ctr);
1043 for (i=0; i<16; i+=sizeof(size_t)) {
1044 size_t c = *(size_t *)(in+i);
1045 *(size_t *)(out+i) = c^*(size_t *)(ctx->EKi.c+i);
1046 *(size_t *)(ctx->Xi.c+i) ^= c;
1055 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
1057 if (is_endian.little)
1058 PUTU32(ctx->Yi.c+12,ctr);
1064 out[n] = c^ctx->EKi.c[n];
1073 for (i=0;i<len;++i) {
1076 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
1078 if (is_endian.little)
1079 PUTU32(ctx->Yi.c+12,ctr);
1084 out[i] = c^ctx->EKi.c[n];
1095 int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx,
1096 const unsigned char *in, unsigned char *out,
1097 size_t len, ctr128_f stream)
1099 const union { long one; char little; } is_endian = {1};
1100 unsigned int n, ctr;
1102 u64 mlen = ctx->len.u[1];
1105 if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
1107 ctx->len.u[1] = mlen;
1110 /* First call to encrypt finalizes GHASH(AAD) */
1115 if (is_endian.little)
1116 ctr = GETU32(ctx->Yi.c+12);
1123 ctx->Xi.c[n] ^= *(out++) = *(in++)^ctx->EKi.c[n];
1127 if (n==0) GCM_MUL(ctx,Xi);
1133 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1134 while (len>=GHASH_CHUNK) {
1135 (*stream)(in,out,GHASH_CHUNK/16,ctx->key,ctx->Yi.c);
1136 ctr += GHASH_CHUNK/16;
1137 if (is_endian.little)
1138 PUTU32(ctx->Yi.c+12,ctr);
1141 GHASH(ctx,out,GHASH_CHUNK);
1147 if ((i = (len&(size_t)-16))) {
1150 (*stream)(in,out,j,ctx->key,ctx->Yi.c);
1151 ctr += (unsigned int)j;
1152 if (is_endian.little)
1153 PUTU32(ctx->Yi.c+12,ctr);
1163 for (i=0;i<16;++i) ctx->Xi.c[i] ^= out[i];
1170 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
1172 if (is_endian.little)
1173 PUTU32(ctx->Yi.c+12,ctr);
1177 ctx->Xi.c[n] ^= out[n] = in[n]^ctx->EKi.c[n];
1186 int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx,
1187 const unsigned char *in, unsigned char *out,
1188 size_t len,ctr128_f stream)
1190 const union { long one; char little; } is_endian = {1};
1191 unsigned int n, ctr;
1193 u64 mlen = ctx->len.u[1];
1196 if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
1198 ctx->len.u[1] = mlen;
1201 /* First call to decrypt finalizes GHASH(AAD) */
1206 if (is_endian.little)
1207 ctr = GETU32(ctx->Yi.c+12);
1215 *(out++) = c^ctx->EKi.c[n];
1220 if (n==0) GCM_MUL (ctx,Xi);
1226 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1227 while (len>=GHASH_CHUNK) {
1228 GHASH(ctx,in,GHASH_CHUNK);
1229 (*stream)(in,out,GHASH_CHUNK/16,ctx->key,ctx->Yi.c);
1230 ctr += GHASH_CHUNK/16;
1231 if (is_endian.little)
1232 PUTU32(ctx->Yi.c+12,ctr);
1240 if ((i = (len&(size_t)-16))) {
1248 for (k=0;k<16;++k) ctx->Xi.c[k] ^= in[k];
1255 (*stream)(in,out,j,ctx->key,ctx->Yi.c);
1256 ctr += (unsigned int)j;
1257 if (is_endian.little)
1258 PUTU32(ctx->Yi.c+12,ctr);
1266 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
1268 if (is_endian.little)
1269 PUTU32(ctx->Yi.c+12,ctr);
1275 out[n] = c^ctx->EKi.c[n];
1284 int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx,const unsigned char *tag,
1287 const union { long one; char little; } is_endian = {1};
1288 u64 alen = ctx->len.u[0]<<3;
1289 u64 clen = ctx->len.u[1]<<3;
1294 if (is_endian.little) {
1296 alen = BSWAP8(alen);
1297 clen = BSWAP8(clen);
1301 ctx->len.u[0] = alen;
1302 ctx->len.u[1] = clen;
1304 alen = (u64)GETU32(p) <<32|GETU32(p+4);
1305 clen = (u64)GETU32(p+8)<<32|GETU32(p+12);
1309 ctx->Xi.u[0] ^= alen;
1310 ctx->Xi.u[1] ^= clen;
1313 ctx->Xi.u[0] ^= ctx->EK0.u[0];
1314 ctx->Xi.u[1] ^= ctx->EK0.u[1];
1316 if (tag && len<=sizeof(ctx->Xi))
1317 return memcmp(ctx->Xi.c,tag,len);
1322 void CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len)
1324 CRYPTO_gcm128_finish(ctx, NULL, 0);
1325 memcpy(tag, ctx->Xi.c, len<=sizeof(ctx->Xi.c)?len:sizeof(ctx->Xi.c));
1328 GCM128_CONTEXT *CRYPTO_gcm128_new(void *key, block128_f block)
1330 GCM128_CONTEXT *ret;
1332 if ((ret = (GCM128_CONTEXT *)OPENSSL_malloc(sizeof(GCM128_CONTEXT))))
1333 CRYPTO_gcm128_init(ret,key,block);
1338 void CRYPTO_gcm128_release(GCM128_CONTEXT *ctx)
1341 OPENSSL_cleanse(ctx,sizeof(*ctx));
1346 #if defined(SELFTEST)
1348 #include <openssl/aes.h>
1351 static const u8 K1[16],
1356 T1[]= {0x58,0xe2,0xfc,0xce,0xfa,0x7e,0x30,0x61,0x36,0x7f,0x1d,0x57,0xa4,0xe7,0x45,0x5a};
1362 static const u8 P2[16],
1363 C2[]= {0x03,0x88,0xda,0xce,0x60,0xb6,0xa3,0x92,0xf3,0x28,0xc2,0xb9,0x71,0xb2,0xfe,0x78},
1364 T2[]= {0xab,0x6e,0x47,0xd4,0x2c,0xec,0x13,0xbd,0xf5,0x3a,0x67,0xb2,0x12,0x57,0xbd,0xdf};
1368 static const u8 K3[]= {0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08},
1369 P3[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1370 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1371 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1372 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55},
1373 IV3[]= {0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,0xde,0xca,0xf8,0x88},
1374 C3[]= {0x42,0x83,0x1e,0xc2,0x21,0x77,0x74,0x24,0x4b,0x72,0x21,0xb7,0x84,0xd0,0xd4,0x9c,
1375 0xe3,0xaa,0x21,0x2f,0x2c,0x02,0xa4,0xe0,0x35,0xc1,0x7e,0x23,0x29,0xac,0xa1,0x2e,
1376 0x21,0xd5,0x14,0xb2,0x54,0x66,0x93,0x1c,0x7d,0x8f,0x6a,0x5a,0xac,0x84,0xaa,0x05,
1377 0x1b,0xa3,0x0b,0x39,0x6a,0x0a,0xac,0x97,0x3d,0x58,0xe0,0x91,0x47,0x3f,0x59,0x85},
1378 T3[]= {0x4d,0x5c,0x2a,0xf3,0x27,0xcd,0x64,0xa6,0x2c,0xf3,0x5a,0xbd,0x2b,0xa6,0xfa,0xb4};
1383 static const u8 P4[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1384 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1385 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1386 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39},
1387 A4[]= {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1388 0xab,0xad,0xda,0xd2},
1389 C4[]= {0x42,0x83,0x1e,0xc2,0x21,0x77,0x74,0x24,0x4b,0x72,0x21,0xb7,0x84,0xd0,0xd4,0x9c,
1390 0xe3,0xaa,0x21,0x2f,0x2c,0x02,0xa4,0xe0,0x35,0xc1,0x7e,0x23,0x29,0xac,0xa1,0x2e,
1391 0x21,0xd5,0x14,0xb2,0x54,0x66,0x93,0x1c,0x7d,0x8f,0x6a,0x5a,0xac,0x84,0xaa,0x05,
1392 0x1b,0xa3,0x0b,0x39,0x6a,0x0a,0xac,0x97,0x3d,0x58,0xe0,0x91},
1393 T4[]= {0x5b,0xc9,0x4f,0xbc,0x32,0x21,0xa5,0xdb,0x94,0xfa,0xe9,0x5a,0xe7,0x12,0x1a,0x47};
1398 static const u8 A5[]= {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1399 0xab,0xad,0xda,0xd2},
1400 IV5[]= {0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad},
1401 C5[]= {0x61,0x35,0x3b,0x4c,0x28,0x06,0x93,0x4a,0x77,0x7f,0xf5,0x1f,0xa2,0x2a,0x47,0x55,
1402 0x69,0x9b,0x2a,0x71,0x4f,0xcd,0xc6,0xf8,0x37,0x66,0xe5,0xf9,0x7b,0x6c,0x74,0x23,
1403 0x73,0x80,0x69,0x00,0xe4,0x9f,0x24,0xb2,0x2b,0x09,0x75,0x44,0xd4,0x89,0x6b,0x42,
1404 0x49,0x89,0xb5,0xe1,0xeb,0xac,0x0f,0x07,0xc2,0x3f,0x45,0x98},
1405 T5[]= {0x36,0x12,0xd2,0xe7,0x9e,0x3b,0x07,0x85,0x56,0x1b,0xe1,0x4a,0xac,0xa2,0xfc,0xcb};
1411 static const u8 IV6[]= {0x93,0x13,0x22,0x5d,0xf8,0x84,0x06,0xe5,0x55,0x90,0x9c,0x5a,0xff,0x52,0x69,0xaa,
1412 0x6a,0x7a,0x95,0x38,0x53,0x4f,0x7d,0xa1,0xe4,0xc3,0x03,0xd2,0xa3,0x18,0xa7,0x28,
1413 0xc3,0xc0,0xc9,0x51,0x56,0x80,0x95,0x39,0xfc,0xf0,0xe2,0x42,0x9a,0x6b,0x52,0x54,
1414 0x16,0xae,0xdb,0xf5,0xa0,0xde,0x6a,0x57,0xa6,0x37,0xb3,0x9b},
1415 C6[]= {0x8c,0xe2,0x49,0x98,0x62,0x56,0x15,0xb6,0x03,0xa0,0x33,0xac,0xa1,0x3f,0xb8,0x94,
1416 0xbe,0x91,0x12,0xa5,0xc3,0xa2,0x11,0xa8,0xba,0x26,0x2a,0x3c,0xca,0x7e,0x2c,0xa7,
1417 0x01,0xe4,0xa9,0xa4,0xfb,0xa4,0x3c,0x90,0xcc,0xdc,0xb2,0x81,0xd4,0x8c,0x7c,0x6f,
1418 0xd6,0x28,0x75,0xd2,0xac,0xa4,0x17,0x03,0x4c,0x34,0xae,0xe5},
1419 T6[]= {0x61,0x9c,0xc5,0xae,0xff,0xfe,0x0b,0xfa,0x46,0x2a,0xf4,0x3c,0x16,0x99,0xd0,0x50};
1422 static const u8 K7[24],
1427 T7[]= {0xcd,0x33,0xb2,0x8a,0xc7,0x73,0xf7,0x4b,0xa0,0x0e,0xd1,0xf3,0x12,0x57,0x24,0x35};
1433 static const u8 P8[16],
1434 C8[]= {0x98,0xe7,0x24,0x7c,0x07,0xf0,0xfe,0x41,0x1c,0x26,0x7e,0x43,0x84,0xb0,0xf6,0x00},
1435 T8[]= {0x2f,0xf5,0x8d,0x80,0x03,0x39,0x27,0xab,0x8e,0xf4,0xd4,0x58,0x75,0x14,0xf0,0xfb};
1439 static const u8 K9[]= {0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08,
1440 0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c},
1441 P9[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1442 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1443 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1444 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55},
1445 IV9[]= {0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,0xde,0xca,0xf8,0x88},
1446 C9[]= {0x39,0x80,0xca,0x0b,0x3c,0x00,0xe8,0x41,0xeb,0x06,0xfa,0xc4,0x87,0x2a,0x27,0x57,
1447 0x85,0x9e,0x1c,0xea,0xa6,0xef,0xd9,0x84,0x62,0x85,0x93,0xb4,0x0c,0xa1,0xe1,0x9c,
1448 0x7d,0x77,0x3d,0x00,0xc1,0x44,0xc5,0x25,0xac,0x61,0x9d,0x18,0xc8,0x4a,0x3f,0x47,
1449 0x18,0xe2,0x44,0x8b,0x2f,0xe3,0x24,0xd9,0xcc,0xda,0x27,0x10,0xac,0xad,0xe2,0x56},
1450 T9[]= {0x99,0x24,0xa7,0xc8,0x58,0x73,0x36,0xbf,0xb1,0x18,0x02,0x4d,0xb8,0x67,0x4a,0x14};
1455 static const u8 P10[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1456 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1457 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1458 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39},
1459 A10[]= {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1460 0xab,0xad,0xda,0xd2},
1461 C10[]= {0x39,0x80,0xca,0x0b,0x3c,0x00,0xe8,0x41,0xeb,0x06,0xfa,0xc4,0x87,0x2a,0x27,0x57,
1462 0x85,0x9e,0x1c,0xea,0xa6,0xef,0xd9,0x84,0x62,0x85,0x93,0xb4,0x0c,0xa1,0xe1,0x9c,
1463 0x7d,0x77,0x3d,0x00,0xc1,0x44,0xc5,0x25,0xac,0x61,0x9d,0x18,0xc8,0x4a,0x3f,0x47,
1464 0x18,0xe2,0x44,0x8b,0x2f,0xe3,0x24,0xd9,0xcc,0xda,0x27,0x10},
1465 T10[]= {0x25,0x19,0x49,0x8e,0x80,0xf1,0x47,0x8f,0x37,0xba,0x55,0xbd,0x6d,0x27,0x61,0x8c};
1471 static const u8 IV11[]={0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad},
1472 C11[]= {0x0f,0x10,0xf5,0x99,0xae,0x14,0xa1,0x54,0xed,0x24,0xb3,0x6e,0x25,0x32,0x4d,0xb8,
1473 0xc5,0x66,0x63,0x2e,0xf2,0xbb,0xb3,0x4f,0x83,0x47,0x28,0x0f,0xc4,0x50,0x70,0x57,
1474 0xfd,0xdc,0x29,0xdf,0x9a,0x47,0x1f,0x75,0xc6,0x65,0x41,0xd4,0xd4,0xda,0xd1,0xc9,
1475 0xe9,0x3a,0x19,0xa5,0x8e,0x8b,0x47,0x3f,0xa0,0xf0,0x62,0xf7},
1476 T11[]= {0x65,0xdc,0xc5,0x7f,0xcf,0x62,0x3a,0x24,0x09,0x4f,0xcc,0xa4,0x0d,0x35,0x33,0xf8};
1482 static const u8 IV12[]={0x93,0x13,0x22,0x5d,0xf8,0x84,0x06,0xe5,0x55,0x90,0x9c,0x5a,0xff,0x52,0x69,0xaa,
1483 0x6a,0x7a,0x95,0x38,0x53,0x4f,0x7d,0xa1,0xe4,0xc3,0x03,0xd2,0xa3,0x18,0xa7,0x28,
1484 0xc3,0xc0,0xc9,0x51,0x56,0x80,0x95,0x39,0xfc,0xf0,0xe2,0x42,0x9a,0x6b,0x52,0x54,
1485 0x16,0xae,0xdb,0xf5,0xa0,0xde,0x6a,0x57,0xa6,0x37,0xb3,0x9b},
1486 C12[]= {0xd2,0x7e,0x88,0x68,0x1c,0xe3,0x24,0x3c,0x48,0x30,0x16,0x5a,0x8f,0xdc,0xf9,0xff,
1487 0x1d,0xe9,0xa1,0xd8,0xe6,0xb4,0x47,0xef,0x6e,0xf7,0xb7,0x98,0x28,0x66,0x6e,0x45,
1488 0x81,0xe7,0x90,0x12,0xaf,0x34,0xdd,0xd9,0xe2,0xf0,0x37,0x58,0x9b,0x29,0x2d,0xb3,
1489 0xe6,0x7c,0x03,0x67,0x45,0xfa,0x22,0xe7,0xe9,0xb7,0x37,0x3b},
1490 T12[]= {0xdc,0xf5,0x66,0xff,0x29,0x1c,0x25,0xbb,0xb8,0x56,0x8f,0xc3,0xd3,0x76,0xa6,0xd9};
1493 static const u8 K13[32],
1498 T13[]={0x53,0x0f,0x8a,0xfb,0xc7,0x45,0x36,0xb9,0xa9,0x63,0xb4,0xf1,0xc4,0xcb,0x73,0x8b};
1503 static const u8 P14[16],
1505 C14[]= {0xce,0xa7,0x40,0x3d,0x4d,0x60,0x6b,0x6e,0x07,0x4e,0xc5,0xd3,0xba,0xf3,0x9d,0x18},
1506 T14[]= {0xd0,0xd1,0xc8,0xa7,0x99,0x99,0x6b,0xf0,0x26,0x5b,0x98,0xb5,0xd4,0x8a,0xb9,0x19};
1510 static const u8 K15[]= {0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08,
1511 0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08},
1512 P15[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1513 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1514 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1515 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55},
1516 IV15[]={0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,0xde,0xca,0xf8,0x88},
1517 C15[]= {0x52,0x2d,0xc1,0xf0,0x99,0x56,0x7d,0x07,0xf4,0x7f,0x37,0xa3,0x2a,0x84,0x42,0x7d,
1518 0x64,0x3a,0x8c,0xdc,0xbf,0xe5,0xc0,0xc9,0x75,0x98,0xa2,0xbd,0x25,0x55,0xd1,0xaa,
1519 0x8c,0xb0,0x8e,0x48,0x59,0x0d,0xbb,0x3d,0xa7,0xb0,0x8b,0x10,0x56,0x82,0x88,0x38,
1520 0xc5,0xf6,0x1e,0x63,0x93,0xba,0x7a,0x0a,0xbc,0xc9,0xf6,0x62,0x89,0x80,0x15,0xad},
1521 T15[]= {0xb0,0x94,0xda,0xc5,0xd9,0x34,0x71,0xbd,0xec,0x1a,0x50,0x22,0x70,0xe3,0xcc,0x6c};
1526 static const u8 P16[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1527 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1528 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1529 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39},
1530 A16[]= {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1531 0xab,0xad,0xda,0xd2},
1532 C16[]= {0x52,0x2d,0xc1,0xf0,0x99,0x56,0x7d,0x07,0xf4,0x7f,0x37,0xa3,0x2a,0x84,0x42,0x7d,
1533 0x64,0x3a,0x8c,0xdc,0xbf,0xe5,0xc0,0xc9,0x75,0x98,0xa2,0xbd,0x25,0x55,0xd1,0xaa,
1534 0x8c,0xb0,0x8e,0x48,0x59,0x0d,0xbb,0x3d,0xa7,0xb0,0x8b,0x10,0x56,0x82,0x88,0x38,
1535 0xc5,0xf6,0x1e,0x63,0x93,0xba,0x7a,0x0a,0xbc,0xc9,0xf6,0x62},
1536 T16[]= {0x76,0xfc,0x6e,0xce,0x0f,0x4e,0x17,0x68,0xcd,0xdf,0x88,0x53,0xbb,0x2d,0x55,0x1b};
1542 static const u8 IV17[]={0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad},
1543 C17[]= {0xc3,0x76,0x2d,0xf1,0xca,0x78,0x7d,0x32,0xae,0x47,0xc1,0x3b,0xf1,0x98,0x44,0xcb,
1544 0xaf,0x1a,0xe1,0x4d,0x0b,0x97,0x6a,0xfa,0xc5,0x2f,0xf7,0xd7,0x9b,0xba,0x9d,0xe0,
1545 0xfe,0xb5,0x82,0xd3,0x39,0x34,0xa4,0xf0,0x95,0x4c,0xc2,0x36,0x3b,0xc7,0x3f,0x78,
1546 0x62,0xac,0x43,0x0e,0x64,0xab,0xe4,0x99,0xf4,0x7c,0x9b,0x1f},
1547 T17[]= {0x3a,0x33,0x7d,0xbf,0x46,0xa7,0x92,0xc4,0x5e,0x45,0x49,0x13,0xfe,0x2e,0xa8,0xf2};
1553 static const u8 IV18[]={0x93,0x13,0x22,0x5d,0xf8,0x84,0x06,0xe5,0x55,0x90,0x9c,0x5a,0xff,0x52,0x69,0xaa,
1554 0x6a,0x7a,0x95,0x38,0x53,0x4f,0x7d,0xa1,0xe4,0xc3,0x03,0xd2,0xa3,0x18,0xa7,0x28,
1555 0xc3,0xc0,0xc9,0x51,0x56,0x80,0x95,0x39,0xfc,0xf0,0xe2,0x42,0x9a,0x6b,0x52,0x54,
1556 0x16,0xae,0xdb,0xf5,0xa0,0xde,0x6a,0x57,0xa6,0x37,0xb3,0x9b},
1557 C18[]= {0x5a,0x8d,0xef,0x2f,0x0c,0x9e,0x53,0xf1,0xf7,0x5d,0x78,0x53,0x65,0x9e,0x2a,0x20,
1558 0xee,0xb2,0xb2,0x2a,0xaf,0xde,0x64,0x19,0xa0,0x58,0xab,0x4f,0x6f,0x74,0x6b,0xf4,
1559 0x0f,0xc0,0xc3,0xb7,0x80,0xf2,0x44,0x45,0x2d,0xa3,0xeb,0xf1,0xc5,0xd8,0x2c,0xde,
1560 0xa2,0x41,0x89,0x97,0x20,0x0e,0xf8,0x2e,0x44,0xae,0x7e,0x3f},
1561 T18[]= {0xa4,0x4a,0x82,0x66,0xee,0x1c,0x8e,0xb0,0xc8,0xb5,0xd4,0xcf,0x5a,0xe9,0xf1,0x9a};
1563 #define TEST_CASE(n) do { \
1564 u8 out[sizeof(P##n)]; \
1565 AES_set_encrypt_key(K##n,sizeof(K##n)*8,&key); \
1566 CRYPTO_gcm128_init(&ctx,&key,(block128_f)AES_encrypt); \
1567 CRYPTO_gcm128_setiv(&ctx,IV##n,sizeof(IV##n)); \
1568 memset(out,0,sizeof(out)); \
1569 if (A##n) CRYPTO_gcm128_aad(&ctx,A##n,sizeof(A##n)); \
1570 if (P##n) CRYPTO_gcm128_encrypt(&ctx,P##n,out,sizeof(out)); \
1571 if (CRYPTO_gcm128_finish(&ctx,T##n,16) || \
1572 (C##n && memcmp(out,C##n,sizeof(out)))) \
1573 ret++, printf ("encrypt test#%d failed.\n",n); \
1574 CRYPTO_gcm128_setiv(&ctx,IV##n,sizeof(IV##n)); \
1575 memset(out,0,sizeof(out)); \
1576 if (A##n) CRYPTO_gcm128_aad(&ctx,A##n,sizeof(A##n)); \
1577 if (C##n) CRYPTO_gcm128_decrypt(&ctx,C##n,out,sizeof(out)); \
1578 if (CRYPTO_gcm128_finish(&ctx,T##n,16) || \
1579 (P##n && memcmp(out,P##n,sizeof(out)))) \
1580 ret++, printf ("decrypt test#%d failed.\n",n); \
1608 #ifdef OPENSSL_CPUID_OBJ
1610 size_t start,stop,gcm_t,ctr_t,OPENSSL_rdtsc();
1611 union { u64 u; u8 c[1024]; } buf;
1614 AES_set_encrypt_key(K1,sizeof(K1)*8,&key);
1615 CRYPTO_gcm128_init(&ctx,&key,(block128_f)AES_encrypt);
1616 CRYPTO_gcm128_setiv(&ctx,IV1,sizeof(IV1));
1618 CRYPTO_gcm128_encrypt(&ctx,buf.c,buf.c,sizeof(buf));
1619 start = OPENSSL_rdtsc();
1620 CRYPTO_gcm128_encrypt(&ctx,buf.c,buf.c,sizeof(buf));
1621 gcm_t = OPENSSL_rdtsc() - start;
1623 CRYPTO_ctr128_encrypt(buf.c,buf.c,sizeof(buf),
1624 &key,ctx.Yi.c,ctx.EKi.c,&ctx.mres,
1625 (block128_f)AES_encrypt);
1626 start = OPENSSL_rdtsc();
1627 CRYPTO_ctr128_encrypt(buf.c,buf.c,sizeof(buf),
1628 &key,ctx.Yi.c,ctx.EKi.c,&ctx.mres,
1629 (block128_f)AES_encrypt);
1630 ctr_t = OPENSSL_rdtsc() - start;
1632 printf("%.2f-%.2f=%.2f\n",
1633 gcm_t/(double)sizeof(buf),
1634 ctr_t/(double)sizeof(buf),
1635 (gcm_t-ctr_t)/(double)sizeof(buf));
1637 GHASH(&ctx,buf.c,sizeof(buf));
1638 start = OPENSSL_rdtsc();
1639 for (i=0;i<100;++i) GHASH(&ctx,buf.c,sizeof(buf));
1640 gcm_t = OPENSSL_rdtsc() - start;
1641 printf("%.2f\n",gcm_t/(double)sizeof(buf)/(double)i);