crypto/modes/gcm128.c: more strict aliasing fixes.
[oweals/openssl.git] / crypto / modes / gcm128.c
1 /* ====================================================================
2  * Copyright (c) 2010 The OpenSSL Project.  All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  *
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer. 
10  *
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in
13  *    the documentation and/or other materials provided with the
14  *    distribution.
15  *
16  * 3. All advertising materials mentioning features or use of this
17  *    software must display the following acknowledgment:
18  *    "This product includes software developed by the OpenSSL Project
19  *    for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
20  *
21  * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
22  *    endorse or promote products derived from this software without
23  *    prior written permission. For written permission, please contact
24  *    openssl-core@openssl.org.
25  *
26  * 5. Products derived from this software may not be called "OpenSSL"
27  *    nor may "OpenSSL" appear in their names without prior written
28  *    permission of the OpenSSL Project.
29  *
30  * 6. Redistributions of any form whatsoever must retain the following
31  *    acknowledgment:
32  *    "This product includes software developed by the OpenSSL Project
33  *    for use in the OpenSSL Toolkit (http://www.openssl.org/)"
34  *
35  * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
36  * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
37  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
38  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
39  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
40  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
41  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
42  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
43  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
44  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
45  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
46  * OF THE POSSIBILITY OF SUCH DAMAGE.
47  * ====================================================================
48  */
49
50 #define OPENSSL_FIPSAPI
51
52 #include <openssl/crypto.h>
53 #include "modes_lcl.h"
54 #include <string.h>
55
56 #ifndef MODES_DEBUG
57 # ifndef NDEBUG
58 #  define NDEBUG
59 # endif
60 #endif
61 #include <assert.h>
62
63 #if defined(BSWAP4) && defined(STRICT_ALIGNMENT)
64 /* redefine, because alignment is ensured */
65 #undef  GETU32
66 #define GETU32(p)       BSWAP4(*(const u32 *)(p))
67 #undef  PUTU32
68 #define PUTU32(p,v)     *(u32 *)(p) = BSWAP4(v)
69 #endif
70
71 #define PACK(s)         ((size_t)(s)<<(sizeof(size_t)*8-16))
72 #define REDUCE1BIT(V)   do { \
73         if (sizeof(size_t)==8) { \
74                 u64 T = U64(0xe100000000000000) & (0-(V.lo&1)); \
75                 V.lo  = (V.hi<<63)|(V.lo>>1); \
76                 V.hi  = (V.hi>>1 )^T; \
77         } \
78         else { \
79                 u32 T = 0xe1000000U & (0-(u32)(V.lo&1)); \
80                 V.lo  = (V.hi<<63)|(V.lo>>1); \
81                 V.hi  = (V.hi>>1 )^((u64)T<<32); \
82         } \
83 } while(0)
84
85 /*
86  * Even though permitted values for TABLE_BITS are 8, 4 and 1, it should
87  * never be set to 8. 8 is effectively reserved for testing purposes.
88  * TABLE_BITS>1 are lookup-table-driven implementations referred to as
89  * "Shoup's" in GCM specification. In other words OpenSSL does not cover
90  * whole spectrum of possible table driven implementations. Why? In
91  * non-"Shoup's" case memory access pattern is segmented in such manner,
92  * that it's trivial to see that cache timing information can reveal
93  * fair portion of intermediate hash value. Given that ciphertext is
94  * always available to attacker, it's possible for him to attempt to
95  * deduce secret parameter H and if successful, tamper with messages
96  * [which is nothing but trivial in CTR mode]. In "Shoup's" case it's
97  * not as trivial, but there is no reason to believe that it's resistant
98  * to cache-timing attack. And the thing about "8-bit" implementation is
99  * that it consumes 16 (sixteen) times more memory, 4KB per individual
100  * key + 1KB shared. Well, on pros side it should be twice as fast as
101  * "4-bit" version. And for gcc-generated x86[_64] code, "8-bit" version
102  * was observed to run ~75% faster, closer to 100% for commercial
103  * compilers... Yet "4-bit" procedure is preferred, because it's
104  * believed to provide better security-performance balance and adequate
105  * all-round performance. "All-round" refers to things like:
106  *
107  * - shorter setup time effectively improves overall timing for
108  *   handling short messages;
109  * - larger table allocation can become unbearable because of VM
110  *   subsystem penalties (for example on Windows large enough free
111  *   results in VM working set trimming, meaning that consequent
112  *   malloc would immediately incur working set expansion);
113  * - larger table has larger cache footprint, which can affect
114  *   performance of other code paths (not necessarily even from same
115  *   thread in Hyper-Threading world);
116  *
117  * Value of 1 is not appropriate for performance reasons.
118  */
119 #if     TABLE_BITS==8
120
121 static void gcm_init_8bit(u128 Htable[256], u64 H[2])
122 {
123         int  i, j;
124         u128 V;
125
126         Htable[0].hi = 0;
127         Htable[0].lo = 0;
128         V.hi = H[0];
129         V.lo = H[1];
130
131         for (Htable[128]=V, i=64; i>0; i>>=1) {
132                 REDUCE1BIT(V);
133                 Htable[i] = V;
134         }
135
136         for (i=2; i<256; i<<=1) {
137                 u128 *Hi = Htable+i, H0 = *Hi;
138                 for (j=1; j<i; ++j) {
139                         Hi[j].hi = H0.hi^Htable[j].hi;
140                         Hi[j].lo = H0.lo^Htable[j].lo;
141                 }
142         }
143 }
144
145 static void gcm_gmult_8bit(u64 Xi[2], const u128 Htable[256])
146 {
147         u128 Z = { 0, 0};
148         const u8 *xi = (const u8 *)Xi+15;
149         size_t rem, n = *xi;
150         const union { long one; char little; } is_endian = {1};
151         static const size_t rem_8bit[256] = {
152                 PACK(0x0000), PACK(0x01C2), PACK(0x0384), PACK(0x0246),
153                 PACK(0x0708), PACK(0x06CA), PACK(0x048C), PACK(0x054E),
154                 PACK(0x0E10), PACK(0x0FD2), PACK(0x0D94), PACK(0x0C56),
155                 PACK(0x0918), PACK(0x08DA), PACK(0x0A9C), PACK(0x0B5E),
156                 PACK(0x1C20), PACK(0x1DE2), PACK(0x1FA4), PACK(0x1E66),
157                 PACK(0x1B28), PACK(0x1AEA), PACK(0x18AC), PACK(0x196E),
158                 PACK(0x1230), PACK(0x13F2), PACK(0x11B4), PACK(0x1076),
159                 PACK(0x1538), PACK(0x14FA), PACK(0x16BC), PACK(0x177E),
160                 PACK(0x3840), PACK(0x3982), PACK(0x3BC4), PACK(0x3A06),
161                 PACK(0x3F48), PACK(0x3E8A), PACK(0x3CCC), PACK(0x3D0E),
162                 PACK(0x3650), PACK(0x3792), PACK(0x35D4), PACK(0x3416),
163                 PACK(0x3158), PACK(0x309A), PACK(0x32DC), PACK(0x331E),
164                 PACK(0x2460), PACK(0x25A2), PACK(0x27E4), PACK(0x2626),
165                 PACK(0x2368), PACK(0x22AA), PACK(0x20EC), PACK(0x212E),
166                 PACK(0x2A70), PACK(0x2BB2), PACK(0x29F4), PACK(0x2836),
167                 PACK(0x2D78), PACK(0x2CBA), PACK(0x2EFC), PACK(0x2F3E),
168                 PACK(0x7080), PACK(0x7142), PACK(0x7304), PACK(0x72C6),
169                 PACK(0x7788), PACK(0x764A), PACK(0x740C), PACK(0x75CE),
170                 PACK(0x7E90), PACK(0x7F52), PACK(0x7D14), PACK(0x7CD6),
171                 PACK(0x7998), PACK(0x785A), PACK(0x7A1C), PACK(0x7BDE),
172                 PACK(0x6CA0), PACK(0x6D62), PACK(0x6F24), PACK(0x6EE6),
173                 PACK(0x6BA8), PACK(0x6A6A), PACK(0x682C), PACK(0x69EE),
174                 PACK(0x62B0), PACK(0x6372), PACK(0x6134), PACK(0x60F6),
175                 PACK(0x65B8), PACK(0x647A), PACK(0x663C), PACK(0x67FE),
176                 PACK(0x48C0), PACK(0x4902), PACK(0x4B44), PACK(0x4A86),
177                 PACK(0x4FC8), PACK(0x4E0A), PACK(0x4C4C), PACK(0x4D8E),
178                 PACK(0x46D0), PACK(0x4712), PACK(0x4554), PACK(0x4496),
179                 PACK(0x41D8), PACK(0x401A), PACK(0x425C), PACK(0x439E),
180                 PACK(0x54E0), PACK(0x5522), PACK(0x5764), PACK(0x56A6),
181                 PACK(0x53E8), PACK(0x522A), PACK(0x506C), PACK(0x51AE),
182                 PACK(0x5AF0), PACK(0x5B32), PACK(0x5974), PACK(0x58B6),
183                 PACK(0x5DF8), PACK(0x5C3A), PACK(0x5E7C), PACK(0x5FBE),
184                 PACK(0xE100), PACK(0xE0C2), PACK(0xE284), PACK(0xE346),
185                 PACK(0xE608), PACK(0xE7CA), PACK(0xE58C), PACK(0xE44E),
186                 PACK(0xEF10), PACK(0xEED2), PACK(0xEC94), PACK(0xED56),
187                 PACK(0xE818), PACK(0xE9DA), PACK(0xEB9C), PACK(0xEA5E),
188                 PACK(0xFD20), PACK(0xFCE2), PACK(0xFEA4), PACK(0xFF66),
189                 PACK(0xFA28), PACK(0xFBEA), PACK(0xF9AC), PACK(0xF86E),
190                 PACK(0xF330), PACK(0xF2F2), PACK(0xF0B4), PACK(0xF176),
191                 PACK(0xF438), PACK(0xF5FA), PACK(0xF7BC), PACK(0xF67E),
192                 PACK(0xD940), PACK(0xD882), PACK(0xDAC4), PACK(0xDB06),
193                 PACK(0xDE48), PACK(0xDF8A), PACK(0xDDCC), PACK(0xDC0E),
194                 PACK(0xD750), PACK(0xD692), PACK(0xD4D4), PACK(0xD516),
195                 PACK(0xD058), PACK(0xD19A), PACK(0xD3DC), PACK(0xD21E),
196                 PACK(0xC560), PACK(0xC4A2), PACK(0xC6E4), PACK(0xC726),
197                 PACK(0xC268), PACK(0xC3AA), PACK(0xC1EC), PACK(0xC02E),
198                 PACK(0xCB70), PACK(0xCAB2), PACK(0xC8F4), PACK(0xC936),
199                 PACK(0xCC78), PACK(0xCDBA), PACK(0xCFFC), PACK(0xCE3E),
200                 PACK(0x9180), PACK(0x9042), PACK(0x9204), PACK(0x93C6),
201                 PACK(0x9688), PACK(0x974A), PACK(0x950C), PACK(0x94CE),
202                 PACK(0x9F90), PACK(0x9E52), PACK(0x9C14), PACK(0x9DD6),
203                 PACK(0x9898), PACK(0x995A), PACK(0x9B1C), PACK(0x9ADE),
204                 PACK(0x8DA0), PACK(0x8C62), PACK(0x8E24), PACK(0x8FE6),
205                 PACK(0x8AA8), PACK(0x8B6A), PACK(0x892C), PACK(0x88EE),
206                 PACK(0x83B0), PACK(0x8272), PACK(0x8034), PACK(0x81F6),
207                 PACK(0x84B8), PACK(0x857A), PACK(0x873C), PACK(0x86FE),
208                 PACK(0xA9C0), PACK(0xA802), PACK(0xAA44), PACK(0xAB86),
209                 PACK(0xAEC8), PACK(0xAF0A), PACK(0xAD4C), PACK(0xAC8E),
210                 PACK(0xA7D0), PACK(0xA612), PACK(0xA454), PACK(0xA596),
211                 PACK(0xA0D8), PACK(0xA11A), PACK(0xA35C), PACK(0xA29E),
212                 PACK(0xB5E0), PACK(0xB422), PACK(0xB664), PACK(0xB7A6),
213                 PACK(0xB2E8), PACK(0xB32A), PACK(0xB16C), PACK(0xB0AE),
214                 PACK(0xBBF0), PACK(0xBA32), PACK(0xB874), PACK(0xB9B6),
215                 PACK(0xBCF8), PACK(0xBD3A), PACK(0xBF7C), PACK(0xBEBE) };
216
217         while (1) {
218                 Z.hi ^= Htable[n].hi;
219                 Z.lo ^= Htable[n].lo;
220
221                 if ((u8 *)Xi==xi)       break;
222
223                 n = *(--xi);
224
225                 rem  = (size_t)Z.lo&0xff;
226                 Z.lo = (Z.hi<<56)|(Z.lo>>8);
227                 Z.hi = (Z.hi>>8);
228                 if (sizeof(size_t)==8)
229                         Z.hi ^= rem_8bit[rem];
230                 else
231                         Z.hi ^= (u64)rem_8bit[rem]<<32;
232         }
233
234         if (is_endian.little) {
235 #ifdef BSWAP8
236                 Xi[0] = BSWAP8(Z.hi);
237                 Xi[1] = BSWAP8(Z.lo);
238 #else
239                 u8 *p = (u8 *)Xi;
240                 u32 v;
241                 v = (u32)(Z.hi>>32);    PUTU32(p,v);
242                 v = (u32)(Z.hi);        PUTU32(p+4,v);
243                 v = (u32)(Z.lo>>32);    PUTU32(p+8,v);
244                 v = (u32)(Z.lo);        PUTU32(p+12,v);
245 #endif
246         }
247         else {
248                 Xi[0] = Z.hi;
249                 Xi[1] = Z.lo;
250         }
251 }
252 #define GCM_MUL(ctx,Xi)   gcm_gmult_8bit(ctx->Xi.u,ctx->Htable)
253
254 #elif   TABLE_BITS==4
255
256 static void gcm_init_4bit(u128 Htable[16], u64 H[2])
257 {
258         u128 V;
259 #if defined(OPENSSL_SMALL_FOOTPRINT)
260         int  i;
261 #endif
262
263         Htable[0].hi = 0;
264         Htable[0].lo = 0;
265         V.hi = H[0];
266         V.lo = H[1];
267
268 #if defined(OPENSSL_SMALL_FOOTPRINT)
269         for (Htable[8]=V, i=4; i>0; i>>=1) {
270                 REDUCE1BIT(V);
271                 Htable[i] = V;
272         }
273
274         for (i=2; i<16; i<<=1) {
275                 u128 *Hi = Htable+i;
276                 int   j;
277                 for (V=*Hi, j=1; j<i; ++j) {
278                         Hi[j].hi = V.hi^Htable[j].hi;
279                         Hi[j].lo = V.lo^Htable[j].lo;
280                 }
281         }
282 #else
283         Htable[8] = V;
284         REDUCE1BIT(V);
285         Htable[4] = V;
286         REDUCE1BIT(V);
287         Htable[2] = V;
288         REDUCE1BIT(V);
289         Htable[1] = V;
290         Htable[3].hi  = V.hi^Htable[2].hi, Htable[3].lo  = V.lo^Htable[2].lo;
291         V=Htable[4];
292         Htable[5].hi  = V.hi^Htable[1].hi, Htable[5].lo  = V.lo^Htable[1].lo;
293         Htable[6].hi  = V.hi^Htable[2].hi, Htable[6].lo  = V.lo^Htable[2].lo;
294         Htable[7].hi  = V.hi^Htable[3].hi, Htable[7].lo  = V.lo^Htable[3].lo;
295         V=Htable[8];
296         Htable[9].hi  = V.hi^Htable[1].hi, Htable[9].lo  = V.lo^Htable[1].lo;
297         Htable[10].hi = V.hi^Htable[2].hi, Htable[10].lo = V.lo^Htable[2].lo;
298         Htable[11].hi = V.hi^Htable[3].hi, Htable[11].lo = V.lo^Htable[3].lo;
299         Htable[12].hi = V.hi^Htable[4].hi, Htable[12].lo = V.lo^Htable[4].lo;
300         Htable[13].hi = V.hi^Htable[5].hi, Htable[13].lo = V.lo^Htable[5].lo;
301         Htable[14].hi = V.hi^Htable[6].hi, Htable[14].lo = V.lo^Htable[6].lo;
302         Htable[15].hi = V.hi^Htable[7].hi, Htable[15].lo = V.lo^Htable[7].lo;
303 #endif
304 #if defined(GHASH_ASM) && (defined(__arm__) || defined(__arm))
305         /*
306          * ARM assembler expects specific dword order in Htable.
307          */
308         {
309         int j;
310         const union { long one; char little; } is_endian = {1};
311
312         if (is_endian.little)
313                 for (j=0;j<16;++j) {
314                         V = Htable[j];
315                         Htable[j].hi = V.lo;
316                         Htable[j].lo = V.hi;
317                 }
318         else
319                 for (j=0;j<16;++j) {
320                         V = Htable[j];
321                         Htable[j].hi = V.lo<<32|V.lo>>32;
322                         Htable[j].lo = V.hi<<32|V.hi>>32;
323                 }
324         }
325 #endif
326 }
327
328 #ifndef GHASH_ASM
329 static const size_t rem_4bit[16] = {
330         PACK(0x0000), PACK(0x1C20), PACK(0x3840), PACK(0x2460),
331         PACK(0x7080), PACK(0x6CA0), PACK(0x48C0), PACK(0x54E0),
332         PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560),
333         PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0) };
334
335 static void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16])
336 {
337         u128 Z;
338         int cnt = 15;
339         size_t rem, nlo, nhi;
340         const union { long one; char little; } is_endian = {1};
341
342         nlo  = ((const u8 *)Xi)[15];
343         nhi  = nlo>>4;
344         nlo &= 0xf;
345
346         Z.hi = Htable[nlo].hi;
347         Z.lo = Htable[nlo].lo;
348
349         while (1) {
350                 rem  = (size_t)Z.lo&0xf;
351                 Z.lo = (Z.hi<<60)|(Z.lo>>4);
352                 Z.hi = (Z.hi>>4);
353                 if (sizeof(size_t)==8)
354                         Z.hi ^= rem_4bit[rem];
355                 else
356                         Z.hi ^= (u64)rem_4bit[rem]<<32;
357
358                 Z.hi ^= Htable[nhi].hi;
359                 Z.lo ^= Htable[nhi].lo;
360
361                 if (--cnt<0)            break;
362
363                 nlo  = ((const u8 *)Xi)[cnt];
364                 nhi  = nlo>>4;
365                 nlo &= 0xf;
366
367                 rem  = (size_t)Z.lo&0xf;
368                 Z.lo = (Z.hi<<60)|(Z.lo>>4);
369                 Z.hi = (Z.hi>>4);
370                 if (sizeof(size_t)==8)
371                         Z.hi ^= rem_4bit[rem];
372                 else
373                         Z.hi ^= (u64)rem_4bit[rem]<<32;
374
375                 Z.hi ^= Htable[nlo].hi;
376                 Z.lo ^= Htable[nlo].lo;
377         }
378
379         if (is_endian.little) {
380 #ifdef BSWAP8
381                 Xi[0] = BSWAP8(Z.hi);
382                 Xi[1] = BSWAP8(Z.lo);
383 #else
384                 u8 *p = (u8 *)Xi;
385                 u32 v;
386                 v = (u32)(Z.hi>>32);    PUTU32(p,v);
387                 v = (u32)(Z.hi);        PUTU32(p+4,v);
388                 v = (u32)(Z.lo>>32);    PUTU32(p+8,v);
389                 v = (u32)(Z.lo);        PUTU32(p+12,v);
390 #endif
391         }
392         else {
393                 Xi[0] = Z.hi;
394                 Xi[1] = Z.lo;
395         }
396 }
397
398 #if !defined(OPENSSL_SMALL_FOOTPRINT)
399 /*
400  * Streamed gcm_mult_4bit, see CRYPTO_gcm128_[en|de]crypt for
401  * details... Compiler-generated code doesn't seem to give any
402  * performance improvement, at least not on x86[_64]. It's here
403  * mostly as reference and a placeholder for possible future
404  * non-trivial optimization[s]...
405  */
406 static void gcm_ghash_4bit(u64 Xi[2],const u128 Htable[16],
407                                 const u8 *inp,size_t len)
408 {
409     u128 Z;
410     int cnt;
411     size_t rem, nlo, nhi;
412     const union { long one; char little; } is_endian = {1};
413
414 #if 1
415     do {
416         cnt  = 15;
417         nlo  = ((const u8 *)Xi)[15];
418         nlo ^= inp[15];
419         nhi  = nlo>>4;
420         nlo &= 0xf;
421
422         Z.hi = Htable[nlo].hi;
423         Z.lo = Htable[nlo].lo;
424
425         while (1) {
426                 rem  = (size_t)Z.lo&0xf;
427                 Z.lo = (Z.hi<<60)|(Z.lo>>4);
428                 Z.hi = (Z.hi>>4);
429                 if (sizeof(size_t)==8)
430                         Z.hi ^= rem_4bit[rem];
431                 else
432                         Z.hi ^= (u64)rem_4bit[rem]<<32;
433
434                 Z.hi ^= Htable[nhi].hi;
435                 Z.lo ^= Htable[nhi].lo;
436
437                 if (--cnt<0)            break;
438
439                 nlo  = ((const u8 *)Xi)[cnt];
440                 nlo ^= inp[cnt];
441                 nhi  = nlo>>4;
442                 nlo &= 0xf;
443
444                 rem  = (size_t)Z.lo&0xf;
445                 Z.lo = (Z.hi<<60)|(Z.lo>>4);
446                 Z.hi = (Z.hi>>4);
447                 if (sizeof(size_t)==8)
448                         Z.hi ^= rem_4bit[rem];
449                 else
450                         Z.hi ^= (u64)rem_4bit[rem]<<32;
451
452                 Z.hi ^= Htable[nlo].hi;
453                 Z.lo ^= Htable[nlo].lo;
454         }
455 #else
456     /*
457      * Extra 256+16 bytes per-key plus 512 bytes shared tables
458      * [should] give ~50% improvement... One could have PACK()-ed
459      * the rem_8bit even here, but the priority is to minimize
460      * cache footprint...
461      */ 
462     u128 Hshr4[16];     /* Htable shifted right by 4 bits */
463     u8   Hshl4[16];     /* Htable shifted left  by 4 bits */
464     static const unsigned short rem_8bit[256] = {
465         0x0000, 0x01C2, 0x0384, 0x0246, 0x0708, 0x06CA, 0x048C, 0x054E,
466         0x0E10, 0x0FD2, 0x0D94, 0x0C56, 0x0918, 0x08DA, 0x0A9C, 0x0B5E,
467         0x1C20, 0x1DE2, 0x1FA4, 0x1E66, 0x1B28, 0x1AEA, 0x18AC, 0x196E,
468         0x1230, 0x13F2, 0x11B4, 0x1076, 0x1538, 0x14FA, 0x16BC, 0x177E,
469         0x3840, 0x3982, 0x3BC4, 0x3A06, 0x3F48, 0x3E8A, 0x3CCC, 0x3D0E,
470         0x3650, 0x3792, 0x35D4, 0x3416, 0x3158, 0x309A, 0x32DC, 0x331E,
471         0x2460, 0x25A2, 0x27E4, 0x2626, 0x2368, 0x22AA, 0x20EC, 0x212E,
472         0x2A70, 0x2BB2, 0x29F4, 0x2836, 0x2D78, 0x2CBA, 0x2EFC, 0x2F3E,
473         0x7080, 0x7142, 0x7304, 0x72C6, 0x7788, 0x764A, 0x740C, 0x75CE,
474         0x7E90, 0x7F52, 0x7D14, 0x7CD6, 0x7998, 0x785A, 0x7A1C, 0x7BDE,
475         0x6CA0, 0x6D62, 0x6F24, 0x6EE6, 0x6BA8, 0x6A6A, 0x682C, 0x69EE,
476         0x62B0, 0x6372, 0x6134, 0x60F6, 0x65B8, 0x647A, 0x663C, 0x67FE,
477         0x48C0, 0x4902, 0x4B44, 0x4A86, 0x4FC8, 0x4E0A, 0x4C4C, 0x4D8E,
478         0x46D0, 0x4712, 0x4554, 0x4496, 0x41D8, 0x401A, 0x425C, 0x439E,
479         0x54E0, 0x5522, 0x5764, 0x56A6, 0x53E8, 0x522A, 0x506C, 0x51AE,
480         0x5AF0, 0x5B32, 0x5974, 0x58B6, 0x5DF8, 0x5C3A, 0x5E7C, 0x5FBE,
481         0xE100, 0xE0C2, 0xE284, 0xE346, 0xE608, 0xE7CA, 0xE58C, 0xE44E,
482         0xEF10, 0xEED2, 0xEC94, 0xED56, 0xE818, 0xE9DA, 0xEB9C, 0xEA5E,
483         0xFD20, 0xFCE2, 0xFEA4, 0xFF66, 0xFA28, 0xFBEA, 0xF9AC, 0xF86E,
484         0xF330, 0xF2F2, 0xF0B4, 0xF176, 0xF438, 0xF5FA, 0xF7BC, 0xF67E,
485         0xD940, 0xD882, 0xDAC4, 0xDB06, 0xDE48, 0xDF8A, 0xDDCC, 0xDC0E,
486         0xD750, 0xD692, 0xD4D4, 0xD516, 0xD058, 0xD19A, 0xD3DC, 0xD21E,
487         0xC560, 0xC4A2, 0xC6E4, 0xC726, 0xC268, 0xC3AA, 0xC1EC, 0xC02E,
488         0xCB70, 0xCAB2, 0xC8F4, 0xC936, 0xCC78, 0xCDBA, 0xCFFC, 0xCE3E,
489         0x9180, 0x9042, 0x9204, 0x93C6, 0x9688, 0x974A, 0x950C, 0x94CE,
490         0x9F90, 0x9E52, 0x9C14, 0x9DD6, 0x9898, 0x995A, 0x9B1C, 0x9ADE,
491         0x8DA0, 0x8C62, 0x8E24, 0x8FE6, 0x8AA8, 0x8B6A, 0x892C, 0x88EE,
492         0x83B0, 0x8272, 0x8034, 0x81F6, 0x84B8, 0x857A, 0x873C, 0x86FE,
493         0xA9C0, 0xA802, 0xAA44, 0xAB86, 0xAEC8, 0xAF0A, 0xAD4C, 0xAC8E,
494         0xA7D0, 0xA612, 0xA454, 0xA596, 0xA0D8, 0xA11A, 0xA35C, 0xA29E,
495         0xB5E0, 0xB422, 0xB664, 0xB7A6, 0xB2E8, 0xB32A, 0xB16C, 0xB0AE,
496         0xBBF0, 0xBA32, 0xB874, 0xB9B6, 0xBCF8, 0xBD3A, 0xBF7C, 0xBEBE };
497     /*
498      * This pre-processing phase slows down procedure by approximately
499      * same time as it makes each loop spin faster. In other words
500      * single block performance is approximately same as straightforward
501      * "4-bit" implementation, and then it goes only faster...
502      */
503     for (cnt=0; cnt<16; ++cnt) {
504         Z.hi = Htable[cnt].hi;
505         Z.lo = Htable[cnt].lo;
506         Hshr4[cnt].lo = (Z.hi<<60)|(Z.lo>>4);
507         Hshr4[cnt].hi = (Z.hi>>4);
508         Hshl4[cnt]    = (u8)(Z.lo<<4);
509     }
510
511     do {
512         for (Z.lo=0, Z.hi=0, cnt=15; cnt; --cnt) {
513                 nlo  = ((const u8 *)Xi)[cnt];
514                 nlo ^= inp[cnt];
515                 nhi  = nlo>>4;
516                 nlo &= 0xf;
517
518                 Z.hi ^= Htable[nlo].hi;
519                 Z.lo ^= Htable[nlo].lo;
520
521                 rem = (size_t)Z.lo&0xff;
522
523                 Z.lo = (Z.hi<<56)|(Z.lo>>8);
524                 Z.hi = (Z.hi>>8);
525
526                 Z.hi ^= Hshr4[nhi].hi;
527                 Z.lo ^= Hshr4[nhi].lo;
528                 Z.hi ^= (u64)rem_8bit[rem^Hshl4[nhi]]<<48;
529         }
530
531         nlo  = ((const u8 *)Xi)[0];
532         nlo ^= inp[0];
533         nhi  = nlo>>4;
534         nlo &= 0xf;
535
536         Z.hi ^= Htable[nlo].hi;
537         Z.lo ^= Htable[nlo].lo;
538
539         rem = (size_t)Z.lo&0xf;
540
541         Z.lo = (Z.hi<<60)|(Z.lo>>4);
542         Z.hi = (Z.hi>>4);
543
544         Z.hi ^= Htable[nhi].hi;
545         Z.lo ^= Htable[nhi].lo;
546         Z.hi ^= ((u64)rem_8bit[rem<<4])<<48;
547 #endif
548
549         if (is_endian.little) {
550 #ifdef BSWAP8
551                 Xi[0] = BSWAP8(Z.hi);
552                 Xi[1] = BSWAP8(Z.lo);
553 #else
554                 u8 *p = (u8 *)Xi;
555                 u32 v;
556                 v = (u32)(Z.hi>>32);    PUTU32(p,v);
557                 v = (u32)(Z.hi);        PUTU32(p+4,v);
558                 v = (u32)(Z.lo>>32);    PUTU32(p+8,v);
559                 v = (u32)(Z.lo);        PUTU32(p+12,v);
560 #endif
561         }
562         else {
563                 Xi[0] = Z.hi;
564                 Xi[1] = Z.lo;
565         }
566     } while (inp+=16, len-=16);
567 }
568 #endif
569 #else
570 void gcm_gmult_4bit(u64 Xi[2],const u128 Htable[16]);
571 void gcm_ghash_4bit(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
572 #endif
573
574 #define GCM_MUL(ctx,Xi)   gcm_gmult_4bit(ctx->Xi.u,ctx->Htable)
575 #if defined(GHASH_ASM) || !defined(OPENSSL_SMALL_FOOTPRINT)
576 #define GHASH(ctx,in,len) gcm_ghash_4bit((ctx)->Xi.u,(ctx)->Htable,in,len)
577 /* GHASH_CHUNK is "stride parameter" missioned to mitigate cache
578  * trashing effect. In other words idea is to hash data while it's
579  * still in L1 cache after encryption pass... */
580 #define GHASH_CHUNK       (3*1024)
581 #endif
582
583 #else   /* TABLE_BITS */
584
585 static void gcm_gmult_1bit(u64 Xi[2],const u64 H[2])
586 {
587         u128 V,Z = { 0,0 };
588         long X;
589         int  i,j;
590         const long *xi = (const long *)Xi;
591         const union { long one; char little; } is_endian = {1};
592
593         V.hi = H[0];    /* H is in host byte order, no byte swapping */
594         V.lo = H[1];
595
596         for (j=0; j<16/sizeof(long); ++j) {
597                 if (is_endian.little) {
598                         if (sizeof(long)==8) {
599 #ifdef BSWAP8
600                                 X = (long)(BSWAP8(xi[j]));
601 #else
602                                 const u8 *p = (const u8 *)(xi+j);
603                                 X = (long)((u64)GETU32(p)<<32|GETU32(p+4));
604 #endif
605                         }
606                         else {
607                                 const u8 *p = (const u8 *)(xi+j);
608                                 X = (long)GETU32(p);
609                         }
610                 }
611                 else
612                         X = xi[j];
613
614                 for (i=0; i<8*sizeof(long); ++i, X<<=1) {
615                         u64 M = (u64)(X>>(8*sizeof(long)-1));
616                         Z.hi ^= V.hi&M;
617                         Z.lo ^= V.lo&M;
618
619                         REDUCE1BIT(V);
620                 }
621         }
622
623         if (is_endian.little) {
624 #ifdef BSWAP8
625                 Xi[0] = BSWAP8(Z.hi);
626                 Xi[1] = BSWAP8(Z.lo);
627 #else
628                 u8 *p = (u8 *)Xi;
629                 u32 v;
630                 v = (u32)(Z.hi>>32);    PUTU32(p,v);
631                 v = (u32)(Z.hi);        PUTU32(p+4,v);
632                 v = (u32)(Z.lo>>32);    PUTU32(p+8,v);
633                 v = (u32)(Z.lo);        PUTU32(p+12,v);
634 #endif
635         }
636         else {
637                 Xi[0] = Z.hi;
638                 Xi[1] = Z.lo;
639         }
640 }
641 #define GCM_MUL(ctx,Xi)   gcm_gmult_1bit(ctx->Xi.u,ctx->H.u)
642
643 #endif
644
645 #if     TABLE_BITS==4 && defined(GHASH_ASM)
646 # if    !defined(I386_ONLY) && \
647         (defined(__i386)        || defined(__i386__)    || \
648          defined(__x86_64)      || defined(__x86_64__)  || \
649          defined(_M_IX86)       || defined(_M_AMD64)    || defined(_M_X64))
650 #  define GHASH_ASM_X86_OR_64
651 #  define GCM_FUNCREF_4BIT
652 extern unsigned int OPENSSL_ia32cap_P[2];
653
654 void gcm_init_clmul(u128 Htable[16],const u64 Xi[2]);
655 void gcm_gmult_clmul(u64 Xi[2],const u128 Htable[16]);
656 void gcm_ghash_clmul(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
657
658 #if defined(__i386) || defined(__i386__) || defined(_M_IX86)
659 # define gcm_init_avx   gcm_init_clmul
660 # define gcm_gmult_avx  gcm_gmult_clmul
661 # define gcm_ghash_avx  gcm_ghash_clmul
662 #else
663 void gcm_init_avx(u128 Htable[16],const u64 Xi[2]);
664 void gcm_gmult_avx(u64 Xi[2],const u128 Htable[16]);
665 void gcm_ghash_avx(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
666 #endif
667
668 #  if   defined(__i386) || defined(__i386__) || defined(_M_IX86)
669 #   define GHASH_ASM_X86
670 void gcm_gmult_4bit_mmx(u64 Xi[2],const u128 Htable[16]);
671 void gcm_ghash_4bit_mmx(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
672
673 void gcm_gmult_4bit_x86(u64 Xi[2],const u128 Htable[16]);
674 void gcm_ghash_4bit_x86(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
675 #  endif
676 # elif defined(__arm__) || defined(__arm)
677 #  include "arm_arch.h"
678 #  if __ARM_ARCH__>=7
679 #   define GHASH_ASM_ARM
680 #   define GCM_FUNCREF_4BIT
681 void gcm_gmult_neon(u64 Xi[2],const u128 Htable[16]);
682 void gcm_ghash_neon(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
683 #  endif
684 # elif defined(__sparc__) || defined(__sparc)
685 #  include "sparc_arch.h"
686 #  define GHASH_ASM_SPARC
687 #  define GCM_FUNCREF_4BIT
688 extern unsigned int OPENSSL_sparcv9cap_P[];
689 void gcm_init_vis3(u128 Htable[16],const u64 Xi[2]);
690 void gcm_gmult_vis3(u64 Xi[2],const u128 Htable[16]);
691 void gcm_ghash_vis3(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
692 # endif
693 #endif
694
695 #ifdef GCM_FUNCREF_4BIT
696 # undef  GCM_MUL
697 # define GCM_MUL(ctx,Xi)        (*gcm_gmult_p)(ctx->Xi.u,ctx->Htable)
698 # ifdef GHASH
699 #  undef  GHASH
700 #  define GHASH(ctx,in,len)     (*gcm_ghash_p)(ctx->Xi.u,ctx->Htable,in,len)
701 # endif
702 #endif
703
704 void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx,void *key,block128_f block)
705 {
706         const union { long one; char little; } is_endian = {1};
707
708         memset(ctx,0,sizeof(*ctx));
709         ctx->block = block;
710         ctx->key   = key;
711
712         (*block)(ctx->H.c,ctx->H.c,key);
713
714         if (is_endian.little) {
715                 /* H is stored in host byte order */
716 #ifdef BSWAP8
717                 ctx->H.u[0] = BSWAP8(ctx->H.u[0]);
718                 ctx->H.u[1] = BSWAP8(ctx->H.u[1]);
719 #else
720                 u8 *p = ctx->H.c;
721                 u64 hi,lo;
722                 hi = (u64)GETU32(p)  <<32|GETU32(p+4);
723                 lo = (u64)GETU32(p+8)<<32|GETU32(p+12);
724                 ctx->H.u[0] = hi;
725                 ctx->H.u[1] = lo;
726 #endif
727         }
728
729 #if     TABLE_BITS==8
730         gcm_init_8bit(ctx->Htable,ctx->H.u);
731 #elif   TABLE_BITS==4
732 # if    defined(GHASH_ASM_X86_OR_64)
733 #  if   !defined(GHASH_ASM_X86) || defined(OPENSSL_IA32_SSE2)
734         if (OPENSSL_ia32cap_P[0]&(1<<24) &&     /* check FXSR bit */
735             OPENSSL_ia32cap_P[1]&(1<<1) ) {     /* check PCLMULQDQ bit */
736                 if (((OPENSSL_ia32cap_P[1]>>22)&0x41)==0x41) {  /* AVX+MOVBE */
737                         gcm_init_avx(ctx->Htable,ctx->H.u);
738                         ctx->gmult = gcm_gmult_avx;
739                         ctx->ghash = gcm_ghash_avx;
740                 } else {
741                         gcm_init_clmul(ctx->Htable,ctx->H.u);
742                         ctx->gmult = gcm_gmult_clmul;
743                         ctx->ghash = gcm_ghash_clmul;
744                 }
745                 return;
746         }
747 #  endif
748         gcm_init_4bit(ctx->Htable,ctx->H.u);
749 #  if   defined(GHASH_ASM_X86)                  /* x86 only */
750 #   if  defined(OPENSSL_IA32_SSE2)
751         if (OPENSSL_ia32cap_P[0]&(1<<25)) {     /* check SSE bit */
752 #   else
753         if (OPENSSL_ia32cap_P[0]&(1<<23)) {     /* check MMX bit */
754 #   endif
755                 ctx->gmult = gcm_gmult_4bit_mmx;
756                 ctx->ghash = gcm_ghash_4bit_mmx;
757         } else {
758                 ctx->gmult = gcm_gmult_4bit_x86;
759                 ctx->ghash = gcm_ghash_4bit_x86;
760         }
761 #  else
762         ctx->gmult = gcm_gmult_4bit;
763         ctx->ghash = gcm_ghash_4bit;
764 #  endif
765 # elif  defined(GHASH_ASM_ARM)
766         if (OPENSSL_armcap_P & ARMV7_NEON) {
767                 ctx->gmult = gcm_gmult_neon;
768                 ctx->ghash = gcm_ghash_neon;
769         } else {
770                 gcm_init_4bit(ctx->Htable,ctx->H.u);
771                 ctx->gmult = gcm_gmult_4bit;
772                 ctx->ghash = gcm_ghash_4bit;
773         }
774 # elif  defined(GHASH_ASM_SPARC)
775         if (OPENSSL_sparcv9cap_P[0] & SPARCV9_VIS3) {
776                 gcm_init_vis3(ctx->Htable,ctx->H.u);
777                 ctx->gmult = gcm_gmult_vis3;
778                 ctx->ghash = gcm_ghash_vis3;
779         } else {
780                 gcm_init_4bit(ctx->Htable,ctx->H.u);
781                 ctx->gmult = gcm_gmult_4bit;
782                 ctx->ghash = gcm_ghash_4bit;
783         }
784 # else
785         gcm_init_4bit(ctx->Htable,ctx->H.u);
786 # endif
787 #endif
788 }
789
790 void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx,const unsigned char *iv,size_t len)
791 {
792         const union { long one; char little; } is_endian = {1};
793         unsigned int ctr;
794 #ifdef GCM_FUNCREF_4BIT
795         void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16])    = ctx->gmult;
796 #endif
797
798         ctx->Yi.u[0]  = 0;
799         ctx->Yi.u[1]  = 0;
800         ctx->Xi.u[0]  = 0;
801         ctx->Xi.u[1]  = 0;
802         ctx->len.u[0] = 0;      /* AAD length */
803         ctx->len.u[1] = 0;      /* message length */
804         ctx->ares = 0;
805         ctx->mres = 0;
806
807         if (len==12) {
808                 memcpy(ctx->Yi.c,iv,12);
809                 ctx->Yi.c[15]=1;
810                 ctr=1;
811         }
812         else {
813                 size_t i;
814                 u64 len0 = len;
815
816                 while (len>=16) {
817                         for (i=0; i<16; ++i) ctx->Yi.c[i] ^= iv[i];
818                         GCM_MUL(ctx,Yi);
819                         iv += 16;
820                         len -= 16;
821                 }
822                 if (len) {
823                         for (i=0; i<len; ++i) ctx->Yi.c[i] ^= iv[i];
824                         GCM_MUL(ctx,Yi);
825                 }
826                 len0 <<= 3;
827                 if (is_endian.little) {
828 #ifdef BSWAP8
829                         ctx->Yi.u[1]  ^= BSWAP8(len0);
830 #else
831                         ctx->Yi.c[8]  ^= (u8)(len0>>56);
832                         ctx->Yi.c[9]  ^= (u8)(len0>>48);
833                         ctx->Yi.c[10] ^= (u8)(len0>>40);
834                         ctx->Yi.c[11] ^= (u8)(len0>>32);
835                         ctx->Yi.c[12] ^= (u8)(len0>>24);
836                         ctx->Yi.c[13] ^= (u8)(len0>>16);
837                         ctx->Yi.c[14] ^= (u8)(len0>>8);
838                         ctx->Yi.c[15] ^= (u8)(len0);
839 #endif
840                 }
841                 else
842                         ctx->Yi.u[1]  ^= len0;
843
844                 GCM_MUL(ctx,Yi);
845
846                 if (is_endian.little)
847 #ifdef BSWAP4
848                         ctr = BSWAP4(ctx->Yi.d[3]);
849 #else
850                         ctr = GETU32(ctx->Yi.c+12);
851 #endif
852                 else
853                         ctr = ctx->Yi.d[3];
854         }
855
856         (*ctx->block)(ctx->Yi.c,ctx->EK0.c,ctx->key);
857         ++ctr;
858         if (is_endian.little)
859 #ifdef BSWAP4
860                 ctx->Yi.d[3] = BSWAP4(ctr);
861 #else
862                 PUTU32(ctx->Yi.c+12,ctr);
863 #endif
864         else
865                 ctx->Yi.d[3] = ctr;
866 }
867
868 int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx,const unsigned char *aad,size_t len)
869 {
870         size_t i;
871         unsigned int n;
872         u64 alen = ctx->len.u[0];
873 #ifdef GCM_FUNCREF_4BIT
874         void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16])    = ctx->gmult;
875 # ifdef GHASH
876         void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
877                                 const u8 *inp,size_t len)       = ctx->ghash;
878 # endif
879 #endif
880
881         if (ctx->len.u[1]) return -2;
882
883         alen += len;
884         if (alen>(U64(1)<<61) || (sizeof(len)==8 && alen<len))
885                 return -1;
886         ctx->len.u[0] = alen;
887
888         n = ctx->ares;
889         if (n) {
890                 while (n && len) {
891                         ctx->Xi.c[n] ^= *(aad++);
892                         --len;
893                         n = (n+1)%16;
894                 }
895                 if (n==0) GCM_MUL(ctx,Xi);
896                 else {
897                         ctx->ares = n;
898                         return 0;
899                 }
900         }
901
902 #ifdef GHASH
903         if ((i = (len&(size_t)-16))) {
904                 GHASH(ctx,aad,i);
905                 aad += i;
906                 len -= i;
907         }
908 #else
909         while (len>=16) {
910                 for (i=0; i<16; ++i) ctx->Xi.c[i] ^= aad[i];
911                 GCM_MUL(ctx,Xi);
912                 aad += 16;
913                 len -= 16;
914         }
915 #endif
916         if (len) {
917                 n = (unsigned int)len;
918                 for (i=0; i<len; ++i) ctx->Xi.c[i] ^= aad[i];
919         }
920
921         ctx->ares = n;
922         return 0;
923 }
924
925 int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
926                 const unsigned char *in, unsigned char *out,
927                 size_t len)
928 {
929         const union { long one; char little; } is_endian = {1};
930         unsigned int n, ctr;
931         size_t i;
932         u64        mlen  = ctx->len.u[1];
933         block128_f block = ctx->block;
934         void      *key   = ctx->key;
935 #ifdef GCM_FUNCREF_4BIT
936         void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16])    = ctx->gmult;
937 # ifdef GHASH
938         void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
939                                 const u8 *inp,size_t len)       = ctx->ghash;
940 # endif
941 #endif
942
943 #if 0
944         n = (unsigned int)mlen%16; /* alternative to ctx->mres */
945 #endif
946         mlen += len;
947         if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
948                 return -1;
949         ctx->len.u[1] = mlen;
950
951         if (ctx->ares) {
952                 /* First call to encrypt finalizes GHASH(AAD) */
953                 GCM_MUL(ctx,Xi);
954                 ctx->ares = 0;
955         }
956
957         if (is_endian.little)
958 #ifdef BSWAP4
959                 ctr = BSWAP4(ctx->Yi.d[3]);
960 #else
961                 ctr = GETU32(ctx->Yi.c+12);
962 #endif
963         else
964                 ctr = ctx->Yi.d[3];
965
966         n = ctx->mres;
967 #if !defined(OPENSSL_SMALL_FOOTPRINT)
968         if (16%sizeof(size_t) == 0) do {        /* always true actually */
969                 if (n) {
970                         while (n && len) {
971                                 ctx->Xi.c[n] ^= *(out++) = *(in++)^ctx->EKi.c[n];
972                                 --len;
973                                 n = (n+1)%16;
974                         }
975                         if (n==0) GCM_MUL(ctx,Xi);
976                         else {
977                                 ctx->mres = n;
978                                 return 0;
979                         }
980                 }
981 #if defined(STRICT_ALIGNMENT)
982                 if (((size_t)in|(size_t)out)%sizeof(size_t) != 0)
983                         break;
984 #endif
985 #if defined(GHASH) && defined(GHASH_CHUNK)
986                 while (len>=GHASH_CHUNK) {
987                     size_t j=GHASH_CHUNK;
988
989                     while (j) {
990                         size_t *out_t=(size_t *)out;
991                         const size_t *in_t=(const size_t *)in;
992
993                         (*block)(ctx->Yi.c,ctx->EKi.c,key);
994                         ++ctr;
995                         if (is_endian.little)
996 #ifdef BSWAP4
997                                 ctx->Yi.d[3] = BSWAP4(ctr);
998 #else
999                                 PUTU32(ctx->Yi.c+12,ctr);
1000 #endif
1001                         else
1002                                 ctx->Yi.d[3] = ctr;
1003                         for (i=0; i<16/sizeof(size_t); ++i)
1004                                 out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1005                         out += 16;
1006                         in  += 16;
1007                         j   -= 16;
1008                     }
1009                     GHASH(ctx,out-GHASH_CHUNK,GHASH_CHUNK);
1010                     len -= GHASH_CHUNK;
1011                 }
1012                 if ((i = (len&(size_t)-16))) {
1013                     size_t j=i;
1014
1015                     while (len>=16) {
1016                         size_t *out_t=(size_t *)out;
1017                         const size_t *in_t=(const size_t *)in;
1018
1019                         (*block)(ctx->Yi.c,ctx->EKi.c,key);
1020                         ++ctr;
1021                         if (is_endian.little)
1022 #ifdef BSWAP4
1023                                 ctx->Yi.d[3] = BSWAP4(ctr);
1024 #else
1025                                 PUTU32(ctx->Yi.c+12,ctr);
1026 #endif
1027                         else
1028                                 ctx->Yi.d[3] = ctr;
1029                         for (i=0; i<16/sizeof(size_t); ++i)
1030                                 out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1031                         out += 16;
1032                         in  += 16;
1033                         len -= 16;
1034                     }
1035                     GHASH(ctx,out-j,j);
1036                 }
1037 #else
1038                 while (len>=16) {
1039                         size_t *out_t=(size_t *)out;
1040                         const size_t *in_t=(const size_t *)in;
1041
1042                         (*block)(ctx->Yi.c,ctx->EKi.c,key);
1043                         ++ctr;
1044                         if (is_endian.little)
1045 #ifdef BSWAP4
1046                                 ctx->Yi.d[3] = BSWAP4(ctr);
1047 #else
1048                                 PUTU32(ctx->Yi.c+12,ctr);
1049 #endif
1050                         else
1051                                 ctx->Yi.d[3] = ctr;
1052                         for (i=0; i<16/sizeof(size_t); ++i)
1053                                 ctx->Xi.t[i] ^=
1054                                 out_t[i] = in_t[i]^ctx->EKi.t[i];
1055                         GCM_MUL(ctx,Xi);
1056                         out += 16;
1057                         in  += 16;
1058                         len -= 16;
1059                 }
1060 #endif
1061                 if (len) {
1062                         (*block)(ctx->Yi.c,ctx->EKi.c,key);
1063                         ++ctr;
1064                         if (is_endian.little)
1065 #ifdef BSWAP4
1066                                 ctx->Yi.d[3] = BSWAP4(ctr);
1067 #else
1068                                 PUTU32(ctx->Yi.c+12,ctr);
1069 #endif
1070                         else
1071                                 ctx->Yi.d[3] = ctr;
1072                         while (len--) {
1073                                 ctx->Xi.c[n] ^= out[n] = in[n]^ctx->EKi.c[n];
1074                                 ++n;
1075                         }
1076                 }
1077
1078                 ctx->mres = n;
1079                 return 0;
1080         } while(0);
1081 #endif
1082         for (i=0;i<len;++i) {
1083                 if (n==0) {
1084                         (*block)(ctx->Yi.c,ctx->EKi.c,key);
1085                         ++ctr;
1086                         if (is_endian.little)
1087 #ifdef BSWAP4
1088                                 ctx->Yi.d[3] = BSWAP4(ctr);
1089 #else
1090                                 PUTU32(ctx->Yi.c+12,ctr);
1091 #endif
1092                         else
1093                                 ctx->Yi.d[3] = ctr;
1094                 }
1095                 ctx->Xi.c[n] ^= out[i] = in[i]^ctx->EKi.c[n];
1096                 n = (n+1)%16;
1097                 if (n==0)
1098                         GCM_MUL(ctx,Xi);
1099         }
1100
1101         ctx->mres = n;
1102         return 0;
1103 }
1104
1105 int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
1106                 const unsigned char *in, unsigned char *out,
1107                 size_t len)
1108 {
1109         const union { long one; char little; } is_endian = {1};
1110         unsigned int n, ctr;
1111         size_t i;
1112         u64        mlen  = ctx->len.u[1];
1113         block128_f block = ctx->block;
1114         void      *key   = ctx->key;
1115 #ifdef GCM_FUNCREF_4BIT
1116         void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16])    = ctx->gmult;
1117 # ifdef GHASH
1118         void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
1119                                 const u8 *inp,size_t len)       = ctx->ghash;
1120 # endif
1121 #endif
1122
1123         mlen += len;
1124         if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
1125                 return -1;
1126         ctx->len.u[1] = mlen;
1127
1128         if (ctx->ares) {
1129                 /* First call to decrypt finalizes GHASH(AAD) */
1130                 GCM_MUL(ctx,Xi);
1131                 ctx->ares = 0;
1132         }
1133
1134         if (is_endian.little)
1135 #ifdef BSWAP4
1136                 ctr = BSWAP4(ctx->Yi.d[3]);
1137 #else
1138                 ctr = GETU32(ctx->Yi.c+12);
1139 #endif
1140         else
1141                 ctr = ctx->Yi.d[3];
1142
1143         n = ctx->mres;
1144 #if !defined(OPENSSL_SMALL_FOOTPRINT)
1145         if (16%sizeof(size_t) == 0) do {        /* always true actually */
1146                 if (n) {
1147                         while (n && len) {
1148                                 u8 c = *(in++);
1149                                 *(out++) = c^ctx->EKi.c[n];
1150                                 ctx->Xi.c[n] ^= c;
1151                                 --len;
1152                                 n = (n+1)%16;
1153                         }
1154                         if (n==0) GCM_MUL (ctx,Xi);
1155                         else {
1156                                 ctx->mres = n;
1157                                 return 0;
1158                         }
1159                 }
1160 #if defined(STRICT_ALIGNMENT)
1161                 if (((size_t)in|(size_t)out)%sizeof(size_t) != 0)
1162                         break;
1163 #endif
1164 #if defined(GHASH) && defined(GHASH_CHUNK)
1165                 while (len>=GHASH_CHUNK) {
1166                     size_t j=GHASH_CHUNK;
1167
1168                     GHASH(ctx,in,GHASH_CHUNK);
1169                     while (j) {
1170                         size_t *out_t=(size_t *)out;
1171                         const size_t *in_t=(const size_t *)in;
1172
1173                         (*block)(ctx->Yi.c,ctx->EKi.c,key);
1174                         ++ctr;
1175                         if (is_endian.little)
1176 #ifdef BSWAP4
1177                                 ctx->Yi.d[3] = BSWAP4(ctr);
1178 #else
1179                                 PUTU32(ctx->Yi.c+12,ctr);
1180 #endif
1181                         else
1182                                 ctx->Yi.d[3] = ctr;
1183                         for (i=0; i<16/sizeof(size_t); ++i)
1184                                 out_t[i] = in_t[i]^ctx->EKi.t[i];
1185                         out += 16;
1186                         in  += 16;
1187                         j   -= 16;
1188                     }
1189                     len -= GHASH_CHUNK;
1190                 }
1191                 if ((i = (len&(size_t)-16))) {
1192                     GHASH(ctx,in,i);
1193                     while (len>=16) {
1194                         size_t *out_t=(size_t *)out;
1195                         const size_t *in_t=(const size_t *)in;
1196
1197                         (*block)(ctx->Yi.c,ctx->EKi.c,key);
1198                         ++ctr;
1199                         if (is_endian.little)
1200 #ifdef BSWAP4
1201                                 ctx->Yi.d[3] = BSWAP4(ctr);
1202 #else
1203                                 PUTU32(ctx->Yi.c+12,ctr);
1204 #endif
1205                         else
1206                                 ctx->Yi.d[3] = ctr;
1207                         for (i=0; i<16/sizeof(size_t); ++i)
1208                                 out_t[i] = in_t[i]^ctx->EKi.t[i];
1209                         out += 16;
1210                         in  += 16;
1211                         len -= 16;
1212                     }
1213                 }
1214 #else
1215                 while (len>=16) {
1216                         size_t *out_t=(size_t *)out;
1217                         const size_t *in_t=(const size_t *)in;
1218
1219                         (*block)(ctx->Yi.c,ctx->EKi.c,key);
1220                         ++ctr;
1221                         if (is_endian.little)
1222 #ifdef BSWAP4
1223                                 ctx->Yi.d[3] = BSWAP4(ctr);
1224 #else
1225                                 PUTU32(ctx->Yi.c+12,ctr);
1226 #endif
1227                         else
1228                                 ctx->Yi.d[3] = ctr;
1229                         for (i=0; i<16/sizeof(size_t); ++i) {
1230                                 size_t c = in[i];
1231                                 out[i] = c^ctx->EKi.t[i];
1232                                 ctx->Xi.t[i] ^= c;
1233                         }
1234                         GCM_MUL(ctx,Xi);
1235                         out += 16;
1236                         in  += 16;
1237                         len -= 16;
1238                 }
1239 #endif
1240                 if (len) {
1241                         (*block)(ctx->Yi.c,ctx->EKi.c,key);
1242                         ++ctr;
1243                         if (is_endian.little)
1244 #ifdef BSWAP4
1245                                 ctx->Yi.d[3] = BSWAP4(ctr);
1246 #else
1247                                 PUTU32(ctx->Yi.c+12,ctr);
1248 #endif
1249                         else
1250                                 ctx->Yi.d[3] = ctr;
1251                         while (len--) {
1252                                 u8 c = in[n];
1253                                 ctx->Xi.c[n] ^= c;
1254                                 out[n] = c^ctx->EKi.c[n];
1255                                 ++n;
1256                         }
1257                 }
1258
1259                 ctx->mres = n;
1260                 return 0;
1261         } while(0);
1262 #endif
1263         for (i=0;i<len;++i) {
1264                 u8 c;
1265                 if (n==0) {
1266                         (*block)(ctx->Yi.c,ctx->EKi.c,key);
1267                         ++ctr;
1268                         if (is_endian.little)
1269 #ifdef BSWAP4
1270                                 ctx->Yi.d[3] = BSWAP4(ctr);
1271 #else
1272                                 PUTU32(ctx->Yi.c+12,ctr);
1273 #endif
1274                         else
1275                                 ctx->Yi.d[3] = ctr;
1276                 }
1277                 c = in[i];
1278                 out[i] = c^ctx->EKi.c[n];
1279                 ctx->Xi.c[n] ^= c;
1280                 n = (n+1)%16;
1281                 if (n==0)
1282                         GCM_MUL(ctx,Xi);
1283         }
1284
1285         ctx->mres = n;
1286         return 0;
1287 }
1288
1289 int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx,
1290                 const unsigned char *in, unsigned char *out,
1291                 size_t len, ctr128_f stream)
1292 {
1293         const union { long one; char little; } is_endian = {1};
1294         unsigned int n, ctr;
1295         size_t i;
1296         u64   mlen = ctx->len.u[1];
1297         void *key  = ctx->key;
1298 #ifdef GCM_FUNCREF_4BIT
1299         void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16])    = ctx->gmult;
1300 # ifdef GHASH
1301         void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
1302                                 const u8 *inp,size_t len)       = ctx->ghash;
1303 # endif
1304 #endif
1305
1306         mlen += len;
1307         if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
1308                 return -1;
1309         ctx->len.u[1] = mlen;
1310
1311         if (ctx->ares) {
1312                 /* First call to encrypt finalizes GHASH(AAD) */
1313                 GCM_MUL(ctx,Xi);
1314                 ctx->ares = 0;
1315         }
1316
1317         if (is_endian.little)
1318 #ifdef BSWAP4
1319                 ctr = BSWAP4(ctx->Yi.d[3]);
1320 #else
1321                 ctr = GETU32(ctx->Yi.c+12);
1322 #endif
1323         else
1324                 ctr = ctx->Yi.d[3];
1325
1326         n = ctx->mres;
1327         if (n) {
1328                 while (n && len) {
1329                         ctx->Xi.c[n] ^= *(out++) = *(in++)^ctx->EKi.c[n];
1330                         --len;
1331                         n = (n+1)%16;
1332                 }
1333                 if (n==0) GCM_MUL(ctx,Xi);
1334                 else {
1335                         ctx->mres = n;
1336                         return 0;
1337                 }
1338         }
1339 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1340         while (len>=GHASH_CHUNK) {
1341                 (*stream)(in,out,GHASH_CHUNK/16,key,ctx->Yi.c);
1342                 ctr += GHASH_CHUNK/16;
1343                 if (is_endian.little)
1344 #ifdef BSWAP4
1345                         ctx->Yi.d[3] = BSWAP4(ctr);
1346 #else
1347                         PUTU32(ctx->Yi.c+12,ctr);
1348 #endif
1349                 else
1350                         ctx->Yi.d[3] = ctr;
1351                 GHASH(ctx,out,GHASH_CHUNK);
1352                 out += GHASH_CHUNK;
1353                 in  += GHASH_CHUNK;
1354                 len -= GHASH_CHUNK;
1355         }
1356 #endif
1357         if ((i = (len&(size_t)-16))) {
1358                 size_t j=i/16;
1359
1360                 (*stream)(in,out,j,key,ctx->Yi.c);
1361                 ctr += (unsigned int)j;
1362                 if (is_endian.little)
1363 #ifdef BSWAP4
1364                         ctx->Yi.d[3] = BSWAP4(ctr);
1365 #else
1366                         PUTU32(ctx->Yi.c+12,ctr);
1367 #endif
1368                 else
1369                         ctx->Yi.d[3] = ctr;
1370                 in  += i;
1371                 len -= i;
1372 #if defined(GHASH)
1373                 GHASH(ctx,out,i);
1374                 out += i;
1375 #else
1376                 while (j--) {
1377                         for (i=0;i<16;++i) ctx->Xi.c[i] ^= out[i];
1378                         GCM_MUL(ctx,Xi);
1379                         out += 16;
1380                 }
1381 #endif
1382         }
1383         if (len) {
1384                 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,key);
1385                 ++ctr;
1386                 if (is_endian.little)
1387 #ifdef BSWAP4
1388                         ctx->Yi.d[3] = BSWAP4(ctr);
1389 #else
1390                         PUTU32(ctx->Yi.c+12,ctr);
1391 #endif
1392                 else
1393                         ctx->Yi.d[3] = ctr;
1394                 while (len--) {
1395                         ctx->Xi.c[n] ^= out[n] = in[n]^ctx->EKi.c[n];
1396                         ++n;
1397                 }
1398         }
1399
1400         ctx->mres = n;
1401         return 0;
1402 }
1403
1404 int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx,
1405                 const unsigned char *in, unsigned char *out,
1406                 size_t len,ctr128_f stream)
1407 {
1408         const union { long one; char little; } is_endian = {1};
1409         unsigned int n, ctr;
1410         size_t i;
1411         u64   mlen = ctx->len.u[1];
1412         void *key  = ctx->key;
1413 #ifdef GCM_FUNCREF_4BIT
1414         void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16])    = ctx->gmult;
1415 # ifdef GHASH
1416         void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
1417                                 const u8 *inp,size_t len)       = ctx->ghash;
1418 # endif
1419 #endif
1420
1421         mlen += len;
1422         if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
1423                 return -1;
1424         ctx->len.u[1] = mlen;
1425
1426         if (ctx->ares) {
1427                 /* First call to decrypt finalizes GHASH(AAD) */
1428                 GCM_MUL(ctx,Xi);
1429                 ctx->ares = 0;
1430         }
1431
1432         if (is_endian.little)
1433 #ifdef BSWAP4
1434                 ctr = BSWAP4(ctx->Yi.d[3]);
1435 #else
1436                 ctr = GETU32(ctx->Yi.c+12);
1437 #endif
1438         else
1439                 ctr = ctx->Yi.d[3];
1440
1441         n = ctx->mres;
1442         if (n) {
1443                 while (n && len) {
1444                         u8 c = *(in++);
1445                         *(out++) = c^ctx->EKi.c[n];
1446                         ctx->Xi.c[n] ^= c;
1447                         --len;
1448                         n = (n+1)%16;
1449                 }
1450                 if (n==0) GCM_MUL (ctx,Xi);
1451                 else {
1452                         ctx->mres = n;
1453                         return 0;
1454                 }
1455         }
1456 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1457         while (len>=GHASH_CHUNK) {
1458                 GHASH(ctx,in,GHASH_CHUNK);
1459                 (*stream)(in,out,GHASH_CHUNK/16,key,ctx->Yi.c);
1460                 ctr += GHASH_CHUNK/16;
1461                 if (is_endian.little)
1462 #ifdef BSWAP4
1463                         ctx->Yi.d[3] = BSWAP4(ctr);
1464 #else
1465                         PUTU32(ctx->Yi.c+12,ctr);
1466 #endif
1467                 else
1468                         ctx->Yi.d[3] = ctr;
1469                 out += GHASH_CHUNK;
1470                 in  += GHASH_CHUNK;
1471                 len -= GHASH_CHUNK;
1472         }
1473 #endif
1474         if ((i = (len&(size_t)-16))) {
1475                 size_t j=i/16;
1476
1477 #if defined(GHASH)
1478                 GHASH(ctx,in,i);
1479 #else
1480                 while (j--) {
1481                         size_t k;
1482                         for (k=0;k<16;++k) ctx->Xi.c[k] ^= in[k];
1483                         GCM_MUL(ctx,Xi);
1484                         in += 16;
1485                 }
1486                 j   = i/16;
1487                 in -= i;
1488 #endif
1489                 (*stream)(in,out,j,key,ctx->Yi.c);
1490                 ctr += (unsigned int)j;
1491                 if (is_endian.little)
1492 #ifdef BSWAP4
1493                         ctx->Yi.d[3] = BSWAP4(ctr);
1494 #else
1495                         PUTU32(ctx->Yi.c+12,ctr);
1496 #endif
1497                 else
1498                         ctx->Yi.d[3] = ctr;
1499                 out += i;
1500                 in  += i;
1501                 len -= i;
1502         }
1503         if (len) {
1504                 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,key);
1505                 ++ctr;
1506                 if (is_endian.little)
1507 #ifdef BSWAP4
1508                         ctx->Yi.d[3] = BSWAP4(ctr);
1509 #else
1510                         PUTU32(ctx->Yi.c+12,ctr);
1511 #endif
1512                 else
1513                         ctx->Yi.d[3] = ctr;
1514                 while (len--) {
1515                         u8 c = in[n];
1516                         ctx->Xi.c[n] ^= c;
1517                         out[n] = c^ctx->EKi.c[n];
1518                         ++n;
1519                 }
1520         }
1521
1522         ctx->mres = n;
1523         return 0;
1524 }
1525
1526 int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx,const unsigned char *tag,
1527                         size_t len)
1528 {
1529         const union { long one; char little; } is_endian = {1};
1530         u64 alen = ctx->len.u[0]<<3;
1531         u64 clen = ctx->len.u[1]<<3;
1532 #ifdef GCM_FUNCREF_4BIT
1533         void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16])    = ctx->gmult;
1534 #endif
1535
1536         if (ctx->mres || ctx->ares)
1537                 GCM_MUL(ctx,Xi);
1538
1539         if (is_endian.little) {
1540 #ifdef BSWAP8
1541                 alen = BSWAP8(alen);
1542                 clen = BSWAP8(clen);
1543 #else
1544                 u8 *p = ctx->len.c;
1545
1546                 ctx->len.u[0] = alen;
1547                 ctx->len.u[1] = clen;
1548
1549                 alen = (u64)GETU32(p)  <<32|GETU32(p+4);
1550                 clen = (u64)GETU32(p+8)<<32|GETU32(p+12);
1551 #endif
1552         }
1553
1554         ctx->Xi.u[0] ^= alen;
1555         ctx->Xi.u[1] ^= clen;
1556         GCM_MUL(ctx,Xi);
1557
1558         ctx->Xi.u[0] ^= ctx->EK0.u[0];
1559         ctx->Xi.u[1] ^= ctx->EK0.u[1];
1560
1561         if (tag && len<=sizeof(ctx->Xi))
1562                 return memcmp(ctx->Xi.c,tag,len);
1563         else
1564                 return -1;
1565 }
1566
1567 void CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len)
1568 {
1569         CRYPTO_gcm128_finish(ctx, NULL, 0);
1570         memcpy(tag, ctx->Xi.c, len<=sizeof(ctx->Xi.c)?len:sizeof(ctx->Xi.c));
1571 }
1572
1573 GCM128_CONTEXT *CRYPTO_gcm128_new(void *key, block128_f block)
1574 {
1575         GCM128_CONTEXT *ret;
1576
1577         if ((ret = (GCM128_CONTEXT *)OPENSSL_malloc(sizeof(GCM128_CONTEXT))))
1578                 CRYPTO_gcm128_init(ret,key,block);
1579
1580         return ret;
1581 }
1582
1583 void CRYPTO_gcm128_release(GCM128_CONTEXT *ctx)
1584 {
1585         if (ctx) {
1586                 OPENSSL_cleanse(ctx,sizeof(*ctx));
1587                 OPENSSL_free(ctx);
1588         }
1589 }
1590
1591 #if defined(SELFTEST)
1592 #include <stdio.h>
1593 #include <openssl/aes.h>
1594
1595 /* Test Case 1 */
1596 static const u8 K1[16],
1597                 *P1=NULL,
1598                 *A1=NULL,
1599                 IV1[12],
1600                 *C1=NULL,
1601                 T1[]=  {0x58,0xe2,0xfc,0xce,0xfa,0x7e,0x30,0x61,0x36,0x7f,0x1d,0x57,0xa4,0xe7,0x45,0x5a};
1602
1603 /* Test Case 2 */
1604 #define K2 K1
1605 #define A2 A1
1606 #define IV2 IV1
1607 static const u8 P2[16],
1608                 C2[]=  {0x03,0x88,0xda,0xce,0x60,0xb6,0xa3,0x92,0xf3,0x28,0xc2,0xb9,0x71,0xb2,0xfe,0x78},
1609                 T2[]=  {0xab,0x6e,0x47,0xd4,0x2c,0xec,0x13,0xbd,0xf5,0x3a,0x67,0xb2,0x12,0x57,0xbd,0xdf};
1610
1611 /* Test Case 3 */
1612 #define A3 A2
1613 static const u8 K3[]=  {0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08},
1614                 P3[]=  {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1615                         0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1616                         0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1617                         0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55},
1618                 IV3[]= {0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,0xde,0xca,0xf8,0x88},
1619                 C3[]=  {0x42,0x83,0x1e,0xc2,0x21,0x77,0x74,0x24,0x4b,0x72,0x21,0xb7,0x84,0xd0,0xd4,0x9c,
1620                         0xe3,0xaa,0x21,0x2f,0x2c,0x02,0xa4,0xe0,0x35,0xc1,0x7e,0x23,0x29,0xac,0xa1,0x2e,
1621                         0x21,0xd5,0x14,0xb2,0x54,0x66,0x93,0x1c,0x7d,0x8f,0x6a,0x5a,0xac,0x84,0xaa,0x05,
1622                         0x1b,0xa3,0x0b,0x39,0x6a,0x0a,0xac,0x97,0x3d,0x58,0xe0,0x91,0x47,0x3f,0x59,0x85},
1623                 T3[]=  {0x4d,0x5c,0x2a,0xf3,0x27,0xcd,0x64,0xa6,0x2c,0xf3,0x5a,0xbd,0x2b,0xa6,0xfa,0xb4};
1624
1625 /* Test Case 4 */
1626 #define K4 K3
1627 #define IV4 IV3
1628 static const u8 P4[]=  {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1629                         0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1630                         0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1631                         0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39},
1632                 A4[]=  {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1633                         0xab,0xad,0xda,0xd2},
1634                 C4[]=  {0x42,0x83,0x1e,0xc2,0x21,0x77,0x74,0x24,0x4b,0x72,0x21,0xb7,0x84,0xd0,0xd4,0x9c,
1635                         0xe3,0xaa,0x21,0x2f,0x2c,0x02,0xa4,0xe0,0x35,0xc1,0x7e,0x23,0x29,0xac,0xa1,0x2e,
1636                         0x21,0xd5,0x14,0xb2,0x54,0x66,0x93,0x1c,0x7d,0x8f,0x6a,0x5a,0xac,0x84,0xaa,0x05,
1637                         0x1b,0xa3,0x0b,0x39,0x6a,0x0a,0xac,0x97,0x3d,0x58,0xe0,0x91},
1638                 T4[]=  {0x5b,0xc9,0x4f,0xbc,0x32,0x21,0xa5,0xdb,0x94,0xfa,0xe9,0x5a,0xe7,0x12,0x1a,0x47};
1639
1640 /* Test Case 5 */
1641 #define K5 K4
1642 #define P5 P4
1643 #define A5 A4
1644 static const u8 IV5[]= {0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad},
1645                 C5[]=  {0x61,0x35,0x3b,0x4c,0x28,0x06,0x93,0x4a,0x77,0x7f,0xf5,0x1f,0xa2,0x2a,0x47,0x55,
1646                         0x69,0x9b,0x2a,0x71,0x4f,0xcd,0xc6,0xf8,0x37,0x66,0xe5,0xf9,0x7b,0x6c,0x74,0x23,
1647                         0x73,0x80,0x69,0x00,0xe4,0x9f,0x24,0xb2,0x2b,0x09,0x75,0x44,0xd4,0x89,0x6b,0x42,
1648                         0x49,0x89,0xb5,0xe1,0xeb,0xac,0x0f,0x07,0xc2,0x3f,0x45,0x98},
1649                 T5[]=  {0x36,0x12,0xd2,0xe7,0x9e,0x3b,0x07,0x85,0x56,0x1b,0xe1,0x4a,0xac,0xa2,0xfc,0xcb};
1650
1651 /* Test Case 6 */
1652 #define K6 K5
1653 #define P6 P5
1654 #define A6 A5
1655 static const u8 IV6[]= {0x93,0x13,0x22,0x5d,0xf8,0x84,0x06,0xe5,0x55,0x90,0x9c,0x5a,0xff,0x52,0x69,0xaa,
1656                         0x6a,0x7a,0x95,0x38,0x53,0x4f,0x7d,0xa1,0xe4,0xc3,0x03,0xd2,0xa3,0x18,0xa7,0x28,
1657                         0xc3,0xc0,0xc9,0x51,0x56,0x80,0x95,0x39,0xfc,0xf0,0xe2,0x42,0x9a,0x6b,0x52,0x54,
1658                         0x16,0xae,0xdb,0xf5,0xa0,0xde,0x6a,0x57,0xa6,0x37,0xb3,0x9b},
1659                 C6[]=  {0x8c,0xe2,0x49,0x98,0x62,0x56,0x15,0xb6,0x03,0xa0,0x33,0xac,0xa1,0x3f,0xb8,0x94,
1660                         0xbe,0x91,0x12,0xa5,0xc3,0xa2,0x11,0xa8,0xba,0x26,0x2a,0x3c,0xca,0x7e,0x2c,0xa7,
1661                         0x01,0xe4,0xa9,0xa4,0xfb,0xa4,0x3c,0x90,0xcc,0xdc,0xb2,0x81,0xd4,0x8c,0x7c,0x6f,
1662                         0xd6,0x28,0x75,0xd2,0xac,0xa4,0x17,0x03,0x4c,0x34,0xae,0xe5},
1663                 T6[]=  {0x61,0x9c,0xc5,0xae,0xff,0xfe,0x0b,0xfa,0x46,0x2a,0xf4,0x3c,0x16,0x99,0xd0,0x50};
1664
1665 /* Test Case 7 */
1666 static const u8 K7[24],
1667                 *P7=NULL,
1668                 *A7=NULL,
1669                 IV7[12],
1670                 *C7=NULL,
1671                 T7[]=  {0xcd,0x33,0xb2,0x8a,0xc7,0x73,0xf7,0x4b,0xa0,0x0e,0xd1,0xf3,0x12,0x57,0x24,0x35};
1672
1673 /* Test Case 8 */
1674 #define K8 K7
1675 #define IV8 IV7
1676 #define A8 A7
1677 static const u8 P8[16],
1678                 C8[]=  {0x98,0xe7,0x24,0x7c,0x07,0xf0,0xfe,0x41,0x1c,0x26,0x7e,0x43,0x84,0xb0,0xf6,0x00},
1679                 T8[]=  {0x2f,0xf5,0x8d,0x80,0x03,0x39,0x27,0xab,0x8e,0xf4,0xd4,0x58,0x75,0x14,0xf0,0xfb};
1680
1681 /* Test Case 9 */
1682 #define A9 A8
1683 static const u8 K9[]=  {0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08,
1684                         0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c},
1685                 P9[]=  {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1686                         0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1687                         0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1688                         0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55},
1689                 IV9[]= {0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,0xde,0xca,0xf8,0x88},
1690                 C9[]=  {0x39,0x80,0xca,0x0b,0x3c,0x00,0xe8,0x41,0xeb,0x06,0xfa,0xc4,0x87,0x2a,0x27,0x57,
1691                         0x85,0x9e,0x1c,0xea,0xa6,0xef,0xd9,0x84,0x62,0x85,0x93,0xb4,0x0c,0xa1,0xe1,0x9c,
1692                         0x7d,0x77,0x3d,0x00,0xc1,0x44,0xc5,0x25,0xac,0x61,0x9d,0x18,0xc8,0x4a,0x3f,0x47,
1693                         0x18,0xe2,0x44,0x8b,0x2f,0xe3,0x24,0xd9,0xcc,0xda,0x27,0x10,0xac,0xad,0xe2,0x56},
1694                 T9[]=  {0x99,0x24,0xa7,0xc8,0x58,0x73,0x36,0xbf,0xb1,0x18,0x02,0x4d,0xb8,0x67,0x4a,0x14};
1695
1696 /* Test Case 10 */
1697 #define K10 K9
1698 #define IV10 IV9
1699 static const u8 P10[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1700                         0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1701                         0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1702                         0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39},
1703                 A10[]= {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1704                         0xab,0xad,0xda,0xd2},
1705                 C10[]= {0x39,0x80,0xca,0x0b,0x3c,0x00,0xe8,0x41,0xeb,0x06,0xfa,0xc4,0x87,0x2a,0x27,0x57,
1706                         0x85,0x9e,0x1c,0xea,0xa6,0xef,0xd9,0x84,0x62,0x85,0x93,0xb4,0x0c,0xa1,0xe1,0x9c,
1707                         0x7d,0x77,0x3d,0x00,0xc1,0x44,0xc5,0x25,0xac,0x61,0x9d,0x18,0xc8,0x4a,0x3f,0x47,
1708                         0x18,0xe2,0x44,0x8b,0x2f,0xe3,0x24,0xd9,0xcc,0xda,0x27,0x10},
1709                 T10[]= {0x25,0x19,0x49,0x8e,0x80,0xf1,0x47,0x8f,0x37,0xba,0x55,0xbd,0x6d,0x27,0x61,0x8c};
1710
1711 /* Test Case 11 */
1712 #define K11 K10
1713 #define P11 P10
1714 #define A11 A10
1715 static const u8 IV11[]={0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad},
1716                 C11[]= {0x0f,0x10,0xf5,0x99,0xae,0x14,0xa1,0x54,0xed,0x24,0xb3,0x6e,0x25,0x32,0x4d,0xb8,
1717                         0xc5,0x66,0x63,0x2e,0xf2,0xbb,0xb3,0x4f,0x83,0x47,0x28,0x0f,0xc4,0x50,0x70,0x57,
1718                         0xfd,0xdc,0x29,0xdf,0x9a,0x47,0x1f,0x75,0xc6,0x65,0x41,0xd4,0xd4,0xda,0xd1,0xc9,
1719                         0xe9,0x3a,0x19,0xa5,0x8e,0x8b,0x47,0x3f,0xa0,0xf0,0x62,0xf7},
1720                 T11[]= {0x65,0xdc,0xc5,0x7f,0xcf,0x62,0x3a,0x24,0x09,0x4f,0xcc,0xa4,0x0d,0x35,0x33,0xf8};
1721
1722 /* Test Case 12 */
1723 #define K12 K11
1724 #define P12 P11
1725 #define A12 A11
1726 static const u8 IV12[]={0x93,0x13,0x22,0x5d,0xf8,0x84,0x06,0xe5,0x55,0x90,0x9c,0x5a,0xff,0x52,0x69,0xaa,
1727                         0x6a,0x7a,0x95,0x38,0x53,0x4f,0x7d,0xa1,0xe4,0xc3,0x03,0xd2,0xa3,0x18,0xa7,0x28,
1728                         0xc3,0xc0,0xc9,0x51,0x56,0x80,0x95,0x39,0xfc,0xf0,0xe2,0x42,0x9a,0x6b,0x52,0x54,
1729                         0x16,0xae,0xdb,0xf5,0xa0,0xde,0x6a,0x57,0xa6,0x37,0xb3,0x9b},
1730                 C12[]= {0xd2,0x7e,0x88,0x68,0x1c,0xe3,0x24,0x3c,0x48,0x30,0x16,0x5a,0x8f,0xdc,0xf9,0xff,
1731                         0x1d,0xe9,0xa1,0xd8,0xe6,0xb4,0x47,0xef,0x6e,0xf7,0xb7,0x98,0x28,0x66,0x6e,0x45,
1732                         0x81,0xe7,0x90,0x12,0xaf,0x34,0xdd,0xd9,0xe2,0xf0,0x37,0x58,0x9b,0x29,0x2d,0xb3,
1733                         0xe6,0x7c,0x03,0x67,0x45,0xfa,0x22,0xe7,0xe9,0xb7,0x37,0x3b},
1734                 T12[]= {0xdc,0xf5,0x66,0xff,0x29,0x1c,0x25,0xbb,0xb8,0x56,0x8f,0xc3,0xd3,0x76,0xa6,0xd9};
1735
1736 /* Test Case 13 */
1737 static const u8 K13[32],
1738                 *P13=NULL,
1739                 *A13=NULL,
1740                 IV13[12],
1741                 *C13=NULL,
1742                 T13[]={0x53,0x0f,0x8a,0xfb,0xc7,0x45,0x36,0xb9,0xa9,0x63,0xb4,0xf1,0xc4,0xcb,0x73,0x8b};
1743
1744 /* Test Case 14 */
1745 #define K14 K13
1746 #define A14 A13
1747 static const u8 P14[16],
1748                 IV14[12],
1749                 C14[]= {0xce,0xa7,0x40,0x3d,0x4d,0x60,0x6b,0x6e,0x07,0x4e,0xc5,0xd3,0xba,0xf3,0x9d,0x18},
1750                 T14[]= {0xd0,0xd1,0xc8,0xa7,0x99,0x99,0x6b,0xf0,0x26,0x5b,0x98,0xb5,0xd4,0x8a,0xb9,0x19};
1751
1752 /* Test Case 15 */
1753 #define A15 A14
1754 static const u8 K15[]= {0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08,
1755                         0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08},
1756                 P15[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1757                         0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1758                         0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1759                         0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55},
1760                 IV15[]={0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,0xde,0xca,0xf8,0x88},
1761                 C15[]= {0x52,0x2d,0xc1,0xf0,0x99,0x56,0x7d,0x07,0xf4,0x7f,0x37,0xa3,0x2a,0x84,0x42,0x7d,
1762                         0x64,0x3a,0x8c,0xdc,0xbf,0xe5,0xc0,0xc9,0x75,0x98,0xa2,0xbd,0x25,0x55,0xd1,0xaa,
1763                         0x8c,0xb0,0x8e,0x48,0x59,0x0d,0xbb,0x3d,0xa7,0xb0,0x8b,0x10,0x56,0x82,0x88,0x38,
1764                         0xc5,0xf6,0x1e,0x63,0x93,0xba,0x7a,0x0a,0xbc,0xc9,0xf6,0x62,0x89,0x80,0x15,0xad},
1765                 T15[]= {0xb0,0x94,0xda,0xc5,0xd9,0x34,0x71,0xbd,0xec,0x1a,0x50,0x22,0x70,0xe3,0xcc,0x6c};
1766
1767 /* Test Case 16 */
1768 #define K16 K15
1769 #define IV16 IV15
1770 static const u8 P16[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1771                         0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1772                         0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1773                         0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39},
1774                 A16[]= {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1775                         0xab,0xad,0xda,0xd2},
1776                 C16[]= {0x52,0x2d,0xc1,0xf0,0x99,0x56,0x7d,0x07,0xf4,0x7f,0x37,0xa3,0x2a,0x84,0x42,0x7d,
1777                         0x64,0x3a,0x8c,0xdc,0xbf,0xe5,0xc0,0xc9,0x75,0x98,0xa2,0xbd,0x25,0x55,0xd1,0xaa,
1778                         0x8c,0xb0,0x8e,0x48,0x59,0x0d,0xbb,0x3d,0xa7,0xb0,0x8b,0x10,0x56,0x82,0x88,0x38,
1779                         0xc5,0xf6,0x1e,0x63,0x93,0xba,0x7a,0x0a,0xbc,0xc9,0xf6,0x62},
1780                 T16[]= {0x76,0xfc,0x6e,0xce,0x0f,0x4e,0x17,0x68,0xcd,0xdf,0x88,0x53,0xbb,0x2d,0x55,0x1b};
1781
1782 /* Test Case 17 */
1783 #define K17 K16
1784 #define P17 P16
1785 #define A17 A16
1786 static const u8 IV17[]={0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad},
1787                 C17[]= {0xc3,0x76,0x2d,0xf1,0xca,0x78,0x7d,0x32,0xae,0x47,0xc1,0x3b,0xf1,0x98,0x44,0xcb,
1788                         0xaf,0x1a,0xe1,0x4d,0x0b,0x97,0x6a,0xfa,0xc5,0x2f,0xf7,0xd7,0x9b,0xba,0x9d,0xe0,
1789                         0xfe,0xb5,0x82,0xd3,0x39,0x34,0xa4,0xf0,0x95,0x4c,0xc2,0x36,0x3b,0xc7,0x3f,0x78,
1790                         0x62,0xac,0x43,0x0e,0x64,0xab,0xe4,0x99,0xf4,0x7c,0x9b,0x1f},
1791                 T17[]= {0x3a,0x33,0x7d,0xbf,0x46,0xa7,0x92,0xc4,0x5e,0x45,0x49,0x13,0xfe,0x2e,0xa8,0xf2};
1792
1793 /* Test Case 18 */
1794 #define K18 K17
1795 #define P18 P17
1796 #define A18 A17
1797 static const u8 IV18[]={0x93,0x13,0x22,0x5d,0xf8,0x84,0x06,0xe5,0x55,0x90,0x9c,0x5a,0xff,0x52,0x69,0xaa,
1798                         0x6a,0x7a,0x95,0x38,0x53,0x4f,0x7d,0xa1,0xe4,0xc3,0x03,0xd2,0xa3,0x18,0xa7,0x28,
1799                         0xc3,0xc0,0xc9,0x51,0x56,0x80,0x95,0x39,0xfc,0xf0,0xe2,0x42,0x9a,0x6b,0x52,0x54,
1800                         0x16,0xae,0xdb,0xf5,0xa0,0xde,0x6a,0x57,0xa6,0x37,0xb3,0x9b},
1801                 C18[]= {0x5a,0x8d,0xef,0x2f,0x0c,0x9e,0x53,0xf1,0xf7,0x5d,0x78,0x53,0x65,0x9e,0x2a,0x20,
1802                         0xee,0xb2,0xb2,0x2a,0xaf,0xde,0x64,0x19,0xa0,0x58,0xab,0x4f,0x6f,0x74,0x6b,0xf4,
1803                         0x0f,0xc0,0xc3,0xb7,0x80,0xf2,0x44,0x45,0x2d,0xa3,0xeb,0xf1,0xc5,0xd8,0x2c,0xde,
1804                         0xa2,0x41,0x89,0x97,0x20,0x0e,0xf8,0x2e,0x44,0xae,0x7e,0x3f},
1805                 T18[]= {0xa4,0x4a,0x82,0x66,0xee,0x1c,0x8e,0xb0,0xc8,0xb5,0xd4,0xcf,0x5a,0xe9,0xf1,0x9a};
1806
1807 /* Test Case 19 */
1808 #define K19 K1
1809 #define P19 P1
1810 #define IV19 IV1
1811 #define C19 C1
1812 static const u8 A19[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1813                         0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1814                         0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1815                         0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55,
1816                         0x52,0x2d,0xc1,0xf0,0x99,0x56,0x7d,0x07,0xf4,0x7f,0x37,0xa3,0x2a,0x84,0x42,0x7d,
1817                         0x64,0x3a,0x8c,0xdc,0xbf,0xe5,0xc0,0xc9,0x75,0x98,0xa2,0xbd,0x25,0x55,0xd1,0xaa,
1818                         0x8c,0xb0,0x8e,0x48,0x59,0x0d,0xbb,0x3d,0xa7,0xb0,0x8b,0x10,0x56,0x82,0x88,0x38,
1819                         0xc5,0xf6,0x1e,0x63,0x93,0xba,0x7a,0x0a,0xbc,0xc9,0xf6,0x62,0x89,0x80,0x15,0xad},
1820                 T19[]= {0x5f,0xea,0x79,0x3a,0x2d,0x6f,0x97,0x4d,0x37,0xe6,0x8e,0x0c,0xb8,0xff,0x94,0x92};
1821
1822 /* Test Case 20 */
1823 #define K20 K1
1824 #define A20 A1
1825 static const u8 IV20[64]={0xff,0xff,0xff,0xff}, /* this results in 0xff in counter LSB */
1826                 P20[288],
1827                 C20[]= {0x56,0xb3,0x37,0x3c,0xa9,0xef,0x6e,0x4a,0x2b,0x64,0xfe,0x1e,0x9a,0x17,0xb6,0x14,
1828                         0x25,0xf1,0x0d,0x47,0xa7,0x5a,0x5f,0xce,0x13,0xef,0xc6,0xbc,0x78,0x4a,0xf2,0x4f,
1829                         0x41,0x41,0xbd,0xd4,0x8c,0xf7,0xc7,0x70,0x88,0x7a,0xfd,0x57,0x3c,0xca,0x54,0x18,
1830                         0xa9,0xae,0xff,0xcd,0x7c,0x5c,0xed,0xdf,0xc6,0xa7,0x83,0x97,0xb9,0xa8,0x5b,0x49,
1831                         0x9d,0xa5,0x58,0x25,0x72,0x67,0xca,0xab,0x2a,0xd0,0xb2,0x3c,0xa4,0x76,0xa5,0x3c,
1832                         0xb1,0x7f,0xb4,0x1c,0x4b,0x8b,0x47,0x5c,0xb4,0xf3,0xf7,0x16,0x50,0x94,0xc2,0x29,
1833                         0xc9,0xe8,0xc4,0xdc,0x0a,0x2a,0x5f,0xf1,0x90,0x3e,0x50,0x15,0x11,0x22,0x13,0x76,
1834                         0xa1,0xcd,0xb8,0x36,0x4c,0x50,0x61,0xa2,0x0c,0xae,0x74,0xbc,0x4a,0xcd,0x76,0xce,
1835                         0xb0,0xab,0xc9,0xfd,0x32,0x17,0xef,0x9f,0x8c,0x90,0xbe,0x40,0x2d,0xdf,0x6d,0x86,
1836                         0x97,0xf4,0xf8,0x80,0xdf,0xf1,0x5b,0xfb,0x7a,0x6b,0x28,0x24,0x1e,0xc8,0xfe,0x18,
1837                         0x3c,0x2d,0x59,0xe3,0xf9,0xdf,0xff,0x65,0x3c,0x71,0x26,0xf0,0xac,0xb9,0xe6,0x42,
1838                         0x11,0xf4,0x2b,0xae,0x12,0xaf,0x46,0x2b,0x10,0x70,0xbe,0xf1,0xab,0x5e,0x36,0x06,
1839                         0x87,0x2c,0xa1,0x0d,0xee,0x15,0xb3,0x24,0x9b,0x1a,0x1b,0x95,0x8f,0x23,0x13,0x4c,
1840                         0x4b,0xcc,0xb7,0xd0,0x32,0x00,0xbc,0xe4,0x20,0xa2,0xf8,0xeb,0x66,0xdc,0xf3,0x64,
1841                         0x4d,0x14,0x23,0xc1,0xb5,0x69,0x90,0x03,0xc1,0x3e,0xce,0xf4,0xbf,0x38,0xa3,0xb6,
1842                         0x0e,0xed,0xc3,0x40,0x33,0xba,0xc1,0x90,0x27,0x83,0xdc,0x6d,0x89,0xe2,0xe7,0x74,
1843                         0x18,0x8a,0x43,0x9c,0x7e,0xbc,0xc0,0x67,0x2d,0xbd,0xa4,0xdd,0xcf,0xb2,0x79,0x46,
1844                         0x13,0xb0,0xbe,0x41,0x31,0x5e,0xf7,0x78,0x70,0x8a,0x70,0xee,0x7d,0x75,0x16,0x5c},
1845                 T20[]= {0x8b,0x30,0x7f,0x6b,0x33,0x28,0x6d,0x0a,0xb0,0x26,0xa9,0xed,0x3f,0xe1,0xe8,0x5f};
1846
1847 #define TEST_CASE(n)    do {                                    \
1848         u8 out[sizeof(P##n)];                                   \
1849         AES_set_encrypt_key(K##n,sizeof(K##n)*8,&key);          \
1850         CRYPTO_gcm128_init(&ctx,&key,(block128_f)AES_encrypt);  \
1851         CRYPTO_gcm128_setiv(&ctx,IV##n,sizeof(IV##n));          \
1852         memset(out,0,sizeof(out));                              \
1853         if (A##n) CRYPTO_gcm128_aad(&ctx,A##n,sizeof(A##n));    \
1854         if (P##n) CRYPTO_gcm128_encrypt(&ctx,P##n,out,sizeof(out));     \
1855         if (CRYPTO_gcm128_finish(&ctx,T##n,16) ||               \
1856             (C##n && memcmp(out,C##n,sizeof(out))))             \
1857                 ret++, printf ("encrypt test#%d failed.\n",n);  \
1858         CRYPTO_gcm128_setiv(&ctx,IV##n,sizeof(IV##n));          \
1859         memset(out,0,sizeof(out));                              \
1860         if (A##n) CRYPTO_gcm128_aad(&ctx,A##n,sizeof(A##n));    \
1861         if (C##n) CRYPTO_gcm128_decrypt(&ctx,C##n,out,sizeof(out));     \
1862         if (CRYPTO_gcm128_finish(&ctx,T##n,16) ||               \
1863             (P##n && memcmp(out,P##n,sizeof(out))))             \
1864                 ret++, printf ("decrypt test#%d failed.\n",n);  \
1865         } while(0)
1866
1867 int main()
1868 {
1869         GCM128_CONTEXT ctx;
1870         AES_KEY key;
1871         int ret=0;
1872
1873         TEST_CASE(1);
1874         TEST_CASE(2);
1875         TEST_CASE(3);
1876         TEST_CASE(4);
1877         TEST_CASE(5);
1878         TEST_CASE(6);
1879         TEST_CASE(7);
1880         TEST_CASE(8);
1881         TEST_CASE(9);
1882         TEST_CASE(10);
1883         TEST_CASE(11);
1884         TEST_CASE(12);
1885         TEST_CASE(13);
1886         TEST_CASE(14);
1887         TEST_CASE(15);
1888         TEST_CASE(16);
1889         TEST_CASE(17);
1890         TEST_CASE(18);
1891         TEST_CASE(19);
1892         TEST_CASE(20);
1893
1894 #ifdef OPENSSL_CPUID_OBJ
1895         {
1896         size_t start,stop,gcm_t,ctr_t,OPENSSL_rdtsc();
1897         union { u64 u; u8 c[1024]; } buf;
1898         int i;
1899
1900         AES_set_encrypt_key(K1,sizeof(K1)*8,&key);
1901         CRYPTO_gcm128_init(&ctx,&key,(block128_f)AES_encrypt);
1902         CRYPTO_gcm128_setiv(&ctx,IV1,sizeof(IV1));
1903
1904         CRYPTO_gcm128_encrypt(&ctx,buf.c,buf.c,sizeof(buf));
1905         start = OPENSSL_rdtsc();
1906         CRYPTO_gcm128_encrypt(&ctx,buf.c,buf.c,sizeof(buf));
1907         gcm_t = OPENSSL_rdtsc() - start;
1908
1909         CRYPTO_ctr128_encrypt(buf.c,buf.c,sizeof(buf),
1910                         &key,ctx.Yi.c,ctx.EKi.c,&ctx.mres,
1911                         (block128_f)AES_encrypt);
1912         start = OPENSSL_rdtsc();
1913         CRYPTO_ctr128_encrypt(buf.c,buf.c,sizeof(buf),
1914                         &key,ctx.Yi.c,ctx.EKi.c,&ctx.mres,
1915                         (block128_f)AES_encrypt);
1916         ctr_t = OPENSSL_rdtsc() - start;
1917
1918         printf("%.2f-%.2f=%.2f\n",
1919                         gcm_t/(double)sizeof(buf),
1920                         ctr_t/(double)sizeof(buf),
1921                         (gcm_t-ctr_t)/(double)sizeof(buf));
1922 #ifdef GHASH
1923         {
1924         void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
1925                                 const u8 *inp,size_t len)       = ctx.ghash;
1926
1927         GHASH((&ctx),buf.c,sizeof(buf));
1928         start = OPENSSL_rdtsc();
1929         for (i=0;i<100;++i) GHASH((&ctx),buf.c,sizeof(buf));
1930         gcm_t = OPENSSL_rdtsc() - start;
1931         printf("%.2f\n",gcm_t/(double)sizeof(buf)/(double)i);
1932         }
1933 #endif
1934         }
1935 #endif
1936
1937         return ret;
1938 }
1939 #endif