2 * Support for VIA PadLock Advanced Cryptography Engine (ACE)
3 * Written by Michal Ludvig <michal@logix.cz>
4 * http://www.logix.cz/michal
6 * Big thanks to Andy Polyakov for a help with optimization,
7 * assembler fixes, port to MS Windows and a lot of other
8 * valuable work on this engine!
11 /* ====================================================================
12 * Copyright (c) 1999-2001 The OpenSSL Project. All rights reserved.
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions
18 * 1. Redistributions of source code must retain the above copyright
19 * notice, this list of conditions and the following disclaimer.
21 * 2. Redistributions in binary form must reproduce the above copyright
22 * notice, this list of conditions and the following disclaimer in
23 * the documentation and/or other materials provided with the
26 * 3. All advertising materials mentioning features or use of this
27 * software must display the following acknowledgment:
28 * "This product includes software developed by the OpenSSL Project
29 * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
31 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
32 * endorse or promote products derived from this software without
33 * prior written permission. For written permission, please contact
34 * licensing@OpenSSL.org.
36 * 5. Products derived from this software may not be called "OpenSSL"
37 * nor may "OpenSSL" appear in their names without prior written
38 * permission of the OpenSSL Project.
40 * 6. Redistributions of any form whatsoever must retain the following
42 * "This product includes software developed by the OpenSSL Project
43 * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
45 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
46 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
47 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
48 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
49 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
50 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
51 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
52 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
53 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
54 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
55 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
56 * OF THE POSSIBILITY OF SUCH DAMAGE.
57 * ====================================================================
59 * This product includes cryptographic software written by Eric Young
60 * (eay@cryptsoft.com). This product includes software written by Tim
61 * Hudson (tjh@cryptsoft.com).
70 # define alloca _alloca
71 # define snprintf _snprintf
74 #include <openssl/crypto.h>
75 #include <openssl/dso.h>
76 #include <openssl/engine.h>
77 #include <openssl/evp.h>
78 #include <openssl/aes.h>
81 #ifndef OPENSSL_NO_HW_PADLOCK
83 /* Attempt to have a single source for both 0.9.7 and 0.9.8 :-) */
84 #if (OPENSSL_VERSION_NUMBER >= 0x00908000L)
85 # ifndef OPENSSL_NO_DYNAMIC_ENGINE
86 # define DYNAMIC_ENGINE
88 #elif (OPENSSL_VERSION_NUMBER >= 0x00907000L)
89 # ifdef ENGINE_DYNAMIC_SUPPORT
90 # define DYNAMIC_ENGINE
93 # error "Only OpenSSL >= 0.9.7 is supported"
96 /* VIA PadLock AES is available *ONLY* on some x86 CPUs.
97 Not only that it doesn't exist elsewhere, but it
98 even can't be compiled on other platforms!
100 In addition, because of the heavy use of inline assembler,
101 compiler choice is limited to GCC and Microsoft C. */
102 #undef COMPILE_HW_PADLOCK
103 #if !defined(I386_ONLY) && !defined(OPENSSL_NO_INLINE_ASM)
104 # if defined(__i386__) || defined(__i386) || defined(_M_IX86)
105 # define COMPILE_HW_PADLOCK
106 static ENGINE *ENGINE_padlock (void);
110 void ENGINE_load_padlock (void)
112 /* On non-x86 CPUs it just returns. */
113 #ifdef COMPILE_HW_PADLOCK
114 ENGINE *toadd = ENGINE_padlock ();
122 #ifdef COMPILE_HW_PADLOCK
123 /* Function for ENGINE detection and control */
124 static int padlock_available(void);
125 static int padlock_init(ENGINE *e);
128 static RAND_METHOD padlock_rand;
131 static int padlock_ciphers(ENGINE *e, const EVP_CIPHER **cipher, const int **nids, int nid);
134 static const char *padlock_id = "padlock";
135 static char padlock_name[100];
137 /* Available features */
138 static int padlock_use_ace = 0; /* Advanced Cryptography Engine */
139 static int padlock_use_rng = 0; /* Random Number Generator */
140 static int padlock_aes_align_required = 1;
142 /* ===== Engine "management" functions ===== */
144 /* Prepare the ENGINE structure for registration */
146 padlock_bind_helper(ENGINE *e)
148 /* Check available features */
151 #if 1 /* disable RNG for now, see commentary in vicinity of RNG code */
155 /* Generate a nice engine name with available features */
156 BIO_snprintf(padlock_name, sizeof(padlock_name),
157 "VIA PadLock (%s, %s)",
158 padlock_use_rng ? "RNG" : "no-RNG",
159 padlock_use_ace ? "ACE" : "no-ACE");
161 /* Register everything or return with an error */
162 if (!ENGINE_set_id(e, padlock_id) ||
163 !ENGINE_set_name(e, padlock_name) ||
165 !ENGINE_set_init_function(e, padlock_init) ||
167 (padlock_use_ace && !ENGINE_set_ciphers (e, padlock_ciphers)) ||
168 (padlock_use_rng && !ENGINE_set_RAND (e, &padlock_rand))) {
172 /* Everything looks good */
180 ENGINE *eng = ENGINE_new();
186 if (!padlock_bind_helper(eng)) {
194 /* Check availability of the engine */
196 padlock_init(ENGINE *e)
198 return (padlock_use_rng || padlock_use_ace);
201 /* This stuff is needed if this ENGINE is being compiled into a self-contained
204 #ifdef DYNAMIC_ENGINE
206 padlock_bind_fn(ENGINE *e, const char *id)
208 if (id && (strcmp(id, padlock_id) != 0)) {
212 if (!padlock_bind_helper(e)) {
219 IMPLEMENT_DYNAMIC_CHECK_FN ();
220 IMPLEMENT_DYNAMIC_BIND_FN (padlock_bind_fn);
221 #endif /* DYNAMIC_ENGINE */
223 /* ===== Here comes the "real" engine ===== */
225 /* Some AES-related constants */
226 #define AES_BLOCK_SIZE 16
227 #define AES_KEY_SIZE_128 16
228 #define AES_KEY_SIZE_192 24
229 #define AES_KEY_SIZE_256 32
231 /* Here we store the status information relevant to the
234 * Inline assembler in PADLOCK_XCRYPT_ASM()
235 * depends on the order of items in this structure.
236 * Don't blindly modify, reorder, etc!
238 struct padlock_cipher_data
240 unsigned char iv[AES_BLOCK_SIZE]; /* Initialization vector */
241 union { unsigned int pad[4];
250 } cword; /* Control word */
251 AES_KEY ks; /* Encryption key */
255 * Essentially this variable belongs in thread local storage.
256 * Having this variable global on the other hand can only cause
257 * few bogus key reloads [if any at all on single-CPU system],
258 * so we accept the penatly...
260 static volatile struct padlock_cipher_data *padlock_saved_context;
263 * =======================================================
264 * Inline assembler section(s).
265 * =======================================================
266 * Order of arguments is chosen to facilitate Windows port
267 * using __fastcall calling convention. If you wish to add
268 * more routines, keep in mind that first __fastcall
269 * argument is passed in %ecx and second - in %edx.
270 * =======================================================
272 #if defined(__GNUC__) && __GNUC__>=2
274 * As for excessive "push %ebx"/"pop %ebx" found all over.
275 * When generating position-independent code GCC won't let
276 * us use "b" in assembler templates nor even respect "ebx"
277 * in "clobber description." Therefore the trouble...
280 /* Helper function - check if a CPUID instruction
281 is available on this CPU */
283 padlock_insn_cpuid_available(void)
287 /* We're checking if the bit #21 of EFLAGS
288 can be toggled. If yes = CPUID is available. */
292 "xorl $0x200000, %%eax\n"
293 "movl %%eax, %%ecx\n"
294 "andl $0x200000, %%ecx\n"
299 "andl $0x200000, %%eax\n"
300 "xorl %%eax, %%ecx\n"
302 : "=r" (result) : : "eax", "ecx");
304 return (result == 0);
307 /* Load supported features of the CPU to see if
308 the PadLock is available. */
310 padlock_available(void)
312 char vendor_string[16];
313 unsigned int eax, edx;
315 /* First check if the CPUID instruction is available at all... */
316 if (! padlock_insn_cpuid_available())
319 /* Are we running on the Centaur (VIA) CPU? */
321 vendor_string[12] = 0;
325 "movl %%ebx,(%%edi)\n"
326 "movl %%edx,4(%%edi)\n"
327 "movl %%ecx,8(%%edi)\n"
329 : "+a"(eax) : "D"(vendor_string) : "ecx", "edx");
330 if (strcmp(vendor_string, "CentaurHauls") != 0)
333 /* Check for Centaur Extended Feature Flags presence */
335 asm volatile ("pushl %%ebx; cpuid; popl %%ebx"
336 : "+a"(eax) : : "ecx", "edx");
337 if (eax < 0xC0000001)
340 /* Read the Centaur Extended Feature Flags */
342 asm volatile ("pushl %%ebx; cpuid; popl %%ebx"
343 : "+a"(eax), "=d"(edx) : : "ecx");
345 /* Fill up some flags */
346 padlock_use_ace = ((edx & (0x3<<6)) == (0x3<<6));
347 padlock_use_rng = ((edx & (0x3<<2)) == (0x3<<2));
349 return padlock_use_ace + padlock_use_rng;
352 /* Our own htonl()/ntohl() */
354 padlock_bswapl(AES_KEY *ks)
356 size_t i = sizeof(ks->rd_key)/sizeof(ks->rd_key[0]);
357 unsigned long *key = ks->rd_key;
360 asm volatile ("bswapl %0" : "+r"(*key));
365 /* Force key reload from memory to the CPU microcode.
366 Loading EFLAGS from the stack clears EFLAGS[30]
367 which does the trick. */
369 padlock_reload_key(void)
371 asm volatile ("pushfl; popfl");
375 * This is heuristic key context tracing. At first one
376 * believes that one should use atomic swap instructions,
377 * but it's not actually necessary. Point is that if
378 * padlock_saved_context was changed by another thread
379 * after we've read it and before we compare it with cdata,
380 * our key *shall* be reloaded upon thread context switch
381 * and we are therefore set in either case...
384 padlock_verify_context(struct padlock_cipher_data *cdata)
396 :"+m"(padlock_saved_context)
397 : "r"(padlock_saved_context), "r"(cdata) : "cc");
400 /* Template for padlock_xcrypt_* modes */
402 * The offsets used with 'leal' instructions
403 * describe items of the 'padlock_cipher_data'
406 #define PADLOCK_XCRYPT_ASM(name,rep_xcrypt) \
407 static inline void *name(size_t cnt, \
408 struct padlock_cipher_data *cdata, \
409 void *out, const void *inp) \
411 asm volatile ( "pushl %%ebx\n" \
412 " leal 16(%0),%%edx\n" \
413 " leal 32(%0),%%ebx\n" \
416 : "=a"(iv), "=c"(cnt), "=D"(out), "=S"(inp) \
417 : "0"(cdata), "1"(cnt), "2"(out), "3"(inp) \
422 /* Generate all functions with appropriate opcodes */
423 PADLOCK_XCRYPT_ASM(padlock_xcrypt_ecb, ".byte 0xf3,0x0f,0xa7,0xc8") /* rep xcryptecb */
424 PADLOCK_XCRYPT_ASM(padlock_xcrypt_cbc, ".byte 0xf3,0x0f,0xa7,0xd0") /* rep xcryptcbc */
425 PADLOCK_XCRYPT_ASM(padlock_xcrypt_cfb, ".byte 0xf3,0x0f,0xa7,0xe0") /* rep xcryptcfb */
426 PADLOCK_XCRYPT_ASM(padlock_xcrypt_ofb, ".byte 0xf3,0x0f,0xa7,0xe8") /* rep xcryptofb */
428 /* The RNG call itself */
429 static inline unsigned int
430 padlock_xstore(void *addr, unsigned int edx_in)
432 unsigned int eax_out;
434 asm volatile (".byte 0x0f,0xa7,0xc0" /* xstore */
435 : "=a"(eax_out),"=m"(*(unsigned *)addr)
436 : "D"(addr), "d" (edx_in)
442 /* Why not inline 'rep movsd'? I failed to find information on what
443 * value in Direction Flag one can expect and consequently have to
444 * apply "better-safe-than-sorry" approach and assume "undefined."
445 * I could explicitly clear it and restore the original value upon
446 * return from padlock_aes_cipher, but it's presumably too much
447 * trouble for too little gain...
449 * In case you wonder 'rep xcrypt*' instructions above are *not*
450 * affected by the Direction Flag and pointers advance toward
451 * larger addresses unconditionally.
453 static inline unsigned char *
454 padlock_memcpy(void *dst,const void *src,size_t n)
460 do { *d++ = *s++; } while (--n);
465 #elif defined(_MSC_VER)
467 * Unlike GCC these are real functions. In order to minimize impact
468 * on performance we adhere to __fastcall calling convention in
469 * order to get two first arguments passed through %ecx and %edx.
470 * Which kind of suits very well, as instructions in question use
471 * both %ecx and %edx as input:-)
473 #define REP_XCRYPT(code) \
475 _asm _emit 0x0f _asm _emit 0xa7 \
479 * The offsets used with 'lea' instructions
480 * describe items of the 'padlock_cipher_data'
483 #define PADLOCK_XCRYPT_ASM(name,code) \
484 static void * __fastcall \
485 name (size_t cnt, void *cdata, \
486 void *outp, const void *inp) \
488 _asm lea edx,[eax+16] \
489 _asm lea ebx,[eax+32] \
495 PADLOCK_XCRYPT_ASM(padlock_xcrypt_ecb,0xc8)
496 PADLOCK_XCRYPT_ASM(padlock_xcrypt_cbc,0xd0)
497 PADLOCK_XCRYPT_ASM(padlock_xcrypt_cfb,0xe0)
498 PADLOCK_XCRYPT_ASM(padlock_xcrypt_ofb,0xe8)
500 static int __fastcall
501 padlock_xstore(void *outp,unsigned int code)
503 _asm _emit 0x0f _asm _emit 0xa7 _asm _emit 0xc0
506 static void __fastcall
507 padlock_reload_key(void)
508 { _asm pushfd _asm popfd }
510 static void __fastcall
511 padlock_verify_context(void *cdata)
516 cmp ecx,padlock_saved_context
518 mov padlock_saved_context,ecx
526 padlock_available(void)
561 mov padlock_use_ace,1
567 mov padlock_use_rng,1
574 static void __fastcall
575 padlock_bswapl(void *key)
590 /* MS actually specifies status of Direction Flag and compiler even
591 * manages to compile following as 'rep movsd' all by itself...
593 #define padlock_memcpy(o,i,n) ((unsigned char *)memcpy((o),(i),(n)&~3U))
596 /* ===== AES encryption/decryption ===== */
598 #if defined(NID_aes_128_cfb128) && ! defined (NID_aes_128_cfb)
599 #define NID_aes_128_cfb NID_aes_128_cfb128
602 #if defined(NID_aes_128_ofb128) && ! defined (NID_aes_128_ofb)
603 #define NID_aes_128_ofb NID_aes_128_ofb128
606 #if defined(NID_aes_192_cfb128) && ! defined (NID_aes_192_cfb)
607 #define NID_aes_192_cfb NID_aes_192_cfb128
610 #if defined(NID_aes_192_ofb128) && ! defined (NID_aes_192_ofb)
611 #define NID_aes_192_ofb NID_aes_192_ofb128
614 #if defined(NID_aes_256_cfb128) && ! defined (NID_aes_256_cfb)
615 #define NID_aes_256_cfb NID_aes_256_cfb128
618 #if defined(NID_aes_256_ofb128) && ! defined (NID_aes_256_ofb)
619 #define NID_aes_256_ofb NID_aes_256_ofb128
622 /* List of supported ciphers. */
623 static int padlock_cipher_nids[] = {
632 NID_aes_192_cfb, /* FIXME: AES192/256 CFB/OFB don't work. */
643 static int padlock_cipher_nids_num = (sizeof(padlock_cipher_nids)/
644 sizeof(padlock_cipher_nids[0]));
646 /* Function prototypes ... */
647 static int padlock_aes_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
648 const unsigned char *iv, int enc);
649 static int padlock_aes_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
650 const unsigned char *in, size_t nbytes);
652 #define NEAREST_ALIGNED(ptr) ( (unsigned char *)(ptr) + \
653 ( (0x10 - ((size_t)(ptr) & 0x0F)) & 0x0F ) )
654 #define ALIGNED_CIPHER_DATA(ctx) ((struct padlock_cipher_data *)\
655 NEAREST_ALIGNED(ctx->cipher_data))
657 /* Declaring so many ciphers by hand would be a pain.
658 Instead introduce a bit of preprocessor magic :-) */
659 #define DECLARE_AES_EVP(ksize,lmode,umode) \
660 static const EVP_CIPHER padlock_aes_##ksize##_##lmode = { \
661 NID_aes_##ksize##_##lmode, \
663 AES_KEY_SIZE_##ksize, \
665 0 | EVP_CIPH_##umode##_MODE, \
666 padlock_aes_init_key, \
667 padlock_aes_cipher, \
669 sizeof(struct padlock_cipher_data) + 16, \
670 EVP_CIPHER_set_asn1_iv, \
671 EVP_CIPHER_get_asn1_iv, \
676 DECLARE_AES_EVP(128,ecb,ECB);
677 DECLARE_AES_EVP(128,cbc,CBC);
678 DECLARE_AES_EVP(128,cfb,CFB);
679 DECLARE_AES_EVP(128,ofb,OFB);
681 DECLARE_AES_EVP(192,ecb,ECB);
682 DECLARE_AES_EVP(192,cbc,CBC);
683 DECLARE_AES_EVP(192,cfb,CFB);
684 DECLARE_AES_EVP(192,ofb,OFB);
686 DECLARE_AES_EVP(256,ecb,ECB);
687 DECLARE_AES_EVP(256,cbc,CBC);
688 DECLARE_AES_EVP(256,cfb,CFB);
689 DECLARE_AES_EVP(256,ofb,OFB);
692 padlock_ciphers (ENGINE *e, const EVP_CIPHER **cipher, const int **nids, int nid)
694 /* No specific cipher => return a list of supported nids ... */
696 *nids = padlock_cipher_nids;
697 return padlock_cipher_nids_num;
700 /* ... or the requested "cipher" otherwise */
702 case NID_aes_128_ecb:
703 *cipher = &padlock_aes_128_ecb;
705 case NID_aes_128_cbc:
706 *cipher = &padlock_aes_128_cbc;
708 case NID_aes_128_cfb:
709 *cipher = &padlock_aes_128_cfb;
711 case NID_aes_128_ofb:
712 *cipher = &padlock_aes_128_ofb;
715 case NID_aes_192_ecb:
716 *cipher = &padlock_aes_192_ecb;
718 case NID_aes_192_cbc:
719 *cipher = &padlock_aes_192_cbc;
721 case NID_aes_192_cfb:
722 *cipher = &padlock_aes_192_cfb;
724 case NID_aes_192_ofb:
725 *cipher = &padlock_aes_192_ofb;
728 case NID_aes_256_ecb:
729 *cipher = &padlock_aes_256_ecb;
731 case NID_aes_256_cbc:
732 *cipher = &padlock_aes_256_cbc;
734 case NID_aes_256_cfb:
735 *cipher = &padlock_aes_256_cfb;
737 case NID_aes_256_ofb:
738 *cipher = &padlock_aes_256_ofb;
742 /* Sorry, we don't support this NID */
750 /* Prepare the encryption key for PadLock usage */
752 padlock_aes_init_key (EVP_CIPHER_CTX *ctx, const unsigned char *key,
753 const unsigned char *iv, int enc)
755 struct padlock_cipher_data *cdata;
756 int key_len = EVP_CIPHER_CTX_key_length(ctx) * 8;
758 if (key==NULL) return 0; /* ERROR */
760 cdata = ALIGNED_CIPHER_DATA(ctx);
761 memset(cdata, 0, sizeof(struct padlock_cipher_data));
763 /* Prepare Control word. */
764 cdata->cword.b.encdec = (ctx->encrypt == 0);
765 cdata->cword.b.rounds = 10 + (key_len - 128) / 32;
766 cdata->cword.b.ksize = (key_len - 128) / 64;
770 /* PadLock can generate an extended key for
771 AES128 in hardware */
772 memcpy(cdata->ks.rd_key, key, AES_KEY_SIZE_128);
773 cdata->cword.b.keygen = 0;
778 /* Generate an extended AES key in software.
779 Needed for AES192/AES256 */
780 /* Well, the above applies to Stepping 8 CPUs
781 and is listed as hardware errata. They most
782 likely will fix it at some point and then
783 a check for stepping would be due here. */
785 AES_set_encrypt_key(key, key_len, &cdata->ks);
787 AES_set_decrypt_key(key, key_len, &cdata->ks);
789 /* OpenSSL internal functions use byte-swapped extended key. */
790 padlock_bswapl(&cdata->ks);
792 cdata->cword.b.keygen = 1;
801 * This is done to cover for cases when user reuses the
802 * context for new key. The catch is that if we don't do
803 * this, padlock_eas_cipher might proceed with old key...
805 padlock_reload_key ();
811 * Simplified version of padlock_aes_cipher() used when
812 * 1) both input and output buffers are at aligned addresses.
814 * 2) running on a newer CPU that doesn't require aligned buffers.
817 padlock_aes_cipher_omnivorous(EVP_CIPHER_CTX *ctx, unsigned char *out_arg,
818 const unsigned char *in_arg, size_t nbytes)
820 struct padlock_cipher_data *cdata;
823 cdata = ALIGNED_CIPHER_DATA(ctx);
824 padlock_verify_context(cdata);
826 switch (EVP_CIPHER_CTX_mode(ctx)) {
827 case EVP_CIPH_ECB_MODE:
828 padlock_xcrypt_ecb(nbytes/AES_BLOCK_SIZE, cdata, out_arg, in_arg);
831 case EVP_CIPH_CBC_MODE:
832 memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE);
833 iv = padlock_xcrypt_cbc(nbytes/AES_BLOCK_SIZE, cdata, out_arg, in_arg);
834 memcpy(ctx->iv, iv, AES_BLOCK_SIZE);
837 case EVP_CIPH_CFB_MODE:
838 memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE);
839 iv = padlock_xcrypt_cfb(nbytes/AES_BLOCK_SIZE, cdata, out_arg, in_arg);
840 memcpy(ctx->iv, iv, AES_BLOCK_SIZE);
843 case EVP_CIPH_OFB_MODE:
844 memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE);
845 padlock_xcrypt_ofb(nbytes/AES_BLOCK_SIZE, cdata, out_arg, in_arg);
846 memcpy(ctx->iv, cdata->iv, AES_BLOCK_SIZE);
853 memset(cdata->iv, 0, AES_BLOCK_SIZE);
858 #ifndef PADLOCK_CHUNK
859 # define PADLOCK_CHUNK 4096 /* Must be a power of 2 larger than 16 */
861 #if PADLOCK_CHUNK<16 || PADLOCK_CHUNK&(PADLOCK_CHUNK-1)
862 # error "insane PADLOCK_CHUNK..."
865 /* Re-align the arguments to 16-Bytes boundaries and run the
866 encryption function itself. This function is not AES-specific. */
868 padlock_aes_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out_arg,
869 const unsigned char *in_arg, size_t nbytes)
871 struct padlock_cipher_data *cdata;
875 int inp_misaligned, out_misaligned, realign_in_loop;
876 size_t chunk, allocated=0;
880 if (nbytes % AES_BLOCK_SIZE)
881 return 0; /* are we expected to do tail processing? */
883 /* VIA promises CPUs that won't require alignment in the future.
884 For now padlock_aes_align_required is initialized to 1 and
885 the condition is never met... */
886 if (!padlock_aes_align_required)
887 return padlock_aes_cipher_omnivorous(ctx, out_arg, in_arg, nbytes);
889 inp_misaligned = (((size_t)in_arg) & 0x0F);
890 out_misaligned = (((size_t)out_arg) & 0x0F);
892 /* Note that even if output is aligned and input not,
893 * I still prefer to loop instead of copy the whole
894 * input and then encrypt in one stroke. This is done
895 * in order to improve L1 cache utilization... */
896 realign_in_loop = out_misaligned|inp_misaligned;
898 if (!realign_in_loop)
899 return padlock_aes_cipher_omnivorous(ctx, out_arg, in_arg, nbytes);
901 /* this takes one "if" out of the loops */
903 chunk %= PADLOCK_CHUNK;
904 if (chunk==0) chunk = PADLOCK_CHUNK;
906 if (out_misaligned) {
907 /* optmize for small input */
908 allocated = (chunk<nbytes?PADLOCK_CHUNK:nbytes);
909 out = alloca(0x10 + allocated);
910 out = NEAREST_ALIGNED(out);
915 cdata = ALIGNED_CIPHER_DATA(ctx);
916 padlock_verify_context(cdata);
918 switch (EVP_CIPHER_CTX_mode(ctx)) {
919 case EVP_CIPH_ECB_MODE:
922 inp = padlock_memcpy(out, in_arg, chunk);
927 padlock_xcrypt_ecb(chunk/AES_BLOCK_SIZE, cdata, out, inp);
930 out_arg = padlock_memcpy(out_arg, out, chunk) + chunk;
932 out = out_arg+=chunk;
935 chunk = PADLOCK_CHUNK;
939 case EVP_CIPH_CBC_MODE:
940 memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE);
944 memcpy(cdata->iv, iv, AES_BLOCK_SIZE);
945 chunk = PADLOCK_CHUNK;
946 cbc_shortcut: /* optimize for small input */
948 inp = padlock_memcpy(out, in_arg, chunk);
953 iv = padlock_xcrypt_cbc(chunk/AES_BLOCK_SIZE, cdata, out, inp);
956 out_arg = padlock_memcpy(out_arg, out, chunk) + chunk;
958 out = out_arg+=chunk;
960 } while (nbytes -= chunk);
961 memcpy(ctx->iv, iv, AES_BLOCK_SIZE);
964 case EVP_CIPH_CFB_MODE:
965 memcpy (cdata->iv, ctx->iv, AES_BLOCK_SIZE);
969 memcpy(cdata->iv, iv, AES_BLOCK_SIZE);
970 chunk = PADLOCK_CHUNK;
971 cfb_shortcut: /* optimize for small input */
973 inp = padlock_memcpy(out, in_arg, chunk);
978 iv = padlock_xcrypt_cfb(chunk/AES_BLOCK_SIZE, cdata, out, inp);
981 out_arg = padlock_memcpy(out_arg, out, chunk) + chunk;
983 out = out_arg+=chunk;
985 } while (nbytes -= chunk);
986 memcpy(ctx->iv, iv, AES_BLOCK_SIZE);
989 case EVP_CIPH_OFB_MODE:
990 memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE);
993 inp = padlock_memcpy(out, in_arg, chunk);
998 padlock_xcrypt_ofb(chunk/AES_BLOCK_SIZE, cdata, out, inp);
1001 out_arg = padlock_memcpy(out_arg, out, chunk) + chunk;
1003 out = out_arg+=chunk;
1006 chunk = PADLOCK_CHUNK;
1008 memcpy(ctx->iv, cdata->iv, AES_BLOCK_SIZE);
1015 /* Clean the realign buffer if it was used */
1016 if (out_misaligned) {
1017 volatile unsigned long *p=(void *)out;
1018 size_t n = allocated/sizeof(*p);
1022 memset(cdata->iv, 0, AES_BLOCK_SIZE);
1027 /* ===== Random Number Generator ===== */
1029 * This code is not engaged. The reason is that it does not comply
1030 * with recommendations for VIA RNG usage for secure applications
1031 * (posted at http://www.via.com.tw/en/viac3/c3.jsp) nor does it
1032 * provide meaningful error control...
1034 /* Wrapper that provides an interface between the API and
1035 the raw PadLock RNG */
1037 padlock_rand_bytes(unsigned char *output, int count)
1039 unsigned int eax, buf;
1041 while (count >= 8) {
1042 eax = padlock_xstore(output, 0);
1043 if (!(eax&(1<<6))) return 0; /* RNG disabled */
1044 /* this ---vv--- covers DC bias, Raw Bits and String Filter */
1045 if (eax&(0x1F<<10)) return 0;
1046 if ((eax&0x1F)==0) continue; /* no data, retry... */
1047 if ((eax&0x1F)!=8) return 0; /* fatal failure... */
1052 eax = padlock_xstore(&buf, 3);
1053 if (!(eax&(1<<6))) return 0; /* RNG disabled */
1054 /* this ---vv--- covers DC bias, Raw Bits and String Filter */
1055 if (eax&(0x1F<<10)) return 0;
1056 if ((eax&0x1F)==0) continue; /* no data, retry... */
1057 if ((eax&0x1F)!=1) return 0; /* fatal failure... */
1058 *output++ = (unsigned char)buf;
1061 *(volatile unsigned int *)&buf=0;
1066 /* Dummy but necessary function */
1068 padlock_rand_status(void)
1073 /* Prepare structure for registration */
1074 static RAND_METHOD padlock_rand = {
1076 padlock_rand_bytes, /* bytes */
1079 padlock_rand_bytes, /* pseudorand */
1080 padlock_rand_status, /* rand status */
1083 #endif /* COMPILE_HW_PADLOCK */
1085 #endif /* !OPENSSL_NO_HW_PADLOCK */
1086 #endif /* !OPENSSL_NO_HW */