2 * Support for VIA PadLock Advanced Cryptography Engine (ACE)
3 * Written by Michal Ludvig <michal@logix.cz>
4 * http://www.logix.cz/michal
6 * Big thanks to Andy Polyakov for a help with optimization,
7 * assembler fixes, port to MS Windows and a lot of other
8 * valuable work on this engine!
11 /* ====================================================================
12 * Copyright (c) 1999-2001 The OpenSSL Project. All rights reserved.
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions
18 * 1. Redistributions of source code must retain the above copyright
19 * notice, this list of conditions and the following disclaimer.
21 * 2. Redistributions in binary form must reproduce the above copyright
22 * notice, this list of conditions and the following disclaimer in
23 * the documentation and/or other materials provided with the
26 * 3. All advertising materials mentioning features or use of this
27 * software must display the following acknowledgment:
28 * "This product includes software developed by the OpenSSL Project
29 * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
31 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
32 * endorse or promote products derived from this software without
33 * prior written permission. For written permission, please contact
34 * licensing@OpenSSL.org.
36 * 5. Products derived from this software may not be called "OpenSSL"
37 * nor may "OpenSSL" appear in their names without prior written
38 * permission of the OpenSSL Project.
40 * 6. Redistributions of any form whatsoever must retain the following
42 * "This product includes software developed by the OpenSSL Project
43 * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
45 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
46 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
47 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
48 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
49 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
50 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
51 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
52 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
53 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
54 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
55 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
56 * OF THE POSSIBILITY OF SUCH DAMAGE.
57 * ====================================================================
59 * This product includes cryptographic software written by Eric Young
60 * (eay@cryptsoft.com). This product includes software written by Tim
61 * Hudson (tjh@cryptsoft.com).
70 # define alloca _alloca
71 # define snprintf _snprintf
74 #include <openssl/crypto.h>
75 #include <openssl/dso.h>
76 #include <openssl/engine.h>
77 #include <openssl/evp.h>
78 #include <openssl/aes.h>
81 #ifndef OPENSSL_NO_HW_PADLOCK
83 /* Attempt to have a single source for both 0.9.7 and 0.9.8 :-) */
84 #if (OPENSSL_VERSION_NUMBER >= 0x00908000L)
85 # ifndef OPENSSL_NO_DYNAMIC_ENGINE
86 # define DYNAMIC_ENGINE
88 #elif (OPENSSL_VERSION_NUMBER >= 0x00907000L)
89 # ifdef ENGINE_DYNAMIC_SUPPORT
90 # define DYNAMIC_ENGINE
93 # error "Only OpenSSL >= 0.9.7 is supported"
96 /* VIA PadLock AES is available *ONLY* on some x86 CPUs.
97 Not only that it doesn't exist elsewhere, but it
98 even can't be compiled on other platforms!
100 In addition, because of the heavy use of inline assembler,
101 compiler choice is limited to GCC and Microsoft C. */
102 #undef COMPILE_HW_PADLOCK
103 #if !defined(I386_ONLY) && !defined(OPENSSL_NO_INLINE_ASM)
104 # if defined(__i386__) || defined(__i386) || defined(_M_IX86)
105 # define COMPILE_HW_PADLOCK
109 static ENGINE *ENGINE_padlock (void);
111 void ENGINE_load_padlock (void)
113 /* On non-x86 CPUs it just returns. */
114 #ifdef COMPILE_HW_PADLOCK
115 ENGINE *toadd = ENGINE_padlock ();
123 #ifdef COMPILE_HW_PADLOCK
124 /* Function for ENGINE detection and control */
125 static int padlock_available(void);
126 static int padlock_init(ENGINE *e);
129 static RAND_METHOD padlock_rand;
132 static int padlock_ciphers(ENGINE *e, const EVP_CIPHER **cipher, const int **nids, int nid);
135 static const char *padlock_id = "padlock";
136 static char padlock_name[100];
138 /* Available features */
139 static int padlock_use_ace = 0; /* Advanced Cryptography Engine */
140 static int padlock_use_rng = 0; /* Random Number Generator */
141 static int padlock_aes_align_required = 1;
143 /* ===== Engine "management" functions ===== */
145 /* Prepare the ENGINE structure for registration */
147 padlock_bind_helper(ENGINE *e)
149 /* Check available features */
152 #if 1 /* disable RNG for now, see commentary in vicinity of RNG code */
156 /* Generate a nice engine name with available features */
157 snprintf(padlock_name, sizeof(padlock_name), "VIA PadLock (%s, %s)",
158 padlock_use_rng ? "RNG" : "no-RNG",
159 padlock_use_ace ? "ACE" : "no-ACE");
161 /* Register everything or return with an error */
162 if (!ENGINE_set_id(e, padlock_id) ||
163 !ENGINE_set_name(e, padlock_name) ||
165 !ENGINE_set_init_function(e, padlock_init) ||
167 (padlock_use_ace && !ENGINE_set_ciphers (e, padlock_ciphers)) ||
168 (padlock_use_rng && !ENGINE_set_RAND (e, &padlock_rand))) {
172 /* Everything looks good */
180 ENGINE *eng = ENGINE_new();
186 if (!padlock_bind_helper(eng)) {
194 /* Check availability of the engine */
196 padlock_init(ENGINE *e)
198 return (padlock_use_rng || padlock_use_ace);
201 /* This stuff is needed if this ENGINE is being compiled into a self-contained
204 #ifdef DYNAMIC_ENGINE
206 padlock_bind_fn(ENGINE *e, const char *id)
208 if (id && (strcmp(id, padlock_id) != 0)) {
212 if (!padlock_bind_helper(e)) {
219 IMPLEMENT_DYNAMIC_CHECK_FN ();
220 IMPLEMENT_DYNAMIC_BIND_FN (padlock_bind_fn);
221 #endif /* DYNAMIC_ENGINE */
223 /* ===== Here comes the "real" engine ===== */
225 /* Some AES-related constants */
226 #define AES_BLOCK_SIZE 16
227 #define AES_KEY_SIZE_128 16
228 #define AES_KEY_SIZE_192 24
229 #define AES_KEY_SIZE_256 32
231 /* Here we store the status information relevant to the
234 * Inline assembler in PADLOCK_XCRYPT_ASM()
235 * depends on the order of items in this structure.
236 * Don't blindly modify, reorder, etc!
238 struct padlock_cipher_data
240 unsigned char iv[AES_BLOCK_SIZE]; /* Initialization vector */
241 union { unsigned int pad[4];
250 } cword; /* Control word */
251 AES_KEY ks; /* Encryption key */
255 * Essentially this variable belongs in thread local storage.
256 * Having this variable global on the other hand can only cause
257 * few bogus key reloads [if any at all on single-CPU system],
258 * so we accept the penatly...
260 static volatile struct padlock_cipher_data *padlock_saved_context;
263 * =======================================================
264 * Inline assembler section(s).
265 * =======================================================
266 * Order of arguments is chosen to facilitate Windows port
267 * using __fastcall calling convention. If you wish to add
268 * more routines, keep in mind that first __fastcall
269 * argument is passed in %ecx and second - in %edx.
270 * =======================================================
272 #if defined(__GNUC__) && __GNUC__>=2
274 * As for excessive "push %ebx"/"pop %ebx" found all over.
275 * When generating position-independent code GCC won't let
276 * us use "b" in assembler templates nor even respect "ebx"
277 * in "clobber description." Therefore the trouble...
280 /* Helper function - check if a CPUID instruction
281 is available on this CPU */
283 padlock_insn_cpuid_available(void)
287 /* We're checking if the bit #21 of EFLAGS
288 can be toggled. If yes = CPUID is available. */
292 "xorl $0x200000, %%eax\n"
293 "movl %%eax, %%ecx\n"
294 "andl $0x200000, %%ecx\n"
299 "andl $0x200000, %%eax\n"
300 "xorl %%eax, %%ecx\n"
302 : "=r" (result) : : "eax", "ecx");
304 return (result == 0);
307 /* Load supported features of the CPU to see if
308 the PadLock is available. */
310 padlock_available(void)
312 char vendor_string[16];
313 unsigned int eax, edx;
315 /* First check if the CPUID instruction is available at all... */
316 if (! padlock_insn_cpuid_available())
319 /* Are we running on the Centaur (VIA) CPU? */
321 vendor_string[12] = 0;
325 "movl %%ebx,(%%edi)\n"
326 "movl %%edx,4(%%edi)\n"
327 "movl %%ecx,8(%%edi)\n"
329 : "+a"(eax) : "D"(vendor_string) : "ecx", "edx");
330 if (strcmp(vendor_string, "CentaurHauls") != 0)
333 /* Check for Centaur Extended Feature Flags presence */
335 asm volatile ("pushl %%ebx; cpuid; popl %%ebx"
336 : "+a"(eax) : : "ecx", "edx");
337 if (eax < 0xC0000001)
340 /* Read the Centaur Extended Feature Flags */
342 asm volatile ("pushl %%ebx; cpuid; popl %%ebx"
343 : "+a"(eax), "=d"(edx) : : "ecx");
345 /* Fill up some flags */
346 padlock_use_ace = ((edx & (0x3<<6)) == (0x3<<6));
347 padlock_use_rng = ((edx & (0x3<<2)) == (0x3<<2));
349 return padlock_use_ace + padlock_use_rng;
352 /* Our own htonl()/ntohl() */
354 padlock_bswapl(AES_KEY *ks)
356 size_t i = sizeof(ks->rd_key)/sizeof(ks->rd_key[0]);
357 unsigned long *key = ks->rd_key;
360 asm volatile ("bswapl %0" : "+r"(*key));
365 /* Force key reload from memory to the CPU microcode.
366 Loading EFLAGS from the stack clears EFLAGS[30]
367 which does the trick. */
369 padlock_reload_key(void)
371 asm volatile ("pushfl; popfl");
375 * This is heuristic key context tracing. At first one
376 * believes that one should use atomic swap instructions,
377 * but it's not actually necessary. Point is that if
378 * padlock_saved_context was changed by another thread
379 * after we've read it and before we compare it with cdata,
380 * our key *shall* be reloaded upon thread context switch
381 * and we are therefore set in either case...
384 padlock_verify_context(struct padlock_cipher_data *cdata)
396 :"+m"(padlock_saved_context)
397 : "r"(padlock_saved_context), "r"(cdata) : "cc");
400 /* Template for padlock_xcrypt_* modes */
402 * The offsets used with 'leal' instructions
403 * describe items of the 'padlock_cipher_data'
406 #define PADLOCK_XCRYPT_ASM(name,rep_xcrypt) \
407 static inline void *name(size_t cnt, \
408 struct padlock_cipher_data *cdata, \
409 void *out, const void *inp) \
411 asm volatile ( "pushl %%ebx\n" \
412 " leal 16(%0),%%edx\n" \
413 " leal 32(%0),%%ebx\n" \
416 : "=a"(iv), "=c"(cnt), "=D"(out), "=S"(inp) \
417 : "0"(cdata), "1"(cnt), "2"(out), "3"(inp) \
422 /* Generate all functions with appropriate opcodes */
423 PADLOCK_XCRYPT_ASM(padlock_xcrypt_ecb, ".byte 0xf3,0x0f,0xa7,0xc8"); /* rep xcryptecb */
424 PADLOCK_XCRYPT_ASM(padlock_xcrypt_cbc, ".byte 0xf3,0x0f,0xa7,0xd0"); /* rep xcryptcbc */
425 PADLOCK_XCRYPT_ASM(padlock_xcrypt_cfb, ".byte 0xf3,0x0f,0xa7,0xe0"); /* rep xcryptcfb */
426 PADLOCK_XCRYPT_ASM(padlock_xcrypt_ofb, ".byte 0xf3,0x0f,0xa7,0xe8"); /* rep xcryptofb */
428 /* The RNG call itself */
429 static inline unsigned int
430 padlock_xstore(void *addr, unsigned int edx_in)
432 unsigned int eax_out;
434 asm volatile (".byte 0x0f,0xa7,0xc0" /* xstore */
435 : "=a"(eax_out),"=m"(*(unsigned *)addr)
436 : "D"(addr), "d" (edx_in)
442 #elif defined(_MSC_VER)
444 * Unlike GCC these are real functions. In order to minimize impact
445 * on performance we adhere to __fastcall calling convention in
446 * order to get two first arguments passed through %ecx and %edx.
447 * Which kind of suits very well, as instructions in question use
448 * both %ecx and %edx as input:-)
450 #define REP_XCRYPT(code) \
452 _asm _emit 0x0f _asm _emit 0xa7 \
456 * The offsets used with 'lea' instructions
457 * describe items of the 'padlock_cipher_data'
460 #define PADLOCK_XCRYPT_ASM(name,code) \
461 static void * __fastcall \
462 name (size_t cnt, void *cdata, \
463 void *outp, const void *inp) \
465 _asm lea edx,[eax+16] \
466 _asm lea ebx,[eax+32] \
472 PADLOCK_XCRYPT_ASM(padlock_xcrypt_ecb,0xc8)
473 PADLOCK_XCRYPT_ASM(padlock_xcrypt_cbc,0xd0)
474 PADLOCK_XCRYPT_ASM(padlock_xcrypt_cfb,0xe0)
475 PADLOCK_XCRYPT_ASM(padlock_xcrypt_ofb,0xe8)
477 static int __fastcall
478 padlock_xstore(void *outp,unsigned int code)
480 _asm _emit 0x0f _asm _emit 0xa7 _asm _emit 0xc0
483 static void __fastcall
484 padlock_reload_key(void)
485 { _asm pushfd _asm popfd }
487 static void __fastcall
488 padlock_verify_context(void *cdata)
493 cmp ecx,padlock_saved_context
495 mov padlock_saved_context,ecx
503 padlock_available(void)
538 mov padlock_use_ace,1
544 mov padlock_use_rng,1
551 static void __fastcall
552 padlock_bswapl(void *key)
568 /* ===== AES encryption/decryption ===== */
570 #if defined(NID_aes_128_cfb128) && ! defined (NID_aes_128_cfb)
571 #define NID_aes_128_cfb NID_aes_128_cfb128
574 #if defined(NID_aes_128_ofb128) && ! defined (NID_aes_128_ofb)
575 #define NID_aes_128_ofb NID_aes_128_ofb128
578 #if defined(NID_aes_192_cfb128) && ! defined (NID_aes_192_cfb)
579 #define NID_aes_192_cfb NID_aes_192_cfb128
582 #if defined(NID_aes_192_ofb128) && ! defined (NID_aes_192_ofb)
583 #define NID_aes_192_ofb NID_aes_192_ofb128
586 #if defined(NID_aes_256_cfb128) && ! defined (NID_aes_256_cfb)
587 #define NID_aes_256_cfb NID_aes_256_cfb128
590 #if defined(NID_aes_256_ofb128) && ! defined (NID_aes_256_ofb)
591 #define NID_aes_256_ofb NID_aes_256_ofb128
594 /* List of supported ciphers. */
595 static int padlock_cipher_nids[] = {
603 // NID_aes_192_cfb, /* FIXME: AES192/256 CFB/OFB don't work. */
611 static int padlock_cipher_nids_num = (sizeof(padlock_cipher_nids)/
612 sizeof(padlock_cipher_nids[0]));
614 /* Function prototypes ... */
615 static int padlock_aes_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
616 const unsigned char *iv, int enc);
617 static int padlock_aes_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
618 const unsigned char *in, unsigned int nbytes);
620 #define NEAREST_ALIGNED(ptr) ( (char *)(ptr) + \
621 ( (0x10 - ((size_t)(ptr) & 0x0F)) & 0x0F ) )
622 #define ALIGNED_CIPHER_DATA(ctx) ((struct padlock_cipher_data *)\
623 NEAREST_ALIGNED(ctx->cipher_data))
625 /* Declaring so many ciphers by hand would be a pain.
626 Instead introduce a bit of preprocessor magic :-) */
627 #define DECLARE_AES_EVP(ksize,lmode,umode) \
628 static const EVP_CIPHER padlock_aes_##ksize##_##lmode = { \
629 NID_aes_##ksize##_##lmode, \
631 AES_KEY_SIZE_##ksize, \
633 0 | EVP_CIPH_##umode##_MODE, \
634 padlock_aes_init_key, \
635 padlock_aes_cipher, \
637 sizeof(struct padlock_cipher_data) + 16, \
638 EVP_CIPHER_set_asn1_iv, \
639 EVP_CIPHER_get_asn1_iv, \
644 DECLARE_AES_EVP(128,ecb,ECB);
645 DECLARE_AES_EVP(128,cbc,CBC);
646 DECLARE_AES_EVP(128,cfb,CFB);
647 DECLARE_AES_EVP(128,ofb,OFB);
649 DECLARE_AES_EVP(192,ecb,ECB);
650 DECLARE_AES_EVP(192,cbc,CBC);
651 DECLARE_AES_EVP(192,cfb,CFB);
652 DECLARE_AES_EVP(192,ofb,OFB);
654 DECLARE_AES_EVP(256,ecb,ECB);
655 DECLARE_AES_EVP(256,cbc,CBC);
656 DECLARE_AES_EVP(256,cfb,CFB);
657 DECLARE_AES_EVP(256,ofb,OFB);
660 padlock_ciphers (ENGINE *e, const EVP_CIPHER **cipher, const int **nids, int nid)
662 /* No specific cipher => return a list of supported nids ... */
664 *nids = padlock_cipher_nids;
665 return padlock_cipher_nids_num;
668 /* ... or the requested "cipher" otherwise */
670 case NID_aes_128_ecb:
671 *cipher = &padlock_aes_128_ecb;
673 case NID_aes_128_cbc:
674 *cipher = &padlock_aes_128_cbc;
676 case NID_aes_128_cfb:
677 *cipher = &padlock_aes_128_cfb;
679 case NID_aes_128_ofb:
680 *cipher = &padlock_aes_128_ofb;
683 case NID_aes_192_ecb:
684 *cipher = &padlock_aes_192_ecb;
686 case NID_aes_192_cbc:
687 *cipher = &padlock_aes_192_cbc;
689 case NID_aes_192_cfb:
690 *cipher = &padlock_aes_192_cfb;
692 case NID_aes_192_ofb:
693 *cipher = &padlock_aes_192_ofb;
696 case NID_aes_256_ecb:
697 *cipher = &padlock_aes_256_ecb;
699 case NID_aes_256_cbc:
700 *cipher = &padlock_aes_256_cbc;
702 case NID_aes_256_cfb:
703 *cipher = &padlock_aes_256_cfb;
705 case NID_aes_256_ofb:
706 *cipher = &padlock_aes_256_ofb;
710 /* Sorry, we don't support this NID */
718 /* Prepare the encryption key for PadLock usage */
720 padlock_aes_init_key (EVP_CIPHER_CTX *ctx, const unsigned char *key,
721 const unsigned char *iv, int enc)
723 struct padlock_cipher_data *cdata;
724 int key_len = EVP_CIPHER_CTX_key_length(ctx) * 8;
726 if (key==NULL) return 0; /* ERROR */
728 cdata = ALIGNED_CIPHER_DATA(ctx);
729 memset(cdata, 0, sizeof(struct padlock_cipher_data));
731 /* Prepare Control word. */
732 cdata->cword.b.encdec = (ctx->encrypt == 0);
733 cdata->cword.b.rounds = 10 + (key_len - 128) / 32;
734 cdata->cword.b.ksize = (key_len - 128) / 64;
738 /* PadLock can generate an extended key for
739 AES128 in hardware */
740 memcpy(cdata->ks.rd_key, key, AES_KEY_SIZE_128);
741 cdata->cword.b.keygen = 0;
746 /* Generate an extended AES key in software.
747 Needed for AES192/AES256 */
748 /* Well, the above applies to Stepping 8 CPUs
749 and is listed as hardware errata. They most
750 likely will fix it at some point and then
751 a check for stepping would be due here. */
753 AES_set_encrypt_key(key, key_len, &cdata->ks);
755 AES_set_decrypt_key(key, key_len, &cdata->ks);
757 /* OpenSSL internal functions use byte-swapped extended key. */
758 padlock_bswapl(&cdata->ks);
760 cdata->cword.b.keygen = 1;
769 * This is done to cover for cases when user reuses the
770 * context for new key. The catch is that if we don't do
771 * this, padlock_eas_cipher might proceed with old key...
773 padlock_reload_key ();
779 * Simplified version of padlock_aes_cipher() used when
780 * 1) both input and output buffers are at aligned addresses.
782 * 2) running on a newer CPU that doesn't require aligned buffers.
785 padlock_aes_cipher_omnivorous(EVP_CIPHER_CTX *ctx, unsigned char *out_arg,
786 const unsigned char *in_arg, size_t nbytes)
788 struct padlock_cipher_data *cdata;
791 cdata = ALIGNED_CIPHER_DATA(ctx);
792 padlock_verify_context(cdata);
794 switch (EVP_CIPHER_CTX_mode(ctx)) {
795 case EVP_CIPH_ECB_MODE:
796 padlock_xcrypt_ecb(nbytes/AES_BLOCK_SIZE, cdata, out_arg, in_arg);
799 case EVP_CIPH_CBC_MODE:
800 memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE);
801 iv = padlock_xcrypt_cbc(nbytes/AES_BLOCK_SIZE, cdata, out_arg, in_arg);
802 memcpy(ctx->iv, iv, AES_BLOCK_SIZE);
805 case EVP_CIPH_CFB_MODE:
806 memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE);
807 iv = padlock_xcrypt_cfb(nbytes/AES_BLOCK_SIZE, cdata, out_arg, in_arg);
808 memcpy(ctx->iv, iv, AES_BLOCK_SIZE);
811 case EVP_CIPH_OFB_MODE:
812 memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE);
813 padlock_xcrypt_ofb(nbytes/AES_BLOCK_SIZE, cdata, out_arg, in_arg);
814 memcpy(ctx->iv, cdata->iv, AES_BLOCK_SIZE);
821 memset(cdata->iv, 0, AES_BLOCK_SIZE);
826 #ifndef PADLOCK_CHUNK
827 # define PADLOCK_CHUNK 4096 /* Must be a power of 2 larger than 16 */
829 #if PADLOCK_CHUNK<16 || PADLOCK_CHUNK&(PADLOCK_CHUNK-1)
830 # error "insane PADLOCK_CHUNK..."
833 /* Re-align the arguments to 16-Bytes boundaries and run the
834 encryption function itself. This function is not AES-specific. */
836 padlock_aes_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out_arg,
837 const unsigned char *in_arg, size_t nbytes)
839 struct padlock_cipher_data *cdata;
843 int inp_misaligned, out_misaligned, realign_in_loop;
844 size_t chunk, allocated=0;
848 if (nbytes % AES_BLOCK_SIZE)
849 return 0; /* are we expected to do tail processing? */
851 /* VIA promises CPUs that won't require alignment in the future.
852 For now padlock_aes_align_required is initialized to 1 and
853 the condition is never met... */
854 if (!padlock_aes_align_required)
855 return padlock_aes_cipher_omnivorous(ctx, out_arg, in_arg, nbytes);
857 inp_misaligned = (((size_t)in_arg) & 0x0F);
858 out_misaligned = (((size_t)out_arg) & 0x0F);
860 /* Note that even if output is aligned and input not,
861 * I still prefer to loop instead of copy the whole
862 * input and then encrypt in one stroke. This is done
863 * in order to improve L1 cache utilization... */
864 realign_in_loop = out_misaligned|inp_misaligned;
866 if (!realign_in_loop)
867 return padlock_aes_cipher_omnivorous(ctx, out_arg, in_arg, nbytes);
869 /* this takes one "if" out of the loops */
871 chunk %= PADLOCK_CHUNK;
872 if (chunk==0) chunk = PADLOCK_CHUNK;
874 if (out_misaligned) {
875 /* optmize for small input */
876 allocated = (chunk<nbytes?PADLOCK_CHUNK:nbytes);
877 out = alloca(0x10 + allocated);
878 out = NEAREST_ALIGNED(out);
883 cdata = ALIGNED_CIPHER_DATA(ctx);
884 padlock_verify_context(cdata);
886 switch (EVP_CIPHER_CTX_mode(ctx)) {
887 case EVP_CIPH_ECB_MODE:
890 inp = memcpy(out, in_arg, chunk&~3);
895 padlock_xcrypt_ecb(chunk/AES_BLOCK_SIZE, cdata, out, inp);
898 out_arg = (char *)memcpy(out_arg, out, chunk&~3) + chunk;
900 out = out_arg+=chunk;
903 chunk = PADLOCK_CHUNK;
907 case EVP_CIPH_CBC_MODE:
908 memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE);
912 memcpy(cdata->iv, iv, AES_BLOCK_SIZE);
913 chunk = PADLOCK_CHUNK;
914 cbc_shortcut: /* optimize for small input */
916 inp = memcpy(out, in_arg, chunk&~3);
921 iv = padlock_xcrypt_cbc(chunk/AES_BLOCK_SIZE, cdata, out, inp);
924 out_arg = (char *)memcpy(out_arg, out, chunk&~3) + chunk;
926 out = out_arg+=chunk;
928 } while (nbytes -= chunk);
929 memcpy(ctx->iv, iv, AES_BLOCK_SIZE);
932 case EVP_CIPH_CFB_MODE:
933 memcpy (cdata->iv, ctx->iv, AES_BLOCK_SIZE);
937 memcpy(cdata->iv, iv, AES_BLOCK_SIZE);
938 chunk = PADLOCK_CHUNK;
939 cfb_shortcut: /* optimize for small input */
941 inp = memcpy(out, in_arg, chunk&~3);
946 iv = padlock_xcrypt_cfb(chunk/AES_BLOCK_SIZE, cdata, out, inp);
949 out_arg = (char *)memcpy(out_arg, out, chunk&~3) + chunk;
951 out = out_arg+=chunk;
953 } while (nbytes -= chunk);
954 memcpy(ctx->iv, iv, AES_BLOCK_SIZE);
957 case EVP_CIPH_OFB_MODE:
958 memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE);
961 inp = memcpy(out, in_arg, chunk&~3);
966 padlock_xcrypt_ofb(chunk/AES_BLOCK_SIZE, cdata, out, inp);
969 out_arg = (char *)memcpy(out_arg, out, chunk&~3) + chunk;
971 out = out_arg+=chunk;
974 chunk = PADLOCK_CHUNK;
976 memcpy(ctx->iv, cdata->iv, AES_BLOCK_SIZE);
983 /* Clean the realign buffer if it was used */
984 if (out_misaligned) {
985 volatile unsigned long *p=(void *)out;
986 size_t n = allocated/sizeof(*p);
990 memset(cdata->iv, 0, AES_BLOCK_SIZE);
995 /* ===== Random Number Generator ===== */
997 * This code is not engaged. The reason is that it does not comply
998 * with recommendations for VIA RNG usage for secure applications
999 * (posted at http://www.via.com.tw/en/viac3/c3.jsp) nor does it
1000 * provide meaningful error control...
1002 /* Wrapper that provides an interface between the API and
1003 the raw PadLock RNG */
1005 padlock_rand_bytes(unsigned char *output, int count)
1007 unsigned int eax, buf;
1009 while (count >= 8) {
1010 eax = padlock_xstore(output, 0);
1011 if (!(eax&(1<<6))) return 0; /* RNG disabled */
1012 /* this ---vv--- covers DC bias, Raw Bits and String Filter */
1013 if (eax&(0x1F<<10)) return 0;
1014 if ((eax&0x1F)==0) continue; /* no data, retry... */
1015 if ((eax&0x1F)!=8) return 0; /* fatal failure... */
1020 eax = padlock_xstore(&buf, 3);
1021 if (!(eax&(1<<6))) return 0; /* RNG disabled */
1022 /* this ---vv--- covers DC bias, Raw Bits and String Filter */
1023 if (eax&(0x1F<<10)) return 0;
1024 if ((eax&0x1F)==0) continue; /* no data, retry... */
1025 if ((eax&0x1F)!=1) return 0; /* fatal failure... */
1026 *output++ = (unsigned char)buf;
1029 *(volatile unsigned int *)&buf=0;
1034 /* Dummy but necessary function */
1036 padlock_rand_status(void)
1041 /* Prepare structure for registration */
1042 static RAND_METHOD padlock_rand = {
1044 padlock_rand_bytes, /* bytes */
1047 padlock_rand_bytes, /* pseudorand */
1048 padlock_rand_status, /* rand status */
1051 #endif /* COMPILE_HW_PADLOCK */
1053 #endif /* !OPENSSL_NO_HW_PADLOCK */
1054 #endif /* !OPENSSL_NO_HW */