2 * Support for VIA PadLock Advanced Cryptography Engine (ACE)
3 * Written by Michal Ludvig <michal@logix.cz>
4 * http://www.logix.cz/michal
6 * Big thanks to Andy Polyakov for a help with optimization,
7 * assembler fixes, port to MS Windows and a lot of other
8 * valuable work on this engine!
11 /* ====================================================================
12 * Copyright (c) 1999-2001 The OpenSSL Project. All rights reserved.
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions
18 * 1. Redistributions of source code must retain the above copyright
19 * notice, this list of conditions and the following disclaimer.
21 * 2. Redistributions in binary form must reproduce the above copyright
22 * notice, this list of conditions and the following disclaimer in
23 * the documentation and/or other materials provided with the
26 * 3. All advertising materials mentioning features or use of this
27 * software must display the following acknowledgment:
28 * "This product includes software developed by the OpenSSL Project
29 * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
31 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
32 * endorse or promote products derived from this software without
33 * prior written permission. For written permission, please contact
34 * licensing@OpenSSL.org.
36 * 5. Products derived from this software may not be called "OpenSSL"
37 * nor may "OpenSSL" appear in their names without prior written
38 * permission of the OpenSSL Project.
40 * 6. Redistributions of any form whatsoever must retain the following
42 * "This product includes software developed by the OpenSSL Project
43 * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
45 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
46 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
47 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
48 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
49 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
50 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
51 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
52 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
53 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
54 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
55 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
56 * OF THE POSSIBILITY OF SUCH DAMAGE.
57 * ====================================================================
59 * This product includes cryptographic software written by Eric Young
60 * (eay@cryptsoft.com). This product includes software written by Tim
61 * Hudson (tjh@cryptsoft.com).
69 #include <openssl/crypto.h>
70 #include <openssl/dso.h>
71 #include <openssl/engine.h>
72 #include <openssl/evp.h>
73 #include <openssl/aes.h>
76 #ifndef OPENSSL_NO_HW_PADLOCK
78 /* Attempt to have a single source for both 0.9.7 and 0.9.8 :-) */
79 #if (OPENSSL_VERSION_NUMBER >= 0x00908000L)
80 # ifndef OPENSSL_NO_DYNAMIC_ENGINE
81 # define DYNAMIC_ENGINE
83 #elif (OPENSSL_VERSION_NUMBER >= 0x00907000L)
84 # ifdef ENGINE_DYNAMIC_SUPPORT
85 # define DYNAMIC_ENGINE
88 # error "Only OpenSSL >= 0.9.7 is supported"
91 /* VIA PadLock AES is available *ONLY* on some x86 CPUs.
92 Not only that it doesn't exist elsewhere, but it
93 even can't be compiled on other platforms!
95 In addition, because of the heavy use of inline assembler,
96 compiler choice is limited to GCC and Microsoft C. */
97 #undef COMPILE_HW_PADLOCK
98 #if !defined(I386_ONLY) && !defined(OPENSSL_NO_INLINE_ASM)
99 # if defined(__i386__) || defined(__i386) || defined(_M_IX86)
100 # define COMPILE_HW_PADLOCK
104 static ENGINE *ENGINE_padlock (void);
106 void ENGINE_load_padlock (void)
108 /* On non-x86 CPUs it just returns. */
109 #ifdef COMPILE_HW_PADLOCK
110 ENGINE *toadd = ENGINE_padlock ();
118 #ifdef COMPILE_HW_PADLOCK
119 /* Function for ENGINE detection and control */
120 static int padlock_available(void);
121 static int padlock_init(ENGINE *e);
124 static RAND_METHOD padlock_rand;
127 static int padlock_ciphers(ENGINE *e, const EVP_CIPHER **cipher, const int **nids, int nid);
130 static const char *padlock_id = "padlock";
131 static char padlock_name[100];
133 /* Available features */
134 static int padlock_use_ace = 0; /* Advanced Cryptography Engine */
135 static int padlock_use_rng = 0; /* Random Number Generator */
136 static int padlock_aes_align_required = 1;
138 /* ===== Engine "management" functions ===== */
140 /* Prepare the ENGINE structure for registration */
142 padlock_bind_helper(ENGINE *e)
144 /* Check available features */
147 #if 1 /* disable RNG for now, see commentary in vicinity of RNG code */
151 /* Generate a nice engine name with available features */
152 snprintf(padlock_name, sizeof(padlock_name), "VIA PadLock (%s, %s)",
153 padlock_use_rng ? "RNG" : "no-RNG",
154 padlock_use_ace ? "ACE" : "no-ACE");
156 /* Register everything or return with an error */
157 if (!ENGINE_set_id(e, padlock_id) ||
158 !ENGINE_set_name(e, padlock_name) ||
160 !ENGINE_set_init_function(e, padlock_init) ||
162 (padlock_use_ace && !ENGINE_set_ciphers (e, padlock_ciphers)) ||
163 (padlock_use_rng && !ENGINE_set_RAND (e, &padlock_rand))) {
167 /* Everything looks good */
175 ENGINE *eng = ENGINE_new();
181 if (!padlock_bind_helper(eng)) {
189 /* Check availability of the engine */
191 padlock_init(ENGINE *e)
193 return (padlock_use_rng || padlock_use_ace);
196 /* This stuff is needed if this ENGINE is being compiled into a self-contained
199 #ifdef DYNAMIC_ENGINE
201 padlock_bind_fn(ENGINE *e, const char *id)
203 if (id && (strcmp(id, padlock_id) != 0)) {
207 if (!padlock_bind_helper(e)) {
214 IMPLEMENT_DYNAMIC_CHECK_FN ();
215 IMPLEMENT_DYNAMIC_BIND_FN (padlock_bind_fn);
216 #endif /* DYNAMIC_ENGINE */
218 /* ===== Here comes the "real" engine ===== */
220 /* Some AES-related constants */
221 #define AES_BLOCK_SIZE 16
222 #define AES_KEY_SIZE_128 16
223 #define AES_KEY_SIZE_192 24
224 #define AES_KEY_SIZE_256 32
226 /* Here we store the status information relevant to the
229 * Inline assembler in PADLOCK_XCRYPT_ASM()
230 * depends on the order of items in this structure.
231 * Don't blindly modify, reorder, etc!
233 struct padlock_cipher_data
235 unsigned char iv[AES_BLOCK_SIZE]; /* Initialization vector */
236 union { unsigned int pad[4];
245 } cword; /* Control word */
246 AES_KEY ks; /* Encryption key */
250 * =======================================================
251 * Inline assembler section(s).
252 * =======================================================
253 * Order of arguments is chosen to facilitate Windows port
254 * using __fastcall calling convention. If you wish to add
255 * more routines, keep in mind that in __fastcall first
256 * argument is passed in %ecx and second - in %edx.
257 * =======================================================
259 #if defined(__GNUC__) && __GNUC__>=2
261 * As for excessive "push %ebx"/"pop %ebx" found all over.
262 * When generating position-independent code GCC won't let
263 * us use "b" in assembler templates nor even respect "ebx"
264 * in "clobber description." Therefore the trouble...
267 /* Helper function - check if a CPUID instruction
268 is available on this CPU */
270 padlock_insn_cpuid_available(void)
274 /* We're checking if the bit #21 of EFLAGS
275 can be toggled. If yes = CPUID is available. */
279 "xorl $0x200000, %%eax\n"
280 "movl %%eax, %%ecx\n"
281 "andl $0x200000, %%ecx\n"
286 "andl $0x200000, %%eax\n"
287 "xorl %%eax, %%ecx\n"
289 : "=r" (result) : : "eax", "ecx");
291 return (result == 0);
294 /* Load supported features of the CPU to see if
295 the PadLock is available. */
297 padlock_available(void)
299 char vendor_string[16];
300 unsigned int eax, edx;
302 /* First check if the CPUID instruction is available at all... */
303 if (! padlock_insn_cpuid_available())
306 /* Are we running on the Centaur (VIA) CPU? */
308 vendor_string[12] = 0;
312 "movl %%ebx,(%%edi)\n"
313 "movl %%edx,4(%%edi)\n"
314 "movl %%ecx,8(%%edi)\n"
316 : "+a"(eax) : "D"(vendor_string) : "ecx", "edx");
317 if (strcmp(vendor_string, "CentaurHauls") != 0)
320 /* Check for Centaur Extended Feature Flags presence */
322 asm volatile ("pushl %%ebx; cpuid; popl %%ebx"
323 : "+a"(eax) : : "ecx", "edx");
324 if (eax < 0xC0000001)
327 /* Read the Centaur Extended Feature Flags */
329 asm volatile ("pushl %%ebx; cpuid; popl %%ebx"
330 : "+a"(eax), "=d"(edx) : : "ecx");
332 /* Fill up some flags */
333 padlock_use_ace = ((edx & (0x3<<6)) == (0x3<<6));
334 padlock_use_rng = ((edx & (0x3<<2)) == (0x3<<2));
336 return padlock_use_ace + padlock_use_rng;
339 /* Our own htonl()/ntohl() */
341 padlock_bswapl(AES_KEY *ks)
343 size_t i = sizeof(ks->rd_key)/sizeof(ks->rd_key[0]);
344 unsigned long *key = ks->rd_key;
347 asm volatile ("bswapl %0" : "+r"(*key));
352 /* Force key reload from memory to the CPU microcode.
353 Loading EFLAGS from the stack clears EFLAGS[30]
354 which does the trick. */
356 padlock_reload_key(void)
358 asm volatile ("pushfl; popfl");
362 * This is heuristic key context tracing. At first one
363 * believes that one should use atomic swap instructions,
364 * but it's not actually necessary. Point is that if
365 * saved_cdata was changed by another thread after we've
366 * read it and before we compare it with cdata, our key
367 * *shall* be reloaded upon thread context switch and
368 * we are therefore set in either case...
371 padlock_verify_context(struct padlock_cipher_data *cdata)
373 static struct padlock_cipher_data *saved_cdata;
385 :"+m"(saved_cdata) : "r"(saved_cdata), "r"(cdata) : "cc");
388 /* Template for padlock_xcrypt_* modes */
390 * The offsets used with 'leal' instructions
391 * describe items of the 'padlock_cipher_data'
394 #define PADLOCK_XCRYPT_ASM(name,rep_xcrypt) \
395 static inline void *name(size_t cnt, \
396 struct padlock_cipher_data *cdata, \
397 void *out, const void *inp) \
399 asm volatile ( "pushl %%ebx\n" \
400 " leal 16(%0),%%edx\n" \
401 " leal 32(%0),%%ebx\n" \
404 : "=a"(iv), "=c"(cnt), "=D"(out), "=S"(inp) \
405 : "0"(cdata), "1"(cnt), "2"(out), "3"(inp) \
410 /* Generate all functions with appropriate opcodes */
411 PADLOCK_XCRYPT_ASM(padlock_xcrypt_ecb, ".byte 0xf3,0x0f,0xa7,0xc8"); /* rep xcryptecb */
412 PADLOCK_XCRYPT_ASM(padlock_xcrypt_cbc, ".byte 0xf3,0x0f,0xa7,0xd0"); /* rep xcryptcbc */
413 PADLOCK_XCRYPT_ASM(padlock_xcrypt_cfb, ".byte 0xf3,0x0f,0xa7,0xe0"); /* rep xcryptcfb */
414 PADLOCK_XCRYPT_ASM(padlock_xcrypt_ofb, ".byte 0xf3,0x0f,0xa7,0xe8"); /* rep xcryptofb */
416 /* The RNG call itself */
417 static inline unsigned int
418 padlock_xstore(void *addr, unsigned int edx_in)
420 unsigned int eax_out;
422 asm volatile (".byte 0x0f,0xa7,0xc0" /* xstore */
423 : "=a"(eax_out),"=m"(*(unsigned *)addr)
424 : "D"(addr), "d" (edx_in)
430 #elif defined(_MSC_VER)
432 * Unlike GCC these are real functions. In order to minimize impact
433 * on performance we adhere to __fastcall calling convention in
434 * order to get two first arguments passed through %ecx and %edx.
435 * Which kind of suits very well, as instructions in question use
436 * both %ecx and %edx as input:-)
438 #define REP_XCRYPT(code) \
440 _asm _emit 0x0f _asm _emit 0xa7 \
444 * The offsets used with 'lea' instructions
445 * describe items of the 'padlock_cipher_data'
448 #define PADLOCK_XCRYPT_ASM(name,code) \
449 static void * __fastcall \
450 name (size_t cnt, void *cdata, \
451 void *outp, const void *inp) \
453 _asm lea ebx,[eax+16] \
454 _asm lea edx,[eax+32] \
460 PADLOCK_XCRYPT_ASM(padlock_xcrypt_ecb,0xc8)
461 PADLOCK_XCRYPT_ASM(padlock_xcrypt_cbc,0xd0)
462 PADLOCK_XCRYPT_ASM(padlock_xcrypt_cfb,0xe0)
463 PADLOCK_XCRYPT_ASM(padlock_xcrypt_ofb,0xe8)
465 static int __fastcall
466 padlock_xstore(void *outp,unsigned int code)
468 _asm _emit 0x0f _asm _emit 0xa7 _asm _emit 0xc0
471 static void __fastcall
472 padlock_reload_key(void)
473 { _asm pushfd _asm popfd }
475 static void __fastcall
476 padlock_verify_context(void *cdata)
477 { static void *saved_cdata;
493 padlock_available(void)
528 mov padlock_use_ace,1
534 mov padlock_use_rng,1
541 static void __fastcall
542 padlock_bswapl(void *key)
559 /* ===== AES encryption/decryption ===== */
561 #if defined(NID_aes_128_cfb128) && ! defined (NID_aes_128_cfb)
562 #define NID_aes_128_cfb NID_aes_128_cfb128
565 #if defined(NID_aes_128_ofb128) && ! defined (NID_aes_128_ofb)
566 #define NID_aes_128_ofb NID_aes_128_ofb128
569 #if defined(NID_aes_192_cfb128) && ! defined (NID_aes_192_cfb)
570 #define NID_aes_192_cfb NID_aes_192_cfb128
573 #if defined(NID_aes_192_ofb128) && ! defined (NID_aes_192_ofb)
574 #define NID_aes_192_ofb NID_aes_192_ofb128
577 #if defined(NID_aes_256_cfb128) && ! defined (NID_aes_256_cfb)
578 #define NID_aes_256_cfb NID_aes_256_cfb128
581 #if defined(NID_aes_256_ofb128) && ! defined (NID_aes_256_ofb)
582 #define NID_aes_256_ofb NID_aes_256_ofb128
585 /* List of supported ciphers. */
586 static int padlock_cipher_nids[] = {
594 // NID_aes_192_cfb, /* FIXME: AES192/256 CFB/OFB don't work. */
602 static int padlock_cipher_nids_num = (sizeof(padlock_cipher_nids)/
603 sizeof(padlock_cipher_nids[0]));
605 /* Function prototypes ... */
606 static int padlock_aes_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
607 const unsigned char *iv, int enc);
608 static int padlock_aes_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
609 const unsigned char *in, unsigned int nbytes);
611 #define ALIGNED_CIPHER_DATA(ctx) ((struct padlock_cipher_data *)(ctx->cipher_data + ((0x10 - ((size_t)(ctx->cipher_data) & 0x0F)) & 0x0F)))
613 /* Declaring so many ciphers by hand would be a pain.
614 Instead introduce a bit of preprocessor magic :-) */
615 #define DECLARE_AES_EVP(ksize,lmode,umode) \
616 static const EVP_CIPHER padlock_aes_##ksize##_##lmode = { \
617 NID_aes_##ksize##_##lmode, \
619 AES_KEY_SIZE_##ksize, \
621 0 | EVP_CIPH_##umode##_MODE, \
622 padlock_aes_init_key, \
623 padlock_aes_cipher, \
625 sizeof(struct padlock_cipher_data) + 16, \
626 EVP_CIPHER_set_asn1_iv, \
627 EVP_CIPHER_get_asn1_iv, \
632 DECLARE_AES_EVP(128,ecb,ECB);
633 DECLARE_AES_EVP(128,cbc,CBC);
634 DECLARE_AES_EVP(128,cfb,CFB);
635 DECLARE_AES_EVP(128,ofb,OFB);
637 DECLARE_AES_EVP(192,ecb,ECB);
638 DECLARE_AES_EVP(192,cbc,CBC);
639 DECLARE_AES_EVP(192,cfb,CFB);
640 DECLARE_AES_EVP(192,ofb,OFB);
642 DECLARE_AES_EVP(256,ecb,ECB);
643 DECLARE_AES_EVP(256,cbc,CBC);
644 DECLARE_AES_EVP(256,cfb,CFB);
645 DECLARE_AES_EVP(256,ofb,OFB);
648 padlock_ciphers (ENGINE *e, const EVP_CIPHER **cipher, const int **nids, int nid)
650 /* No specific cipher => return a list of supported nids ... */
652 *nids = padlock_cipher_nids;
653 return padlock_cipher_nids_num;
656 /* ... or the requested "cipher" otherwise */
658 case NID_aes_128_ecb:
659 *cipher = &padlock_aes_128_ecb;
661 case NID_aes_128_cbc:
662 *cipher = &padlock_aes_128_cbc;
664 case NID_aes_128_cfb:
665 *cipher = &padlock_aes_128_cfb;
667 case NID_aes_128_ofb:
668 *cipher = &padlock_aes_128_ofb;
671 case NID_aes_192_ecb:
672 *cipher = &padlock_aes_192_ecb;
674 case NID_aes_192_cbc:
675 *cipher = &padlock_aes_192_cbc;
677 case NID_aes_192_cfb:
678 *cipher = &padlock_aes_192_cfb;
680 case NID_aes_192_ofb:
681 *cipher = &padlock_aes_192_ofb;
684 case NID_aes_256_ecb:
685 *cipher = &padlock_aes_256_ecb;
687 case NID_aes_256_cbc:
688 *cipher = &padlock_aes_256_cbc;
690 case NID_aes_256_cfb:
691 *cipher = &padlock_aes_256_cfb;
693 case NID_aes_256_ofb:
694 *cipher = &padlock_aes_256_ofb;
698 /* Sorry, we don't support this NID */
706 /* Prepare the encryption key for PadLock usage */
708 padlock_aes_init_key (EVP_CIPHER_CTX *ctx, const unsigned char *key,
709 const unsigned char *iv, int enc)
711 struct padlock_cipher_data *cdata;
712 int key_len = EVP_CIPHER_CTX_key_length(ctx) * 8;
714 if (key==NULL) return 0; /* ERROR */
716 cdata = ALIGNED_CIPHER_DATA(ctx);
717 memset(cdata, 0, sizeof(struct padlock_cipher_data));
719 /* Prepare Control word. */
720 cdata->cword.b.encdec = (ctx->encrypt == 0);
721 cdata->cword.b.rounds = 10 + (key_len - 128) / 32;
722 cdata->cword.b.ksize = (key_len - 128) / 64;
726 /* PadLock can generate an extended key for
727 AES128 in hardware */
728 memcpy(cdata->ks.rd_key, key, AES_KEY_SIZE_128);
729 cdata->cword.b.keygen = 0;
734 /* Generate an extended AES key in software.
735 Needed for AES192/AES256 */
737 AES_set_encrypt_key(key, key_len, &cdata->ks);
739 AES_set_decrypt_key(key, key_len, &cdata->ks);
741 /* OpenSSL internal functions use byte-swapped extended key. */
742 padlock_bswapl(&cdata->ks);
744 cdata->cword.b.keygen = 1;
753 * This is done to cover for cases when user reuses the
754 * context for new key. The catch is that if we don't do
755 * this, padlock_eas_cipher might proceed with old key...
757 padlock_reload_key ();
763 * Simplified version of padlock_aes_cipher() used when
764 * 1) both input and output buffers are at aligned addresses.
766 * 2) running on a newer CPU that doesn't require aligned buffers.
769 padlock_aes_cipher_omnivorous(EVP_CIPHER_CTX *ctx, unsigned char *out_arg,
770 const unsigned char *in_arg, size_t nbytes)
772 struct padlock_cipher_data *cdata;
775 cdata = ALIGNED_CIPHER_DATA(ctx);
776 padlock_verify_context(cdata);
778 switch (EVP_CIPHER_CTX_mode(ctx)) {
779 case EVP_CIPH_ECB_MODE:
780 padlock_xcrypt_ecb(nbytes/AES_BLOCK_SIZE, cdata, out_arg, in_arg);
783 case EVP_CIPH_CBC_MODE:
784 memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE);
785 iv = padlock_xcrypt_cbc(nbytes/AES_BLOCK_SIZE, cdata, out_arg, in_arg);
786 memcpy(ctx->iv, iv, AES_BLOCK_SIZE);
789 case EVP_CIPH_CFB_MODE:
790 memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE);
791 iv = padlock_xcrypt_cfb(nbytes/AES_BLOCK_SIZE, cdata, out_arg, in_arg);
792 memcpy(ctx->iv, iv, AES_BLOCK_SIZE);
795 case EVP_CIPH_OFB_MODE:
796 memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE);
797 padlock_xcrypt_ofb(nbytes/AES_BLOCK_SIZE, cdata, out_arg, in_arg);
798 memcpy(ctx->iv, cdata->iv, AES_BLOCK_SIZE);
805 memset(cdata->iv, 0, AES_BLOCK_SIZE);
810 #ifndef PADLOCK_CHUNK
811 # define PADLOCK_CHUNK 4096 /* Must be a power of 2 larger than 16 */
813 #if PADLOCK_CHUNK<16 || PADLOCK_CHUNK&(PADLOCK_CHUNK-1)
814 # error "insane PADLOCK_CHUNK..."
817 /* Re-align the arguments to 16-Bytes boundaries and run the
818 encryption function itself. This function is not AES-specific. */
820 padlock_aes_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out_arg,
821 const unsigned char *in_arg, size_t nbytes)
823 struct padlock_cipher_data *cdata;
826 int inp_misaligned, out_misaligned, realign_in_loop;
827 size_t chunk, allocated;
831 if (nbytes % AES_BLOCK_SIZE)
832 return 0; /* are we expected to do tail processing? */
835 /* There is more work to support CPUs that don't require alignment.
836 Therefore disabled completely for now... */
837 if (!padlock_aes_align_required)
838 return padlock_aes_cipher_omnivorous(ctx, out_arg, in_arg, nbytes);
841 inp_misaligned = (((size_t)in_arg) & 0x0F);
842 out_misaligned = (((size_t)out_arg) & 0x0F);
844 /* Note that even if output is aligned and input not,
845 * I still prefer to loop instead of copy the whole
846 * input and then encrypt in one stroke. This is done
847 * in order to improve L1 cache utilization... */
848 realign_in_loop = out_misaligned|inp_misaligned;
850 if (!realign_in_loop)
851 return padlock_aes_cipher_omnivorous(ctx, out_arg, in_arg, nbytes);
853 /* this takes one "if" out of the loops */
855 chunk %= PADLOCK_CHUNK;
856 if (chunk==0) chunk = PADLOCK_CHUNK;
858 if (out_misaligned) {
859 /* optmize for small input */
860 allocated = (chunk<nbytes?PADLOCK_CHUNK:nbytes);
862 out = _alloca(0x10 + allocated);
864 out = alloca(0x10 + allocated);
866 out += (0x10 - ((size_t)out & 0x0F)) & 0x0F;
871 cdata = ALIGNED_CIPHER_DATA(ctx);
872 padlock_verify_context(cdata);
874 switch (EVP_CIPHER_CTX_mode(ctx)) {
875 case EVP_CIPH_ECB_MODE:
878 inp = memcpy(out, in_arg, chunk);
883 padlock_xcrypt_ecb(chunk/AES_BLOCK_SIZE, cdata, out, inp);
886 out_arg = memcpy(out_arg, out, chunk) + chunk;
888 out = out_arg+=chunk;
891 chunk = PADLOCK_CHUNK;
895 case EVP_CIPH_CBC_MODE:
896 memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE);
900 memcpy(cdata->iv, iv, AES_BLOCK_SIZE);
901 chunk = PADLOCK_CHUNK;
902 cbc_shortcut: /* optimize for small input */
904 inp = memcpy(out, in_arg, chunk);
909 iv = padlock_xcrypt_cbc(chunk/AES_BLOCK_SIZE, cdata, out, inp);
912 out_arg = memcpy(out_arg, out, chunk) + chunk;
914 out = out_arg+=chunk;
916 } while (nbytes -= chunk);
917 memcpy(ctx->iv, iv, AES_BLOCK_SIZE);
920 case EVP_CIPH_CFB_MODE:
921 memcpy (cdata->iv, ctx->iv, AES_BLOCK_SIZE);
925 memcpy(cdata->iv, iv, AES_BLOCK_SIZE);
926 chunk = PADLOCK_CHUNK;
927 cfb_shortcut: /* optimize for small input */
929 inp = memcpy(out, in_arg, chunk);
934 iv = padlock_xcrypt_cfb(chunk/AES_BLOCK_SIZE, cdata, out, inp);
937 out_arg = memcpy(out_arg, out, chunk) + chunk;
939 out = out_arg+=chunk;
941 } while (nbytes -= chunk);
942 memcpy(ctx->iv, iv, AES_BLOCK_SIZE);
945 case EVP_CIPH_OFB_MODE:
946 memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE);
949 inp = memcpy(out, in_arg, chunk);
954 padlock_xcrypt_ofb(chunk/AES_BLOCK_SIZE, cdata, out, inp);
957 out_arg = memcpy(out_arg, out, chunk) + chunk;
959 out = out_arg+=chunk;
962 chunk = PADLOCK_CHUNK;
964 memcpy(ctx->iv, cdata->iv, AES_BLOCK_SIZE);
971 /* Clean the realign buffer if it was used */
972 if (out_misaligned) {
973 volatile unsigned long *p=out;
974 size_t n = allocated/sizeof(*p);
978 memset(cdata->iv, 0, AES_BLOCK_SIZE);
983 /* ===== Random Number Generator ===== */
985 * This code is not engaged. The reason is that it does not comply
986 * with recommendations for VIA RNG usage for secure applications
987 * (posted at http://www.via.com.tw/en/viac3/c3.jsp) nor does it
988 * provide meaningful error control...
990 /* Wrapper that provides an interface between the API and
991 the raw PadLock RNG */
993 padlock_rand_bytes(unsigned char *output, int count)
995 unsigned int eax, buf;
998 eax = padlock_xstore(output, 0);
999 if (!(eax&(1<<6))) return 0; /* RNG disabled */
1000 /* this ---vv--- covers DC bias, Raw Bits and String Filter */
1001 if (eax&(0x1F<<10)) return 0;
1002 if ((eax&0x1F)==0) continue; /* no data, retry... */
1003 if ((eax&0x1F)!=8) return 0; /* fatal failure... */
1008 eax = padlock_xstore(&buf, 3);
1009 if (!(eax&(1<<6))) return 0; /* RNG disabled */
1010 /* this ---vv--- covers DC bias, Raw Bits and String Filter */
1011 if (eax&(0x1F<<10)) return 0;
1012 if ((eax&0x1F)==0) continue; /* no data, retry... */
1013 if ((eax&0x1F)!=1) return 0; /* fatal failure... */
1014 *output++ = (unsigned char)buf;
1017 *(volatile unsigned int *)&buf=0;
1022 /* Dummy but necessary function */
1024 padlock_rand_status(void)
1029 /* Prepare structure for registration */
1030 static RAND_METHOD padlock_rand = {
1032 padlock_rand_bytes, /* bytes */
1035 padlock_rand_bytes, /* pseudorand */
1036 padlock_rand_status, /* rand status */
1039 #endif /* COMPILE_HW_PADLOCK */
1041 #endif /* !OPENSSL_NO_HW_PADLOCK */
1042 #endif /* !OPENSSL_NO_HW */