- struct padlock_cipher_data *cdata;
- const void *inp;
- unsigned char *out;
- void *iv;
- int inp_misaligned, out_misaligned, realign_in_loop;
- size_t chunk, allocated=0;
-
- /* ctx->num is maintained in byte-oriented modes,
- such as CFB and OFB... */
- if ((chunk = ctx->num)) { /* borrow chunk variable */
- unsigned char *ivp=ctx->iv;
-
- switch (EVP_CIPHER_CTX_mode(ctx)) {
- case EVP_CIPH_CFB_MODE:
- if (chunk >= AES_BLOCK_SIZE)
- return 0; /* bogus value */
-
- if (ctx->encrypt)
- while (chunk<AES_BLOCK_SIZE && nbytes!=0) {
- ivp[chunk] = *(out_arg++) = *(in_arg++) ^ ivp[chunk];
- chunk++, nbytes--;
- }
- else while (chunk<AES_BLOCK_SIZE && nbytes!=0) {
- unsigned char c = *(in_arg++);
- *(out_arg++) = c ^ ivp[chunk];
- ivp[chunk++] = c, nbytes--;
- }
-
- ctx->num = chunk%AES_BLOCK_SIZE;
- break;
- case EVP_CIPH_OFB_MODE:
- if (chunk >= AES_BLOCK_SIZE)
- return 0; /* bogus value */
-
- while (chunk<AES_BLOCK_SIZE && nbytes!=0) {
- *(out_arg++) = *(in_arg++) ^ ivp[chunk];
- chunk++, nbytes--;
- }
-
- ctx->num = chunk%AES_BLOCK_SIZE;
- break;
- }
- }
-
- if (nbytes == 0)
- return 1;
-#if 0
- if (nbytes % AES_BLOCK_SIZE)
- return 0; /* are we expected to do tail processing? */
-#else
- /* nbytes is always multiple of AES_BLOCK_SIZE in ECB and CBC
- modes and arbitrary value in byte-oriented modes, such as
- CFB and OFB... */
-#endif
-
- /* VIA promises CPUs that won't require alignment in the future.
- For now padlock_aes_align_required is initialized to 1 and
- the condition is never met... */
- /* C7 core is capable to manage unaligned input in non-ECB[!]
- mode, but performance penalties appear to be approximately
- same as for software alignment below or ~3x. They promise to
- improve it in the future, but for now we can just as well
- pretend that it can only handle aligned input... */
- if (!padlock_aes_align_required && (nbytes%AES_BLOCK_SIZE)==0)
- return padlock_aes_cipher_omnivorous(ctx, out_arg, in_arg, nbytes);
-
- inp_misaligned = (((size_t)in_arg) & 0x0F);
- out_misaligned = (((size_t)out_arg) & 0x0F);
-
- /* Note that even if output is aligned and input not,
- * I still prefer to loop instead of copy the whole
- * input and then encrypt in one stroke. This is done
- * in order to improve L1 cache utilization... */
- realign_in_loop = out_misaligned|inp_misaligned;
-
- if (!realign_in_loop && (nbytes%AES_BLOCK_SIZE)==0)
- return padlock_aes_cipher_omnivorous(ctx, out_arg, in_arg, nbytes);
-
- /* this takes one "if" out of the loops */
- chunk = nbytes;
- chunk %= PADLOCK_CHUNK;
- if (chunk==0) chunk = PADLOCK_CHUNK;
-
- if (out_misaligned) {
- /* optmize for small input */
- allocated = (chunk<nbytes?PADLOCK_CHUNK:nbytes);
- out = alloca(0x10 + allocated);
- out = NEAREST_ALIGNED(out);
- }
- else
- out = out_arg;
-
- cdata = ALIGNED_CIPHER_DATA(ctx);
- padlock_verify_context(cdata);
-
- switch (EVP_CIPHER_CTX_mode(ctx)) {
- case EVP_CIPH_ECB_MODE:
- do {
- if (inp_misaligned)
- inp = padlock_memcpy(out, in_arg, chunk);
- else
- inp = in_arg;
- in_arg += chunk;
-
- padlock_xcrypt_ecb(chunk/AES_BLOCK_SIZE, cdata, out, inp);
-
- if (out_misaligned)
- out_arg = padlock_memcpy(out_arg, out, chunk) + chunk;
- else
- out = out_arg+=chunk;
-
- nbytes -= chunk;
- chunk = PADLOCK_CHUNK;
- } while (nbytes);
- break;
-
- case EVP_CIPH_CBC_MODE:
- memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE);
- goto cbc_shortcut;
- do {
- if (iv != cdata->iv)
- memcpy(cdata->iv, iv, AES_BLOCK_SIZE);
- chunk = PADLOCK_CHUNK;
- cbc_shortcut: /* optimize for small input */
- if (inp_misaligned)
- inp = padlock_memcpy(out, in_arg, chunk);
- else
- inp = in_arg;
- in_arg += chunk;
-
- iv = padlock_xcrypt_cbc(chunk/AES_BLOCK_SIZE, cdata, out, inp);
-
- if (out_misaligned)
- out_arg = padlock_memcpy(out_arg, out, chunk) + chunk;
- else
- out = out_arg+=chunk;
-
- } while (nbytes -= chunk);
- memcpy(ctx->iv, iv, AES_BLOCK_SIZE);
- break;
-
- case EVP_CIPH_CFB_MODE:
- memcpy (iv = cdata->iv, ctx->iv, AES_BLOCK_SIZE);
- chunk &= ~(AES_BLOCK_SIZE-1);
- if (chunk) goto cfb_shortcut;
- else goto cfb_skiploop;
- do {
- if (iv != cdata->iv)
- memcpy(cdata->iv, iv, AES_BLOCK_SIZE);
- chunk = PADLOCK_CHUNK;
- cfb_shortcut: /* optimize for small input */
- if (inp_misaligned)
- inp = padlock_memcpy(out, in_arg, chunk);
- else
- inp = in_arg;
- in_arg += chunk;
-
- iv = padlock_xcrypt_cfb(chunk/AES_BLOCK_SIZE, cdata, out, inp);
-
- if (out_misaligned)
- out_arg = padlock_memcpy(out_arg, out, chunk) + chunk;
- else
- out = out_arg+=chunk;
-
- nbytes -= chunk;
- } while (nbytes >= AES_BLOCK_SIZE);
-
- cfb_skiploop:
- if (nbytes) {
- unsigned char *ivp = cdata->iv;
-
- if (iv != ivp) {
- memcpy(ivp, iv, AES_BLOCK_SIZE);
- iv = ivp;
- }
- ctx->num = nbytes;
- if (cdata->cword.b.encdec) {
- cdata->cword.b.encdec=0;
- padlock_reload_key();
- padlock_xcrypt_ecb(1,cdata,ivp,ivp);
- cdata->cword.b.encdec=1;
- padlock_reload_key();
- while(nbytes) {
- unsigned char c = *(in_arg++);
- *(out_arg++) = c ^ *ivp;
- *(ivp++) = c, nbytes--;
- }
- }
- else { padlock_reload_key();
- padlock_xcrypt_ecb(1,cdata,ivp,ivp);
- padlock_reload_key();
- while (nbytes) {
- *ivp = *(out_arg++) = *(in_arg++) ^ *ivp;
- ivp++, nbytes--;
- }
- }
- }
-
- memcpy(ctx->iv, iv, AES_BLOCK_SIZE);
- break;
-
- case EVP_CIPH_OFB_MODE:
- memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE);
- chunk &= ~(AES_BLOCK_SIZE-1);
- if (chunk) do {
- if (inp_misaligned)
- inp = padlock_memcpy(out, in_arg, chunk);
- else
- inp = in_arg;
- in_arg += chunk;
-
- padlock_xcrypt_ofb(chunk/AES_BLOCK_SIZE, cdata, out, inp);
-
- if (out_misaligned)
- out_arg = padlock_memcpy(out_arg, out, chunk) + chunk;
- else
- out = out_arg+=chunk;
-
- nbytes -= chunk;
- chunk = PADLOCK_CHUNK;
- } while (nbytes >= AES_BLOCK_SIZE);
-
- if (nbytes) {
- unsigned char *ivp = cdata->iv;
-
- ctx->num = nbytes;
- padlock_reload_key(); /* empirically found */
- padlock_xcrypt_ecb(1,cdata,ivp,ivp);
- padlock_reload_key(); /* empirically found */
- while (nbytes) {
- *(out_arg++) = *(in_arg++) ^ *ivp;
- ivp++, nbytes--;
- }
- }
-
- memcpy(ctx->iv, cdata->iv, AES_BLOCK_SIZE);
- break;
-
- default:
- return 0;
- }
-
- /* Clean the realign buffer if it was used */
- if (out_misaligned) {
- volatile unsigned long *p=(void *)out;
- size_t n = allocated/sizeof(*p);
- while (n--) *p++=0;
- }
-
- memset(cdata->iv, 0, AES_BLOCK_SIZE);
-
- return 1;
+ struct padlock_cipher_data *cdata;
+ int key_len = EVP_CIPHER_CTX_key_length(ctx) * 8;
+ unsigned long mode = EVP_CIPHER_CTX_mode(ctx);
+
+ if (key == NULL)
+ return 0; /* ERROR */
+
+ cdata = ALIGNED_CIPHER_DATA(ctx);
+ memset(cdata, 0, sizeof(*cdata));
+
+ /* Prepare Control word. */
+ if (mode == EVP_CIPH_OFB_MODE || mode == EVP_CIPH_CTR_MODE)
+ cdata->cword.b.encdec = 0;
+ else
+ cdata->cword.b.encdec = (EVP_CIPHER_CTX_encrypting(ctx) == 0);
+ cdata->cword.b.rounds = 10 + (key_len - 128) / 32;
+ cdata->cword.b.ksize = (key_len - 128) / 64;
+
+ switch (key_len) {
+ case 128:
+ /*
+ * PadLock can generate an extended key for AES128 in hardware
+ */
+ memcpy(cdata->ks.rd_key, key, AES_KEY_SIZE_128);
+ cdata->cword.b.keygen = 0;
+ break;
+
+ case 192:
+ case 256:
+ /*
+ * Generate an extended AES key in software. Needed for AES192/AES256
+ */
+ /*
+ * Well, the above applies to Stepping 8 CPUs and is listed as
+ * hardware errata. They most likely will fix it at some point and
+ * then a check for stepping would be due here.
+ */
+ if ((mode == EVP_CIPH_ECB_MODE || mode == EVP_CIPH_CBC_MODE)
+ && !enc)
+ AES_set_decrypt_key(key, key_len, &cdata->ks);
+ else
+ AES_set_encrypt_key(key, key_len, &cdata->ks);
+# ifndef AES_ASM
+ /*
+ * OpenSSL C functions use byte-swapped extended key.
+ */
+ padlock_key_bswap(&cdata->ks);
+# endif
+ cdata->cword.b.keygen = 1;
+ break;
+
+ default:
+ /* ERROR */
+ return 0;
+ }
+
+ /*
+ * This is done to cover for cases when user reuses the
+ * context for new key. The catch is that if we don't do
+ * this, padlock_eas_cipher might proceed with old key...
+ */
+ padlock_reload_key();
+
+ return 1;