1 /* SPDX-License-Identifier: GPL-2.0-only */
3 * aes-ce-core.S - AES in CBC/CTR/XTS mode using ARMv8 Crypto Extensions
5 * Copyright (C) 2015 Linaro Ltd <ard.biesheuvel@linaro.org>
8 #include <linux/linkage.h>
9 #include <asm/assembler.h>
12 .fpu crypto-neon-fp-armv8
15 .macro enc_round, state, key
17 aesmc.8 \state, \state
20 .macro dec_round, state, key
22 aesimc.8 \state, \state
25 .macro enc_dround, key1, key2
30 .macro dec_dround, key1, key2
35 .macro enc_fround, key1, key2, key3
41 .macro dec_fround, key1, key2, key3
47 .macro enc_dround_3x, key1, key2
56 .macro dec_dround_3x, key1, key2
65 .macro enc_fround_3x, key1, key2, key3
77 .macro dec_fround_3x, key1, key2, key3
89 .macro do_block, dround, fround
90 cmp r3, #12 @ which key size?
91 vld1.8 {q10-q11}, [ip]!
93 vld1.8 {q12-q13}, [ip]!
95 vld1.8 {q10-q11}, [ip]!
97 vld1.8 {q12-q13}, [ip]!
99 blo 0f @ AES-128: 10 rounds
100 vld1.8 {q10-q11}, [ip]!
102 beq 1f @ AES-192: 12 rounds
103 vld1.8 {q12-q13}, [ip]
105 0: \fround q12, q13, q14
108 1: \fround q10, q11, q14
113 * Internal, non-AAPCS compliant functions that implement the core AES
114 * transforms. These should preserve all registers except q0 - q2 and ip
116 * q0 : first in/output block
117 * q1 : second in/output block (_3x version only)
118 * q2 : third in/output block (_3x version only)
119 * q8 : first round key
120 * q9 : secound round key
121 * q14 : final round key
122 * r2 : address of round key array
123 * r3 : number of rounds
127 add ip, r2, #32 @ 3rd round key
129 do_block enc_dround, enc_fround
134 add ip, r2, #32 @ 3rd round key
135 do_block dec_dround, dec_fround
140 add ip, r2, #32 @ 3rd round key
141 do_block enc_dround_3x, enc_fround_3x
142 ENDPROC(aes_encrypt_3x)
146 add ip, r2, #32 @ 3rd round key
147 do_block dec_dround_3x, dec_fround_3x
148 ENDPROC(aes_decrypt_3x)
150 .macro prepare_key, rk, rounds
151 add ip, \rk, \rounds, lsl #4
152 vld1.8 {q8-q9}, [\rk] @ load first 2 round keys
153 vld1.8 {q14}, [ip] @ load last round key
157 * aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
159 * aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
162 ENTRY(ce_aes_ecb_encrypt)
169 vld1.8 {q0-q1}, [r1]!
172 vst1.8 {q0-q1}, [r0]!
186 ENDPROC(ce_aes_ecb_encrypt)
188 ENTRY(ce_aes_ecb_decrypt)
195 vld1.8 {q0-q1}, [r1]!
198 vst1.8 {q0-q1}, [r0]!
212 ENDPROC(ce_aes_ecb_decrypt)
215 * aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
216 * int blocks, u8 iv[])
217 * aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
218 * int blocks, u8 iv[])
220 ENTRY(ce_aes_cbc_encrypt)
222 ldrd r4, r5, [sp, #16]
226 vld1.8 {q1}, [r1]! @ get next pt block
227 veor q0, q0, q1 @ ..and xor with iv
234 ENDPROC(ce_aes_cbc_encrypt)
236 ENTRY(ce_aes_cbc_decrypt)
238 ldrd r4, r5, [sp, #16]
239 vld1.8 {q6}, [r5] @ keep iv in q6
244 vld1.8 {q0-q1}, [r1]!
254 vst1.8 {q0-q1}, [r0]!
260 vmov q15, q14 @ preserve last round key
262 vld1.8 {q0}, [r1]! @ get next ct block
263 veor q14, q15, q6 @ combine prev ct with last key
270 vst1.8 {q6}, [r5] @ keep iv in q6
272 ENDPROC(ce_aes_cbc_decrypt)
275 * aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
276 * int blocks, u8 ctr[])
278 ENTRY(ce_aes_ctr_encrypt)
280 ldrd r4, r5, [sp, #16]
281 vld1.8 {q6}, [r5] @ load ctr
283 vmov r6, s27 @ keep swabbed ctr in r6
285 cmn r6, r4 @ 32 bit overflow?
300 vld1.8 {q3-q4}, [r1]!
307 vst1.8 {q0-q1}, [r0]!
318 adds r6, r6, #1 @ increment BE ctr
325 bmi .Lctrtailblock @ blocks < 0 means tail block
332 vst1.8 {q6}, [r5] @ return next CTR value
336 vst1.8 {q0}, [r0, :64] @ return the key stream
340 .irp sreg, s26, s25, s24
341 vmov ip, \sreg @ load next word of ctr
342 rev ip, ip @ ... to handle the carry
349 ENDPROC(ce_aes_ctr_encrypt)
352 * aes_xts_encrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
353 * int blocks, u8 iv[], u8 const rk2[], int first)
354 * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
355 * int blocks, u8 iv[], u8 const rk2[], int first)
358 .macro next_tweak, out, in, const, tmp
359 vshr.s64 \tmp, \in, #63
360 vand \tmp, \tmp, \const
361 vadd.u64 \out, \in, \in
362 vext.8 \tmp, \tmp, \tmp, #8
363 veor \out, \out, \tmp
371 vldr d14, .Lxts_mul_x
372 vldr d15, .Lxts_mul_x + 8
374 ldrd r4, r5, [sp, #16] @ load args
376 vld1.8 {q0}, [r5] @ load iv
377 teq r6, #1 @ start of a block?
380 @ Encrypt the IV in q0 with the second AES key. This should only
381 @ be done at the start of a block.
382 ldr r6, [sp, #24] @ load AES key 2
384 add ip, r6, #32 @ 3rd round key of key 2
385 b .Laes_encrypt_tweak @ tail call
386 ENDPROC(ce_aes_xts_init)
388 ENTRY(ce_aes_xts_encrypt)
391 bl ce_aes_xts_init @ run shared prologue
395 teq r6, #0 @ start of a block?
399 next_tweak q3, q3, q7, q6
403 vld1.8 {q0-q1}, [r1]! @ get 3 pt blocks
405 next_tweak q4, q3, q7, q6
407 next_tweak q5, q4, q7, q6
414 vst1.8 {q0-q1}, [r0]! @ write 3 ct blocks
431 next_tweak q3, q3, q7, q6
436 ENDPROC(ce_aes_xts_encrypt)
439 ENTRY(ce_aes_xts_decrypt)
442 bl ce_aes_xts_init @ run shared prologue
446 teq r6, #0 @ start of a block?
450 next_tweak q3, q3, q7, q6
454 vld1.8 {q0-q1}, [r1]! @ get 3 ct blocks
456 next_tweak q4, q3, q7, q6
458 next_tweak q5, q4, q7, q6
465 vst1.8 {q0-q1}, [r0]! @ write 3 pt blocks
477 add ip, r2, #32 @ 3rd round key
483 next_tweak q3, q3, q7, q6
488 ENDPROC(ce_aes_xts_decrypt)
491 * u32 ce_aes_sub(u32 input) - use the aese instruction to perform the
492 * AES sbox substitution on each byte in
504 * void ce_aes_invert(u8 *dst, u8 *src) - perform the Inverse MixColumns
505 * operation on round key *src
512 ENDPROC(ce_aes_invert)