1 /* SPDX-License-Identifier: GPL-2.0-only */
3 * aes-ce-ccm-core.S - AES-CCM transform for ARMv8 with Crypto Extensions
5 * Copyright (C) 2013 - 2017 Linaro Ltd.
6 * Copyright (C) 2024 Google LLC
8 * Author: Ard Biesheuvel <ardb@kernel.org>
11 #include <linux/linkage.h>
12 #include <asm/assembler.h>
17 .macro load_round_keys, rk, nr, tmp
19 add \tmp, \rk, w\tmp, sxtw #4
20 ld1 {v10.4s-v13.4s}, [\rk]
21 ld1 {v14.4s-v17.4s}, [\tmp], #64
22 ld1 {v18.4s-v21.4s}, [\tmp], #64
23 ld1 {v3.4s-v5.4s}, [\tmp]
26 .macro dround, va, vb, vk
27 aese \va\().16b, \vk\().16b
28 aesmc \va\().16b, \va\().16b
29 aese \vb\().16b, \vk\().16b
30 aesmc \vb\().16b, \vb\().16b
33 .macro aes_encrypt, va, vb, nr
40 .L\@: .irp v, v14, v15, v16, v17, v18, v19, v20, v21, v3
43 aese \va\().16b, v4.16b
44 aese \vb\().16b, v4.16b
48 * void ce_aes_ccm_final(u8 mac[], u8 const ctr[], u8 const rk[],
51 SYM_FUNC_START(ce_aes_ccm_final)
52 ld1 {v0.16b}, [x0] /* load mac */
53 ld1 {v1.16b}, [x1] /* load 1st ctriv */
55 aes_encrypt v0, v1, w3
57 /* final round key cancels out */
58 eor v0.16b, v0.16b, v1.16b /* en-/decrypt the mac */
59 st1 {v0.16b}, [x0] /* store result */
61 SYM_FUNC_END(ce_aes_ccm_final)
63 .macro aes_ccm_do_crypt,enc
64 load_round_keys x3, w4, x10
67 ldr x8, [x6, #8] /* load lower ctr */
68 ld1 {v0.16b}, [x5] /* load mac */
69 CPU_LE( rev x8, x8 ) /* keep swabbed ctr in reg */
71 ld1 {v1.8b}, [x6] /* load upper ctr */
75 ins v1.d[1], x9 /* no carry in lower ctr */
77 aes_encrypt v0, v1, w4
80 bmi ce_aes_ccm_crypt_tail
81 ld1 {v2.16b}, [x1], #16 /* load next input block */
83 eor v2.16b, v2.16b, v5.16b /* final round enc+mac */
84 eor v6.16b, v1.16b, v2.16b /* xor with crypted ctr */
86 eor v2.16b, v2.16b, v1.16b /* xor with crypted ctr */
87 eor v6.16b, v2.16b, v5.16b /* final round enc */
89 eor v0.16b, v0.16b, v2.16b /* xor mac with pt ^ rk[last] */
90 st1 {v6.16b}, [x0], #16 /* write output block */
93 st1 {v0.16b}, [x5] /* store mac */
94 str x8, [x6, #8] /* store lsb end of ctr (BE) */
98 SYM_FUNC_START_LOCAL(ce_aes_ccm_crypt_tail)
99 eor v0.16b, v0.16b, v5.16b /* final round mac */
100 eor v1.16b, v1.16b, v5.16b /* final round enc */
102 add x1, x1, w2, sxtw /* rewind the input pointer (w2 < 0) */
103 add x0, x0, w2, sxtw /* rewind the output pointer */
105 adr_l x8, .Lpermute /* load permute vectors */
108 ld1 {v7.16b-v8.16b}, [x9]
111 ld1 {v2.16b}, [x1] /* load a full block of input */
112 tbl v1.16b, {v1.16b}, v7.16b /* move keystream to end of register */
113 eor v7.16b, v2.16b, v1.16b /* encrypt partial input block */
114 bif v2.16b, v7.16b, v22.16b /* select plaintext */
115 tbx v7.16b, {v6.16b}, v8.16b /* insert output from previous iteration */
116 tbl v2.16b, {v2.16b}, v9.16b /* copy plaintext to start of v2 */
117 eor v0.16b, v0.16b, v2.16b /* fold plaintext into mac */
119 st1 {v0.16b}, [x5] /* store mac */
120 st1 {v7.16b}, [x0] /* store output block */
122 SYM_FUNC_END(ce_aes_ccm_crypt_tail)
125 * void ce_aes_ccm_encrypt(u8 out[], u8 const in[], u32 cbytes,
126 * u8 const rk[], u32 rounds, u8 mac[],
128 * void ce_aes_ccm_decrypt(u8 out[], u8 const in[], u32 cbytes,
129 * u8 const rk[], u32 rounds, u8 mac[],
132 SYM_FUNC_START(ce_aes_ccm_encrypt)
135 SYM_FUNC_END(ce_aes_ccm_encrypt)
137 SYM_FUNC_START(ce_aes_ccm_decrypt)
140 SYM_FUNC_END(ce_aes_ccm_decrypt)
142 .section ".rodata", "a"
146 .byte 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7
147 .byte 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf