arch/arm64/crypto/aes-ce-ccm-core.S

   1 /* SPDX-License-Identifier: GPL-2.0-only */
   2 /*
   3  * aes-ce-ccm-core.S - AES-CCM transform for ARMv8 with Crypto Extensions
   4  *
   5  * Copyright (C) 2013 - 2017 Linaro Ltd.
   6  * Copyright (C) 2024 Google LLC
   7  *
   8  * Author: Ard Biesheuvel <ardb@kernel.org>
   9  */
  10
  11 #include <linux/linkage.h>
  12 #include <asm/assembler.h>
  13
  14         .text
  15         .arch   armv8-a+crypto
  16
  17         .macro  load_round_keys, rk, nr, tmp
  18         sub     w\tmp, \nr, #10
  19         add     \tmp, \rk, w\tmp, sxtw #4
  20         ld1     {v10.4s-v13.4s}, [\rk]
  21         ld1     {v14.4s-v17.4s}, [\tmp], #64
  22         ld1     {v18.4s-v21.4s}, [\tmp], #64
  23         ld1     {v3.4s-v5.4s}, [\tmp]
  24         .endm
  25
  26         .macro  dround, va, vb, vk
  27         aese    \va\().16b, \vk\().16b
  28         aesmc   \va\().16b, \va\().16b
  29         aese    \vb\().16b, \vk\().16b
  30         aesmc   \vb\().16b, \vb\().16b
  31         .endm
  32
  33         .macro  aes_encrypt, va, vb, nr
  34         tbz     \nr, #2, .L\@
  35         dround  \va, \vb, v10
  36         dround  \va, \vb, v11
  37         tbz     \nr, #1, .L\@
  38         dround  \va, \vb, v12
  39         dround  \va, \vb, v13
  40 .L\@:   .irp    v, v14, v15, v16, v17, v18, v19, v20, v21, v3
  41         dround  \va, \vb, \v
  42         .endr
  43         aese    \va\().16b, v4.16b
  44         aese    \vb\().16b, v4.16b
  45         .endm
  46
  47         /*
  48          * void ce_aes_ccm_final(u8 mac[], u8 const ctr[], u8 const rk[],
  49          *                       u32 rounds);
  50          */
  51 SYM_FUNC_START(ce_aes_ccm_final)
  52         ld1     {v0.16b}, [x0]                  /* load mac */
  53         ld1     {v1.16b}, [x1]                  /* load 1st ctriv */
  54
  55         aes_encrypt     v0, v1, w3
  56
  57         /* final round key cancels out */
  58         eor     v0.16b, v0.16b, v1.16b          /* en-/decrypt the mac */
  59         st1     {v0.16b}, [x0]                  /* store result */
  60         ret
  61 SYM_FUNC_END(ce_aes_ccm_final)
  62
  63         .macro  aes_ccm_do_crypt,enc
  64         load_round_keys x3, w4, x10
  65
  66         cbz     x2, 5f
  67         ldr     x8, [x6, #8]                    /* load lower ctr */
  68         ld1     {v0.16b}, [x5]                  /* load mac */
  69 CPU_LE( rev     x8, x8                  )       /* keep swabbed ctr in reg */
  70 0:      /* outer loop */
  71         ld1     {v1.8b}, [x6]                   /* load upper ctr */
  72         prfm    pldl1strm, [x1]
  73         add     x8, x8, #1
  74         rev     x9, x8
  75         ins     v1.d[1], x9                     /* no carry in lower ctr */
  76
  77         aes_encrypt     v0, v1, w4
  78
  79         subs    w2, w2, #16
  80         bmi     ce_aes_ccm_crypt_tail
  81         ld1     {v2.16b}, [x1], #16             /* load next input block */
  82         .if     \enc == 1
  83         eor     v2.16b, v2.16b, v5.16b          /* final round enc+mac */
  84         eor     v6.16b, v1.16b, v2.16b          /* xor with crypted ctr */
  85         .else
  86         eor     v2.16b, v2.16b, v1.16b          /* xor with crypted ctr */
  87         eor     v6.16b, v2.16b, v5.16b          /* final round enc */
  88         .endif
  89         eor     v0.16b, v0.16b, v2.16b          /* xor mac with pt ^ rk[last] */
  90         st1     {v6.16b}, [x0], #16             /* write output block */
  91         bne     0b
  92 CPU_LE( rev     x8, x8                  )
  93         st1     {v0.16b}, [x5]                  /* store mac */
  94         str     x8, [x6, #8]                    /* store lsb end of ctr (BE) */
  95 5:      ret
  96         .endm
  97
  98 SYM_FUNC_START_LOCAL(ce_aes_ccm_crypt_tail)
  99         eor     v0.16b, v0.16b, v5.16b          /* final round mac */
 100         eor     v1.16b, v1.16b, v5.16b          /* final round enc */
 101
 102         add     x1, x1, w2, sxtw                /* rewind the input pointer (w2 < 0) */
 103         add     x0, x0, w2, sxtw                /* rewind the output pointer */
 104
 105         adr_l   x8, .Lpermute                   /* load permute vectors */
 106         add     x9, x8, w2, sxtw
 107         sub     x8, x8, w2, sxtw
 108         ld1     {v7.16b-v8.16b}, [x9]
 109         ld1     {v9.16b}, [x8]
 110
 111         ld1     {v2.16b}, [x1]                  /* load a full block of input */
 112         tbl     v1.16b, {v1.16b}, v7.16b        /* move keystream to end of register */
 113         eor     v7.16b, v2.16b, v1.16b          /* encrypt partial input block */
 114         bif     v2.16b, v7.16b, v22.16b         /* select plaintext */
 115         tbx     v7.16b, {v6.16b}, v8.16b        /* insert output from previous iteration */
 116         tbl     v2.16b, {v2.16b}, v9.16b        /* copy plaintext to start of v2 */
 117         eor     v0.16b, v0.16b, v2.16b          /* fold plaintext into mac */
 118
 119         st1     {v0.16b}, [x5]                  /* store mac */
 120         st1     {v7.16b}, [x0]                  /* store output block */
 121         ret
 122 SYM_FUNC_END(ce_aes_ccm_crypt_tail)
 123
 124         /*
 125          * void ce_aes_ccm_encrypt(u8 out[], u8 const in[], u32 cbytes,
 126          *                         u8 const rk[], u32 rounds, u8 mac[],
 127          *                         u8 ctr[]);
 128          * void ce_aes_ccm_decrypt(u8 out[], u8 const in[], u32 cbytes,
 129          *                         u8 const rk[], u32 rounds, u8 mac[],
 130          *                         u8 ctr[]);
 131          */
 132 SYM_FUNC_START(ce_aes_ccm_encrypt)
 133         movi    v22.16b, #255
 134         aes_ccm_do_crypt        1
 135 SYM_FUNC_END(ce_aes_ccm_encrypt)
 136
 137 SYM_FUNC_START(ce_aes_ccm_decrypt)
 138         movi    v22.16b, #0
 139         aes_ccm_do_crypt        0
 140 SYM_FUNC_END(ce_aes_ccm_decrypt)
 141
 142         .section ".rodata", "a"
 143         .align  6
 144         .fill   15, 1, 0xff
 145 .Lpermute:
 146         .byte   0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7
 147         .byte   0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf
 148         .fill   15, 1, 0xff