aes-ce-ccm-core.S source code [linux/arch/arm64/crypto/aes-ce-ccm-core.S]

1	/ SPDX-License-Identifier: GPL-2.0-only /
2	/*
3	* aes-ce-ccm-core.S - AES-CCM transform for ARMv8 with Crypto Extensions
4	*
5	* Copyright (C) 2013 - 2017 Linaro Ltd.
6	* Copyright (C) 2024 Google LLC
7	*
8	* Author: Ard Biesheuvel <ardb@kernel.org>
9	*/
10
11	#include <linux/linkage.h>
12	#include <asm/assembler.h>
13
14	.text
15	.arch armv8-a+crypto
16
17	.macro load_round_keys, rk, nr, tmp
18	sub w\tmp, \nr, #`10`
19	add \tmp, \rk, w\tmp, sxtw #`4`
20	ld1 {v10`.4s`-v13`.4s`}, [\rk]
21	ld1 {v14`.4s`-v17`.4s`}, [\tmp], #`64`
22	ld1 {v18`.4s`-v21`.4s`}, [\tmp], #`64`
23	ld1 {v3`.4s`-v5`.4s`}, [\tmp]
24	.endm
25
26	.macro dround, va, vb, vk
27	aese \va\()`.16b`, \vk\()`.16b`
28	aesmc \va\()`.16b`, \va\()`.16b`
29	aese \vb\()`.16b`, \vk\()`.16b`
30	aesmc \vb\()`.16b`, \vb\()`.16b`
31	.endm
32
33	.macro aes_encrypt, va, vb, nr
34	tbz \nr, #`2`, .L\@
35	dround \va, \vb, v10
36	dround \va, \vb, v11
37	tbz \nr, #`1`, .L\@
38	dround \va, \vb, v12
39	dround \va, \vb, v13
40	.L\@: .irp v, v14, v15, v16, v17, v18, v19, v20, v21, v3
41	dround \va, \vb, \v
42	.endr
43	aese \va\()`.16b`, v4`.16b`
44	aese \vb\()`.16b`, v4`.16b`
45	.endm
46
47	.macro aes_ccm_do_crypt,enc
48	load_round_keys x3, w4, x10
49
50	ld1 {v0`.16b`}, [x5] / load mac /
51	cbz x2, ce_aes_ccm_final
52	ldr x8, [x6, #`8`] / load lower ctr /
53	CPU_LE( rev x8, x8 ) / keep swabbed ctr in reg /
54	`0`: / outer loop /
55	ld1 {v1`.8b`}, [x6] / load upper ctr /
56	prfm pldl1strm, [x1]
57	add x8, x8, #`1`
58	rev x9, x8
59	ins v1.d[`1`], x9 / no carry in lower ctr /
60
61	aes_encrypt v0, v1, w4
62
63	subs w2, w2, #`16`
64	bmi ce_aes_ccm_crypt_tail
65	ld1 {v2`.16b`}, [x1], #`16` / load next input block /
66	.if \enc == `1`
67	eor v2`.16b`, v2`.16b`, v5`.16b` / final round enc+mac /
68	eor v6`.16b`, v1`.16b`, v2`.16b` / xor with crypted ctr /
69	.else
70	eor v2`.16b`, v2`.16b`, v1`.16b` / xor with crypted ctr /
71	eor v6`.16b`, v2`.16b`, v5`.16b` / final round enc /
72	.endif
73	eor v0`.16b`, v0`.16b`, v2`.16b` / xor mac with pt ^ rk[last] /
74	st1 {v6`.16b`}, [x0], #`16` / write output block /
75	bne `0b`
76	CPU_LE( rev x8, x8 )
77	str x8, [x6, #`8`] / store lsb end of ctr (BE) /
78	cbnz x7, ce_aes_ccm_final
79	st1 {v0`.16b`}, [x5] / store mac /
80	ret
81	.endm
82
83	SYM_FUNC_START_LOCAL(ce_aes_ccm_crypt_tail)
84	eor v0`.16b`, v0`.16b`, v5`.16b` / final round mac /
85	eor v1`.16b`, v1`.16b`, v5`.16b` / final round enc /
86
87	add x1, x1, w2, sxtw / rewind the input pointer (w2 < 0) /
88	add x0, x0, w2, sxtw / rewind the output pointer /
89
90	adr_l x8, .Lpermute / load permute vectors /
91	add x9, x8, w2, sxtw
92	sub x8, x8, w2, sxtw
93	ld1 {v7`.16b`-v8`.16b`}, [x9]
94	ld1 {v9`.16b`}, [x8]
95
96	ld1 {v2`.16b`}, [x1] / load a full block of input /
97	tbl v1`.16b`, {v1`.16b`}, v7`.16b` / move keystream to end of register /
98	eor v7`.16b`, v2`.16b`, v1`.16b` / encrypt partial input block /
99	bif v2`.16b`, v7`.16b`, v22`.16b` / select plaintext /
100	tbx v7`.16b`, {v6`.16b`}, v8`.16b` / insert output from previous iteration /
101	tbl v2`.16b`, {v2`.16b`}, v9`.16b` / copy plaintext to start of v2 /
102	eor v0`.16b`, v0`.16b`, v2`.16b` / fold plaintext into mac /
103
104	st1 {v7`.16b`}, [x0] / store output block /
105	cbz x7, `0f`
106
107	SYM_INNER_LABEL(ce_aes_ccm_final, SYM_L_LOCAL)
108	ld1 {v1`.16b`}, [x7] / load 1st ctriv /
109
110	aes_encrypt v0, v1, w4
111
112	/ final round key cancels out /
113	eor v0`.16b`, v0`.16b`, v1`.16b` / en-/decrypt the mac /
114	`0`: st1 {v0`.16b`}, [x5] / store result /
115	ret
116	SYM_FUNC_END(ce_aes_ccm_crypt_tail)
117
118	/*
119	* void ce_aes_ccm_encrypt(u8 out[], u8 const in[], u32 cbytes,
120	* u8 const rk[], u32 rounds, u8 mac[],
121	* u8 ctr[], u8 const final_iv[]);
122	* void ce_aes_ccm_decrypt(u8 out[], u8 const in[], u32 cbytes,
123	* u8 const rk[], u32 rounds, u8 mac[],
124	* u8 ctr[], u8 const final_iv[]);
125	*/
126	SYM_FUNC_START(ce_aes_ccm_encrypt)
127	movi v22`.16b`, #`255`
128	aes_ccm_do_crypt `1`
129	SYM_FUNC_END(ce_aes_ccm_encrypt)
130
131	SYM_FUNC_START(ce_aes_ccm_decrypt)
132	movi v22`.16b`, #`0`
133	aes_ccm_do_crypt `0`
134	SYM_FUNC_END(ce_aes_ccm_decrypt)
135
136	.section ".rodata", "a"
137	.align `6`
138	.fill `15`, `1`, `0xff`
139	.Lpermute:
140	.byte `0x0`, `0x1`, `0x2`, `0x3`, `0x4`, `0x5`, `0x6`, `0x7`
141	.byte `0x8`, `0x9`, `0xa`, `0xb`, `0xc`, `0xd`, `0xe`, `0xf`
142	.fill `15`, `1`, `0xff`
143

source code of linux/arch/arm64/crypto/aes-ce-ccm-core.S