1// SPDX-License-Identifier: GPL-2.0
2/*
3 * ARM NEON accelerated ChaCha and XChaCha stream ciphers,
4 * including ChaCha20 (RFC7539)
5 *
6 * Copyright (C) 2016-2019 Linaro, Ltd. <ard.biesheuvel@linaro.org>
7 * Copyright (C) 2015 Martin Willi
8 */
9
10#include <crypto/algapi.h>
11#include <crypto/internal/chacha.h>
12#include <crypto/internal/simd.h>
13#include <crypto/internal/skcipher.h>
14#include <linux/jump_label.h>
15#include <linux/kernel.h>
16#include <linux/module.h>
17
18#include <asm/cputype.h>
19#include <asm/hwcap.h>
20#include <asm/neon.h>
21#include <asm/simd.h>
22
23asmlinkage void chacha_block_xor_neon(const u32 *state, u8 *dst, const u8 *src,
24 int nrounds);
25asmlinkage void chacha_4block_xor_neon(const u32 *state, u8 *dst, const u8 *src,
26 int nrounds, unsigned int nbytes);
27asmlinkage void hchacha_block_arm(const u32 *state, u32 *out, int nrounds);
28asmlinkage void hchacha_block_neon(const u32 *state, u32 *out, int nrounds);
29
30asmlinkage void chacha_doarm(u8 *dst, const u8 *src, unsigned int bytes,
31 const u32 *state, int nrounds);
32
33static __ro_after_init DEFINE_STATIC_KEY_FALSE(use_neon);
34
35static inline bool neon_usable(void)
36{
37 return static_branch_likely(&use_neon) && crypto_simd_usable();
38}
39
40static void chacha_doneon(u32 *state, u8 *dst, const u8 *src,
41 unsigned int bytes, int nrounds)
42{
43 u8 buf[CHACHA_BLOCK_SIZE];
44
45 while (bytes > CHACHA_BLOCK_SIZE) {
46 unsigned int l = min(bytes, CHACHA_BLOCK_SIZE * 4U);
47
48 chacha_4block_xor_neon(state, dst, src, nrounds, nbytes: l);
49 bytes -= l;
50 src += l;
51 dst += l;
52 state[12] += DIV_ROUND_UP(l, CHACHA_BLOCK_SIZE);
53 }
54 if (bytes) {
55 const u8 *s = src;
56 u8 *d = dst;
57
58 if (bytes != CHACHA_BLOCK_SIZE)
59 s = d = memcpy(buf, src, bytes);
60 chacha_block_xor_neon(state, dst: d, src: s, nrounds);
61 if (d != dst)
62 memcpy(dst, buf, bytes);
63 state[12]++;
64 }
65}
66
67void hchacha_block_arch(const u32 *state, u32 *stream, int nrounds)
68{
69 if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon_usable()) {
70 hchacha_block_arm(state, out: stream, nrounds);
71 } else {
72 kernel_neon_begin();
73 hchacha_block_neon(state, out: stream, nrounds);
74 kernel_neon_end();
75 }
76}
77EXPORT_SYMBOL(hchacha_block_arch);
78
79void chacha_init_arch(u32 *state, const u32 *key, const u8 *iv)
80{
81 chacha_init_generic(state, key, iv);
82}
83EXPORT_SYMBOL(chacha_init_arch);
84
85void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, unsigned int bytes,
86 int nrounds)
87{
88 if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon_usable() ||
89 bytes <= CHACHA_BLOCK_SIZE) {
90 chacha_doarm(dst, src, bytes, state, nrounds);
91 state[12] += DIV_ROUND_UP(bytes, CHACHA_BLOCK_SIZE);
92 return;
93 }
94
95 do {
96 unsigned int todo = min_t(unsigned int, bytes, SZ_4K);
97
98 kernel_neon_begin();
99 chacha_doneon(state, dst, src, bytes: todo, nrounds);
100 kernel_neon_end();
101
102 bytes -= todo;
103 src += todo;
104 dst += todo;
105 } while (bytes);
106}
107EXPORT_SYMBOL(chacha_crypt_arch);
108
109static int chacha_stream_xor(struct skcipher_request *req,
110 const struct chacha_ctx *ctx, const u8 *iv,
111 bool neon)
112{
113 struct skcipher_walk walk;
114 u32 state[16];
115 int err;
116
117 err = skcipher_walk_virt(walk: &walk, req, atomic: false);
118
119 chacha_init_generic(state, key: ctx->key, iv);
120
121 while (walk.nbytes > 0) {
122 unsigned int nbytes = walk.nbytes;
123
124 if (nbytes < walk.total)
125 nbytes = round_down(nbytes, walk.stride);
126
127 if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon) {
128 chacha_doarm(dst: walk.dst.virt.addr, src: walk.src.virt.addr,
129 bytes: nbytes, state, nrounds: ctx->nrounds);
130 state[12] += DIV_ROUND_UP(nbytes, CHACHA_BLOCK_SIZE);
131 } else {
132 kernel_neon_begin();
133 chacha_doneon(state, dst: walk.dst.virt.addr,
134 src: walk.src.virt.addr, bytes: nbytes, nrounds: ctx->nrounds);
135 kernel_neon_end();
136 }
137 err = skcipher_walk_done(walk: &walk, err: walk.nbytes - nbytes);
138 }
139
140 return err;
141}
142
143static int do_chacha(struct skcipher_request *req, bool neon)
144{
145 struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
146 struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
147
148 return chacha_stream_xor(req, ctx, iv: req->iv, neon);
149}
150
151static int chacha_arm(struct skcipher_request *req)
152{
153 return do_chacha(req, neon: false);
154}
155
156static int chacha_neon(struct skcipher_request *req)
157{
158 return do_chacha(req, neon: neon_usable());
159}
160
161static int do_xchacha(struct skcipher_request *req, bool neon)
162{
163 struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
164 struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
165 struct chacha_ctx subctx;
166 u32 state[16];
167 u8 real_iv[16];
168
169 chacha_init_generic(state, key: ctx->key, iv: req->iv);
170
171 if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon) {
172 hchacha_block_arm(state, out: subctx.key, nrounds: ctx->nrounds);
173 } else {
174 kernel_neon_begin();
175 hchacha_block_neon(state, out: subctx.key, nrounds: ctx->nrounds);
176 kernel_neon_end();
177 }
178 subctx.nrounds = ctx->nrounds;
179
180 memcpy(&real_iv[0], req->iv + 24, 8);
181 memcpy(&real_iv[8], req->iv + 16, 8);
182 return chacha_stream_xor(req, ctx: &subctx, iv: real_iv, neon);
183}
184
185static int xchacha_arm(struct skcipher_request *req)
186{
187 return do_xchacha(req, neon: false);
188}
189
190static int xchacha_neon(struct skcipher_request *req)
191{
192 return do_xchacha(req, neon: neon_usable());
193}
194
195static struct skcipher_alg arm_algs[] = {
196 {
197 .base.cra_name = "chacha20",
198 .base.cra_driver_name = "chacha20-arm",
199 .base.cra_priority = 200,
200 .base.cra_blocksize = 1,
201 .base.cra_ctxsize = sizeof(struct chacha_ctx),
202 .base.cra_module = THIS_MODULE,
203
204 .min_keysize = CHACHA_KEY_SIZE,
205 .max_keysize = CHACHA_KEY_SIZE,
206 .ivsize = CHACHA_IV_SIZE,
207 .chunksize = CHACHA_BLOCK_SIZE,
208 .setkey = chacha20_setkey,
209 .encrypt = chacha_arm,
210 .decrypt = chacha_arm,
211 }, {
212 .base.cra_name = "xchacha20",
213 .base.cra_driver_name = "xchacha20-arm",
214 .base.cra_priority = 200,
215 .base.cra_blocksize = 1,
216 .base.cra_ctxsize = sizeof(struct chacha_ctx),
217 .base.cra_module = THIS_MODULE,
218
219 .min_keysize = CHACHA_KEY_SIZE,
220 .max_keysize = CHACHA_KEY_SIZE,
221 .ivsize = XCHACHA_IV_SIZE,
222 .chunksize = CHACHA_BLOCK_SIZE,
223 .setkey = chacha20_setkey,
224 .encrypt = xchacha_arm,
225 .decrypt = xchacha_arm,
226 }, {
227 .base.cra_name = "xchacha12",
228 .base.cra_driver_name = "xchacha12-arm",
229 .base.cra_priority = 200,
230 .base.cra_blocksize = 1,
231 .base.cra_ctxsize = sizeof(struct chacha_ctx),
232 .base.cra_module = THIS_MODULE,
233
234 .min_keysize = CHACHA_KEY_SIZE,
235 .max_keysize = CHACHA_KEY_SIZE,
236 .ivsize = XCHACHA_IV_SIZE,
237 .chunksize = CHACHA_BLOCK_SIZE,
238 .setkey = chacha12_setkey,
239 .encrypt = xchacha_arm,
240 .decrypt = xchacha_arm,
241 },
242};
243
244static struct skcipher_alg neon_algs[] = {
245 {
246 .base.cra_name = "chacha20",
247 .base.cra_driver_name = "chacha20-neon",
248 .base.cra_priority = 300,
249 .base.cra_blocksize = 1,
250 .base.cra_ctxsize = sizeof(struct chacha_ctx),
251 .base.cra_module = THIS_MODULE,
252
253 .min_keysize = CHACHA_KEY_SIZE,
254 .max_keysize = CHACHA_KEY_SIZE,
255 .ivsize = CHACHA_IV_SIZE,
256 .chunksize = CHACHA_BLOCK_SIZE,
257 .walksize = 4 * CHACHA_BLOCK_SIZE,
258 .setkey = chacha20_setkey,
259 .encrypt = chacha_neon,
260 .decrypt = chacha_neon,
261 }, {
262 .base.cra_name = "xchacha20",
263 .base.cra_driver_name = "xchacha20-neon",
264 .base.cra_priority = 300,
265 .base.cra_blocksize = 1,
266 .base.cra_ctxsize = sizeof(struct chacha_ctx),
267 .base.cra_module = THIS_MODULE,
268
269 .min_keysize = CHACHA_KEY_SIZE,
270 .max_keysize = CHACHA_KEY_SIZE,
271 .ivsize = XCHACHA_IV_SIZE,
272 .chunksize = CHACHA_BLOCK_SIZE,
273 .walksize = 4 * CHACHA_BLOCK_SIZE,
274 .setkey = chacha20_setkey,
275 .encrypt = xchacha_neon,
276 .decrypt = xchacha_neon,
277 }, {
278 .base.cra_name = "xchacha12",
279 .base.cra_driver_name = "xchacha12-neon",
280 .base.cra_priority = 300,
281 .base.cra_blocksize = 1,
282 .base.cra_ctxsize = sizeof(struct chacha_ctx),
283 .base.cra_module = THIS_MODULE,
284
285 .min_keysize = CHACHA_KEY_SIZE,
286 .max_keysize = CHACHA_KEY_SIZE,
287 .ivsize = XCHACHA_IV_SIZE,
288 .chunksize = CHACHA_BLOCK_SIZE,
289 .walksize = 4 * CHACHA_BLOCK_SIZE,
290 .setkey = chacha12_setkey,
291 .encrypt = xchacha_neon,
292 .decrypt = xchacha_neon,
293 }
294};
295
296static int __init chacha_simd_mod_init(void)
297{
298 int err = 0;
299
300 if (IS_REACHABLE(CONFIG_CRYPTO_SKCIPHER)) {
301 err = crypto_register_skciphers(algs: arm_algs, ARRAY_SIZE(arm_algs));
302 if (err)
303 return err;
304 }
305
306 if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (elf_hwcap & HWCAP_NEON)) {
307 int i;
308
309 switch (read_cpuid_part()) {
310 case ARM_CPU_PART_CORTEX_A7:
311 case ARM_CPU_PART_CORTEX_A5:
312 /*
313 * The Cortex-A7 and Cortex-A5 do not perform well with
314 * the NEON implementation but do incredibly with the
315 * scalar one and use less power.
316 */
317 for (i = 0; i < ARRAY_SIZE(neon_algs); i++)
318 neon_algs[i].base.cra_priority = 0;
319 break;
320 default:
321 static_branch_enable(&use_neon);
322 }
323
324 if (IS_REACHABLE(CONFIG_CRYPTO_SKCIPHER)) {
325 err = crypto_register_skciphers(algs: neon_algs, ARRAY_SIZE(neon_algs));
326 if (err)
327 crypto_unregister_skciphers(algs: arm_algs, ARRAY_SIZE(arm_algs));
328 }
329 }
330 return err;
331}
332
333static void __exit chacha_simd_mod_fini(void)
334{
335 if (IS_REACHABLE(CONFIG_CRYPTO_SKCIPHER)) {
336 crypto_unregister_skciphers(algs: arm_algs, ARRAY_SIZE(arm_algs));
337 if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (elf_hwcap & HWCAP_NEON))
338 crypto_unregister_skciphers(algs: neon_algs, ARRAY_SIZE(neon_algs));
339 }
340}
341
342module_init(chacha_simd_mod_init);
343module_exit(chacha_simd_mod_fini);
344
345MODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (scalar and NEON accelerated)");
346MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
347MODULE_LICENSE("GPL v2");
348MODULE_ALIAS_CRYPTO("chacha20");
349MODULE_ALIAS_CRYPTO("chacha20-arm");
350MODULE_ALIAS_CRYPTO("xchacha20");
351MODULE_ALIAS_CRYPTO("xchacha20-arm");
352MODULE_ALIAS_CRYPTO("xchacha12");
353MODULE_ALIAS_CRYPTO("xchacha12-arm");
354#ifdef CONFIG_KERNEL_MODE_NEON
355MODULE_ALIAS_CRYPTO("chacha20-neon");
356MODULE_ALIAS_CRYPTO("xchacha20-neon");
357MODULE_ALIAS_CRYPTO("xchacha12-neon");
358#endif
359

source code of linux/arch/arm/crypto/chacha-glue.c