1 | /* |
2 | * ARM NEON and scalar accelerated ChaCha and XChaCha stream ciphers, |
3 | * including ChaCha20 (RFC7539) |
4 | * |
5 | * Copyright (C) 2016 - 2017 Linaro, Ltd. <ard.biesheuvel@linaro.org> |
6 | * |
7 | * This program is free software; you can redistribute it and/or modify |
8 | * it under the terms of the GNU General Public License version 2 as |
9 | * published by the Free Software Foundation. |
10 | * |
11 | * Based on: |
12 | * ChaCha20 256-bit cipher algorithm, RFC7539, SIMD glue code |
13 | * |
14 | * Copyright (C) 2015 Martin Willi |
15 | * |
16 | * This program is free software; you can redistribute it and/or modify |
17 | * it under the terms of the GNU General Public License as published by |
18 | * the Free Software Foundation; either version 2 of the License, or |
19 | * (at your option) any later version. |
20 | */ |
21 | |
22 | #include <crypto/algapi.h> |
23 | #include <crypto/internal/chacha.h> |
24 | #include <crypto/internal/simd.h> |
25 | #include <crypto/internal/skcipher.h> |
26 | #include <linux/jump_label.h> |
27 | #include <linux/kernel.h> |
28 | #include <linux/module.h> |
29 | |
30 | #include <asm/hwcap.h> |
31 | #include <asm/neon.h> |
32 | #include <asm/simd.h> |
33 | |
34 | asmlinkage void chacha_block_xor_neon(u32 *state, u8 *dst, const u8 *src, |
35 | int nrounds); |
36 | asmlinkage void chacha_4block_xor_neon(u32 *state, u8 *dst, const u8 *src, |
37 | int nrounds, int bytes); |
38 | asmlinkage void hchacha_block_neon(const u32 *state, u32 *out, int nrounds); |
39 | |
40 | static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon); |
41 | |
42 | static void chacha_doneon(u32 *state, u8 *dst, const u8 *src, |
43 | int bytes, int nrounds) |
44 | { |
45 | while (bytes > 0) { |
46 | int l = min(bytes, CHACHA_BLOCK_SIZE * 5); |
47 | |
48 | if (l <= CHACHA_BLOCK_SIZE) { |
49 | u8 buf[CHACHA_BLOCK_SIZE]; |
50 | |
51 | memcpy(buf, src, l); |
52 | chacha_block_xor_neon(state, dst: buf, src: buf, nrounds); |
53 | memcpy(dst, buf, l); |
54 | state[12] += 1; |
55 | break; |
56 | } |
57 | chacha_4block_xor_neon(state, dst, src, nrounds, bytes: l); |
58 | bytes -= l; |
59 | src += l; |
60 | dst += l; |
61 | state[12] += DIV_ROUND_UP(l, CHACHA_BLOCK_SIZE); |
62 | } |
63 | } |
64 | |
65 | void hchacha_block_arch(const u32 *state, u32 *stream, int nrounds) |
66 | { |
67 | if (!static_branch_likely(&have_neon) || !crypto_simd_usable()) { |
68 | hchacha_block_generic(state, out: stream, nrounds); |
69 | } else { |
70 | kernel_neon_begin(); |
71 | hchacha_block_neon(state, out: stream, nrounds); |
72 | kernel_neon_end(); |
73 | } |
74 | } |
75 | EXPORT_SYMBOL(hchacha_block_arch); |
76 | |
77 | void chacha_init_arch(u32 *state, const u32 *key, const u8 *iv) |
78 | { |
79 | chacha_init_generic(state, key, iv); |
80 | } |
81 | EXPORT_SYMBOL(chacha_init_arch); |
82 | |
83 | void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, unsigned int bytes, |
84 | int nrounds) |
85 | { |
86 | if (!static_branch_likely(&have_neon) || bytes <= CHACHA_BLOCK_SIZE || |
87 | !crypto_simd_usable()) |
88 | return chacha_crypt_generic(state, dst, src, bytes, nrounds); |
89 | |
90 | do { |
91 | unsigned int todo = min_t(unsigned int, bytes, SZ_4K); |
92 | |
93 | kernel_neon_begin(); |
94 | chacha_doneon(state, dst, src, bytes: todo, nrounds); |
95 | kernel_neon_end(); |
96 | |
97 | bytes -= todo; |
98 | src += todo; |
99 | dst += todo; |
100 | } while (bytes); |
101 | } |
102 | EXPORT_SYMBOL(chacha_crypt_arch); |
103 | |
104 | static int chacha_neon_stream_xor(struct skcipher_request *req, |
105 | const struct chacha_ctx *ctx, const u8 *iv) |
106 | { |
107 | struct skcipher_walk walk; |
108 | u32 state[16]; |
109 | int err; |
110 | |
111 | err = skcipher_walk_virt(walk: &walk, req, atomic: false); |
112 | |
113 | chacha_init_generic(state, key: ctx->key, iv); |
114 | |
115 | while (walk.nbytes > 0) { |
116 | unsigned int nbytes = walk.nbytes; |
117 | |
118 | if (nbytes < walk.total) |
119 | nbytes = rounddown(nbytes, walk.stride); |
120 | |
121 | if (!static_branch_likely(&have_neon) || |
122 | !crypto_simd_usable()) { |
123 | chacha_crypt_generic(state, dst: walk.dst.virt.addr, |
124 | src: walk.src.virt.addr, bytes: nbytes, |
125 | nrounds: ctx->nrounds); |
126 | } else { |
127 | kernel_neon_begin(); |
128 | chacha_doneon(state, dst: walk.dst.virt.addr, |
129 | src: walk.src.virt.addr, bytes: nbytes, nrounds: ctx->nrounds); |
130 | kernel_neon_end(); |
131 | } |
132 | err = skcipher_walk_done(walk: &walk, err: walk.nbytes - nbytes); |
133 | } |
134 | |
135 | return err; |
136 | } |
137 | |
138 | static int chacha_neon(struct skcipher_request *req) |
139 | { |
140 | struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); |
141 | struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); |
142 | |
143 | return chacha_neon_stream_xor(req, ctx, iv: req->iv); |
144 | } |
145 | |
146 | static int xchacha_neon(struct skcipher_request *req) |
147 | { |
148 | struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); |
149 | struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); |
150 | struct chacha_ctx subctx; |
151 | u32 state[16]; |
152 | u8 real_iv[16]; |
153 | |
154 | chacha_init_generic(state, key: ctx->key, iv: req->iv); |
155 | hchacha_block_arch(state, subctx.key, ctx->nrounds); |
156 | subctx.nrounds = ctx->nrounds; |
157 | |
158 | memcpy(&real_iv[0], req->iv + 24, 8); |
159 | memcpy(&real_iv[8], req->iv + 16, 8); |
160 | return chacha_neon_stream_xor(req, ctx: &subctx, iv: real_iv); |
161 | } |
162 | |
163 | static struct skcipher_alg algs[] = { |
164 | { |
165 | .base.cra_name = "chacha20" , |
166 | .base.cra_driver_name = "chacha20-neon" , |
167 | .base.cra_priority = 300, |
168 | .base.cra_blocksize = 1, |
169 | .base.cra_ctxsize = sizeof(struct chacha_ctx), |
170 | .base.cra_module = THIS_MODULE, |
171 | |
172 | .min_keysize = CHACHA_KEY_SIZE, |
173 | .max_keysize = CHACHA_KEY_SIZE, |
174 | .ivsize = CHACHA_IV_SIZE, |
175 | .chunksize = CHACHA_BLOCK_SIZE, |
176 | .walksize = 5 * CHACHA_BLOCK_SIZE, |
177 | .setkey = chacha20_setkey, |
178 | .encrypt = chacha_neon, |
179 | .decrypt = chacha_neon, |
180 | }, { |
181 | .base.cra_name = "xchacha20" , |
182 | .base.cra_driver_name = "xchacha20-neon" , |
183 | .base.cra_priority = 300, |
184 | .base.cra_blocksize = 1, |
185 | .base.cra_ctxsize = sizeof(struct chacha_ctx), |
186 | .base.cra_module = THIS_MODULE, |
187 | |
188 | .min_keysize = CHACHA_KEY_SIZE, |
189 | .max_keysize = CHACHA_KEY_SIZE, |
190 | .ivsize = XCHACHA_IV_SIZE, |
191 | .chunksize = CHACHA_BLOCK_SIZE, |
192 | .walksize = 5 * CHACHA_BLOCK_SIZE, |
193 | .setkey = chacha20_setkey, |
194 | .encrypt = xchacha_neon, |
195 | .decrypt = xchacha_neon, |
196 | }, { |
197 | .base.cra_name = "xchacha12" , |
198 | .base.cra_driver_name = "xchacha12-neon" , |
199 | .base.cra_priority = 300, |
200 | .base.cra_blocksize = 1, |
201 | .base.cra_ctxsize = sizeof(struct chacha_ctx), |
202 | .base.cra_module = THIS_MODULE, |
203 | |
204 | .min_keysize = CHACHA_KEY_SIZE, |
205 | .max_keysize = CHACHA_KEY_SIZE, |
206 | .ivsize = XCHACHA_IV_SIZE, |
207 | .chunksize = CHACHA_BLOCK_SIZE, |
208 | .walksize = 5 * CHACHA_BLOCK_SIZE, |
209 | .setkey = chacha12_setkey, |
210 | .encrypt = xchacha_neon, |
211 | .decrypt = xchacha_neon, |
212 | } |
213 | }; |
214 | |
215 | static int __init chacha_simd_mod_init(void) |
216 | { |
217 | if (!cpu_have_named_feature(ASIMD)) |
218 | return 0; |
219 | |
220 | static_branch_enable(&have_neon); |
221 | |
222 | return IS_REACHABLE(CONFIG_CRYPTO_SKCIPHER) ? |
223 | crypto_register_skciphers(algs, ARRAY_SIZE(algs)) : 0; |
224 | } |
225 | |
226 | static void __exit chacha_simd_mod_fini(void) |
227 | { |
228 | if (IS_REACHABLE(CONFIG_CRYPTO_SKCIPHER) && cpu_have_named_feature(ASIMD)) |
229 | crypto_unregister_skciphers(algs, ARRAY_SIZE(algs)); |
230 | } |
231 | |
232 | module_init(chacha_simd_mod_init); |
233 | module_exit(chacha_simd_mod_fini); |
234 | |
235 | MODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (NEON accelerated)" ); |
236 | MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>" ); |
237 | MODULE_LICENSE("GPL v2" ); |
238 | MODULE_ALIAS_CRYPTO("chacha20" ); |
239 | MODULE_ALIAS_CRYPTO("chacha20-neon" ); |
240 | MODULE_ALIAS_CRYPTO("xchacha20" ); |
241 | MODULE_ALIAS_CRYPTO("xchacha20-neon" ); |
242 | MODULE_ALIAS_CRYPTO("xchacha12" ); |
243 | MODULE_ALIAS_CRYPTO("xchacha12-neon" ); |
244 | |