1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * OpenSSL/Cryptogams accelerated Poly1305 transform for ARM |
4 | * |
5 | * Copyright (C) 2019 Linaro Ltd. <ard.biesheuvel@linaro.org> |
6 | */ |
7 | |
8 | #include <asm/hwcap.h> |
9 | #include <asm/neon.h> |
10 | #include <asm/simd.h> |
11 | #include <asm/unaligned.h> |
12 | #include <crypto/algapi.h> |
13 | #include <crypto/internal/hash.h> |
14 | #include <crypto/internal/poly1305.h> |
15 | #include <crypto/internal/simd.h> |
16 | #include <linux/cpufeature.h> |
17 | #include <linux/crypto.h> |
18 | #include <linux/jump_label.h> |
19 | #include <linux/module.h> |
20 | |
21 | void poly1305_init_arm(void *state, const u8 *key); |
22 | void poly1305_blocks_arm(void *state, const u8 *src, u32 len, u32 hibit); |
23 | void poly1305_blocks_neon(void *state, const u8 *src, u32 len, u32 hibit); |
24 | void poly1305_emit_arm(void *state, u8 *digest, const u32 *nonce); |
25 | |
26 | void __weak poly1305_blocks_neon(void *state, const u8 *src, u32 len, u32 hibit) |
27 | { |
28 | } |
29 | |
30 | static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon); |
31 | |
32 | void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 key[POLY1305_KEY_SIZE]) |
33 | { |
34 | poly1305_init_arm(state: &dctx->h, key); |
35 | dctx->s[0] = get_unaligned_le32(p: key + 16); |
36 | dctx->s[1] = get_unaligned_le32(p: key + 20); |
37 | dctx->s[2] = get_unaligned_le32(p: key + 24); |
38 | dctx->s[3] = get_unaligned_le32(p: key + 28); |
39 | dctx->buflen = 0; |
40 | } |
41 | EXPORT_SYMBOL(poly1305_init_arch); |
42 | |
43 | static int arm_poly1305_init(struct shash_desc *desc) |
44 | { |
45 | struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); |
46 | |
47 | dctx->buflen = 0; |
48 | dctx->rset = 0; |
49 | dctx->sset = false; |
50 | |
51 | return 0; |
52 | } |
53 | |
54 | static void arm_poly1305_blocks(struct poly1305_desc_ctx *dctx, const u8 *src, |
55 | u32 len, u32 hibit, bool do_neon) |
56 | { |
57 | if (unlikely(!dctx->sset)) { |
58 | if (!dctx->rset) { |
59 | poly1305_init_arm(state: &dctx->h, key: src); |
60 | src += POLY1305_BLOCK_SIZE; |
61 | len -= POLY1305_BLOCK_SIZE; |
62 | dctx->rset = 1; |
63 | } |
64 | if (len >= POLY1305_BLOCK_SIZE) { |
65 | dctx->s[0] = get_unaligned_le32(p: src + 0); |
66 | dctx->s[1] = get_unaligned_le32(p: src + 4); |
67 | dctx->s[2] = get_unaligned_le32(p: src + 8); |
68 | dctx->s[3] = get_unaligned_le32(p: src + 12); |
69 | src += POLY1305_BLOCK_SIZE; |
70 | len -= POLY1305_BLOCK_SIZE; |
71 | dctx->sset = true; |
72 | } |
73 | if (len < POLY1305_BLOCK_SIZE) |
74 | return; |
75 | } |
76 | |
77 | len &= ~(POLY1305_BLOCK_SIZE - 1); |
78 | |
79 | if (static_branch_likely(&have_neon) && likely(do_neon)) |
80 | poly1305_blocks_neon(state: &dctx->h, src, len, hibit); |
81 | else |
82 | poly1305_blocks_arm(state: &dctx->h, src, len, hibit); |
83 | } |
84 | |
85 | static void arm_poly1305_do_update(struct poly1305_desc_ctx *dctx, |
86 | const u8 *src, u32 len, bool do_neon) |
87 | { |
88 | if (unlikely(dctx->buflen)) { |
89 | u32 bytes = min(len, POLY1305_BLOCK_SIZE - dctx->buflen); |
90 | |
91 | memcpy(dctx->buf + dctx->buflen, src, bytes); |
92 | src += bytes; |
93 | len -= bytes; |
94 | dctx->buflen += bytes; |
95 | |
96 | if (dctx->buflen == POLY1305_BLOCK_SIZE) { |
97 | arm_poly1305_blocks(dctx, src: dctx->buf, |
98 | POLY1305_BLOCK_SIZE, hibit: 1, do_neon: false); |
99 | dctx->buflen = 0; |
100 | } |
101 | } |
102 | |
103 | if (likely(len >= POLY1305_BLOCK_SIZE)) { |
104 | arm_poly1305_blocks(dctx, src, len, hibit: 1, do_neon); |
105 | src += round_down(len, POLY1305_BLOCK_SIZE); |
106 | len %= POLY1305_BLOCK_SIZE; |
107 | } |
108 | |
109 | if (unlikely(len)) { |
110 | dctx->buflen = len; |
111 | memcpy(dctx->buf, src, len); |
112 | } |
113 | } |
114 | |
115 | static int arm_poly1305_update(struct shash_desc *desc, |
116 | const u8 *src, unsigned int srclen) |
117 | { |
118 | struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); |
119 | |
120 | arm_poly1305_do_update(dctx, src, len: srclen, do_neon: false); |
121 | return 0; |
122 | } |
123 | |
124 | static int __maybe_unused arm_poly1305_update_neon(struct shash_desc *desc, |
125 | const u8 *src, |
126 | unsigned int srclen) |
127 | { |
128 | struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); |
129 | bool do_neon = crypto_simd_usable() && srclen > 128; |
130 | |
131 | if (static_branch_likely(&have_neon) && do_neon) |
132 | kernel_neon_begin(); |
133 | arm_poly1305_do_update(dctx, src, len: srclen, do_neon); |
134 | if (static_branch_likely(&have_neon) && do_neon) |
135 | kernel_neon_end(); |
136 | return 0; |
137 | } |
138 | |
139 | void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src, |
140 | unsigned int nbytes) |
141 | { |
142 | bool do_neon = IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && |
143 | crypto_simd_usable(); |
144 | |
145 | if (unlikely(dctx->buflen)) { |
146 | u32 bytes = min(nbytes, POLY1305_BLOCK_SIZE - dctx->buflen); |
147 | |
148 | memcpy(dctx->buf + dctx->buflen, src, bytes); |
149 | src += bytes; |
150 | nbytes -= bytes; |
151 | dctx->buflen += bytes; |
152 | |
153 | if (dctx->buflen == POLY1305_BLOCK_SIZE) { |
154 | poly1305_blocks_arm(state: &dctx->h, src: dctx->buf, |
155 | POLY1305_BLOCK_SIZE, hibit: 1); |
156 | dctx->buflen = 0; |
157 | } |
158 | } |
159 | |
160 | if (likely(nbytes >= POLY1305_BLOCK_SIZE)) { |
161 | unsigned int len = round_down(nbytes, POLY1305_BLOCK_SIZE); |
162 | |
163 | if (static_branch_likely(&have_neon) && do_neon) { |
164 | do { |
165 | unsigned int todo = min_t(unsigned int, len, SZ_4K); |
166 | |
167 | kernel_neon_begin(); |
168 | poly1305_blocks_neon(state: &dctx->h, src, len: todo, hibit: 1); |
169 | kernel_neon_end(); |
170 | |
171 | len -= todo; |
172 | src += todo; |
173 | } while (len); |
174 | } else { |
175 | poly1305_blocks_arm(state: &dctx->h, src, len, hibit: 1); |
176 | src += len; |
177 | } |
178 | nbytes %= POLY1305_BLOCK_SIZE; |
179 | } |
180 | |
181 | if (unlikely(nbytes)) { |
182 | dctx->buflen = nbytes; |
183 | memcpy(dctx->buf, src, nbytes); |
184 | } |
185 | } |
186 | EXPORT_SYMBOL(poly1305_update_arch); |
187 | |
188 | void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst) |
189 | { |
190 | if (unlikely(dctx->buflen)) { |
191 | dctx->buf[dctx->buflen++] = 1; |
192 | memset(dctx->buf + dctx->buflen, 0, |
193 | POLY1305_BLOCK_SIZE - dctx->buflen); |
194 | poly1305_blocks_arm(state: &dctx->h, src: dctx->buf, POLY1305_BLOCK_SIZE, hibit: 0); |
195 | } |
196 | |
197 | poly1305_emit_arm(state: &dctx->h, digest: dst, nonce: dctx->s); |
198 | *dctx = (struct poly1305_desc_ctx){}; |
199 | } |
200 | EXPORT_SYMBOL(poly1305_final_arch); |
201 | |
202 | static int arm_poly1305_final(struct shash_desc *desc, u8 *dst) |
203 | { |
204 | struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); |
205 | |
206 | if (unlikely(!dctx->sset)) |
207 | return -ENOKEY; |
208 | |
209 | poly1305_final_arch(dctx, dst); |
210 | return 0; |
211 | } |
212 | |
213 | static struct shash_alg arm_poly1305_algs[] = {{ |
214 | .init = arm_poly1305_init, |
215 | .update = arm_poly1305_update, |
216 | .final = arm_poly1305_final, |
217 | .digestsize = POLY1305_DIGEST_SIZE, |
218 | .descsize = sizeof(struct poly1305_desc_ctx), |
219 | |
220 | .base.cra_name = "poly1305" , |
221 | .base.cra_driver_name = "poly1305-arm" , |
222 | .base.cra_priority = 150, |
223 | .base.cra_blocksize = POLY1305_BLOCK_SIZE, |
224 | .base.cra_module = THIS_MODULE, |
225 | #ifdef CONFIG_KERNEL_MODE_NEON |
226 | }, { |
227 | .init = arm_poly1305_init, |
228 | .update = arm_poly1305_update_neon, |
229 | .final = arm_poly1305_final, |
230 | .digestsize = POLY1305_DIGEST_SIZE, |
231 | .descsize = sizeof(struct poly1305_desc_ctx), |
232 | |
233 | .base.cra_name = "poly1305" , |
234 | .base.cra_driver_name = "poly1305-neon" , |
235 | .base.cra_priority = 200, |
236 | .base.cra_blocksize = POLY1305_BLOCK_SIZE, |
237 | .base.cra_module = THIS_MODULE, |
238 | #endif |
239 | }}; |
240 | |
241 | static int __init arm_poly1305_mod_init(void) |
242 | { |
243 | if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && |
244 | (elf_hwcap & HWCAP_NEON)) |
245 | static_branch_enable(&have_neon); |
246 | else if (IS_REACHABLE(CONFIG_CRYPTO_HASH)) |
247 | /* register only the first entry */ |
248 | return crypto_register_shash(alg: &arm_poly1305_algs[0]); |
249 | |
250 | return IS_REACHABLE(CONFIG_CRYPTO_HASH) ? |
251 | crypto_register_shashes(algs: arm_poly1305_algs, |
252 | ARRAY_SIZE(arm_poly1305_algs)) : 0; |
253 | } |
254 | |
255 | static void __exit arm_poly1305_mod_exit(void) |
256 | { |
257 | if (!IS_REACHABLE(CONFIG_CRYPTO_HASH)) |
258 | return; |
259 | if (!static_branch_likely(&have_neon)) { |
260 | crypto_unregister_shash(alg: &arm_poly1305_algs[0]); |
261 | return; |
262 | } |
263 | crypto_unregister_shashes(algs: arm_poly1305_algs, |
264 | ARRAY_SIZE(arm_poly1305_algs)); |
265 | } |
266 | |
267 | module_init(arm_poly1305_mod_init); |
268 | module_exit(arm_poly1305_mod_exit); |
269 | |
270 | MODULE_LICENSE("GPL v2" ); |
271 | MODULE_ALIAS_CRYPTO("poly1305" ); |
272 | MODULE_ALIAS_CRYPTO("poly1305-arm" ); |
273 | MODULE_ALIAS_CRYPTO("poly1305-neon" ); |
274 | |