1// SPDX-License-Identifier: GPL-2.0
2/*
3 * OpenSSL/Cryptogams accelerated Poly1305 transform for ARM
4 *
5 * Copyright (C) 2019 Linaro Ltd. <ard.biesheuvel@linaro.org>
6 */
7
8#include <asm/hwcap.h>
9#include <asm/neon.h>
10#include <asm/simd.h>
11#include <asm/unaligned.h>
12#include <crypto/algapi.h>
13#include <crypto/internal/hash.h>
14#include <crypto/internal/poly1305.h>
15#include <crypto/internal/simd.h>
16#include <linux/cpufeature.h>
17#include <linux/crypto.h>
18#include <linux/jump_label.h>
19#include <linux/module.h>
20
21void poly1305_init_arm(void *state, const u8 *key);
22void poly1305_blocks_arm(void *state, const u8 *src, u32 len, u32 hibit);
23void poly1305_blocks_neon(void *state, const u8 *src, u32 len, u32 hibit);
24void poly1305_emit_arm(void *state, u8 *digest, const u32 *nonce);
25
26void __weak poly1305_blocks_neon(void *state, const u8 *src, u32 len, u32 hibit)
27{
28}
29
30static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon);
31
32void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 key[POLY1305_KEY_SIZE])
33{
34 poly1305_init_arm(state: &dctx->h, key);
35 dctx->s[0] = get_unaligned_le32(p: key + 16);
36 dctx->s[1] = get_unaligned_le32(p: key + 20);
37 dctx->s[2] = get_unaligned_le32(p: key + 24);
38 dctx->s[3] = get_unaligned_le32(p: key + 28);
39 dctx->buflen = 0;
40}
41EXPORT_SYMBOL(poly1305_init_arch);
42
43static int arm_poly1305_init(struct shash_desc *desc)
44{
45 struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
46
47 dctx->buflen = 0;
48 dctx->rset = 0;
49 dctx->sset = false;
50
51 return 0;
52}
53
54static void arm_poly1305_blocks(struct poly1305_desc_ctx *dctx, const u8 *src,
55 u32 len, u32 hibit, bool do_neon)
56{
57 if (unlikely(!dctx->sset)) {
58 if (!dctx->rset) {
59 poly1305_init_arm(state: &dctx->h, key: src);
60 src += POLY1305_BLOCK_SIZE;
61 len -= POLY1305_BLOCK_SIZE;
62 dctx->rset = 1;
63 }
64 if (len >= POLY1305_BLOCK_SIZE) {
65 dctx->s[0] = get_unaligned_le32(p: src + 0);
66 dctx->s[1] = get_unaligned_le32(p: src + 4);
67 dctx->s[2] = get_unaligned_le32(p: src + 8);
68 dctx->s[3] = get_unaligned_le32(p: src + 12);
69 src += POLY1305_BLOCK_SIZE;
70 len -= POLY1305_BLOCK_SIZE;
71 dctx->sset = true;
72 }
73 if (len < POLY1305_BLOCK_SIZE)
74 return;
75 }
76
77 len &= ~(POLY1305_BLOCK_SIZE - 1);
78
79 if (static_branch_likely(&have_neon) && likely(do_neon))
80 poly1305_blocks_neon(state: &dctx->h, src, len, hibit);
81 else
82 poly1305_blocks_arm(state: &dctx->h, src, len, hibit);
83}
84
85static void arm_poly1305_do_update(struct poly1305_desc_ctx *dctx,
86 const u8 *src, u32 len, bool do_neon)
87{
88 if (unlikely(dctx->buflen)) {
89 u32 bytes = min(len, POLY1305_BLOCK_SIZE - dctx->buflen);
90
91 memcpy(dctx->buf + dctx->buflen, src, bytes);
92 src += bytes;
93 len -= bytes;
94 dctx->buflen += bytes;
95
96 if (dctx->buflen == POLY1305_BLOCK_SIZE) {
97 arm_poly1305_blocks(dctx, src: dctx->buf,
98 POLY1305_BLOCK_SIZE, hibit: 1, do_neon: false);
99 dctx->buflen = 0;
100 }
101 }
102
103 if (likely(len >= POLY1305_BLOCK_SIZE)) {
104 arm_poly1305_blocks(dctx, src, len, hibit: 1, do_neon);
105 src += round_down(len, POLY1305_BLOCK_SIZE);
106 len %= POLY1305_BLOCK_SIZE;
107 }
108
109 if (unlikely(len)) {
110 dctx->buflen = len;
111 memcpy(dctx->buf, src, len);
112 }
113}
114
115static int arm_poly1305_update(struct shash_desc *desc,
116 const u8 *src, unsigned int srclen)
117{
118 struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
119
120 arm_poly1305_do_update(dctx, src, len: srclen, do_neon: false);
121 return 0;
122}
123
124static int __maybe_unused arm_poly1305_update_neon(struct shash_desc *desc,
125 const u8 *src,
126 unsigned int srclen)
127{
128 struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
129 bool do_neon = crypto_simd_usable() && srclen > 128;
130
131 if (static_branch_likely(&have_neon) && do_neon)
132 kernel_neon_begin();
133 arm_poly1305_do_update(dctx, src, len: srclen, do_neon);
134 if (static_branch_likely(&have_neon) && do_neon)
135 kernel_neon_end();
136 return 0;
137}
138
139void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src,
140 unsigned int nbytes)
141{
142 bool do_neon = IS_ENABLED(CONFIG_KERNEL_MODE_NEON) &&
143 crypto_simd_usable();
144
145 if (unlikely(dctx->buflen)) {
146 u32 bytes = min(nbytes, POLY1305_BLOCK_SIZE - dctx->buflen);
147
148 memcpy(dctx->buf + dctx->buflen, src, bytes);
149 src += bytes;
150 nbytes -= bytes;
151 dctx->buflen += bytes;
152
153 if (dctx->buflen == POLY1305_BLOCK_SIZE) {
154 poly1305_blocks_arm(state: &dctx->h, src: dctx->buf,
155 POLY1305_BLOCK_SIZE, hibit: 1);
156 dctx->buflen = 0;
157 }
158 }
159
160 if (likely(nbytes >= POLY1305_BLOCK_SIZE)) {
161 unsigned int len = round_down(nbytes, POLY1305_BLOCK_SIZE);
162
163 if (static_branch_likely(&have_neon) && do_neon) {
164 do {
165 unsigned int todo = min_t(unsigned int, len, SZ_4K);
166
167 kernel_neon_begin();
168 poly1305_blocks_neon(state: &dctx->h, src, len: todo, hibit: 1);
169 kernel_neon_end();
170
171 len -= todo;
172 src += todo;
173 } while (len);
174 } else {
175 poly1305_blocks_arm(state: &dctx->h, src, len, hibit: 1);
176 src += len;
177 }
178 nbytes %= POLY1305_BLOCK_SIZE;
179 }
180
181 if (unlikely(nbytes)) {
182 dctx->buflen = nbytes;
183 memcpy(dctx->buf, src, nbytes);
184 }
185}
186EXPORT_SYMBOL(poly1305_update_arch);
187
188void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst)
189{
190 if (unlikely(dctx->buflen)) {
191 dctx->buf[dctx->buflen++] = 1;
192 memset(dctx->buf + dctx->buflen, 0,
193 POLY1305_BLOCK_SIZE - dctx->buflen);
194 poly1305_blocks_arm(state: &dctx->h, src: dctx->buf, POLY1305_BLOCK_SIZE, hibit: 0);
195 }
196
197 poly1305_emit_arm(state: &dctx->h, digest: dst, nonce: dctx->s);
198 *dctx = (struct poly1305_desc_ctx){};
199}
200EXPORT_SYMBOL(poly1305_final_arch);
201
202static int arm_poly1305_final(struct shash_desc *desc, u8 *dst)
203{
204 struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
205
206 if (unlikely(!dctx->sset))
207 return -ENOKEY;
208
209 poly1305_final_arch(dctx, dst);
210 return 0;
211}
212
213static struct shash_alg arm_poly1305_algs[] = {{
214 .init = arm_poly1305_init,
215 .update = arm_poly1305_update,
216 .final = arm_poly1305_final,
217 .digestsize = POLY1305_DIGEST_SIZE,
218 .descsize = sizeof(struct poly1305_desc_ctx),
219
220 .base.cra_name = "poly1305",
221 .base.cra_driver_name = "poly1305-arm",
222 .base.cra_priority = 150,
223 .base.cra_blocksize = POLY1305_BLOCK_SIZE,
224 .base.cra_module = THIS_MODULE,
225#ifdef CONFIG_KERNEL_MODE_NEON
226}, {
227 .init = arm_poly1305_init,
228 .update = arm_poly1305_update_neon,
229 .final = arm_poly1305_final,
230 .digestsize = POLY1305_DIGEST_SIZE,
231 .descsize = sizeof(struct poly1305_desc_ctx),
232
233 .base.cra_name = "poly1305",
234 .base.cra_driver_name = "poly1305-neon",
235 .base.cra_priority = 200,
236 .base.cra_blocksize = POLY1305_BLOCK_SIZE,
237 .base.cra_module = THIS_MODULE,
238#endif
239}};
240
241static int __init arm_poly1305_mod_init(void)
242{
243 if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) &&
244 (elf_hwcap & HWCAP_NEON))
245 static_branch_enable(&have_neon);
246 else if (IS_REACHABLE(CONFIG_CRYPTO_HASH))
247 /* register only the first entry */
248 return crypto_register_shash(alg: &arm_poly1305_algs[0]);
249
250 return IS_REACHABLE(CONFIG_CRYPTO_HASH) ?
251 crypto_register_shashes(algs: arm_poly1305_algs,
252 ARRAY_SIZE(arm_poly1305_algs)) : 0;
253}
254
255static void __exit arm_poly1305_mod_exit(void)
256{
257 if (!IS_REACHABLE(CONFIG_CRYPTO_HASH))
258 return;
259 if (!static_branch_likely(&have_neon)) {
260 crypto_unregister_shash(alg: &arm_poly1305_algs[0]);
261 return;
262 }
263 crypto_unregister_shashes(algs: arm_poly1305_algs,
264 ARRAY_SIZE(arm_poly1305_algs));
265}
266
267module_init(arm_poly1305_mod_init);
268module_exit(arm_poly1305_mod_exit);
269
270MODULE_LICENSE("GPL v2");
271MODULE_ALIAS_CRYPTO("poly1305");
272MODULE_ALIAS_CRYPTO("poly1305-arm");
273MODULE_ALIAS_CRYPTO("poly1305-neon");
274

source code of linux/arch/arm/crypto/poly1305-glue.c