1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* |
3 | * Accelerated GHASH implementation with ARMv8 vmull.p64 instructions. |
4 | * |
5 | * Copyright (C) 2015 - 2018 Linaro Ltd. |
6 | * Copyright (C) 2023 Google LLC. |
7 | */ |
8 | |
9 | #include <asm/hwcap.h> |
10 | #include <asm/neon.h> |
11 | #include <asm/simd.h> |
12 | #include <asm/unaligned.h> |
13 | #include <crypto/aes.h> |
14 | #include <crypto/gcm.h> |
15 | #include <crypto/b128ops.h> |
16 | #include <crypto/cryptd.h> |
17 | #include <crypto/internal/aead.h> |
18 | #include <crypto/internal/hash.h> |
19 | #include <crypto/internal/simd.h> |
20 | #include <crypto/internal/skcipher.h> |
21 | #include <crypto/gf128mul.h> |
22 | #include <crypto/scatterwalk.h> |
23 | #include <linux/cpufeature.h> |
24 | #include <linux/crypto.h> |
25 | #include <linux/jump_label.h> |
26 | #include <linux/module.h> |
27 | |
28 | MODULE_DESCRIPTION("GHASH hash function using ARMv8 Crypto Extensions" ); |
29 | MODULE_AUTHOR("Ard Biesheuvel <ardb@kernel.org>" ); |
30 | MODULE_LICENSE("GPL" ); |
31 | MODULE_ALIAS_CRYPTO("ghash" ); |
32 | MODULE_ALIAS_CRYPTO("gcm(aes)" ); |
33 | MODULE_ALIAS_CRYPTO("rfc4106(gcm(aes))" ); |
34 | |
35 | #define GHASH_BLOCK_SIZE 16 |
36 | #define GHASH_DIGEST_SIZE 16 |
37 | |
38 | #define RFC4106_NONCE_SIZE 4 |
39 | |
40 | struct ghash_key { |
41 | be128 k; |
42 | u64 h[][2]; |
43 | }; |
44 | |
45 | struct gcm_key { |
46 | u64 h[4][2]; |
47 | u32 rk[AES_MAX_KEYLENGTH_U32]; |
48 | int rounds; |
49 | u8 nonce[]; // for RFC4106 nonce |
50 | }; |
51 | |
52 | struct ghash_desc_ctx { |
53 | u64 digest[GHASH_DIGEST_SIZE/sizeof(u64)]; |
54 | u8 buf[GHASH_BLOCK_SIZE]; |
55 | u32 count; |
56 | }; |
57 | |
58 | struct ghash_async_ctx { |
59 | struct cryptd_ahash *cryptd_tfm; |
60 | }; |
61 | |
62 | asmlinkage void pmull_ghash_update_p64(int blocks, u64 dg[], const char *src, |
63 | u64 const h[][2], const char *head); |
64 | |
65 | asmlinkage void pmull_ghash_update_p8(int blocks, u64 dg[], const char *src, |
66 | u64 const h[][2], const char *head); |
67 | |
68 | static __ro_after_init DEFINE_STATIC_KEY_FALSE(use_p64); |
69 | |
70 | static int ghash_init(struct shash_desc *desc) |
71 | { |
72 | struct ghash_desc_ctx *ctx = shash_desc_ctx(desc); |
73 | |
74 | *ctx = (struct ghash_desc_ctx){}; |
75 | return 0; |
76 | } |
77 | |
78 | static void ghash_do_update(int blocks, u64 dg[], const char *src, |
79 | struct ghash_key *key, const char *head) |
80 | { |
81 | if (likely(crypto_simd_usable())) { |
82 | kernel_neon_begin(); |
83 | if (static_branch_likely(&use_p64)) |
84 | pmull_ghash_update_p64(blocks, dg, src, h: key->h, head); |
85 | else |
86 | pmull_ghash_update_p8(blocks, dg, src, h: key->h, head); |
87 | kernel_neon_end(); |
88 | } else { |
89 | be128 dst = { cpu_to_be64(dg[1]), cpu_to_be64(dg[0]) }; |
90 | |
91 | do { |
92 | const u8 *in = src; |
93 | |
94 | if (head) { |
95 | in = head; |
96 | blocks++; |
97 | head = NULL; |
98 | } else { |
99 | src += GHASH_BLOCK_SIZE; |
100 | } |
101 | |
102 | crypto_xor(dst: (u8 *)&dst, src: in, GHASH_BLOCK_SIZE); |
103 | gf128mul_lle(a: &dst, b: &key->k); |
104 | } while (--blocks); |
105 | |
106 | dg[0] = be64_to_cpu(dst.b); |
107 | dg[1] = be64_to_cpu(dst.a); |
108 | } |
109 | } |
110 | |
111 | static int ghash_update(struct shash_desc *desc, const u8 *src, |
112 | unsigned int len) |
113 | { |
114 | struct ghash_desc_ctx *ctx = shash_desc_ctx(desc); |
115 | unsigned int partial = ctx->count % GHASH_BLOCK_SIZE; |
116 | |
117 | ctx->count += len; |
118 | |
119 | if ((partial + len) >= GHASH_BLOCK_SIZE) { |
120 | struct ghash_key *key = crypto_shash_ctx(tfm: desc->tfm); |
121 | int blocks; |
122 | |
123 | if (partial) { |
124 | int p = GHASH_BLOCK_SIZE - partial; |
125 | |
126 | memcpy(ctx->buf + partial, src, p); |
127 | src += p; |
128 | len -= p; |
129 | } |
130 | |
131 | blocks = len / GHASH_BLOCK_SIZE; |
132 | len %= GHASH_BLOCK_SIZE; |
133 | |
134 | ghash_do_update(blocks, dg: ctx->digest, src, key, |
135 | head: partial ? ctx->buf : NULL); |
136 | src += blocks * GHASH_BLOCK_SIZE; |
137 | partial = 0; |
138 | } |
139 | if (len) |
140 | memcpy(ctx->buf + partial, src, len); |
141 | return 0; |
142 | } |
143 | |
144 | static int ghash_final(struct shash_desc *desc, u8 *dst) |
145 | { |
146 | struct ghash_desc_ctx *ctx = shash_desc_ctx(desc); |
147 | unsigned int partial = ctx->count % GHASH_BLOCK_SIZE; |
148 | |
149 | if (partial) { |
150 | struct ghash_key *key = crypto_shash_ctx(tfm: desc->tfm); |
151 | |
152 | memset(ctx->buf + partial, 0, GHASH_BLOCK_SIZE - partial); |
153 | ghash_do_update(blocks: 1, dg: ctx->digest, src: ctx->buf, key, NULL); |
154 | } |
155 | put_unaligned_be64(val: ctx->digest[1], p: dst); |
156 | put_unaligned_be64(val: ctx->digest[0], p: dst + 8); |
157 | |
158 | *ctx = (struct ghash_desc_ctx){}; |
159 | return 0; |
160 | } |
161 | |
162 | static void ghash_reflect(u64 h[], const be128 *k) |
163 | { |
164 | u64 carry = be64_to_cpu(k->a) >> 63; |
165 | |
166 | h[0] = (be64_to_cpu(k->b) << 1) | carry; |
167 | h[1] = (be64_to_cpu(k->a) << 1) | (be64_to_cpu(k->b) >> 63); |
168 | |
169 | if (carry) |
170 | h[1] ^= 0xc200000000000000UL; |
171 | } |
172 | |
173 | static int ghash_setkey(struct crypto_shash *tfm, |
174 | const u8 *inkey, unsigned int keylen) |
175 | { |
176 | struct ghash_key *key = crypto_shash_ctx(tfm); |
177 | |
178 | if (keylen != GHASH_BLOCK_SIZE) |
179 | return -EINVAL; |
180 | |
181 | /* needed for the fallback */ |
182 | memcpy(&key->k, inkey, GHASH_BLOCK_SIZE); |
183 | ghash_reflect(h: key->h[0], k: &key->k); |
184 | |
185 | if (static_branch_likely(&use_p64)) { |
186 | be128 h = key->k; |
187 | |
188 | gf128mul_lle(a: &h, b: &key->k); |
189 | ghash_reflect(h: key->h[1], k: &h); |
190 | |
191 | gf128mul_lle(a: &h, b: &key->k); |
192 | ghash_reflect(h: key->h[2], k: &h); |
193 | |
194 | gf128mul_lle(a: &h, b: &key->k); |
195 | ghash_reflect(h: key->h[3], k: &h); |
196 | } |
197 | return 0; |
198 | } |
199 | |
200 | static struct shash_alg ghash_alg = { |
201 | .digestsize = GHASH_DIGEST_SIZE, |
202 | .init = ghash_init, |
203 | .update = ghash_update, |
204 | .final = ghash_final, |
205 | .setkey = ghash_setkey, |
206 | .descsize = sizeof(struct ghash_desc_ctx), |
207 | |
208 | .base.cra_name = "ghash" , |
209 | .base.cra_driver_name = "ghash-ce-sync" , |
210 | .base.cra_priority = 300 - 1, |
211 | .base.cra_blocksize = GHASH_BLOCK_SIZE, |
212 | .base.cra_ctxsize = sizeof(struct ghash_key) + sizeof(u64[2]), |
213 | .base.cra_module = THIS_MODULE, |
214 | }; |
215 | |
216 | static int ghash_async_init(struct ahash_request *req) |
217 | { |
218 | struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); |
219 | struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm); |
220 | struct ahash_request *cryptd_req = ahash_request_ctx(req); |
221 | struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm; |
222 | struct shash_desc *desc = cryptd_shash_desc(req: cryptd_req); |
223 | struct crypto_shash *child = cryptd_ahash_child(tfm: cryptd_tfm); |
224 | |
225 | desc->tfm = child; |
226 | return crypto_shash_init(desc); |
227 | } |
228 | |
229 | static int ghash_async_update(struct ahash_request *req) |
230 | { |
231 | struct ahash_request *cryptd_req = ahash_request_ctx(req); |
232 | struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); |
233 | struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm); |
234 | struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm; |
235 | |
236 | if (!crypto_simd_usable() || |
237 | (in_atomic() && cryptd_ahash_queued(tfm: cryptd_tfm))) { |
238 | memcpy(cryptd_req, req, sizeof(*req)); |
239 | ahash_request_set_tfm(req: cryptd_req, tfm: &cryptd_tfm->base); |
240 | return crypto_ahash_update(req: cryptd_req); |
241 | } else { |
242 | struct shash_desc *desc = cryptd_shash_desc(req: cryptd_req); |
243 | return shash_ahash_update(req, desc); |
244 | } |
245 | } |
246 | |
247 | static int ghash_async_final(struct ahash_request *req) |
248 | { |
249 | struct ahash_request *cryptd_req = ahash_request_ctx(req); |
250 | struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); |
251 | struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm); |
252 | struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm; |
253 | |
254 | if (!crypto_simd_usable() || |
255 | (in_atomic() && cryptd_ahash_queued(tfm: cryptd_tfm))) { |
256 | memcpy(cryptd_req, req, sizeof(*req)); |
257 | ahash_request_set_tfm(req: cryptd_req, tfm: &cryptd_tfm->base); |
258 | return crypto_ahash_final(req: cryptd_req); |
259 | } else { |
260 | struct shash_desc *desc = cryptd_shash_desc(req: cryptd_req); |
261 | return crypto_shash_final(desc, out: req->result); |
262 | } |
263 | } |
264 | |
265 | static int ghash_async_digest(struct ahash_request *req) |
266 | { |
267 | struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); |
268 | struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm); |
269 | struct ahash_request *cryptd_req = ahash_request_ctx(req); |
270 | struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm; |
271 | |
272 | if (!crypto_simd_usable() || |
273 | (in_atomic() && cryptd_ahash_queued(tfm: cryptd_tfm))) { |
274 | memcpy(cryptd_req, req, sizeof(*req)); |
275 | ahash_request_set_tfm(req: cryptd_req, tfm: &cryptd_tfm->base); |
276 | return crypto_ahash_digest(req: cryptd_req); |
277 | } else { |
278 | struct shash_desc *desc = cryptd_shash_desc(req: cryptd_req); |
279 | struct crypto_shash *child = cryptd_ahash_child(tfm: cryptd_tfm); |
280 | |
281 | desc->tfm = child; |
282 | return shash_ahash_digest(req, desc); |
283 | } |
284 | } |
285 | |
286 | static int ghash_async_import(struct ahash_request *req, const void *in) |
287 | { |
288 | struct ahash_request *cryptd_req = ahash_request_ctx(req); |
289 | struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); |
290 | struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm); |
291 | struct shash_desc *desc = cryptd_shash_desc(req: cryptd_req); |
292 | |
293 | desc->tfm = cryptd_ahash_child(tfm: ctx->cryptd_tfm); |
294 | |
295 | return crypto_shash_import(desc, in); |
296 | } |
297 | |
298 | static int ghash_async_export(struct ahash_request *req, void *out) |
299 | { |
300 | struct ahash_request *cryptd_req = ahash_request_ctx(req); |
301 | struct shash_desc *desc = cryptd_shash_desc(req: cryptd_req); |
302 | |
303 | return crypto_shash_export(desc, out); |
304 | } |
305 | |
306 | static int ghash_async_setkey(struct crypto_ahash *tfm, const u8 *key, |
307 | unsigned int keylen) |
308 | { |
309 | struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm); |
310 | struct crypto_ahash *child = &ctx->cryptd_tfm->base; |
311 | |
312 | crypto_ahash_clear_flags(tfm: child, CRYPTO_TFM_REQ_MASK); |
313 | crypto_ahash_set_flags(tfm: child, flags: crypto_ahash_get_flags(tfm) |
314 | & CRYPTO_TFM_REQ_MASK); |
315 | return crypto_ahash_setkey(tfm: child, key, keylen); |
316 | } |
317 | |
318 | static int ghash_async_init_tfm(struct crypto_tfm *tfm) |
319 | { |
320 | struct cryptd_ahash *cryptd_tfm; |
321 | struct ghash_async_ctx *ctx = crypto_tfm_ctx(tfm); |
322 | |
323 | cryptd_tfm = cryptd_alloc_ahash(alg_name: "ghash-ce-sync" , type: 0, mask: 0); |
324 | if (IS_ERR(ptr: cryptd_tfm)) |
325 | return PTR_ERR(ptr: cryptd_tfm); |
326 | ctx->cryptd_tfm = cryptd_tfm; |
327 | crypto_ahash_set_reqsize(tfm: __crypto_ahash_cast(tfm), |
328 | reqsize: sizeof(struct ahash_request) + |
329 | crypto_ahash_reqsize(tfm: &cryptd_tfm->base)); |
330 | |
331 | return 0; |
332 | } |
333 | |
334 | static void ghash_async_exit_tfm(struct crypto_tfm *tfm) |
335 | { |
336 | struct ghash_async_ctx *ctx = crypto_tfm_ctx(tfm); |
337 | |
338 | cryptd_free_ahash(tfm: ctx->cryptd_tfm); |
339 | } |
340 | |
341 | static struct ahash_alg ghash_async_alg = { |
342 | .init = ghash_async_init, |
343 | .update = ghash_async_update, |
344 | .final = ghash_async_final, |
345 | .setkey = ghash_async_setkey, |
346 | .digest = ghash_async_digest, |
347 | .import = ghash_async_import, |
348 | .export = ghash_async_export, |
349 | .halg.digestsize = GHASH_DIGEST_SIZE, |
350 | .halg.statesize = sizeof(struct ghash_desc_ctx), |
351 | .halg.base = { |
352 | .cra_name = "ghash" , |
353 | .cra_driver_name = "ghash-ce" , |
354 | .cra_priority = 300, |
355 | .cra_flags = CRYPTO_ALG_ASYNC, |
356 | .cra_blocksize = GHASH_BLOCK_SIZE, |
357 | .cra_ctxsize = sizeof(struct ghash_async_ctx), |
358 | .cra_module = THIS_MODULE, |
359 | .cra_init = ghash_async_init_tfm, |
360 | .cra_exit = ghash_async_exit_tfm, |
361 | }, |
362 | }; |
363 | |
364 | |
365 | void pmull_gcm_encrypt(int blocks, u64 dg[], const char *src, |
366 | struct gcm_key const *k, char *dst, |
367 | const char *iv, int rounds, u32 counter); |
368 | |
369 | void pmull_gcm_enc_final(int blocks, u64 dg[], char *tag, |
370 | struct gcm_key const *k, char *head, |
371 | const char *iv, int rounds, u32 counter); |
372 | |
373 | void pmull_gcm_decrypt(int bytes, u64 dg[], const char *src, |
374 | struct gcm_key const *k, char *dst, |
375 | const char *iv, int rounds, u32 counter); |
376 | |
377 | int pmull_gcm_dec_final(int bytes, u64 dg[], char *tag, |
378 | struct gcm_key const *k, char *head, |
379 | const char *iv, int rounds, u32 counter, |
380 | const char *otag, int authsize); |
381 | |
382 | static int gcm_aes_setkey(struct crypto_aead *tfm, const u8 *inkey, |
383 | unsigned int keylen) |
384 | { |
385 | struct gcm_key *ctx = crypto_aead_ctx(tfm); |
386 | struct crypto_aes_ctx aes_ctx; |
387 | be128 h, k; |
388 | int ret; |
389 | |
390 | ret = aes_expandkey(ctx: &aes_ctx, in_key: inkey, key_len: keylen); |
391 | if (ret) |
392 | return -EINVAL; |
393 | |
394 | aes_encrypt(ctx: &aes_ctx, out: (u8 *)&k, in: (u8[AES_BLOCK_SIZE]){}); |
395 | |
396 | memcpy(ctx->rk, aes_ctx.key_enc, sizeof(ctx->rk)); |
397 | ctx->rounds = 6 + keylen / 4; |
398 | |
399 | memzero_explicit(s: &aes_ctx, count: sizeof(aes_ctx)); |
400 | |
401 | ghash_reflect(h: ctx->h[0], k: &k); |
402 | |
403 | h = k; |
404 | gf128mul_lle(a: &h, b: &k); |
405 | ghash_reflect(h: ctx->h[1], k: &h); |
406 | |
407 | gf128mul_lle(a: &h, b: &k); |
408 | ghash_reflect(h: ctx->h[2], k: &h); |
409 | |
410 | gf128mul_lle(a: &h, b: &k); |
411 | ghash_reflect(h: ctx->h[3], k: &h); |
412 | |
413 | return 0; |
414 | } |
415 | |
416 | static int gcm_aes_setauthsize(struct crypto_aead *tfm, unsigned int authsize) |
417 | { |
418 | return crypto_gcm_check_authsize(authsize); |
419 | } |
420 | |
421 | static void gcm_update_mac(u64 dg[], const u8 *src, int count, u8 buf[], |
422 | int *buf_count, struct gcm_key *ctx) |
423 | { |
424 | if (*buf_count > 0) { |
425 | int buf_added = min(count, GHASH_BLOCK_SIZE - *buf_count); |
426 | |
427 | memcpy(&buf[*buf_count], src, buf_added); |
428 | |
429 | *buf_count += buf_added; |
430 | src += buf_added; |
431 | count -= buf_added; |
432 | } |
433 | |
434 | if (count >= GHASH_BLOCK_SIZE || *buf_count == GHASH_BLOCK_SIZE) { |
435 | int blocks = count / GHASH_BLOCK_SIZE; |
436 | |
437 | pmull_ghash_update_p64(blocks, dg, src, h: ctx->h, |
438 | head: *buf_count ? buf : NULL); |
439 | |
440 | src += blocks * GHASH_BLOCK_SIZE; |
441 | count %= GHASH_BLOCK_SIZE; |
442 | *buf_count = 0; |
443 | } |
444 | |
445 | if (count > 0) { |
446 | memcpy(buf, src, count); |
447 | *buf_count = count; |
448 | } |
449 | } |
450 | |
451 | static void gcm_calculate_auth_mac(struct aead_request *req, u64 dg[], u32 len) |
452 | { |
453 | struct crypto_aead *aead = crypto_aead_reqtfm(req); |
454 | struct gcm_key *ctx = crypto_aead_ctx(tfm: aead); |
455 | u8 buf[GHASH_BLOCK_SIZE]; |
456 | struct scatter_walk walk; |
457 | int buf_count = 0; |
458 | |
459 | scatterwalk_start(walk: &walk, sg: req->src); |
460 | |
461 | do { |
462 | u32 n = scatterwalk_clamp(walk: &walk, nbytes: len); |
463 | u8 *p; |
464 | |
465 | if (!n) { |
466 | scatterwalk_start(walk: &walk, sg: sg_next(walk.sg)); |
467 | n = scatterwalk_clamp(walk: &walk, nbytes: len); |
468 | } |
469 | |
470 | p = scatterwalk_map(walk: &walk); |
471 | gcm_update_mac(dg, src: p, count: n, buf, buf_count: &buf_count, ctx); |
472 | scatterwalk_unmap(vaddr: p); |
473 | |
474 | if (unlikely(len / SZ_4K > (len - n) / SZ_4K)) { |
475 | kernel_neon_end(); |
476 | kernel_neon_begin(); |
477 | } |
478 | |
479 | len -= n; |
480 | scatterwalk_advance(walk: &walk, nbytes: n); |
481 | scatterwalk_done(walk: &walk, out: 0, more: len); |
482 | } while (len); |
483 | |
484 | if (buf_count) { |
485 | memset(&buf[buf_count], 0, GHASH_BLOCK_SIZE - buf_count); |
486 | pmull_ghash_update_p64(blocks: 1, dg, src: buf, h: ctx->h, NULL); |
487 | } |
488 | } |
489 | |
490 | static int gcm_encrypt(struct aead_request *req, const u8 *iv, u32 assoclen) |
491 | { |
492 | struct crypto_aead *aead = crypto_aead_reqtfm(req); |
493 | struct gcm_key *ctx = crypto_aead_ctx(tfm: aead); |
494 | struct skcipher_walk walk; |
495 | u8 buf[AES_BLOCK_SIZE]; |
496 | u32 counter = 2; |
497 | u64 dg[2] = {}; |
498 | be128 lengths; |
499 | const u8 *src; |
500 | u8 *tag, *dst; |
501 | int tail, err; |
502 | |
503 | if (WARN_ON_ONCE(!may_use_simd())) |
504 | return -EBUSY; |
505 | |
506 | err = skcipher_walk_aead_encrypt(walk: &walk, req, atomic: false); |
507 | |
508 | kernel_neon_begin(); |
509 | |
510 | if (assoclen) |
511 | gcm_calculate_auth_mac(req, dg, len: assoclen); |
512 | |
513 | src = walk.src.virt.addr; |
514 | dst = walk.dst.virt.addr; |
515 | |
516 | while (walk.nbytes >= AES_BLOCK_SIZE) { |
517 | int nblocks = walk.nbytes / AES_BLOCK_SIZE; |
518 | |
519 | pmull_gcm_encrypt(blocks: nblocks, dg, src, k: ctx, dst, iv, |
520 | rounds: ctx->rounds, counter); |
521 | counter += nblocks; |
522 | |
523 | if (walk.nbytes == walk.total) { |
524 | src += nblocks * AES_BLOCK_SIZE; |
525 | dst += nblocks * AES_BLOCK_SIZE; |
526 | break; |
527 | } |
528 | |
529 | kernel_neon_end(); |
530 | |
531 | err = skcipher_walk_done(walk: &walk, |
532 | err: walk.nbytes % AES_BLOCK_SIZE); |
533 | if (err) |
534 | return err; |
535 | |
536 | src = walk.src.virt.addr; |
537 | dst = walk.dst.virt.addr; |
538 | |
539 | kernel_neon_begin(); |
540 | } |
541 | |
542 | |
543 | lengths.a = cpu_to_be64(assoclen * 8); |
544 | lengths.b = cpu_to_be64(req->cryptlen * 8); |
545 | |
546 | tag = (u8 *)&lengths; |
547 | tail = walk.nbytes % AES_BLOCK_SIZE; |
548 | |
549 | /* |
550 | * Bounce via a buffer unless we are encrypting in place and src/dst |
551 | * are not pointing to the start of the walk buffer. In that case, we |
552 | * can do a NEON load/xor/store sequence in place as long as we move |
553 | * the plain/ciphertext and keystream to the start of the register. If |
554 | * not, do a memcpy() to the end of the buffer so we can reuse the same |
555 | * logic. |
556 | */ |
557 | if (unlikely(tail && (tail == walk.nbytes || src != dst))) |
558 | src = memcpy(buf + sizeof(buf) - tail, src, tail); |
559 | |
560 | pmull_gcm_enc_final(blocks: tail, dg, tag, k: ctx, head: (u8 *)src, iv, |
561 | rounds: ctx->rounds, counter); |
562 | kernel_neon_end(); |
563 | |
564 | if (unlikely(tail && src != dst)) |
565 | memcpy(dst, src, tail); |
566 | |
567 | if (walk.nbytes) { |
568 | err = skcipher_walk_done(walk: &walk, err: 0); |
569 | if (err) |
570 | return err; |
571 | } |
572 | |
573 | /* copy authtag to end of dst */ |
574 | scatterwalk_map_and_copy(buf: tag, sg: req->dst, start: req->assoclen + req->cryptlen, |
575 | nbytes: crypto_aead_authsize(tfm: aead), out: 1); |
576 | |
577 | return 0; |
578 | } |
579 | |
580 | static int gcm_decrypt(struct aead_request *req, const u8 *iv, u32 assoclen) |
581 | { |
582 | struct crypto_aead *aead = crypto_aead_reqtfm(req); |
583 | struct gcm_key *ctx = crypto_aead_ctx(tfm: aead); |
584 | int authsize = crypto_aead_authsize(tfm: aead); |
585 | struct skcipher_walk walk; |
586 | u8 otag[AES_BLOCK_SIZE]; |
587 | u8 buf[AES_BLOCK_SIZE]; |
588 | u32 counter = 2; |
589 | u64 dg[2] = {}; |
590 | be128 lengths; |
591 | const u8 *src; |
592 | u8 *tag, *dst; |
593 | int tail, err, ret; |
594 | |
595 | if (WARN_ON_ONCE(!may_use_simd())) |
596 | return -EBUSY; |
597 | |
598 | scatterwalk_map_and_copy(buf: otag, sg: req->src, |
599 | start: req->assoclen + req->cryptlen - authsize, |
600 | nbytes: authsize, out: 0); |
601 | |
602 | err = skcipher_walk_aead_decrypt(walk: &walk, req, atomic: false); |
603 | |
604 | kernel_neon_begin(); |
605 | |
606 | if (assoclen) |
607 | gcm_calculate_auth_mac(req, dg, len: assoclen); |
608 | |
609 | src = walk.src.virt.addr; |
610 | dst = walk.dst.virt.addr; |
611 | |
612 | while (walk.nbytes >= AES_BLOCK_SIZE) { |
613 | int nblocks = walk.nbytes / AES_BLOCK_SIZE; |
614 | |
615 | pmull_gcm_decrypt(bytes: nblocks, dg, src, k: ctx, dst, iv, |
616 | rounds: ctx->rounds, counter); |
617 | counter += nblocks; |
618 | |
619 | if (walk.nbytes == walk.total) { |
620 | src += nblocks * AES_BLOCK_SIZE; |
621 | dst += nblocks * AES_BLOCK_SIZE; |
622 | break; |
623 | } |
624 | |
625 | kernel_neon_end(); |
626 | |
627 | err = skcipher_walk_done(walk: &walk, |
628 | err: walk.nbytes % AES_BLOCK_SIZE); |
629 | if (err) |
630 | return err; |
631 | |
632 | src = walk.src.virt.addr; |
633 | dst = walk.dst.virt.addr; |
634 | |
635 | kernel_neon_begin(); |
636 | } |
637 | |
638 | lengths.a = cpu_to_be64(assoclen * 8); |
639 | lengths.b = cpu_to_be64((req->cryptlen - authsize) * 8); |
640 | |
641 | tag = (u8 *)&lengths; |
642 | tail = walk.nbytes % AES_BLOCK_SIZE; |
643 | |
644 | if (unlikely(tail && (tail == walk.nbytes || src != dst))) |
645 | src = memcpy(buf + sizeof(buf) - tail, src, tail); |
646 | |
647 | ret = pmull_gcm_dec_final(bytes: tail, dg, tag, k: ctx, head: (u8 *)src, iv, |
648 | rounds: ctx->rounds, counter, otag, authsize); |
649 | kernel_neon_end(); |
650 | |
651 | if (unlikely(tail && src != dst)) |
652 | memcpy(dst, src, tail); |
653 | |
654 | if (walk.nbytes) { |
655 | err = skcipher_walk_done(walk: &walk, err: 0); |
656 | if (err) |
657 | return err; |
658 | } |
659 | |
660 | return ret ? -EBADMSG : 0; |
661 | } |
662 | |
663 | static int gcm_aes_encrypt(struct aead_request *req) |
664 | { |
665 | return gcm_encrypt(req, iv: req->iv, assoclen: req->assoclen); |
666 | } |
667 | |
668 | static int gcm_aes_decrypt(struct aead_request *req) |
669 | { |
670 | return gcm_decrypt(req, iv: req->iv, assoclen: req->assoclen); |
671 | } |
672 | |
673 | static int rfc4106_setkey(struct crypto_aead *tfm, const u8 *inkey, |
674 | unsigned int keylen) |
675 | { |
676 | struct gcm_key *ctx = crypto_aead_ctx(tfm); |
677 | int err; |
678 | |
679 | keylen -= RFC4106_NONCE_SIZE; |
680 | err = gcm_aes_setkey(tfm, inkey, keylen); |
681 | if (err) |
682 | return err; |
683 | |
684 | memcpy(ctx->nonce, inkey + keylen, RFC4106_NONCE_SIZE); |
685 | return 0; |
686 | } |
687 | |
688 | static int rfc4106_setauthsize(struct crypto_aead *tfm, unsigned int authsize) |
689 | { |
690 | return crypto_rfc4106_check_authsize(authsize); |
691 | } |
692 | |
693 | static int rfc4106_encrypt(struct aead_request *req) |
694 | { |
695 | struct crypto_aead *aead = crypto_aead_reqtfm(req); |
696 | struct gcm_key *ctx = crypto_aead_ctx(tfm: aead); |
697 | u8 iv[GCM_AES_IV_SIZE]; |
698 | |
699 | memcpy(iv, ctx->nonce, RFC4106_NONCE_SIZE); |
700 | memcpy(iv + RFC4106_NONCE_SIZE, req->iv, GCM_RFC4106_IV_SIZE); |
701 | |
702 | return crypto_ipsec_check_assoclen(assoclen: req->assoclen) ?: |
703 | gcm_encrypt(req, iv, assoclen: req->assoclen - GCM_RFC4106_IV_SIZE); |
704 | } |
705 | |
706 | static int rfc4106_decrypt(struct aead_request *req) |
707 | { |
708 | struct crypto_aead *aead = crypto_aead_reqtfm(req); |
709 | struct gcm_key *ctx = crypto_aead_ctx(tfm: aead); |
710 | u8 iv[GCM_AES_IV_SIZE]; |
711 | |
712 | memcpy(iv, ctx->nonce, RFC4106_NONCE_SIZE); |
713 | memcpy(iv + RFC4106_NONCE_SIZE, req->iv, GCM_RFC4106_IV_SIZE); |
714 | |
715 | return crypto_ipsec_check_assoclen(assoclen: req->assoclen) ?: |
716 | gcm_decrypt(req, iv, assoclen: req->assoclen - GCM_RFC4106_IV_SIZE); |
717 | } |
718 | |
719 | static struct aead_alg gcm_aes_algs[] = {{ |
720 | .ivsize = GCM_AES_IV_SIZE, |
721 | .chunksize = AES_BLOCK_SIZE, |
722 | .maxauthsize = AES_BLOCK_SIZE, |
723 | .setkey = gcm_aes_setkey, |
724 | .setauthsize = gcm_aes_setauthsize, |
725 | .encrypt = gcm_aes_encrypt, |
726 | .decrypt = gcm_aes_decrypt, |
727 | |
728 | .base.cra_name = "gcm(aes)" , |
729 | .base.cra_driver_name = "gcm-aes-ce" , |
730 | .base.cra_priority = 400, |
731 | .base.cra_blocksize = 1, |
732 | .base.cra_ctxsize = sizeof(struct gcm_key), |
733 | .base.cra_module = THIS_MODULE, |
734 | }, { |
735 | .ivsize = GCM_RFC4106_IV_SIZE, |
736 | .chunksize = AES_BLOCK_SIZE, |
737 | .maxauthsize = AES_BLOCK_SIZE, |
738 | .setkey = rfc4106_setkey, |
739 | .setauthsize = rfc4106_setauthsize, |
740 | .encrypt = rfc4106_encrypt, |
741 | .decrypt = rfc4106_decrypt, |
742 | |
743 | .base.cra_name = "rfc4106(gcm(aes))" , |
744 | .base.cra_driver_name = "rfc4106-gcm-aes-ce" , |
745 | .base.cra_priority = 400, |
746 | .base.cra_blocksize = 1, |
747 | .base.cra_ctxsize = sizeof(struct gcm_key) + RFC4106_NONCE_SIZE, |
748 | .base.cra_module = THIS_MODULE, |
749 | }}; |
750 | |
751 | static int __init ghash_ce_mod_init(void) |
752 | { |
753 | int err; |
754 | |
755 | if (!(elf_hwcap & HWCAP_NEON)) |
756 | return -ENODEV; |
757 | |
758 | if (elf_hwcap2 & HWCAP2_PMULL) { |
759 | err = crypto_register_aeads(algs: gcm_aes_algs, |
760 | ARRAY_SIZE(gcm_aes_algs)); |
761 | if (err) |
762 | return err; |
763 | ghash_alg.base.cra_ctxsize += 3 * sizeof(u64[2]); |
764 | static_branch_enable(&use_p64); |
765 | } |
766 | |
767 | err = crypto_register_shash(alg: &ghash_alg); |
768 | if (err) |
769 | goto err_aead; |
770 | err = crypto_register_ahash(alg: &ghash_async_alg); |
771 | if (err) |
772 | goto err_shash; |
773 | |
774 | return 0; |
775 | |
776 | err_shash: |
777 | crypto_unregister_shash(alg: &ghash_alg); |
778 | err_aead: |
779 | if (elf_hwcap2 & HWCAP2_PMULL) |
780 | crypto_unregister_aeads(algs: gcm_aes_algs, |
781 | ARRAY_SIZE(gcm_aes_algs)); |
782 | return err; |
783 | } |
784 | |
785 | static void __exit ghash_ce_mod_exit(void) |
786 | { |
787 | crypto_unregister_ahash(alg: &ghash_async_alg); |
788 | crypto_unregister_shash(alg: &ghash_alg); |
789 | if (elf_hwcap2 & HWCAP2_PMULL) |
790 | crypto_unregister_aeads(algs: gcm_aes_algs, |
791 | ARRAY_SIZE(gcm_aes_algs)); |
792 | } |
793 | |
794 | module_init(ghash_ce_mod_init); |
795 | module_exit(ghash_ce_mod_exit); |
796 | |