1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* |
3 | * Accelerated GHASH implementation with Intel PCLMULQDQ-NI |
4 | * instructions. This file contains glue code. |
5 | * |
6 | * Copyright (c) 2009 Intel Corp. |
7 | * Author: Huang Ying <ying.huang@intel.com> |
8 | */ |
9 | |
10 | #include <linux/err.h> |
11 | #include <linux/module.h> |
12 | #include <linux/init.h> |
13 | #include <linux/kernel.h> |
14 | #include <linux/crypto.h> |
15 | #include <crypto/algapi.h> |
16 | #include <crypto/cryptd.h> |
17 | #include <crypto/gf128mul.h> |
18 | #include <crypto/internal/hash.h> |
19 | #include <crypto/internal/simd.h> |
20 | #include <asm/cpu_device_id.h> |
21 | #include <asm/simd.h> |
22 | #include <asm/unaligned.h> |
23 | |
24 | #define GHASH_BLOCK_SIZE 16 |
25 | #define GHASH_DIGEST_SIZE 16 |
26 | |
27 | void clmul_ghash_mul(char *dst, const le128 *shash); |
28 | |
29 | void clmul_ghash_update(char *dst, const char *src, unsigned int srclen, |
30 | const le128 *shash); |
31 | |
32 | struct ghash_async_ctx { |
33 | struct cryptd_ahash *cryptd_tfm; |
34 | }; |
35 | |
36 | struct ghash_ctx { |
37 | le128 shash; |
38 | }; |
39 | |
40 | struct ghash_desc_ctx { |
41 | u8 buffer[GHASH_BLOCK_SIZE]; |
42 | u32 bytes; |
43 | }; |
44 | |
45 | static int ghash_init(struct shash_desc *desc) |
46 | { |
47 | struct ghash_desc_ctx *dctx = shash_desc_ctx(desc); |
48 | |
49 | memset(dctx, 0, sizeof(*dctx)); |
50 | |
51 | return 0; |
52 | } |
53 | |
54 | static int ghash_setkey(struct crypto_shash *tfm, |
55 | const u8 *key, unsigned int keylen) |
56 | { |
57 | struct ghash_ctx *ctx = crypto_shash_ctx(tfm); |
58 | u64 a, b; |
59 | |
60 | if (keylen != GHASH_BLOCK_SIZE) |
61 | return -EINVAL; |
62 | |
63 | /* |
64 | * GHASH maps bits to polynomial coefficients backwards, which makes it |
65 | * hard to implement. But it can be shown that the GHASH multiplication |
66 | * |
67 | * D * K (mod x^128 + x^7 + x^2 + x + 1) |
68 | * |
69 | * (where D is a data block and K is the key) is equivalent to: |
70 | * |
71 | * bitreflect(D) * bitreflect(K) * x^(-127) |
72 | * (mod x^128 + x^127 + x^126 + x^121 + 1) |
73 | * |
74 | * So, the code below precomputes: |
75 | * |
76 | * bitreflect(K) * x^(-127) (mod x^128 + x^127 + x^126 + x^121 + 1) |
77 | * |
78 | * ... but in Montgomery form (so that Montgomery multiplication can be |
79 | * used), i.e. with an extra x^128 factor, which means actually: |
80 | * |
81 | * bitreflect(K) * x (mod x^128 + x^127 + x^126 + x^121 + 1) |
82 | * |
83 | * The within-a-byte part of bitreflect() cancels out GHASH's built-in |
84 | * reflection, and thus bitreflect() is actually a byteswap. |
85 | */ |
86 | a = get_unaligned_be64(p: key); |
87 | b = get_unaligned_be64(p: key + 8); |
88 | ctx->shash.a = cpu_to_le64((a << 1) | (b >> 63)); |
89 | ctx->shash.b = cpu_to_le64((b << 1) | (a >> 63)); |
90 | if (a >> 63) |
91 | ctx->shash.a ^= cpu_to_le64((u64)0xc2 << 56); |
92 | return 0; |
93 | } |
94 | |
95 | static int ghash_update(struct shash_desc *desc, |
96 | const u8 *src, unsigned int srclen) |
97 | { |
98 | struct ghash_desc_ctx *dctx = shash_desc_ctx(desc); |
99 | struct ghash_ctx *ctx = crypto_shash_ctx(tfm: desc->tfm); |
100 | u8 *dst = dctx->buffer; |
101 | |
102 | kernel_fpu_begin(); |
103 | if (dctx->bytes) { |
104 | int n = min(srclen, dctx->bytes); |
105 | u8 *pos = dst + (GHASH_BLOCK_SIZE - dctx->bytes); |
106 | |
107 | dctx->bytes -= n; |
108 | srclen -= n; |
109 | |
110 | while (n--) |
111 | *pos++ ^= *src++; |
112 | |
113 | if (!dctx->bytes) |
114 | clmul_ghash_mul(dst, shash: &ctx->shash); |
115 | } |
116 | |
117 | clmul_ghash_update(dst, src, srclen, shash: &ctx->shash); |
118 | kernel_fpu_end(); |
119 | |
120 | if (srclen & 0xf) { |
121 | src += srclen - (srclen & 0xf); |
122 | srclen &= 0xf; |
123 | dctx->bytes = GHASH_BLOCK_SIZE - srclen; |
124 | while (srclen--) |
125 | *dst++ ^= *src++; |
126 | } |
127 | |
128 | return 0; |
129 | } |
130 | |
131 | static void ghash_flush(struct ghash_ctx *ctx, struct ghash_desc_ctx *dctx) |
132 | { |
133 | u8 *dst = dctx->buffer; |
134 | |
135 | if (dctx->bytes) { |
136 | u8 *tmp = dst + (GHASH_BLOCK_SIZE - dctx->bytes); |
137 | |
138 | while (dctx->bytes--) |
139 | *tmp++ ^= 0; |
140 | |
141 | kernel_fpu_begin(); |
142 | clmul_ghash_mul(dst, shash: &ctx->shash); |
143 | kernel_fpu_end(); |
144 | } |
145 | |
146 | dctx->bytes = 0; |
147 | } |
148 | |
149 | static int ghash_final(struct shash_desc *desc, u8 *dst) |
150 | { |
151 | struct ghash_desc_ctx *dctx = shash_desc_ctx(desc); |
152 | struct ghash_ctx *ctx = crypto_shash_ctx(tfm: desc->tfm); |
153 | u8 *buf = dctx->buffer; |
154 | |
155 | ghash_flush(ctx, dctx); |
156 | memcpy(dst, buf, GHASH_BLOCK_SIZE); |
157 | |
158 | return 0; |
159 | } |
160 | |
161 | static struct shash_alg ghash_alg = { |
162 | .digestsize = GHASH_DIGEST_SIZE, |
163 | .init = ghash_init, |
164 | .update = ghash_update, |
165 | .final = ghash_final, |
166 | .setkey = ghash_setkey, |
167 | .descsize = sizeof(struct ghash_desc_ctx), |
168 | .base = { |
169 | .cra_name = "__ghash" , |
170 | .cra_driver_name = "__ghash-pclmulqdqni" , |
171 | .cra_priority = 0, |
172 | .cra_flags = CRYPTO_ALG_INTERNAL, |
173 | .cra_blocksize = GHASH_BLOCK_SIZE, |
174 | .cra_ctxsize = sizeof(struct ghash_ctx), |
175 | .cra_module = THIS_MODULE, |
176 | }, |
177 | }; |
178 | |
179 | static int ghash_async_init(struct ahash_request *req) |
180 | { |
181 | struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); |
182 | struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm); |
183 | struct ahash_request *cryptd_req = ahash_request_ctx(req); |
184 | struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm; |
185 | struct shash_desc *desc = cryptd_shash_desc(req: cryptd_req); |
186 | struct crypto_shash *child = cryptd_ahash_child(tfm: cryptd_tfm); |
187 | |
188 | desc->tfm = child; |
189 | return crypto_shash_init(desc); |
190 | } |
191 | |
192 | static int ghash_async_update(struct ahash_request *req) |
193 | { |
194 | struct ahash_request *cryptd_req = ahash_request_ctx(req); |
195 | struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); |
196 | struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm); |
197 | struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm; |
198 | |
199 | if (!crypto_simd_usable() || |
200 | (in_atomic() && cryptd_ahash_queued(tfm: cryptd_tfm))) { |
201 | memcpy(cryptd_req, req, sizeof(*req)); |
202 | ahash_request_set_tfm(req: cryptd_req, tfm: &cryptd_tfm->base); |
203 | return crypto_ahash_update(req: cryptd_req); |
204 | } else { |
205 | struct shash_desc *desc = cryptd_shash_desc(req: cryptd_req); |
206 | return shash_ahash_update(req, desc); |
207 | } |
208 | } |
209 | |
210 | static int ghash_async_final(struct ahash_request *req) |
211 | { |
212 | struct ahash_request *cryptd_req = ahash_request_ctx(req); |
213 | struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); |
214 | struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm); |
215 | struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm; |
216 | |
217 | if (!crypto_simd_usable() || |
218 | (in_atomic() && cryptd_ahash_queued(tfm: cryptd_tfm))) { |
219 | memcpy(cryptd_req, req, sizeof(*req)); |
220 | ahash_request_set_tfm(req: cryptd_req, tfm: &cryptd_tfm->base); |
221 | return crypto_ahash_final(req: cryptd_req); |
222 | } else { |
223 | struct shash_desc *desc = cryptd_shash_desc(req: cryptd_req); |
224 | return crypto_shash_final(desc, out: req->result); |
225 | } |
226 | } |
227 | |
228 | static int ghash_async_import(struct ahash_request *req, const void *in) |
229 | { |
230 | struct ahash_request *cryptd_req = ahash_request_ctx(req); |
231 | struct shash_desc *desc = cryptd_shash_desc(req: cryptd_req); |
232 | struct ghash_desc_ctx *dctx = shash_desc_ctx(desc); |
233 | |
234 | ghash_async_init(req); |
235 | memcpy(dctx, in, sizeof(*dctx)); |
236 | return 0; |
237 | |
238 | } |
239 | |
240 | static int ghash_async_export(struct ahash_request *req, void *out) |
241 | { |
242 | struct ahash_request *cryptd_req = ahash_request_ctx(req); |
243 | struct shash_desc *desc = cryptd_shash_desc(req: cryptd_req); |
244 | struct ghash_desc_ctx *dctx = shash_desc_ctx(desc); |
245 | |
246 | memcpy(out, dctx, sizeof(*dctx)); |
247 | return 0; |
248 | |
249 | } |
250 | |
251 | static int ghash_async_digest(struct ahash_request *req) |
252 | { |
253 | struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); |
254 | struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm); |
255 | struct ahash_request *cryptd_req = ahash_request_ctx(req); |
256 | struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm; |
257 | |
258 | if (!crypto_simd_usable() || |
259 | (in_atomic() && cryptd_ahash_queued(tfm: cryptd_tfm))) { |
260 | memcpy(cryptd_req, req, sizeof(*req)); |
261 | ahash_request_set_tfm(req: cryptd_req, tfm: &cryptd_tfm->base); |
262 | return crypto_ahash_digest(req: cryptd_req); |
263 | } else { |
264 | struct shash_desc *desc = cryptd_shash_desc(req: cryptd_req); |
265 | struct crypto_shash *child = cryptd_ahash_child(tfm: cryptd_tfm); |
266 | |
267 | desc->tfm = child; |
268 | return shash_ahash_digest(req, desc); |
269 | } |
270 | } |
271 | |
272 | static int ghash_async_setkey(struct crypto_ahash *tfm, const u8 *key, |
273 | unsigned int keylen) |
274 | { |
275 | struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm); |
276 | struct crypto_ahash *child = &ctx->cryptd_tfm->base; |
277 | |
278 | crypto_ahash_clear_flags(tfm: child, CRYPTO_TFM_REQ_MASK); |
279 | crypto_ahash_set_flags(tfm: child, flags: crypto_ahash_get_flags(tfm) |
280 | & CRYPTO_TFM_REQ_MASK); |
281 | return crypto_ahash_setkey(tfm: child, key, keylen); |
282 | } |
283 | |
284 | static int ghash_async_init_tfm(struct crypto_tfm *tfm) |
285 | { |
286 | struct cryptd_ahash *cryptd_tfm; |
287 | struct ghash_async_ctx *ctx = crypto_tfm_ctx(tfm); |
288 | |
289 | cryptd_tfm = cryptd_alloc_ahash(alg_name: "__ghash-pclmulqdqni" , |
290 | CRYPTO_ALG_INTERNAL, |
291 | CRYPTO_ALG_INTERNAL); |
292 | if (IS_ERR(ptr: cryptd_tfm)) |
293 | return PTR_ERR(ptr: cryptd_tfm); |
294 | ctx->cryptd_tfm = cryptd_tfm; |
295 | crypto_ahash_set_reqsize(tfm: __crypto_ahash_cast(tfm), |
296 | reqsize: sizeof(struct ahash_request) + |
297 | crypto_ahash_reqsize(tfm: &cryptd_tfm->base)); |
298 | |
299 | return 0; |
300 | } |
301 | |
302 | static void ghash_async_exit_tfm(struct crypto_tfm *tfm) |
303 | { |
304 | struct ghash_async_ctx *ctx = crypto_tfm_ctx(tfm); |
305 | |
306 | cryptd_free_ahash(tfm: ctx->cryptd_tfm); |
307 | } |
308 | |
309 | static struct ahash_alg ghash_async_alg = { |
310 | .init = ghash_async_init, |
311 | .update = ghash_async_update, |
312 | .final = ghash_async_final, |
313 | .setkey = ghash_async_setkey, |
314 | .digest = ghash_async_digest, |
315 | .export = ghash_async_export, |
316 | .import = ghash_async_import, |
317 | .halg = { |
318 | .digestsize = GHASH_DIGEST_SIZE, |
319 | .statesize = sizeof(struct ghash_desc_ctx), |
320 | .base = { |
321 | .cra_name = "ghash" , |
322 | .cra_driver_name = "ghash-clmulni" , |
323 | .cra_priority = 400, |
324 | .cra_ctxsize = sizeof(struct ghash_async_ctx), |
325 | .cra_flags = CRYPTO_ALG_ASYNC, |
326 | .cra_blocksize = GHASH_BLOCK_SIZE, |
327 | .cra_module = THIS_MODULE, |
328 | .cra_init = ghash_async_init_tfm, |
329 | .cra_exit = ghash_async_exit_tfm, |
330 | }, |
331 | }, |
332 | }; |
333 | |
334 | static const struct x86_cpu_id pcmul_cpu_id[] = { |
335 | X86_MATCH_FEATURE(X86_FEATURE_PCLMULQDQ, NULL), /* Pickle-Mickle-Duck */ |
336 | {} |
337 | }; |
338 | MODULE_DEVICE_TABLE(x86cpu, pcmul_cpu_id); |
339 | |
340 | static int __init ghash_pclmulqdqni_mod_init(void) |
341 | { |
342 | int err; |
343 | |
344 | if (!x86_match_cpu(match: pcmul_cpu_id)) |
345 | return -ENODEV; |
346 | |
347 | err = crypto_register_shash(alg: &ghash_alg); |
348 | if (err) |
349 | goto err_out; |
350 | err = crypto_register_ahash(alg: &ghash_async_alg); |
351 | if (err) |
352 | goto err_shash; |
353 | |
354 | return 0; |
355 | |
356 | err_shash: |
357 | crypto_unregister_shash(alg: &ghash_alg); |
358 | err_out: |
359 | return err; |
360 | } |
361 | |
362 | static void __exit ghash_pclmulqdqni_mod_exit(void) |
363 | { |
364 | crypto_unregister_ahash(alg: &ghash_async_alg); |
365 | crypto_unregister_shash(alg: &ghash_alg); |
366 | } |
367 | |
368 | module_init(ghash_pclmulqdqni_mod_init); |
369 | module_exit(ghash_pclmulqdqni_mod_exit); |
370 | |
371 | MODULE_LICENSE("GPL" ); |
372 | MODULE_DESCRIPTION("GHASH hash function, accelerated by PCLMULQDQ-NI" ); |
373 | MODULE_ALIAS_CRYPTO("ghash" ); |
374 | |