1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * Accelerated GHASH implementation with Intel PCLMULQDQ-NI
4 * instructions. This file contains glue code.
5 *
6 * Copyright (c) 2009 Intel Corp.
7 * Author: Huang Ying <ying.huang@intel.com>
8 */
9
10#include <linux/err.h>
11#include <linux/module.h>
12#include <linux/init.h>
13#include <linux/kernel.h>
14#include <linux/crypto.h>
15#include <crypto/algapi.h>
16#include <crypto/cryptd.h>
17#include <crypto/gf128mul.h>
18#include <crypto/internal/hash.h>
19#include <crypto/internal/simd.h>
20#include <asm/cpu_device_id.h>
21#include <asm/simd.h>
22#include <asm/unaligned.h>
23
24#define GHASH_BLOCK_SIZE 16
25#define GHASH_DIGEST_SIZE 16
26
27void clmul_ghash_mul(char *dst, const le128 *shash);
28
29void clmul_ghash_update(char *dst, const char *src, unsigned int srclen,
30 const le128 *shash);
31
32struct ghash_async_ctx {
33 struct cryptd_ahash *cryptd_tfm;
34};
35
36struct ghash_ctx {
37 le128 shash;
38};
39
40struct ghash_desc_ctx {
41 u8 buffer[GHASH_BLOCK_SIZE];
42 u32 bytes;
43};
44
45static int ghash_init(struct shash_desc *desc)
46{
47 struct ghash_desc_ctx *dctx = shash_desc_ctx(desc);
48
49 memset(dctx, 0, sizeof(*dctx));
50
51 return 0;
52}
53
54static int ghash_setkey(struct crypto_shash *tfm,
55 const u8 *key, unsigned int keylen)
56{
57 struct ghash_ctx *ctx = crypto_shash_ctx(tfm);
58 u64 a, b;
59
60 if (keylen != GHASH_BLOCK_SIZE)
61 return -EINVAL;
62
63 /*
64 * GHASH maps bits to polynomial coefficients backwards, which makes it
65 * hard to implement. But it can be shown that the GHASH multiplication
66 *
67 * D * K (mod x^128 + x^7 + x^2 + x + 1)
68 *
69 * (where D is a data block and K is the key) is equivalent to:
70 *
71 * bitreflect(D) * bitreflect(K) * x^(-127)
72 * (mod x^128 + x^127 + x^126 + x^121 + 1)
73 *
74 * So, the code below precomputes:
75 *
76 * bitreflect(K) * x^(-127) (mod x^128 + x^127 + x^126 + x^121 + 1)
77 *
78 * ... but in Montgomery form (so that Montgomery multiplication can be
79 * used), i.e. with an extra x^128 factor, which means actually:
80 *
81 * bitreflect(K) * x (mod x^128 + x^127 + x^126 + x^121 + 1)
82 *
83 * The within-a-byte part of bitreflect() cancels out GHASH's built-in
84 * reflection, and thus bitreflect() is actually a byteswap.
85 */
86 a = get_unaligned_be64(p: key);
87 b = get_unaligned_be64(p: key + 8);
88 ctx->shash.a = cpu_to_le64((a << 1) | (b >> 63));
89 ctx->shash.b = cpu_to_le64((b << 1) | (a >> 63));
90 if (a >> 63)
91 ctx->shash.a ^= cpu_to_le64((u64)0xc2 << 56);
92 return 0;
93}
94
95static int ghash_update(struct shash_desc *desc,
96 const u8 *src, unsigned int srclen)
97{
98 struct ghash_desc_ctx *dctx = shash_desc_ctx(desc);
99 struct ghash_ctx *ctx = crypto_shash_ctx(tfm: desc->tfm);
100 u8 *dst = dctx->buffer;
101
102 kernel_fpu_begin();
103 if (dctx->bytes) {
104 int n = min(srclen, dctx->bytes);
105 u8 *pos = dst + (GHASH_BLOCK_SIZE - dctx->bytes);
106
107 dctx->bytes -= n;
108 srclen -= n;
109
110 while (n--)
111 *pos++ ^= *src++;
112
113 if (!dctx->bytes)
114 clmul_ghash_mul(dst, shash: &ctx->shash);
115 }
116
117 clmul_ghash_update(dst, src, srclen, shash: &ctx->shash);
118 kernel_fpu_end();
119
120 if (srclen & 0xf) {
121 src += srclen - (srclen & 0xf);
122 srclen &= 0xf;
123 dctx->bytes = GHASH_BLOCK_SIZE - srclen;
124 while (srclen--)
125 *dst++ ^= *src++;
126 }
127
128 return 0;
129}
130
131static void ghash_flush(struct ghash_ctx *ctx, struct ghash_desc_ctx *dctx)
132{
133 u8 *dst = dctx->buffer;
134
135 if (dctx->bytes) {
136 u8 *tmp = dst + (GHASH_BLOCK_SIZE - dctx->bytes);
137
138 while (dctx->bytes--)
139 *tmp++ ^= 0;
140
141 kernel_fpu_begin();
142 clmul_ghash_mul(dst, shash: &ctx->shash);
143 kernel_fpu_end();
144 }
145
146 dctx->bytes = 0;
147}
148
149static int ghash_final(struct shash_desc *desc, u8 *dst)
150{
151 struct ghash_desc_ctx *dctx = shash_desc_ctx(desc);
152 struct ghash_ctx *ctx = crypto_shash_ctx(tfm: desc->tfm);
153 u8 *buf = dctx->buffer;
154
155 ghash_flush(ctx, dctx);
156 memcpy(dst, buf, GHASH_BLOCK_SIZE);
157
158 return 0;
159}
160
161static struct shash_alg ghash_alg = {
162 .digestsize = GHASH_DIGEST_SIZE,
163 .init = ghash_init,
164 .update = ghash_update,
165 .final = ghash_final,
166 .setkey = ghash_setkey,
167 .descsize = sizeof(struct ghash_desc_ctx),
168 .base = {
169 .cra_name = "__ghash",
170 .cra_driver_name = "__ghash-pclmulqdqni",
171 .cra_priority = 0,
172 .cra_flags = CRYPTO_ALG_INTERNAL,
173 .cra_blocksize = GHASH_BLOCK_SIZE,
174 .cra_ctxsize = sizeof(struct ghash_ctx),
175 .cra_module = THIS_MODULE,
176 },
177};
178
179static int ghash_async_init(struct ahash_request *req)
180{
181 struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
182 struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
183 struct ahash_request *cryptd_req = ahash_request_ctx(req);
184 struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm;
185 struct shash_desc *desc = cryptd_shash_desc(req: cryptd_req);
186 struct crypto_shash *child = cryptd_ahash_child(tfm: cryptd_tfm);
187
188 desc->tfm = child;
189 return crypto_shash_init(desc);
190}
191
192static int ghash_async_update(struct ahash_request *req)
193{
194 struct ahash_request *cryptd_req = ahash_request_ctx(req);
195 struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
196 struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
197 struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm;
198
199 if (!crypto_simd_usable() ||
200 (in_atomic() && cryptd_ahash_queued(tfm: cryptd_tfm))) {
201 memcpy(cryptd_req, req, sizeof(*req));
202 ahash_request_set_tfm(req: cryptd_req, tfm: &cryptd_tfm->base);
203 return crypto_ahash_update(req: cryptd_req);
204 } else {
205 struct shash_desc *desc = cryptd_shash_desc(req: cryptd_req);
206 return shash_ahash_update(req, desc);
207 }
208}
209
210static int ghash_async_final(struct ahash_request *req)
211{
212 struct ahash_request *cryptd_req = ahash_request_ctx(req);
213 struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
214 struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
215 struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm;
216
217 if (!crypto_simd_usable() ||
218 (in_atomic() && cryptd_ahash_queued(tfm: cryptd_tfm))) {
219 memcpy(cryptd_req, req, sizeof(*req));
220 ahash_request_set_tfm(req: cryptd_req, tfm: &cryptd_tfm->base);
221 return crypto_ahash_final(req: cryptd_req);
222 } else {
223 struct shash_desc *desc = cryptd_shash_desc(req: cryptd_req);
224 return crypto_shash_final(desc, out: req->result);
225 }
226}
227
228static int ghash_async_import(struct ahash_request *req, const void *in)
229{
230 struct ahash_request *cryptd_req = ahash_request_ctx(req);
231 struct shash_desc *desc = cryptd_shash_desc(req: cryptd_req);
232 struct ghash_desc_ctx *dctx = shash_desc_ctx(desc);
233
234 ghash_async_init(req);
235 memcpy(dctx, in, sizeof(*dctx));
236 return 0;
237
238}
239
240static int ghash_async_export(struct ahash_request *req, void *out)
241{
242 struct ahash_request *cryptd_req = ahash_request_ctx(req);
243 struct shash_desc *desc = cryptd_shash_desc(req: cryptd_req);
244 struct ghash_desc_ctx *dctx = shash_desc_ctx(desc);
245
246 memcpy(out, dctx, sizeof(*dctx));
247 return 0;
248
249}
250
251static int ghash_async_digest(struct ahash_request *req)
252{
253 struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
254 struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
255 struct ahash_request *cryptd_req = ahash_request_ctx(req);
256 struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm;
257
258 if (!crypto_simd_usable() ||
259 (in_atomic() && cryptd_ahash_queued(tfm: cryptd_tfm))) {
260 memcpy(cryptd_req, req, sizeof(*req));
261 ahash_request_set_tfm(req: cryptd_req, tfm: &cryptd_tfm->base);
262 return crypto_ahash_digest(req: cryptd_req);
263 } else {
264 struct shash_desc *desc = cryptd_shash_desc(req: cryptd_req);
265 struct crypto_shash *child = cryptd_ahash_child(tfm: cryptd_tfm);
266
267 desc->tfm = child;
268 return shash_ahash_digest(req, desc);
269 }
270}
271
272static int ghash_async_setkey(struct crypto_ahash *tfm, const u8 *key,
273 unsigned int keylen)
274{
275 struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
276 struct crypto_ahash *child = &ctx->cryptd_tfm->base;
277
278 crypto_ahash_clear_flags(tfm: child, CRYPTO_TFM_REQ_MASK);
279 crypto_ahash_set_flags(tfm: child, flags: crypto_ahash_get_flags(tfm)
280 & CRYPTO_TFM_REQ_MASK);
281 return crypto_ahash_setkey(tfm: child, key, keylen);
282}
283
284static int ghash_async_init_tfm(struct crypto_tfm *tfm)
285{
286 struct cryptd_ahash *cryptd_tfm;
287 struct ghash_async_ctx *ctx = crypto_tfm_ctx(tfm);
288
289 cryptd_tfm = cryptd_alloc_ahash(alg_name: "__ghash-pclmulqdqni",
290 CRYPTO_ALG_INTERNAL,
291 CRYPTO_ALG_INTERNAL);
292 if (IS_ERR(ptr: cryptd_tfm))
293 return PTR_ERR(ptr: cryptd_tfm);
294 ctx->cryptd_tfm = cryptd_tfm;
295 crypto_ahash_set_reqsize(tfm: __crypto_ahash_cast(tfm),
296 reqsize: sizeof(struct ahash_request) +
297 crypto_ahash_reqsize(tfm: &cryptd_tfm->base));
298
299 return 0;
300}
301
302static void ghash_async_exit_tfm(struct crypto_tfm *tfm)
303{
304 struct ghash_async_ctx *ctx = crypto_tfm_ctx(tfm);
305
306 cryptd_free_ahash(tfm: ctx->cryptd_tfm);
307}
308
309static struct ahash_alg ghash_async_alg = {
310 .init = ghash_async_init,
311 .update = ghash_async_update,
312 .final = ghash_async_final,
313 .setkey = ghash_async_setkey,
314 .digest = ghash_async_digest,
315 .export = ghash_async_export,
316 .import = ghash_async_import,
317 .halg = {
318 .digestsize = GHASH_DIGEST_SIZE,
319 .statesize = sizeof(struct ghash_desc_ctx),
320 .base = {
321 .cra_name = "ghash",
322 .cra_driver_name = "ghash-clmulni",
323 .cra_priority = 400,
324 .cra_ctxsize = sizeof(struct ghash_async_ctx),
325 .cra_flags = CRYPTO_ALG_ASYNC,
326 .cra_blocksize = GHASH_BLOCK_SIZE,
327 .cra_module = THIS_MODULE,
328 .cra_init = ghash_async_init_tfm,
329 .cra_exit = ghash_async_exit_tfm,
330 },
331 },
332};
333
334static const struct x86_cpu_id pcmul_cpu_id[] = {
335 X86_MATCH_FEATURE(X86_FEATURE_PCLMULQDQ, NULL), /* Pickle-Mickle-Duck */
336 {}
337};
338MODULE_DEVICE_TABLE(x86cpu, pcmul_cpu_id);
339
340static int __init ghash_pclmulqdqni_mod_init(void)
341{
342 int err;
343
344 if (!x86_match_cpu(match: pcmul_cpu_id))
345 return -ENODEV;
346
347 err = crypto_register_shash(alg: &ghash_alg);
348 if (err)
349 goto err_out;
350 err = crypto_register_ahash(alg: &ghash_async_alg);
351 if (err)
352 goto err_shash;
353
354 return 0;
355
356err_shash:
357 crypto_unregister_shash(alg: &ghash_alg);
358err_out:
359 return err;
360}
361
362static void __exit ghash_pclmulqdqni_mod_exit(void)
363{
364 crypto_unregister_ahash(alg: &ghash_async_alg);
365 crypto_unregister_shash(alg: &ghash_alg);
366}
367
368module_init(ghash_pclmulqdqni_mod_init);
369module_exit(ghash_pclmulqdqni_mod_exit);
370
371MODULE_LICENSE("GPL");
372MODULE_DESCRIPTION("GHASH hash function, accelerated by PCLMULQDQ-NI");
373MODULE_ALIAS_CRYPTO("ghash");
374

source code of linux/arch/x86/crypto/ghash-clmulni-intel_glue.c