1 | // SPDX-License-Identifier: GPL-2.0-or-later |
2 | /* |
3 | * Cryptographic API for the NX-842 hardware compression. |
4 | * |
5 | * Copyright (C) IBM Corporation, 2011-2015 |
6 | * |
7 | * Designer of the Power data compression engine: |
8 | * Bulent Abali <abali@us.ibm.com> |
9 | * |
10 | * Original Authors: Robert Jennings <rcj@linux.vnet.ibm.com> |
11 | * Seth Jennings <sjenning@linux.vnet.ibm.com> |
12 | * |
13 | * Rewrite: Dan Streetman <ddstreet@ieee.org> |
14 | * |
15 | * This is an interface to the NX-842 compression hardware in PowerPC |
16 | * processors. Most of the complexity of this drvier is due to the fact that |
17 | * the NX-842 compression hardware requires the input and output data buffers |
18 | * to be specifically aligned, to be a specific multiple in length, and within |
19 | * specific minimum and maximum lengths. Those restrictions, provided by the |
20 | * nx-842 driver via nx842_constraints, mean this driver must use bounce |
21 | * buffers and headers to correct misaligned in or out buffers, and to split |
22 | * input buffers that are too large. |
23 | * |
24 | * This driver will fall back to software decompression if the hardware |
25 | * decompression fails, so this driver's decompression should never fail as |
26 | * long as the provided compressed buffer is valid. Any compressed buffer |
27 | * created by this driver will have a header (except ones where the input |
28 | * perfectly matches the constraints); so users of this driver cannot simply |
29 | * pass a compressed buffer created by this driver over to the 842 software |
30 | * decompression library. Instead, users must use this driver to decompress; |
31 | * if the hardware fails or is unavailable, the compressed buffer will be |
32 | * parsed and the header removed, and the raw 842 buffer(s) passed to the 842 |
33 | * software decompression library. |
34 | * |
35 | * This does not fall back to software compression, however, since the caller |
36 | * of this function is specifically requesting hardware compression; if the |
37 | * hardware compression fails, the caller can fall back to software |
38 | * compression, and the raw 842 compressed buffer that the software compressor |
39 | * creates can be passed to this driver for hardware decompression; any |
40 | * buffer without our specific header magic is assumed to be a raw 842 buffer |
41 | * and passed directly to the hardware. Note that the software compression |
42 | * library will produce a compressed buffer that is incompatible with the |
43 | * hardware decompressor if the original input buffer length is not a multiple |
44 | * of 8; if such a compressed buffer is passed to this driver for |
45 | * decompression, the hardware will reject it and this driver will then pass |
46 | * it over to the software library for decompression. |
47 | */ |
48 | |
49 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt |
50 | |
51 | #include <linux/vmalloc.h> |
52 | #include <linux/sw842.h> |
53 | #include <linux/spinlock.h> |
54 | |
55 | #include "nx-842.h" |
56 | |
57 | /* The first 5 bits of this magic are 0x1f, which is an invalid 842 5-bit |
58 | * template (see lib/842/842.h), so this magic number will never appear at |
59 | * the start of a raw 842 compressed buffer. That is important, as any buffer |
60 | * passed to us without this magic is assumed to be a raw 842 compressed |
61 | * buffer, and passed directly to the hardware to decompress. |
62 | */ |
63 | #define NX842_CRYPTO_MAGIC (0xf842) |
64 | #define (g) \ |
65 | (sizeof(struct nx842_crypto_header) + \ |
66 | sizeof(struct nx842_crypto_header_group) * (g)) |
67 | #define \ |
68 | NX842_CRYPTO_HEADER_SIZE(NX842_CRYPTO_GROUP_MAX) |
69 | |
70 | /* bounce buffer size */ |
71 | #define BOUNCE_BUFFER_ORDER (2) |
72 | #define BOUNCE_BUFFER_SIZE \ |
73 | ((unsigned int)(PAGE_SIZE << BOUNCE_BUFFER_ORDER)) |
74 | |
75 | /* try longer on comp because we can fallback to sw decomp if hw is busy */ |
76 | #define COMP_BUSY_TIMEOUT (250) /* ms */ |
77 | #define DECOMP_BUSY_TIMEOUT (50) /* ms */ |
78 | |
79 | struct nx842_crypto_param { |
80 | u8 *in; |
81 | unsigned int iremain; |
82 | u8 *out; |
83 | unsigned int oremain; |
84 | unsigned int ototal; |
85 | }; |
86 | |
87 | static int update_param(struct nx842_crypto_param *p, |
88 | unsigned int slen, unsigned int dlen) |
89 | { |
90 | if (p->iremain < slen) |
91 | return -EOVERFLOW; |
92 | if (p->oremain < dlen) |
93 | return -ENOSPC; |
94 | |
95 | p->in += slen; |
96 | p->iremain -= slen; |
97 | p->out += dlen; |
98 | p->oremain -= dlen; |
99 | p->ototal += dlen; |
100 | |
101 | return 0; |
102 | } |
103 | |
104 | int nx842_crypto_init(struct crypto_tfm *tfm, struct nx842_driver *driver) |
105 | { |
106 | struct nx842_crypto_ctx *ctx = crypto_tfm_ctx(tfm); |
107 | |
108 | spin_lock_init(&ctx->lock); |
109 | ctx->driver = driver; |
110 | ctx->wmem = kmalloc(size: driver->workmem_size, GFP_KERNEL); |
111 | ctx->sbounce = (u8 *)__get_free_pages(GFP_KERNEL, BOUNCE_BUFFER_ORDER); |
112 | ctx->dbounce = (u8 *)__get_free_pages(GFP_KERNEL, BOUNCE_BUFFER_ORDER); |
113 | if (!ctx->wmem || !ctx->sbounce || !ctx->dbounce) { |
114 | kfree(objp: ctx->wmem); |
115 | free_page((unsigned long)ctx->sbounce); |
116 | free_page((unsigned long)ctx->dbounce); |
117 | return -ENOMEM; |
118 | } |
119 | |
120 | return 0; |
121 | } |
122 | EXPORT_SYMBOL_GPL(nx842_crypto_init); |
123 | |
124 | void nx842_crypto_exit(struct crypto_tfm *tfm) |
125 | { |
126 | struct nx842_crypto_ctx *ctx = crypto_tfm_ctx(tfm); |
127 | |
128 | kfree(objp: ctx->wmem); |
129 | free_page((unsigned long)ctx->sbounce); |
130 | free_page((unsigned long)ctx->dbounce); |
131 | } |
132 | EXPORT_SYMBOL_GPL(nx842_crypto_exit); |
133 | |
134 | static void check_constraints(struct nx842_constraints *c) |
135 | { |
136 | /* limit maximum, to always have enough bounce buffer to decompress */ |
137 | if (c->maximum > BOUNCE_BUFFER_SIZE) |
138 | c->maximum = BOUNCE_BUFFER_SIZE; |
139 | } |
140 | |
141 | static int (struct nx842_crypto_header *hdr, u8 *buf) |
142 | { |
143 | int s = NX842_CRYPTO_HEADER_SIZE(hdr->groups); |
144 | |
145 | /* compress should have added space for header */ |
146 | if (s > be16_to_cpu(hdr->group[0].padding)) { |
147 | pr_err("Internal error: no space for header\n" ); |
148 | return -EINVAL; |
149 | } |
150 | |
151 | memcpy(buf, hdr, s); |
152 | |
153 | print_hex_dump_debug("header " , DUMP_PREFIX_OFFSET, 16, 1, buf, s, 0); |
154 | |
155 | return 0; |
156 | } |
157 | |
158 | static int compress(struct nx842_crypto_ctx *ctx, |
159 | struct nx842_crypto_param *p, |
160 | struct nx842_crypto_header_group *g, |
161 | struct nx842_constraints *c, |
162 | u16 *ignore, |
163 | unsigned int hdrsize) |
164 | { |
165 | unsigned int slen = p->iremain, dlen = p->oremain, tmplen; |
166 | unsigned int adj_slen = slen; |
167 | u8 *src = p->in, *dst = p->out; |
168 | int ret, dskip = 0; |
169 | ktime_t timeout; |
170 | |
171 | if (p->iremain == 0) |
172 | return -EOVERFLOW; |
173 | |
174 | if (p->oremain == 0 || hdrsize + c->minimum > dlen) |
175 | return -ENOSPC; |
176 | |
177 | if (slen % c->multiple) |
178 | adj_slen = round_up(slen, c->multiple); |
179 | if (slen < c->minimum) |
180 | adj_slen = c->minimum; |
181 | if (slen > c->maximum) |
182 | adj_slen = slen = c->maximum; |
183 | if (adj_slen > slen || (u64)src % c->alignment) { |
184 | adj_slen = min(adj_slen, BOUNCE_BUFFER_SIZE); |
185 | slen = min(slen, BOUNCE_BUFFER_SIZE); |
186 | if (adj_slen > slen) |
187 | memset(ctx->sbounce + slen, 0, adj_slen - slen); |
188 | memcpy(ctx->sbounce, src, slen); |
189 | src = ctx->sbounce; |
190 | slen = adj_slen; |
191 | pr_debug("using comp sbounce buffer, len %x\n" , slen); |
192 | } |
193 | |
194 | dst += hdrsize; |
195 | dlen -= hdrsize; |
196 | |
197 | if ((u64)dst % c->alignment) { |
198 | dskip = (int)(PTR_ALIGN(dst, c->alignment) - dst); |
199 | dst += dskip; |
200 | dlen -= dskip; |
201 | } |
202 | if (dlen % c->multiple) |
203 | dlen = round_down(dlen, c->multiple); |
204 | if (dlen < c->minimum) { |
205 | nospc: |
206 | dst = ctx->dbounce; |
207 | dlen = min(p->oremain, BOUNCE_BUFFER_SIZE); |
208 | dlen = round_down(dlen, c->multiple); |
209 | dskip = 0; |
210 | pr_debug("using comp dbounce buffer, len %x\n" , dlen); |
211 | } |
212 | if (dlen > c->maximum) |
213 | dlen = c->maximum; |
214 | |
215 | tmplen = dlen; |
216 | timeout = ktime_add_ms(kt: ktime_get(), COMP_BUSY_TIMEOUT); |
217 | do { |
218 | dlen = tmplen; /* reset dlen, if we're retrying */ |
219 | ret = ctx->driver->compress(src, slen, dst, &dlen, ctx->wmem); |
220 | /* possibly we should reduce the slen here, instead of |
221 | * retrying with the dbounce buffer? |
222 | */ |
223 | if (ret == -ENOSPC && dst != ctx->dbounce) |
224 | goto nospc; |
225 | } while (ret == -EBUSY && ktime_before(cmp1: ktime_get(), cmp2: timeout)); |
226 | if (ret) |
227 | return ret; |
228 | |
229 | dskip += hdrsize; |
230 | |
231 | if (dst == ctx->dbounce) |
232 | memcpy(p->out + dskip, dst, dlen); |
233 | |
234 | g->padding = cpu_to_be16(dskip); |
235 | g->compressed_length = cpu_to_be32(dlen); |
236 | g->uncompressed_length = cpu_to_be32(slen); |
237 | |
238 | if (p->iremain < slen) { |
239 | *ignore = slen - p->iremain; |
240 | slen = p->iremain; |
241 | } |
242 | |
243 | pr_debug("compress slen %x ignore %x dlen %x padding %x\n" , |
244 | slen, *ignore, dlen, dskip); |
245 | |
246 | return update_param(p, slen, dlen: dskip + dlen); |
247 | } |
248 | |
249 | int nx842_crypto_compress(struct crypto_tfm *tfm, |
250 | const u8 *src, unsigned int slen, |
251 | u8 *dst, unsigned int *dlen) |
252 | { |
253 | struct nx842_crypto_ctx *ctx = crypto_tfm_ctx(tfm); |
254 | struct nx842_crypto_header *hdr = &ctx->header; |
255 | struct nx842_crypto_param p; |
256 | struct nx842_constraints c = *ctx->driver->constraints; |
257 | unsigned int groups, hdrsize, h; |
258 | int ret, n; |
259 | bool ; |
260 | u16 ignore = 0; |
261 | |
262 | check_constraints(c: &c); |
263 | |
264 | p.in = (u8 *)src; |
265 | p.iremain = slen; |
266 | p.out = dst; |
267 | p.oremain = *dlen; |
268 | p.ototal = 0; |
269 | |
270 | *dlen = 0; |
271 | |
272 | groups = min_t(unsigned int, NX842_CRYPTO_GROUP_MAX, |
273 | DIV_ROUND_UP(p.iremain, c.maximum)); |
274 | hdrsize = NX842_CRYPTO_HEADER_SIZE(groups); |
275 | |
276 | spin_lock_bh(lock: &ctx->lock); |
277 | |
278 | /* skip adding header if the buffers meet all constraints */ |
279 | add_header = (p.iremain % c.multiple || |
280 | p.iremain < c.minimum || |
281 | p.iremain > c.maximum || |
282 | (u64)p.in % c.alignment || |
283 | p.oremain % c.multiple || |
284 | p.oremain < c.minimum || |
285 | p.oremain > c.maximum || |
286 | (u64)p.out % c.alignment); |
287 | |
288 | hdr->magic = cpu_to_be16(NX842_CRYPTO_MAGIC); |
289 | hdr->groups = 0; |
290 | hdr->ignore = 0; |
291 | |
292 | while (p.iremain > 0) { |
293 | n = hdr->groups++; |
294 | ret = -ENOSPC; |
295 | if (hdr->groups > NX842_CRYPTO_GROUP_MAX) |
296 | goto unlock; |
297 | |
298 | /* header goes before first group */ |
299 | h = !n && add_header ? hdrsize : 0; |
300 | |
301 | if (ignore) |
302 | pr_warn("internal error, ignore is set %x\n" , ignore); |
303 | |
304 | ret = compress(ctx, p: &p, g: &hdr->group[n], c: &c, ignore: &ignore, hdrsize: h); |
305 | if (ret) |
306 | goto unlock; |
307 | } |
308 | |
309 | if (!add_header && hdr->groups > 1) { |
310 | pr_err("Internal error: No header but multiple groups\n" ); |
311 | ret = -EINVAL; |
312 | goto unlock; |
313 | } |
314 | |
315 | /* ignore indicates the input stream needed to be padded */ |
316 | hdr->ignore = cpu_to_be16(ignore); |
317 | if (ignore) |
318 | pr_debug("marked %d bytes as ignore\n" , ignore); |
319 | |
320 | if (add_header) |
321 | ret = nx842_crypto_add_header(hdr, buf: dst); |
322 | if (ret) |
323 | goto unlock; |
324 | |
325 | *dlen = p.ototal; |
326 | |
327 | pr_debug("compress total slen %x dlen %x\n" , slen, *dlen); |
328 | |
329 | unlock: |
330 | spin_unlock_bh(lock: &ctx->lock); |
331 | return ret; |
332 | } |
333 | EXPORT_SYMBOL_GPL(nx842_crypto_compress); |
334 | |
335 | static int decompress(struct nx842_crypto_ctx *ctx, |
336 | struct nx842_crypto_param *p, |
337 | struct nx842_crypto_header_group *g, |
338 | struct nx842_constraints *c, |
339 | u16 ignore) |
340 | { |
341 | unsigned int slen = be32_to_cpu(g->compressed_length); |
342 | unsigned int required_len = be32_to_cpu(g->uncompressed_length); |
343 | unsigned int dlen = p->oremain, tmplen; |
344 | unsigned int adj_slen = slen; |
345 | u8 *src = p->in, *dst = p->out; |
346 | u16 padding = be16_to_cpu(g->padding); |
347 | int ret, spadding = 0; |
348 | ktime_t timeout; |
349 | |
350 | if (!slen || !required_len) |
351 | return -EINVAL; |
352 | |
353 | if (p->iremain <= 0 || padding + slen > p->iremain) |
354 | return -EOVERFLOW; |
355 | |
356 | if (p->oremain <= 0 || required_len - ignore > p->oremain) |
357 | return -ENOSPC; |
358 | |
359 | src += padding; |
360 | |
361 | if (slen % c->multiple) |
362 | adj_slen = round_up(slen, c->multiple); |
363 | if (slen < c->minimum) |
364 | adj_slen = c->minimum; |
365 | if (slen > c->maximum) |
366 | goto usesw; |
367 | if (slen < adj_slen || (u64)src % c->alignment) { |
368 | /* we can append padding bytes because the 842 format defines |
369 | * an "end" template (see lib/842/842_decompress.c) and will |
370 | * ignore any bytes following it. |
371 | */ |
372 | if (slen < adj_slen) |
373 | memset(ctx->sbounce + slen, 0, adj_slen - slen); |
374 | memcpy(ctx->sbounce, src, slen); |
375 | src = ctx->sbounce; |
376 | spadding = adj_slen - slen; |
377 | slen = adj_slen; |
378 | pr_debug("using decomp sbounce buffer, len %x\n" , slen); |
379 | } |
380 | |
381 | if (dlen % c->multiple) |
382 | dlen = round_down(dlen, c->multiple); |
383 | if (dlen < required_len || (u64)dst % c->alignment) { |
384 | dst = ctx->dbounce; |
385 | dlen = min(required_len, BOUNCE_BUFFER_SIZE); |
386 | pr_debug("using decomp dbounce buffer, len %x\n" , dlen); |
387 | } |
388 | if (dlen < c->minimum) |
389 | goto usesw; |
390 | if (dlen > c->maximum) |
391 | dlen = c->maximum; |
392 | |
393 | tmplen = dlen; |
394 | timeout = ktime_add_ms(kt: ktime_get(), DECOMP_BUSY_TIMEOUT); |
395 | do { |
396 | dlen = tmplen; /* reset dlen, if we're retrying */ |
397 | ret = ctx->driver->decompress(src, slen, dst, &dlen, ctx->wmem); |
398 | } while (ret == -EBUSY && ktime_before(cmp1: ktime_get(), cmp2: timeout)); |
399 | if (ret) { |
400 | usesw: |
401 | /* reset everything, sw doesn't have constraints */ |
402 | src = p->in + padding; |
403 | slen = be32_to_cpu(g->compressed_length); |
404 | spadding = 0; |
405 | dst = p->out; |
406 | dlen = p->oremain; |
407 | if (dlen < required_len) { /* have ignore bytes */ |
408 | dst = ctx->dbounce; |
409 | dlen = BOUNCE_BUFFER_SIZE; |
410 | } |
411 | pr_info_ratelimited("using software 842 decompression\n" ); |
412 | ret = sw842_decompress(src, srclen: slen, dst, destlen: &dlen); |
413 | } |
414 | if (ret) |
415 | return ret; |
416 | |
417 | slen -= spadding; |
418 | |
419 | dlen -= ignore; |
420 | if (ignore) |
421 | pr_debug("ignoring last %x bytes\n" , ignore); |
422 | |
423 | if (dst == ctx->dbounce) |
424 | memcpy(p->out, dst, dlen); |
425 | |
426 | pr_debug("decompress slen %x padding %x dlen %x ignore %x\n" , |
427 | slen, padding, dlen, ignore); |
428 | |
429 | return update_param(p, slen: slen + padding, dlen); |
430 | } |
431 | |
432 | int nx842_crypto_decompress(struct crypto_tfm *tfm, |
433 | const u8 *src, unsigned int slen, |
434 | u8 *dst, unsigned int *dlen) |
435 | { |
436 | struct nx842_crypto_ctx *ctx = crypto_tfm_ctx(tfm); |
437 | struct nx842_crypto_header *hdr; |
438 | struct nx842_crypto_param p; |
439 | struct nx842_constraints c = *ctx->driver->constraints; |
440 | int n, ret, hdr_len; |
441 | u16 ignore = 0; |
442 | |
443 | check_constraints(c: &c); |
444 | |
445 | p.in = (u8 *)src; |
446 | p.iremain = slen; |
447 | p.out = dst; |
448 | p.oremain = *dlen; |
449 | p.ototal = 0; |
450 | |
451 | *dlen = 0; |
452 | |
453 | hdr = (struct nx842_crypto_header *)src; |
454 | |
455 | spin_lock_bh(lock: &ctx->lock); |
456 | |
457 | /* If it doesn't start with our header magic number, assume it's a raw |
458 | * 842 compressed buffer and pass it directly to the hardware driver |
459 | */ |
460 | if (be16_to_cpu(hdr->magic) != NX842_CRYPTO_MAGIC) { |
461 | struct nx842_crypto_header_group g = { |
462 | .padding = 0, |
463 | .compressed_length = cpu_to_be32(p.iremain), |
464 | .uncompressed_length = cpu_to_be32(p.oremain), |
465 | }; |
466 | |
467 | ret = decompress(ctx, p: &p, g: &g, c: &c, ignore: 0); |
468 | if (ret) |
469 | goto unlock; |
470 | |
471 | goto success; |
472 | } |
473 | |
474 | if (!hdr->groups) { |
475 | pr_err("header has no groups\n" ); |
476 | ret = -EINVAL; |
477 | goto unlock; |
478 | } |
479 | if (hdr->groups > NX842_CRYPTO_GROUP_MAX) { |
480 | pr_err("header has too many groups %x, max %x\n" , |
481 | hdr->groups, NX842_CRYPTO_GROUP_MAX); |
482 | ret = -EINVAL; |
483 | goto unlock; |
484 | } |
485 | |
486 | hdr_len = NX842_CRYPTO_HEADER_SIZE(hdr->groups); |
487 | if (hdr_len > slen) { |
488 | ret = -EOVERFLOW; |
489 | goto unlock; |
490 | } |
491 | |
492 | memcpy(&ctx->header, src, hdr_len); |
493 | hdr = &ctx->header; |
494 | |
495 | for (n = 0; n < hdr->groups; n++) { |
496 | /* ignore applies to last group */ |
497 | if (n + 1 == hdr->groups) |
498 | ignore = be16_to_cpu(hdr->ignore); |
499 | |
500 | ret = decompress(ctx, p: &p, g: &hdr->group[n], c: &c, ignore); |
501 | if (ret) |
502 | goto unlock; |
503 | } |
504 | |
505 | success: |
506 | *dlen = p.ototal; |
507 | |
508 | pr_debug("decompress total slen %x dlen %x\n" , slen, *dlen); |
509 | |
510 | ret = 0; |
511 | |
512 | unlock: |
513 | spin_unlock_bh(lock: &ctx->lock); |
514 | |
515 | return ret; |
516 | } |
517 | EXPORT_SYMBOL_GPL(nx842_crypto_decompress); |
518 | |
519 | MODULE_LICENSE("GPL" ); |
520 | MODULE_DESCRIPTION("IBM PowerPC Nest (NX) 842 Hardware Compression Driver" ); |
521 | MODULE_AUTHOR("Dan Streetman <ddstreet@ieee.org>" ); |
522 | |