1 | // SPDX-License-Identifier: GPL-2.0-or-later |
2 | /* |
3 | * Driver for IBM PowerNV compression accelerator |
4 | * |
5 | * Copyright (C) 2015 Dan Streetman, IBM Corp |
6 | */ |
7 | |
8 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt |
9 | |
10 | #include "nx-842.h" |
11 | |
12 | #include <linux/timer.h> |
13 | |
14 | #include <asm/prom.h> |
15 | #include <asm/icswx.h> |
16 | #include <asm/vas.h> |
17 | #include <asm/reg.h> |
18 | #include <asm/opal-api.h> |
19 | #include <asm/opal.h> |
20 | |
21 | MODULE_LICENSE("GPL" ); |
22 | MODULE_AUTHOR("Dan Streetman <ddstreet@ieee.org>" ); |
23 | MODULE_DESCRIPTION("H/W Compression driver for IBM PowerNV processors" ); |
24 | MODULE_ALIAS_CRYPTO("842" ); |
25 | MODULE_ALIAS_CRYPTO("842-nx" ); |
26 | |
27 | #define WORKMEM_ALIGN (CRB_ALIGN) |
28 | #define CSB_WAIT_MAX (5000) /* ms */ |
29 | #define VAS_RETRIES (10) |
30 | |
31 | struct nx842_workmem { |
32 | /* Below fields must be properly aligned */ |
33 | struct coprocessor_request_block crb; /* CRB_ALIGN align */ |
34 | struct data_descriptor_entry ddl_in[DDL_LEN_MAX]; /* DDE_ALIGN align */ |
35 | struct data_descriptor_entry ddl_out[DDL_LEN_MAX]; /* DDE_ALIGN align */ |
36 | /* Above fields must be properly aligned */ |
37 | |
38 | ktime_t start; |
39 | |
40 | char padding[WORKMEM_ALIGN]; /* unused, to allow alignment */ |
41 | } __packed __aligned(WORKMEM_ALIGN); |
42 | |
43 | struct nx_coproc { |
44 | unsigned int chip_id; |
45 | unsigned int ct; /* Can be 842 or GZIP high/normal*/ |
46 | unsigned int ci; /* Coprocessor instance, used with icswx */ |
47 | struct { |
48 | struct vas_window *rxwin; |
49 | int id; |
50 | } vas; |
51 | struct list_head list; |
52 | }; |
53 | |
54 | /* |
55 | * Send the request to NX engine on the chip for the corresponding CPU |
56 | * where the process is executing. Use with VAS function. |
57 | */ |
58 | static DEFINE_PER_CPU(struct vas_window *, cpu_txwin); |
59 | |
60 | /* no cpu hotplug on powernv, so this list never changes after init */ |
61 | static LIST_HEAD(nx_coprocs); |
62 | static unsigned int nx842_ct; /* used in icswx function */ |
63 | |
64 | /* |
65 | * Using same values as in skiboot or coprocessor type representing |
66 | * in NX workbook. |
67 | */ |
68 | #define NX_CT_GZIP (2) /* on P9 and later */ |
69 | #define NX_CT_842 (3) |
70 | |
71 | static int (*nx842_powernv_exec)(const unsigned char *in, |
72 | unsigned int inlen, unsigned char *out, |
73 | unsigned int *outlenp, void *workmem, int fc); |
74 | |
75 | /* |
76 | * setup_indirect_dde - Setup an indirect DDE |
77 | * |
78 | * The DDE is setup with the DDE count, byte count, and address of |
79 | * first direct DDE in the list. |
80 | */ |
81 | static void setup_indirect_dde(struct data_descriptor_entry *dde, |
82 | struct data_descriptor_entry *ddl, |
83 | unsigned int dde_count, unsigned int byte_count) |
84 | { |
85 | dde->flags = 0; |
86 | dde->count = dde_count; |
87 | dde->index = 0; |
88 | dde->length = cpu_to_be32(byte_count); |
89 | dde->address = cpu_to_be64(nx842_get_pa(ddl)); |
90 | } |
91 | |
92 | /* |
93 | * setup_direct_dde - Setup single DDE from buffer |
94 | * |
95 | * The DDE is setup with the buffer and length. The buffer must be properly |
96 | * aligned. The used length is returned. |
97 | * Returns: |
98 | * N Successfully set up DDE with N bytes |
99 | */ |
100 | static unsigned int setup_direct_dde(struct data_descriptor_entry *dde, |
101 | unsigned long pa, unsigned int len) |
102 | { |
103 | unsigned int l = min_t(unsigned int, len, LEN_ON_PAGE(pa)); |
104 | |
105 | dde->flags = 0; |
106 | dde->count = 0; |
107 | dde->index = 0; |
108 | dde->length = cpu_to_be32(l); |
109 | dde->address = cpu_to_be64(pa); |
110 | |
111 | return l; |
112 | } |
113 | |
114 | /* |
115 | * setup_ddl - Setup DDL from buffer |
116 | * |
117 | * Returns: |
118 | * 0 Successfully set up DDL |
119 | */ |
120 | static int setup_ddl(struct data_descriptor_entry *dde, |
121 | struct data_descriptor_entry *ddl, |
122 | unsigned char *buf, unsigned int len, |
123 | bool in) |
124 | { |
125 | unsigned long pa = nx842_get_pa(addr: buf); |
126 | int i, ret, total_len = len; |
127 | |
128 | if (!IS_ALIGNED(pa, DDE_BUFFER_ALIGN)) { |
129 | pr_debug("%s buffer pa 0x%lx not 0x%x-byte aligned\n" , |
130 | in ? "input" : "output" , pa, DDE_BUFFER_ALIGN); |
131 | return -EINVAL; |
132 | } |
133 | |
134 | /* only need to check last mult; since buffer must be |
135 | * DDE_BUFFER_ALIGN aligned, and that is a multiple of |
136 | * DDE_BUFFER_SIZE_MULT, and pre-last page DDE buffers |
137 | * are guaranteed a multiple of DDE_BUFFER_SIZE_MULT. |
138 | */ |
139 | if (len % DDE_BUFFER_LAST_MULT) { |
140 | pr_debug("%s buffer len 0x%x not a multiple of 0x%x\n" , |
141 | in ? "input" : "output" , len, DDE_BUFFER_LAST_MULT); |
142 | if (in) |
143 | return -EINVAL; |
144 | len = round_down(len, DDE_BUFFER_LAST_MULT); |
145 | } |
146 | |
147 | /* use a single direct DDE */ |
148 | if (len <= LEN_ON_PAGE(pa)) { |
149 | ret = setup_direct_dde(dde, pa, len); |
150 | WARN_ON(ret < len); |
151 | return 0; |
152 | } |
153 | |
154 | /* use the DDL */ |
155 | for (i = 0; i < DDL_LEN_MAX && len > 0; i++) { |
156 | ret = setup_direct_dde(&ddl[i], pa, len); |
157 | buf += ret; |
158 | len -= ret; |
159 | pa = nx842_get_pa(addr: buf); |
160 | } |
161 | |
162 | if (len > 0) { |
163 | pr_debug("0x%x total %s bytes 0x%x too many for DDL.\n" , |
164 | total_len, in ? "input" : "output" , len); |
165 | if (in) |
166 | return -EMSGSIZE; |
167 | total_len -= len; |
168 | } |
169 | setup_indirect_dde(dde, ddl, dde_count: i, byte_count: total_len); |
170 | |
171 | return 0; |
172 | } |
173 | |
174 | #define CSB_ERR(csb, msg, ...) \ |
175 | pr_err("ERROR: " msg " : %02x %02x %02x %02x %08x\n", \ |
176 | ##__VA_ARGS__, (csb)->flags, \ |
177 | (csb)->cs, (csb)->cc, (csb)->ce, \ |
178 | be32_to_cpu((csb)->count)) |
179 | |
180 | #define CSB_ERR_ADDR(csb, msg, ...) \ |
181 | CSB_ERR(csb, msg " at %lx", ##__VA_ARGS__, \ |
182 | (unsigned long)be64_to_cpu((csb)->address)) |
183 | |
184 | static int wait_for_csb(struct nx842_workmem *wmem, |
185 | struct coprocessor_status_block *csb) |
186 | { |
187 | ktime_t start = wmem->start, now = ktime_get(); |
188 | ktime_t timeout = ktime_add_ms(kt: start, CSB_WAIT_MAX); |
189 | |
190 | while (!(READ_ONCE(csb->flags) & CSB_V)) { |
191 | cpu_relax(); |
192 | now = ktime_get(); |
193 | if (ktime_after(cmp1: now, cmp2: timeout)) |
194 | break; |
195 | } |
196 | |
197 | /* hw has updated csb and output buffer */ |
198 | barrier(); |
199 | |
200 | /* check CSB flags */ |
201 | if (!(csb->flags & CSB_V)) { |
202 | CSB_ERR(csb, "CSB still not valid after %ld us, giving up" , |
203 | (long)ktime_us_delta(now, start)); |
204 | return -ETIMEDOUT; |
205 | } |
206 | if (csb->flags & CSB_F) { |
207 | CSB_ERR(csb, "Invalid CSB format" ); |
208 | return -EPROTO; |
209 | } |
210 | if (csb->flags & CSB_CH) { |
211 | CSB_ERR(csb, "Invalid CSB chaining state" ); |
212 | return -EPROTO; |
213 | } |
214 | |
215 | /* verify CSB completion sequence is 0 */ |
216 | if (csb->cs) { |
217 | CSB_ERR(csb, "Invalid CSB completion sequence" ); |
218 | return -EPROTO; |
219 | } |
220 | |
221 | /* check CSB Completion Code */ |
222 | switch (csb->cc) { |
223 | /* no error */ |
224 | case CSB_CC_SUCCESS: |
225 | break; |
226 | case CSB_CC_TPBC_GT_SPBC: |
227 | /* not an error, but the compressed data is |
228 | * larger than the uncompressed data :( |
229 | */ |
230 | break; |
231 | |
232 | /* input data errors */ |
233 | case CSB_CC_OPERAND_OVERLAP: |
234 | /* input and output buffers overlap */ |
235 | CSB_ERR(csb, "Operand Overlap error" ); |
236 | return -EINVAL; |
237 | case CSB_CC_INVALID_OPERAND: |
238 | CSB_ERR(csb, "Invalid operand" ); |
239 | return -EINVAL; |
240 | case CSB_CC_NOSPC: |
241 | /* output buffer too small */ |
242 | return -ENOSPC; |
243 | case CSB_CC_ABORT: |
244 | CSB_ERR(csb, "Function aborted" ); |
245 | return -EINTR; |
246 | case CSB_CC_CRC_MISMATCH: |
247 | CSB_ERR(csb, "CRC mismatch" ); |
248 | return -EINVAL; |
249 | case CSB_CC_TEMPL_INVALID: |
250 | CSB_ERR(csb, "Compressed data template invalid" ); |
251 | return -EINVAL; |
252 | case CSB_CC_TEMPL_OVERFLOW: |
253 | CSB_ERR(csb, "Compressed data template shows data past end" ); |
254 | return -EINVAL; |
255 | case CSB_CC_EXCEED_BYTE_COUNT: /* P9 or later */ |
256 | /* |
257 | * DDE byte count exceeds the limit specified in Maximum |
258 | * byte count register. |
259 | */ |
260 | CSB_ERR(csb, "DDE byte count exceeds the limit" ); |
261 | return -EINVAL; |
262 | |
263 | /* these should not happen */ |
264 | case CSB_CC_INVALID_ALIGN: |
265 | /* setup_ddl should have detected this */ |
266 | CSB_ERR_ADDR(csb, "Invalid alignment" ); |
267 | return -EINVAL; |
268 | case CSB_CC_DATA_LENGTH: |
269 | /* setup_ddl should have detected this */ |
270 | CSB_ERR(csb, "Invalid data length" ); |
271 | return -EINVAL; |
272 | case CSB_CC_WR_TRANSLATION: |
273 | case CSB_CC_TRANSLATION: |
274 | case CSB_CC_TRANSLATION_DUP1: |
275 | case CSB_CC_TRANSLATION_DUP2: |
276 | case CSB_CC_TRANSLATION_DUP3: |
277 | case CSB_CC_TRANSLATION_DUP4: |
278 | case CSB_CC_TRANSLATION_DUP5: |
279 | case CSB_CC_TRANSLATION_DUP6: |
280 | /* should not happen, we use physical addrs */ |
281 | CSB_ERR_ADDR(csb, "Translation error" ); |
282 | return -EPROTO; |
283 | case CSB_CC_WR_PROTECTION: |
284 | case CSB_CC_PROTECTION: |
285 | case CSB_CC_PROTECTION_DUP1: |
286 | case CSB_CC_PROTECTION_DUP2: |
287 | case CSB_CC_PROTECTION_DUP3: |
288 | case CSB_CC_PROTECTION_DUP4: |
289 | case CSB_CC_PROTECTION_DUP5: |
290 | case CSB_CC_PROTECTION_DUP6: |
291 | /* should not happen, we use physical addrs */ |
292 | CSB_ERR_ADDR(csb, "Protection error" ); |
293 | return -EPROTO; |
294 | case CSB_CC_PRIVILEGE: |
295 | /* shouldn't happen, we're in HYP mode */ |
296 | CSB_ERR(csb, "Insufficient Privilege error" ); |
297 | return -EPROTO; |
298 | case CSB_CC_EXCESSIVE_DDE: |
299 | /* shouldn't happen, setup_ddl doesn't use many dde's */ |
300 | CSB_ERR(csb, "Too many DDEs in DDL" ); |
301 | return -EINVAL; |
302 | case CSB_CC_TRANSPORT: |
303 | case CSB_CC_INVALID_CRB: /* P9 or later */ |
304 | /* shouldn't happen, we setup CRB correctly */ |
305 | CSB_ERR(csb, "Invalid CRB" ); |
306 | return -EINVAL; |
307 | case CSB_CC_INVALID_DDE: /* P9 or later */ |
308 | /* |
309 | * shouldn't happen, setup_direct/indirect_dde creates |
310 | * DDE right |
311 | */ |
312 | CSB_ERR(csb, "Invalid DDE" ); |
313 | return -EINVAL; |
314 | case CSB_CC_SEGMENTED_DDL: |
315 | /* shouldn't happen, setup_ddl creates DDL right */ |
316 | CSB_ERR(csb, "Segmented DDL error" ); |
317 | return -EINVAL; |
318 | case CSB_CC_DDE_OVERFLOW: |
319 | /* shouldn't happen, setup_ddl creates DDL right */ |
320 | CSB_ERR(csb, "DDE overflow error" ); |
321 | return -EINVAL; |
322 | case CSB_CC_SESSION: |
323 | /* should not happen with ICSWX */ |
324 | CSB_ERR(csb, "Session violation error" ); |
325 | return -EPROTO; |
326 | case CSB_CC_CHAIN: |
327 | /* should not happen, we don't use chained CRBs */ |
328 | CSB_ERR(csb, "Chained CRB error" ); |
329 | return -EPROTO; |
330 | case CSB_CC_SEQUENCE: |
331 | /* should not happen, we don't use chained CRBs */ |
332 | CSB_ERR(csb, "CRB sequence number error" ); |
333 | return -EPROTO; |
334 | case CSB_CC_UNKNOWN_CODE: |
335 | CSB_ERR(csb, "Unknown subfunction code" ); |
336 | return -EPROTO; |
337 | |
338 | /* hardware errors */ |
339 | case CSB_CC_RD_EXTERNAL: |
340 | case CSB_CC_RD_EXTERNAL_DUP1: |
341 | case CSB_CC_RD_EXTERNAL_DUP2: |
342 | case CSB_CC_RD_EXTERNAL_DUP3: |
343 | CSB_ERR_ADDR(csb, "Read error outside coprocessor" ); |
344 | return -EPROTO; |
345 | case CSB_CC_WR_EXTERNAL: |
346 | CSB_ERR_ADDR(csb, "Write error outside coprocessor" ); |
347 | return -EPROTO; |
348 | case CSB_CC_INTERNAL: |
349 | CSB_ERR(csb, "Internal error in coprocessor" ); |
350 | return -EPROTO; |
351 | case CSB_CC_PROVISION: |
352 | CSB_ERR(csb, "Storage provision error" ); |
353 | return -EPROTO; |
354 | case CSB_CC_HW: |
355 | CSB_ERR(csb, "Correctable hardware error" ); |
356 | return -EPROTO; |
357 | case CSB_CC_HW_EXPIRED_TIMER: /* P9 or later */ |
358 | CSB_ERR(csb, "Job did not finish within allowed time" ); |
359 | return -EPROTO; |
360 | |
361 | default: |
362 | CSB_ERR(csb, "Invalid CC %d" , csb->cc); |
363 | return -EPROTO; |
364 | } |
365 | |
366 | /* check Completion Extension state */ |
367 | if (csb->ce & CSB_CE_TERMINATION) { |
368 | CSB_ERR(csb, "CSB request was terminated" ); |
369 | return -EPROTO; |
370 | } |
371 | if (csb->ce & CSB_CE_INCOMPLETE) { |
372 | CSB_ERR(csb, "CSB request not complete" ); |
373 | return -EPROTO; |
374 | } |
375 | if (!(csb->ce & CSB_CE_TPBC)) { |
376 | CSB_ERR(csb, "TPBC not provided, unknown target length" ); |
377 | return -EPROTO; |
378 | } |
379 | |
380 | /* successful completion */ |
381 | pr_debug_ratelimited("Processed %u bytes in %lu us\n" , |
382 | be32_to_cpu(csb->count), |
383 | (unsigned long)ktime_us_delta(now, start)); |
384 | |
385 | return 0; |
386 | } |
387 | |
388 | static int nx842_config_crb(const unsigned char *in, unsigned int inlen, |
389 | unsigned char *out, unsigned int outlen, |
390 | struct nx842_workmem *wmem) |
391 | { |
392 | struct coprocessor_request_block *crb; |
393 | struct coprocessor_status_block *csb; |
394 | u64 csb_addr; |
395 | int ret; |
396 | |
397 | crb = &wmem->crb; |
398 | csb = &crb->csb; |
399 | |
400 | /* Clear any previous values */ |
401 | memset(crb, 0, sizeof(*crb)); |
402 | |
403 | /* set up DDLs */ |
404 | ret = setup_ddl(dde: &crb->source, ddl: wmem->ddl_in, |
405 | buf: (unsigned char *)in, len: inlen, in: true); |
406 | if (ret) |
407 | return ret; |
408 | |
409 | ret = setup_ddl(dde: &crb->target, ddl: wmem->ddl_out, |
410 | buf: out, len: outlen, in: false); |
411 | if (ret) |
412 | return ret; |
413 | |
414 | /* set up CRB's CSB addr */ |
415 | csb_addr = nx842_get_pa(addr: csb) & CRB_CSB_ADDRESS; |
416 | csb_addr |= CRB_CSB_AT; /* Addrs are phys */ |
417 | crb->csb_addr = cpu_to_be64(csb_addr); |
418 | |
419 | return 0; |
420 | } |
421 | |
422 | /** |
423 | * nx842_exec_icswx - compress/decompress data using the 842 algorithm |
424 | * |
425 | * (De)compression provided by the NX842 coprocessor on IBM PowerNV systems. |
426 | * This compresses or decompresses the provided input buffer into the provided |
427 | * output buffer. |
428 | * |
429 | * Upon return from this function @outlen contains the length of the |
430 | * output data. If there is an error then @outlen will be 0 and an |
431 | * error will be specified by the return code from this function. |
432 | * |
433 | * The @workmem buffer should only be used by one function call at a time. |
434 | * |
435 | * @in: input buffer pointer |
436 | * @inlen: input buffer size |
437 | * @out: output buffer pointer |
438 | * @outlenp: output buffer size pointer |
439 | * @workmem: working memory buffer pointer, size determined by |
440 | * nx842_powernv_driver.workmem_size |
441 | * @fc: function code, see CCW Function Codes in nx-842.h |
442 | * |
443 | * Returns: |
444 | * 0 Success, output of length @outlenp stored in the buffer at @out |
445 | * -ENODEV Hardware unavailable |
446 | * -ENOSPC Output buffer is to small |
447 | * -EMSGSIZE Input buffer too large |
448 | * -EINVAL buffer constraints do not fix nx842_constraints |
449 | * -EPROTO hardware error during operation |
450 | * -ETIMEDOUT hardware did not complete operation in reasonable time |
451 | * -EINTR operation was aborted |
452 | */ |
453 | static int nx842_exec_icswx(const unsigned char *in, unsigned int inlen, |
454 | unsigned char *out, unsigned int *outlenp, |
455 | void *workmem, int fc) |
456 | { |
457 | struct coprocessor_request_block *crb; |
458 | struct coprocessor_status_block *csb; |
459 | struct nx842_workmem *wmem; |
460 | int ret; |
461 | u32 ccw; |
462 | unsigned int outlen = *outlenp; |
463 | |
464 | wmem = PTR_ALIGN(workmem, WORKMEM_ALIGN); |
465 | |
466 | *outlenp = 0; |
467 | |
468 | /* shoudn't happen, we don't load without a coproc */ |
469 | if (!nx842_ct) { |
470 | pr_err_ratelimited("coprocessor CT is 0" ); |
471 | return -ENODEV; |
472 | } |
473 | |
474 | ret = nx842_config_crb(in, inlen, out, outlen, wmem); |
475 | if (ret) |
476 | return ret; |
477 | |
478 | crb = &wmem->crb; |
479 | csb = &crb->csb; |
480 | |
481 | /* set up CCW */ |
482 | ccw = 0; |
483 | ccw = SET_FIELD(CCW_CT, ccw, nx842_ct); |
484 | ccw = SET_FIELD(CCW_CI_842, ccw, 0); /* use 0 for hw auto-selection */ |
485 | ccw = SET_FIELD(CCW_FC_842, ccw, fc); |
486 | |
487 | wmem->start = ktime_get(); |
488 | |
489 | /* do ICSWX */ |
490 | ret = icswx(cpu_to_be32(ccw), crb); |
491 | |
492 | pr_debug_ratelimited("icswx CR %x ccw %x crb->ccw %x\n" , ret, |
493 | (unsigned int)ccw, |
494 | (unsigned int)be32_to_cpu(crb->ccw)); |
495 | |
496 | /* |
497 | * NX842 coprocessor sets 3rd bit in CR register with XER[S0]. |
498 | * XER[S0] is the integer summary overflow bit which is nothing |
499 | * to do NX. Since this bit can be set with other return values, |
500 | * mask this bit. |
501 | */ |
502 | ret &= ~ICSWX_XERS0; |
503 | |
504 | switch (ret) { |
505 | case ICSWX_INITIATED: |
506 | ret = wait_for_csb(wmem, csb); |
507 | break; |
508 | case ICSWX_BUSY: |
509 | pr_debug_ratelimited("842 Coprocessor busy\n" ); |
510 | ret = -EBUSY; |
511 | break; |
512 | case ICSWX_REJECTED: |
513 | pr_err_ratelimited("ICSWX rejected\n" ); |
514 | ret = -EPROTO; |
515 | break; |
516 | } |
517 | |
518 | if (!ret) |
519 | *outlenp = be32_to_cpu(csb->count); |
520 | |
521 | return ret; |
522 | } |
523 | |
524 | /** |
525 | * nx842_exec_vas - compress/decompress data using the 842 algorithm |
526 | * |
527 | * (De)compression provided by the NX842 coprocessor on IBM PowerNV systems. |
528 | * This compresses or decompresses the provided input buffer into the provided |
529 | * output buffer. |
530 | * |
531 | * Upon return from this function @outlen contains the length of the |
532 | * output data. If there is an error then @outlen will be 0 and an |
533 | * error will be specified by the return code from this function. |
534 | * |
535 | * The @workmem buffer should only be used by one function call at a time. |
536 | * |
537 | * @in: input buffer pointer |
538 | * @inlen: input buffer size |
539 | * @out: output buffer pointer |
540 | * @outlenp: output buffer size pointer |
541 | * @workmem: working memory buffer pointer, size determined by |
542 | * nx842_powernv_driver.workmem_size |
543 | * @fc: function code, see CCW Function Codes in nx-842.h |
544 | * |
545 | * Returns: |
546 | * 0 Success, output of length @outlenp stored in the buffer |
547 | * at @out |
548 | * -ENODEV Hardware unavailable |
549 | * -ENOSPC Output buffer is to small |
550 | * -EMSGSIZE Input buffer too large |
551 | * -EINVAL buffer constraints do not fix nx842_constraints |
552 | * -EPROTO hardware error during operation |
553 | * -ETIMEDOUT hardware did not complete operation in reasonable time |
554 | * -EINTR operation was aborted |
555 | */ |
556 | static int nx842_exec_vas(const unsigned char *in, unsigned int inlen, |
557 | unsigned char *out, unsigned int *outlenp, |
558 | void *workmem, int fc) |
559 | { |
560 | struct coprocessor_request_block *crb; |
561 | struct coprocessor_status_block *csb; |
562 | struct nx842_workmem *wmem; |
563 | struct vas_window *txwin; |
564 | int ret, i = 0; |
565 | u32 ccw; |
566 | unsigned int outlen = *outlenp; |
567 | |
568 | wmem = PTR_ALIGN(workmem, WORKMEM_ALIGN); |
569 | |
570 | *outlenp = 0; |
571 | |
572 | crb = &wmem->crb; |
573 | csb = &crb->csb; |
574 | |
575 | ret = nx842_config_crb(in, inlen, out, outlen, wmem); |
576 | if (ret) |
577 | return ret; |
578 | |
579 | ccw = 0; |
580 | ccw = SET_FIELD(CCW_FC_842, ccw, fc); |
581 | crb->ccw = cpu_to_be32(ccw); |
582 | |
583 | do { |
584 | wmem->start = ktime_get(); |
585 | preempt_disable(); |
586 | txwin = this_cpu_read(cpu_txwin); |
587 | |
588 | /* |
589 | * VAS copy CRB into L2 cache. Refer <asm/vas.h>. |
590 | * @crb and @offset. |
591 | */ |
592 | vas_copy_crb(crb, 0); |
593 | |
594 | /* |
595 | * VAS paste previously copied CRB to NX. |
596 | * @txwin, @offset and @last (must be true). |
597 | */ |
598 | ret = vas_paste_crb(txwin, 0, 1); |
599 | preempt_enable(); |
600 | /* |
601 | * Retry copy/paste function for VAS failures. |
602 | */ |
603 | } while (ret && (i++ < VAS_RETRIES)); |
604 | |
605 | if (ret) { |
606 | pr_err_ratelimited("VAS copy/paste failed\n" ); |
607 | return ret; |
608 | } |
609 | |
610 | ret = wait_for_csb(wmem, csb); |
611 | if (!ret) |
612 | *outlenp = be32_to_cpu(csb->count); |
613 | |
614 | return ret; |
615 | } |
616 | |
617 | /** |
618 | * nx842_powernv_compress - Compress data using the 842 algorithm |
619 | * |
620 | * Compression provided by the NX842 coprocessor on IBM PowerNV systems. |
621 | * The input buffer is compressed and the result is stored in the |
622 | * provided output buffer. |
623 | * |
624 | * Upon return from this function @outlen contains the length of the |
625 | * compressed data. If there is an error then @outlen will be 0 and an |
626 | * error will be specified by the return code from this function. |
627 | * |
628 | * @in: input buffer pointer |
629 | * @inlen: input buffer size |
630 | * @out: output buffer pointer |
631 | * @outlenp: output buffer size pointer |
632 | * @wmem: working memory buffer pointer, size determined by |
633 | * nx842_powernv_driver.workmem_size |
634 | * |
635 | * Returns: see @nx842_powernv_exec() |
636 | */ |
637 | static int nx842_powernv_compress(const unsigned char *in, unsigned int inlen, |
638 | unsigned char *out, unsigned int *outlenp, |
639 | void *wmem) |
640 | { |
641 | return nx842_powernv_exec(in, inlen, out, outlenp, |
642 | wmem, CCW_FC_842_COMP_CRC); |
643 | } |
644 | |
645 | /** |
646 | * nx842_powernv_decompress - Decompress data using the 842 algorithm |
647 | * |
648 | * Decompression provided by the NX842 coprocessor on IBM PowerNV systems. |
649 | * The input buffer is decompressed and the result is stored in the |
650 | * provided output buffer. |
651 | * |
652 | * Upon return from this function @outlen contains the length of the |
653 | * decompressed data. If there is an error then @outlen will be 0 and an |
654 | * error will be specified by the return code from this function. |
655 | * |
656 | * @in: input buffer pointer |
657 | * @inlen: input buffer size |
658 | * @out: output buffer pointer |
659 | * @outlenp: output buffer size pointer |
660 | * @wmem: working memory buffer pointer, size determined by |
661 | * nx842_powernv_driver.workmem_size |
662 | * |
663 | * Returns: see @nx842_powernv_exec() |
664 | */ |
665 | static int nx842_powernv_decompress(const unsigned char *in, unsigned int inlen, |
666 | unsigned char *out, unsigned int *outlenp, |
667 | void *wmem) |
668 | { |
669 | return nx842_powernv_exec(in, inlen, out, outlenp, |
670 | wmem, CCW_FC_842_DECOMP_CRC); |
671 | } |
672 | |
673 | static inline void nx_add_coprocs_list(struct nx_coproc *coproc, |
674 | int chipid) |
675 | { |
676 | coproc->chip_id = chipid; |
677 | INIT_LIST_HEAD(list: &coproc->list); |
678 | list_add(new: &coproc->list, head: &nx_coprocs); |
679 | } |
680 | |
681 | static struct vas_window *nx_alloc_txwin(struct nx_coproc *coproc) |
682 | { |
683 | struct vas_window *txwin = NULL; |
684 | struct vas_tx_win_attr txattr; |
685 | |
686 | /* |
687 | * Kernel requests will be high priority. So open send |
688 | * windows only for high priority RxFIFO entries. |
689 | */ |
690 | vas_init_tx_win_attr(&txattr, coproc->ct); |
691 | txattr.lpid = 0; /* lpid is 0 for kernel requests */ |
692 | |
693 | /* |
694 | * Open a VAS send window which is used to send request to NX. |
695 | */ |
696 | txwin = vas_tx_win_open(coproc->vas.id, coproc->ct, &txattr); |
697 | if (IS_ERR(ptr: txwin)) |
698 | pr_err("ibm,nx-842: Can not open TX window: %ld\n" , |
699 | PTR_ERR(txwin)); |
700 | |
701 | return txwin; |
702 | } |
703 | |
704 | /* |
705 | * Identify chip ID for each CPU, open send wndow for the corresponding NX |
706 | * engine and save txwin in percpu cpu_txwin. |
707 | * cpu_txwin is used in copy/paste operation for each compression / |
708 | * decompression request. |
709 | */ |
710 | static int nx_open_percpu_txwins(void) |
711 | { |
712 | struct nx_coproc *coproc, *n; |
713 | unsigned int i, chip_id; |
714 | |
715 | for_each_possible_cpu(i) { |
716 | struct vas_window *txwin = NULL; |
717 | |
718 | chip_id = cpu_to_chip_id(i); |
719 | |
720 | list_for_each_entry_safe(coproc, n, &nx_coprocs, list) { |
721 | /* |
722 | * Kernel requests use only high priority FIFOs. So |
723 | * open send windows for these FIFOs. |
724 | * GZIP is not supported in kernel right now. |
725 | */ |
726 | |
727 | if (coproc->ct != VAS_COP_TYPE_842_HIPRI) |
728 | continue; |
729 | |
730 | if (coproc->chip_id == chip_id) { |
731 | txwin = nx_alloc_txwin(coproc); |
732 | if (IS_ERR(ptr: txwin)) |
733 | return PTR_ERR(ptr: txwin); |
734 | |
735 | per_cpu(cpu_txwin, i) = txwin; |
736 | break; |
737 | } |
738 | } |
739 | |
740 | if (!per_cpu(cpu_txwin, i)) { |
741 | /* shouldn't happen, Each chip will have NX engine */ |
742 | pr_err("NX engine is not available for CPU %d\n" , i); |
743 | return -EINVAL; |
744 | } |
745 | } |
746 | |
747 | return 0; |
748 | } |
749 | |
750 | static int __init nx_set_ct(struct nx_coproc *coproc, const char *priority, |
751 | int high, int normal) |
752 | { |
753 | if (!strcmp(priority, "High" )) |
754 | coproc->ct = high; |
755 | else if (!strcmp(priority, "Normal" )) |
756 | coproc->ct = normal; |
757 | else { |
758 | pr_err("Invalid RxFIFO priority value\n" ); |
759 | return -EINVAL; |
760 | } |
761 | |
762 | return 0; |
763 | } |
764 | |
765 | static int __init vas_cfg_coproc_info(struct device_node *dn, int chip_id, |
766 | int vasid, int type, int *ct) |
767 | { |
768 | struct vas_window *rxwin = NULL; |
769 | struct vas_rx_win_attr rxattr; |
770 | u32 lpid, pid, tid, fifo_size; |
771 | struct nx_coproc *coproc; |
772 | u64 rx_fifo; |
773 | const char *priority; |
774 | int ret; |
775 | |
776 | ret = of_property_read_u64(np: dn, propname: "rx-fifo-address" , out_value: &rx_fifo); |
777 | if (ret) { |
778 | pr_err("Missing rx-fifo-address property\n" ); |
779 | return ret; |
780 | } |
781 | |
782 | ret = of_property_read_u32(np: dn, propname: "rx-fifo-size" , out_value: &fifo_size); |
783 | if (ret) { |
784 | pr_err("Missing rx-fifo-size property\n" ); |
785 | return ret; |
786 | } |
787 | |
788 | ret = of_property_read_u32(np: dn, propname: "lpid" , out_value: &lpid); |
789 | if (ret) { |
790 | pr_err("Missing lpid property\n" ); |
791 | return ret; |
792 | } |
793 | |
794 | ret = of_property_read_u32(np: dn, propname: "pid" , out_value: &pid); |
795 | if (ret) { |
796 | pr_err("Missing pid property\n" ); |
797 | return ret; |
798 | } |
799 | |
800 | ret = of_property_read_u32(np: dn, propname: "tid" , out_value: &tid); |
801 | if (ret) { |
802 | pr_err("Missing tid property\n" ); |
803 | return ret; |
804 | } |
805 | |
806 | ret = of_property_read_string(np: dn, propname: "priority" , out_string: &priority); |
807 | if (ret) { |
808 | pr_err("Missing priority property\n" ); |
809 | return ret; |
810 | } |
811 | |
812 | coproc = kzalloc(size: sizeof(*coproc), GFP_KERNEL); |
813 | if (!coproc) |
814 | return -ENOMEM; |
815 | |
816 | if (type == NX_CT_842) |
817 | ret = nx_set_ct(coproc, priority, VAS_COP_TYPE_842_HIPRI, |
818 | VAS_COP_TYPE_842); |
819 | else if (type == NX_CT_GZIP) |
820 | ret = nx_set_ct(coproc, priority, VAS_COP_TYPE_GZIP_HIPRI, |
821 | VAS_COP_TYPE_GZIP); |
822 | |
823 | if (ret) |
824 | goto err_out; |
825 | |
826 | vas_init_rx_win_attr(&rxattr, coproc->ct); |
827 | rxattr.rx_fifo = rx_fifo; |
828 | rxattr.rx_fifo_size = fifo_size; |
829 | rxattr.lnotify_lpid = lpid; |
830 | rxattr.lnotify_pid = pid; |
831 | rxattr.lnotify_tid = tid; |
832 | /* |
833 | * Maximum RX window credits can not be more than #CRBs in |
834 | * RxFIFO. Otherwise, can get checkstop if RxFIFO overruns. |
835 | */ |
836 | rxattr.wcreds_max = fifo_size / CRB_SIZE; |
837 | |
838 | /* |
839 | * Open a VAS receice window which is used to configure RxFIFO |
840 | * for NX. |
841 | */ |
842 | rxwin = vas_rx_win_open(vasid, coproc->ct, &rxattr); |
843 | if (IS_ERR(ptr: rxwin)) { |
844 | ret = PTR_ERR(ptr: rxwin); |
845 | pr_err("setting RxFIFO with VAS failed: %d\n" , |
846 | ret); |
847 | goto err_out; |
848 | } |
849 | |
850 | coproc->vas.rxwin = rxwin; |
851 | coproc->vas.id = vasid; |
852 | nx_add_coprocs_list(coproc, chipid: chip_id); |
853 | |
854 | /* |
855 | * (lpid, pid, tid) combination has to be unique for each |
856 | * coprocessor instance in the system. So to make it |
857 | * unique, skiboot uses coprocessor type such as 842 or |
858 | * GZIP for pid and provides this value to kernel in pid |
859 | * device-tree property. |
860 | */ |
861 | *ct = pid; |
862 | |
863 | return 0; |
864 | |
865 | err_out: |
866 | kfree(objp: coproc); |
867 | return ret; |
868 | } |
869 | |
870 | static int __init nx_coproc_init(int chip_id, int ct_842, int ct_gzip) |
871 | { |
872 | int ret = 0; |
873 | |
874 | if (opal_check_token(OPAL_NX_COPROC_INIT)) { |
875 | ret = opal_nx_coproc_init(chip_id, ct_842); |
876 | |
877 | if (!ret) |
878 | ret = opal_nx_coproc_init(chip_id, ct_gzip); |
879 | |
880 | if (ret) { |
881 | ret = opal_error_code(ret); |
882 | pr_err("Failed to initialize NX for chip(%d): %d\n" , |
883 | chip_id, ret); |
884 | } |
885 | } else |
886 | pr_warn("Firmware doesn't support NX initialization\n" ); |
887 | |
888 | return ret; |
889 | } |
890 | |
891 | static int __init find_nx_device_tree(struct device_node *dn, int chip_id, |
892 | int vasid, int type, char *devname, |
893 | int *ct) |
894 | { |
895 | int ret = 0; |
896 | |
897 | if (of_device_is_compatible(device: dn, devname)) { |
898 | ret = vas_cfg_coproc_info(dn, chip_id, vasid, type, ct); |
899 | if (ret) |
900 | of_node_put(node: dn); |
901 | } |
902 | |
903 | return ret; |
904 | } |
905 | |
906 | static int __init nx_powernv_probe_vas(struct device_node *pn) |
907 | { |
908 | int chip_id, vasid, ret = 0; |
909 | int ct_842 = 0, ct_gzip = 0; |
910 | struct device_node *dn; |
911 | |
912 | chip_id = of_get_ibm_chip_id(pn); |
913 | if (chip_id < 0) { |
914 | pr_err("ibm,chip-id missing\n" ); |
915 | return -EINVAL; |
916 | } |
917 | |
918 | vasid = chip_to_vas_id(chip_id); |
919 | if (vasid < 0) { |
920 | pr_err("Unable to map chip_id %d to vasid\n" , chip_id); |
921 | return -EINVAL; |
922 | } |
923 | |
924 | for_each_child_of_node(pn, dn) { |
925 | ret = find_nx_device_tree(dn, chip_id, vasid, NX_CT_842, |
926 | devname: "ibm,p9-nx-842" , ct: &ct_842); |
927 | |
928 | if (!ret) |
929 | ret = find_nx_device_tree(dn, chip_id, vasid, |
930 | NX_CT_GZIP, devname: "ibm,p9-nx-gzip" , ct: &ct_gzip); |
931 | |
932 | if (ret) { |
933 | of_node_put(node: dn); |
934 | return ret; |
935 | } |
936 | } |
937 | |
938 | if (!ct_842 || !ct_gzip) { |
939 | pr_err("NX FIFO nodes are missing\n" ); |
940 | return -EINVAL; |
941 | } |
942 | |
943 | /* |
944 | * Initialize NX instance for both high and normal priority FIFOs. |
945 | */ |
946 | ret = nx_coproc_init(chip_id, ct_842, ct_gzip); |
947 | |
948 | return ret; |
949 | } |
950 | |
951 | static int __init nx842_powernv_probe(struct device_node *dn) |
952 | { |
953 | struct nx_coproc *coproc; |
954 | unsigned int ct, ci; |
955 | int chip_id; |
956 | |
957 | chip_id = of_get_ibm_chip_id(dn); |
958 | if (chip_id < 0) { |
959 | pr_err("ibm,chip-id missing\n" ); |
960 | return -EINVAL; |
961 | } |
962 | |
963 | if (of_property_read_u32(np: dn, propname: "ibm,842-coprocessor-type" , out_value: &ct)) { |
964 | pr_err("ibm,842-coprocessor-type missing\n" ); |
965 | return -EINVAL; |
966 | } |
967 | |
968 | if (of_property_read_u32(np: dn, propname: "ibm,842-coprocessor-instance" , out_value: &ci)) { |
969 | pr_err("ibm,842-coprocessor-instance missing\n" ); |
970 | return -EINVAL; |
971 | } |
972 | |
973 | coproc = kzalloc(size: sizeof(*coproc), GFP_KERNEL); |
974 | if (!coproc) |
975 | return -ENOMEM; |
976 | |
977 | coproc->ct = ct; |
978 | coproc->ci = ci; |
979 | nx_add_coprocs_list(coproc, chipid: chip_id); |
980 | |
981 | pr_info("coprocessor found on chip %d, CT %d CI %d\n" , chip_id, ct, ci); |
982 | |
983 | if (!nx842_ct) |
984 | nx842_ct = ct; |
985 | else if (nx842_ct != ct) |
986 | pr_err("NX842 chip %d, CT %d != first found CT %d\n" , |
987 | chip_id, ct, nx842_ct); |
988 | |
989 | return 0; |
990 | } |
991 | |
992 | static void nx_delete_coprocs(void) |
993 | { |
994 | struct nx_coproc *coproc, *n; |
995 | struct vas_window *txwin; |
996 | int i; |
997 | |
998 | /* |
999 | * close percpu txwins that are opened for the corresponding coproc. |
1000 | */ |
1001 | for_each_possible_cpu(i) { |
1002 | txwin = per_cpu(cpu_txwin, i); |
1003 | if (txwin) |
1004 | vas_win_close(txwin); |
1005 | |
1006 | per_cpu(cpu_txwin, i) = NULL; |
1007 | } |
1008 | |
1009 | list_for_each_entry_safe(coproc, n, &nx_coprocs, list) { |
1010 | if (coproc->vas.rxwin) |
1011 | vas_win_close(coproc->vas.rxwin); |
1012 | |
1013 | list_del(entry: &coproc->list); |
1014 | kfree(objp: coproc); |
1015 | } |
1016 | } |
1017 | |
1018 | static struct nx842_constraints nx842_powernv_constraints = { |
1019 | .alignment = DDE_BUFFER_ALIGN, |
1020 | .multiple = DDE_BUFFER_LAST_MULT, |
1021 | .minimum = DDE_BUFFER_LAST_MULT, |
1022 | .maximum = (DDL_LEN_MAX - 1) * PAGE_SIZE, |
1023 | }; |
1024 | |
1025 | static struct nx842_driver nx842_powernv_driver = { |
1026 | .name = KBUILD_MODNAME, |
1027 | .owner = THIS_MODULE, |
1028 | .workmem_size = sizeof(struct nx842_workmem), |
1029 | .constraints = &nx842_powernv_constraints, |
1030 | .compress = nx842_powernv_compress, |
1031 | .decompress = nx842_powernv_decompress, |
1032 | }; |
1033 | |
1034 | static int nx842_powernv_crypto_init(struct crypto_tfm *tfm) |
1035 | { |
1036 | return nx842_crypto_init(tfm, driver: &nx842_powernv_driver); |
1037 | } |
1038 | |
1039 | static struct crypto_alg nx842_powernv_alg = { |
1040 | .cra_name = "842" , |
1041 | .cra_driver_name = "842-nx" , |
1042 | .cra_priority = 300, |
1043 | .cra_flags = CRYPTO_ALG_TYPE_COMPRESS, |
1044 | .cra_ctxsize = sizeof(struct nx842_crypto_ctx), |
1045 | .cra_module = THIS_MODULE, |
1046 | .cra_init = nx842_powernv_crypto_init, |
1047 | .cra_exit = nx842_crypto_exit, |
1048 | .cra_u = { .compress = { |
1049 | .coa_compress = nx842_crypto_compress, |
1050 | .coa_decompress = nx842_crypto_decompress } } |
1051 | }; |
1052 | |
1053 | static __init int nx_compress_powernv_init(void) |
1054 | { |
1055 | struct device_node *dn; |
1056 | int ret; |
1057 | |
1058 | /* verify workmem size/align restrictions */ |
1059 | BUILD_BUG_ON(WORKMEM_ALIGN % CRB_ALIGN); |
1060 | BUILD_BUG_ON(CRB_ALIGN % DDE_ALIGN); |
1061 | BUILD_BUG_ON(CRB_SIZE % DDE_ALIGN); |
1062 | /* verify buffer size/align restrictions */ |
1063 | BUILD_BUG_ON(PAGE_SIZE % DDE_BUFFER_ALIGN); |
1064 | BUILD_BUG_ON(DDE_BUFFER_ALIGN % DDE_BUFFER_SIZE_MULT); |
1065 | BUILD_BUG_ON(DDE_BUFFER_SIZE_MULT % DDE_BUFFER_LAST_MULT); |
1066 | |
1067 | for_each_compatible_node(dn, NULL, "ibm,power9-nx" ) { |
1068 | ret = nx_powernv_probe_vas(pn: dn); |
1069 | if (ret) { |
1070 | nx_delete_coprocs(); |
1071 | of_node_put(node: dn); |
1072 | return ret; |
1073 | } |
1074 | } |
1075 | |
1076 | if (list_empty(head: &nx_coprocs)) { |
1077 | for_each_compatible_node(dn, NULL, "ibm,power-nx" ) |
1078 | nx842_powernv_probe(dn); |
1079 | |
1080 | if (!nx842_ct) |
1081 | return -ENODEV; |
1082 | |
1083 | nx842_powernv_exec = nx842_exec_icswx; |
1084 | } else { |
1085 | /* |
1086 | * Register VAS user space API for NX GZIP so |
1087 | * that user space can use GZIP engine. |
1088 | * Using high FIFO priority for kernel requests and |
1089 | * normal FIFO priority is assigned for userspace. |
1090 | * 842 compression is supported only in kernel. |
1091 | */ |
1092 | ret = vas_register_api_powernv(THIS_MODULE, VAS_COP_TYPE_GZIP, |
1093 | "nx-gzip" ); |
1094 | |
1095 | /* |
1096 | * GZIP is not supported in kernel right now. |
1097 | * So open tx windows only for 842. |
1098 | */ |
1099 | if (!ret) |
1100 | ret = nx_open_percpu_txwins(); |
1101 | |
1102 | if (ret) { |
1103 | nx_delete_coprocs(); |
1104 | return ret; |
1105 | } |
1106 | |
1107 | nx842_powernv_exec = nx842_exec_vas; |
1108 | } |
1109 | |
1110 | ret = crypto_register_alg(alg: &nx842_powernv_alg); |
1111 | if (ret) { |
1112 | nx_delete_coprocs(); |
1113 | return ret; |
1114 | } |
1115 | |
1116 | return 0; |
1117 | } |
1118 | module_init(nx_compress_powernv_init); |
1119 | |
1120 | static void __exit nx_compress_powernv_exit(void) |
1121 | { |
1122 | /* |
1123 | * GZIP engine is supported only in power9 or later and nx842_ct |
1124 | * is used on power8 (icswx). |
1125 | * VAS API for NX GZIP is registered during init for user space |
1126 | * use. So delete this API use for GZIP engine. |
1127 | */ |
1128 | if (!nx842_ct) |
1129 | vas_unregister_api_powernv(); |
1130 | |
1131 | crypto_unregister_alg(alg: &nx842_powernv_alg); |
1132 | |
1133 | nx_delete_coprocs(); |
1134 | } |
1135 | module_exit(nx_compress_powernv_exit); |
1136 | |