1 | /* |
---|---|
2 | * This file is provided under a dual BSD/GPLv2 license. When using or |
3 | * redistributing this file, you may do so under either license. |
4 | * |
5 | * GPL LICENSE SUMMARY |
6 | * |
7 | * Copyright(c) 2015 Intel Corporation. All rights reserved. |
8 | * Copyright(c) 2017 T-Platforms. All Rights Reserved. |
9 | * |
10 | * This program is free software; you can redistribute it and/or modify |
11 | * it under the terms of version 2 of the GNU General Public License as |
12 | * published by the Free Software Foundation. |
13 | * |
14 | * BSD LICENSE |
15 | * |
16 | * Copyright(c) 2015 Intel Corporation. All rights reserved. |
17 | * Copyright(c) 2017 T-Platforms. All Rights Reserved. |
18 | * |
19 | * Redistribution and use in source and binary forms, with or without |
20 | * modification, are permitted provided that the following conditions |
21 | * are met: |
22 | * |
23 | * * Redistributions of source code must retain the above copyright |
24 | * notice, this list of conditions and the following disclaimer. |
25 | * * Redistributions in binary form must reproduce the above copy |
26 | * notice, this list of conditions and the following disclaimer in |
27 | * the documentation and/or other materials provided with the |
28 | * distribution. |
29 | * * Neither the name of Intel Corporation nor the names of its |
30 | * contributors may be used to endorse or promote products derived |
31 | * from this software without specific prior written permission. |
32 | * |
33 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
34 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
35 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
36 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
37 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
38 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
39 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
40 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
41 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
42 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
43 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
44 | * |
45 | * PCIe NTB Perf Linux driver |
46 | */ |
47 | |
48 | /* |
49 | * How to use this tool, by example. |
50 | * |
51 | * Assuming $DBG_DIR is something like: |
52 | * '/sys/kernel/debug/ntb_perf/0000:00:03.0' |
53 | * Suppose aside from local device there is at least one remote device |
54 | * connected to NTB with index 0. |
55 | *----------------------------------------------------------------------------- |
56 | * Eg: install driver with specified chunk/total orders and dma-enabled flag |
57 | * |
58 | * root@self# insmod ntb_perf.ko chunk_order=19 total_order=28 use_dma |
59 | *----------------------------------------------------------------------------- |
60 | * Eg: check NTB ports (index) and MW mapping information |
61 | * |
62 | * root@self# cat $DBG_DIR/info |
63 | *----------------------------------------------------------------------------- |
64 | * Eg: start performance test with peer (index 0) and get the test metrics |
65 | * |
66 | * root@self# echo 0 > $DBG_DIR/run |
67 | * root@self# cat $DBG_DIR/run |
68 | */ |
69 | |
70 | #include <linux/init.h> |
71 | #include <linux/kernel.h> |
72 | #include <linux/module.h> |
73 | #include <linux/sched.h> |
74 | #include <linux/wait.h> |
75 | #include <linux/dma-mapping.h> |
76 | #include <linux/dmaengine.h> |
77 | #include <linux/pci.h> |
78 | #include <linux/ktime.h> |
79 | #include <linux/slab.h> |
80 | #include <linux/delay.h> |
81 | #include <linux/sizes.h> |
82 | #include <linux/workqueue.h> |
83 | #include <linux/debugfs.h> |
84 | #include <linux/random.h> |
85 | #include <linux/ntb.h> |
86 | |
87 | #define DRIVER_NAME "ntb_perf" |
88 | #define DRIVER_VERSION "2.0" |
89 | |
90 | MODULE_LICENSE("Dual BSD/GPL"); |
91 | MODULE_VERSION(DRIVER_VERSION); |
92 | MODULE_AUTHOR("Dave Jiang <dave.jiang@intel.com>"); |
93 | MODULE_DESCRIPTION("PCIe NTB Performance Measurement Tool"); |
94 | |
95 | #define MAX_THREADS_CNT 32 |
96 | #define DEF_THREADS_CNT 1 |
97 | #define MAX_CHUNK_SIZE SZ_1M |
98 | #define MAX_CHUNK_ORDER 20 /* no larger than 1M */ |
99 | |
100 | #define DMA_TRIES 100 |
101 | #define DMA_MDELAY 10 |
102 | |
103 | #define MSG_TRIES 1000 |
104 | #define MSG_UDELAY_LOW 1000000 |
105 | #define MSG_UDELAY_HIGH 2000000 |
106 | |
107 | #define PERF_BUF_LEN 1024 |
108 | |
109 | static unsigned long max_mw_size; |
110 | module_param(max_mw_size, ulong, 0644); |
111 | MODULE_PARM_DESC(max_mw_size, "Upper limit of memory window size"); |
112 | |
113 | static unsigned char chunk_order = 19; /* 512K */ |
114 | module_param(chunk_order, byte, 0644); |
115 | MODULE_PARM_DESC(chunk_order, "Data chunk order [2^n] to transfer"); |
116 | |
117 | static unsigned char total_order = 30; /* 1G */ |
118 | module_param(total_order, byte, 0644); |
119 | MODULE_PARM_DESC(total_order, "Total data order [2^n] to transfer"); |
120 | |
121 | static bool use_dma; /* default to 0 */ |
122 | module_param(use_dma, bool, 0644); |
123 | MODULE_PARM_DESC(use_dma, "Use DMA engine to measure performance"); |
124 | |
125 | /*============================================================================== |
126 | * Perf driver data definition |
127 | *============================================================================== |
128 | */ |
129 | |
130 | enum perf_cmd { |
131 | PERF_CMD_INVAL = -1,/* invalid spad command */ |
132 | PERF_CMD_SSIZE = 0, /* send out buffer size */ |
133 | PERF_CMD_RSIZE = 1, /* recv in buffer size */ |
134 | PERF_CMD_SXLAT = 2, /* send in buffer xlat */ |
135 | PERF_CMD_RXLAT = 3, /* recv out buffer xlat */ |
136 | PERF_CMD_CLEAR = 4, /* clear allocated memory */ |
137 | PERF_STS_DONE = 5, /* init is done */ |
138 | PERF_STS_LNKUP = 6, /* link up state flag */ |
139 | }; |
140 | |
141 | struct perf_ctx; |
142 | |
143 | struct perf_peer { |
144 | struct perf_ctx *perf; |
145 | int pidx; |
146 | int gidx; |
147 | |
148 | /* Outbound MW params */ |
149 | u64 outbuf_xlat; |
150 | resource_size_t outbuf_size; |
151 | void __iomem *outbuf; |
152 | phys_addr_t out_phys_addr; |
153 | dma_addr_t dma_dst_addr; |
154 | /* Inbound MW params */ |
155 | dma_addr_t inbuf_xlat; |
156 | resource_size_t inbuf_size; |
157 | void *inbuf; |
158 | |
159 | /* NTB connection setup service */ |
160 | struct work_struct service; |
161 | unsigned long sts; |
162 | |
163 | struct completion init_comp; |
164 | }; |
165 | #define to_peer_service(__work) \ |
166 | container_of(__work, struct perf_peer, service) |
167 | |
168 | struct perf_thread { |
169 | struct perf_ctx *perf; |
170 | int tidx; |
171 | |
172 | /* DMA-based test sync parameters */ |
173 | atomic_t dma_sync; |
174 | wait_queue_head_t dma_wait; |
175 | struct dma_chan *dma_chan; |
176 | |
177 | /* Data source and measured statistics */ |
178 | void *src; |
179 | u64 copied; |
180 | ktime_t duration; |
181 | int status; |
182 | struct work_struct work; |
183 | }; |
184 | #define to_thread_work(__work) \ |
185 | container_of(__work, struct perf_thread, work) |
186 | |
187 | struct perf_ctx { |
188 | struct ntb_dev *ntb; |
189 | |
190 | /* Global device index and peers descriptors */ |
191 | int gidx; |
192 | int pcnt; |
193 | struct perf_peer *peers; |
194 | |
195 | /* Performance measuring work-threads interface */ |
196 | unsigned long busy_flag; |
197 | wait_queue_head_t twait; |
198 | atomic_t tsync; |
199 | u8 tcnt; |
200 | struct perf_peer *test_peer; |
201 | struct perf_thread threads[MAX_THREADS_CNT]; |
202 | |
203 | /* Scratchpad/Message IO operations */ |
204 | int (*cmd_send)(struct perf_peer *peer, enum perf_cmd cmd, u64 data); |
205 | int (*cmd_recv)(struct perf_ctx *perf, int *pidx, enum perf_cmd *cmd, |
206 | u64 *data); |
207 | |
208 | struct dentry *dbgfs_dir; |
209 | }; |
210 | |
211 | /* |
212 | * Scratchpads-base commands interface |
213 | */ |
214 | #define PERF_SPAD_CNT(_pcnt) \ |
215 | (3*((_pcnt) + 1)) |
216 | #define PERF_SPAD_CMD(_gidx) \ |
217 | (3*(_gidx)) |
218 | #define PERF_SPAD_LDATA(_gidx) \ |
219 | (3*(_gidx) + 1) |
220 | #define PERF_SPAD_HDATA(_gidx) \ |
221 | (3*(_gidx) + 2) |
222 | #define PERF_SPAD_NOTIFY(_gidx) \ |
223 | (BIT_ULL(_gidx)) |
224 | |
225 | /* |
226 | * Messages-base commands interface |
227 | */ |
228 | #define PERF_MSG_CNT 3 |
229 | #define PERF_MSG_CMD 0 |
230 | #define PERF_MSG_LDATA 1 |
231 | #define PERF_MSG_HDATA 2 |
232 | |
233 | /*============================================================================== |
234 | * Static data declarations |
235 | *============================================================================== |
236 | */ |
237 | |
238 | static struct dentry *perf_dbgfs_topdir; |
239 | |
240 | static struct workqueue_struct *perf_wq __read_mostly; |
241 | |
242 | /*============================================================================== |
243 | * NTB cross-link commands execution service |
244 | *============================================================================== |
245 | */ |
246 | |
247 | static void perf_terminate_test(struct perf_ctx *perf); |
248 | |
249 | static inline bool perf_link_is_up(struct perf_peer *peer) |
250 | { |
251 | u64 link; |
252 | |
253 | link = ntb_link_is_up(ntb: peer->perf->ntb, NULL, NULL); |
254 | return !!(link & BIT_ULL_MASK(peer->pidx)); |
255 | } |
256 | |
257 | static int perf_spad_cmd_send(struct perf_peer *peer, enum perf_cmd cmd, |
258 | u64 data) |
259 | { |
260 | struct perf_ctx *perf = peer->perf; |
261 | int try; |
262 | u32 sts; |
263 | |
264 | dev_dbg(&perf->ntb->dev, "CMD send: %d 0x%llx\n", cmd, data); |
265 | |
266 | /* |
267 | * Perform predefined number of attempts before give up. |
268 | * We are sending the data to the port specific scratchpad, so |
269 | * to prevent a multi-port access race-condition. Additionally |
270 | * there is no need in local locking since only thread-safe |
271 | * service work is using this method. |
272 | */ |
273 | for (try = 0; try < MSG_TRIES; try++) { |
274 | if (!perf_link_is_up(peer)) |
275 | return -ENOLINK; |
276 | |
277 | sts = ntb_peer_spad_read(ntb: perf->ntb, pidx: peer->pidx, |
278 | PERF_SPAD_CMD(perf->gidx)); |
279 | if (sts != PERF_CMD_INVAL) { |
280 | usleep_range(MSG_UDELAY_LOW, MSG_UDELAY_HIGH); |
281 | continue; |
282 | } |
283 | |
284 | ntb_peer_spad_write(ntb: perf->ntb, pidx: peer->pidx, |
285 | PERF_SPAD_LDATA(perf->gidx), |
286 | lower_32_bits(data)); |
287 | ntb_peer_spad_write(ntb: perf->ntb, pidx: peer->pidx, |
288 | PERF_SPAD_HDATA(perf->gidx), |
289 | upper_32_bits(data)); |
290 | ntb_peer_spad_write(ntb: perf->ntb, pidx: peer->pidx, |
291 | PERF_SPAD_CMD(perf->gidx), |
292 | val: cmd); |
293 | ntb_peer_db_set(ntb: perf->ntb, PERF_SPAD_NOTIFY(peer->gidx)); |
294 | |
295 | dev_dbg(&perf->ntb->dev, "DB ring peer %#llx\n", |
296 | PERF_SPAD_NOTIFY(peer->gidx)); |
297 | |
298 | break; |
299 | } |
300 | |
301 | return try < MSG_TRIES ? 0 : -EAGAIN; |
302 | } |
303 | |
304 | static int perf_spad_cmd_recv(struct perf_ctx *perf, int *pidx, |
305 | enum perf_cmd *cmd, u64 *data) |
306 | { |
307 | struct perf_peer *peer; |
308 | u32 val; |
309 | |
310 | ntb_db_clear(ntb: perf->ntb, PERF_SPAD_NOTIFY(perf->gidx)); |
311 | |
312 | /* |
313 | * We start scanning all over, since cleared DB may have been set |
314 | * by any peer. Yes, it makes peer with smaller index being |
315 | * serviced with greater priority, but it's convenient for spad |
316 | * and message code unification and simplicity. |
317 | */ |
318 | for (*pidx = 0; *pidx < perf->pcnt; (*pidx)++) { |
319 | peer = &perf->peers[*pidx]; |
320 | |
321 | if (!perf_link_is_up(peer)) |
322 | continue; |
323 | |
324 | val = ntb_spad_read(ntb: perf->ntb, PERF_SPAD_CMD(peer->gidx)); |
325 | if (val == PERF_CMD_INVAL) |
326 | continue; |
327 | |
328 | *cmd = val; |
329 | |
330 | val = ntb_spad_read(ntb: perf->ntb, PERF_SPAD_LDATA(peer->gidx)); |
331 | *data = val; |
332 | |
333 | val = ntb_spad_read(ntb: perf->ntb, PERF_SPAD_HDATA(peer->gidx)); |
334 | *data |= (u64)val << 32; |
335 | |
336 | /* Next command can be retrieved from now */ |
337 | ntb_spad_write(ntb: perf->ntb, PERF_SPAD_CMD(peer->gidx), |
338 | val: PERF_CMD_INVAL); |
339 | |
340 | dev_dbg(&perf->ntb->dev, "CMD recv: %d 0x%llx\n", *cmd, *data); |
341 | |
342 | return 0; |
343 | } |
344 | |
345 | return -ENODATA; |
346 | } |
347 | |
348 | static int perf_msg_cmd_send(struct perf_peer *peer, enum perf_cmd cmd, |
349 | u64 data) |
350 | { |
351 | struct perf_ctx *perf = peer->perf; |
352 | int try, ret; |
353 | u64 outbits; |
354 | |
355 | dev_dbg(&perf->ntb->dev, "CMD send: %d 0x%llx\n", cmd, data); |
356 | |
357 | /* |
358 | * Perform predefined number of attempts before give up. Message |
359 | * registers are free of race-condition problem when accessed |
360 | * from different ports, so we don't need splitting registers |
361 | * by global device index. We also won't have local locking, |
362 | * since the method is used from service work only. |
363 | */ |
364 | outbits = ntb_msg_outbits(ntb: perf->ntb); |
365 | for (try = 0; try < MSG_TRIES; try++) { |
366 | if (!perf_link_is_up(peer)) |
367 | return -ENOLINK; |
368 | |
369 | ret = ntb_msg_clear_sts(ntb: perf->ntb, sts_bits: outbits); |
370 | if (ret) |
371 | return ret; |
372 | |
373 | ntb_peer_msg_write(ntb: perf->ntb, pidx: peer->pidx, PERF_MSG_LDATA, |
374 | lower_32_bits(data)); |
375 | |
376 | if (ntb_msg_read_sts(ntb: perf->ntb) & outbits) { |
377 | usleep_range(MSG_UDELAY_LOW, MSG_UDELAY_HIGH); |
378 | continue; |
379 | } |
380 | |
381 | ntb_peer_msg_write(ntb: perf->ntb, pidx: peer->pidx, PERF_MSG_HDATA, |
382 | upper_32_bits(data)); |
383 | |
384 | /* This call shall trigger peer message event */ |
385 | ntb_peer_msg_write(ntb: perf->ntb, pidx: peer->pidx, PERF_MSG_CMD, msg: cmd); |
386 | |
387 | break; |
388 | } |
389 | |
390 | return try < MSG_TRIES ? 0 : -EAGAIN; |
391 | } |
392 | |
393 | static int perf_msg_cmd_recv(struct perf_ctx *perf, int *pidx, |
394 | enum perf_cmd *cmd, u64 *data) |
395 | { |
396 | u64 inbits; |
397 | u32 val; |
398 | |
399 | inbits = ntb_msg_inbits(ntb: perf->ntb); |
400 | |
401 | if (hweight64(ntb_msg_read_sts(perf->ntb) & inbits) < 3) |
402 | return -ENODATA; |
403 | |
404 | val = ntb_msg_read(ntb: perf->ntb, pidx, PERF_MSG_CMD); |
405 | *cmd = val; |
406 | |
407 | val = ntb_msg_read(ntb: perf->ntb, pidx, PERF_MSG_LDATA); |
408 | *data = val; |
409 | |
410 | val = ntb_msg_read(ntb: perf->ntb, pidx, PERF_MSG_HDATA); |
411 | *data |= (u64)val << 32; |
412 | |
413 | /* Next command can be retrieved from now */ |
414 | ntb_msg_clear_sts(ntb: perf->ntb, sts_bits: inbits); |
415 | |
416 | dev_dbg(&perf->ntb->dev, "CMD recv: %d 0x%llx\n", *cmd, *data); |
417 | |
418 | return 0; |
419 | } |
420 | |
421 | static int perf_cmd_send(struct perf_peer *peer, enum perf_cmd cmd, u64 data) |
422 | { |
423 | struct perf_ctx *perf = peer->perf; |
424 | |
425 | if (cmd == PERF_CMD_SSIZE || cmd == PERF_CMD_SXLAT) |
426 | return perf->cmd_send(peer, cmd, data); |
427 | |
428 | dev_err(&perf->ntb->dev, "Send invalid command\n"); |
429 | return -EINVAL; |
430 | } |
431 | |
432 | static int perf_cmd_exec(struct perf_peer *peer, enum perf_cmd cmd) |
433 | { |
434 | switch (cmd) { |
435 | case PERF_CMD_SSIZE: |
436 | case PERF_CMD_RSIZE: |
437 | case PERF_CMD_SXLAT: |
438 | case PERF_CMD_RXLAT: |
439 | case PERF_CMD_CLEAR: |
440 | break; |
441 | default: |
442 | dev_err(&peer->perf->ntb->dev, "Exec invalid command\n"); |
443 | return -EINVAL; |
444 | } |
445 | |
446 | /* No need of memory barrier, since bit ops have invernal lock */ |
447 | set_bit(nr: cmd, addr: &peer->sts); |
448 | |
449 | dev_dbg(&peer->perf->ntb->dev, "CMD exec: %d\n", cmd); |
450 | |
451 | (void)queue_work(wq: system_highpri_wq, work: &peer->service); |
452 | |
453 | return 0; |
454 | } |
455 | |
456 | static int perf_cmd_recv(struct perf_ctx *perf) |
457 | { |
458 | struct perf_peer *peer; |
459 | int ret, pidx, cmd; |
460 | u64 data; |
461 | |
462 | while (!(ret = perf->cmd_recv(perf, &pidx, &cmd, &data))) { |
463 | peer = &perf->peers[pidx]; |
464 | |
465 | switch (cmd) { |
466 | case PERF_CMD_SSIZE: |
467 | peer->inbuf_size = data; |
468 | return perf_cmd_exec(peer, cmd: PERF_CMD_RSIZE); |
469 | case PERF_CMD_SXLAT: |
470 | peer->outbuf_xlat = data; |
471 | return perf_cmd_exec(peer, cmd: PERF_CMD_RXLAT); |
472 | default: |
473 | dev_err(&perf->ntb->dev, "Recv invalid command\n"); |
474 | return -EINVAL; |
475 | } |
476 | } |
477 | |
478 | /* Return 0 if no data left to process, otherwise an error */ |
479 | return ret == -ENODATA ? 0 : ret; |
480 | } |
481 | |
482 | static void perf_link_event(void *ctx) |
483 | { |
484 | struct perf_ctx *perf = ctx; |
485 | struct perf_peer *peer; |
486 | bool lnk_up; |
487 | int pidx; |
488 | |
489 | for (pidx = 0; pidx < perf->pcnt; pidx++) { |
490 | peer = &perf->peers[pidx]; |
491 | |
492 | lnk_up = perf_link_is_up(peer); |
493 | |
494 | if (lnk_up && |
495 | !test_and_set_bit(nr: PERF_STS_LNKUP, addr: &peer->sts)) { |
496 | perf_cmd_exec(peer, cmd: PERF_CMD_SSIZE); |
497 | } else if (!lnk_up && |
498 | test_and_clear_bit(nr: PERF_STS_LNKUP, addr: &peer->sts)) { |
499 | perf_cmd_exec(peer, cmd: PERF_CMD_CLEAR); |
500 | } |
501 | } |
502 | } |
503 | |
504 | static void perf_db_event(void *ctx, int vec) |
505 | { |
506 | struct perf_ctx *perf = ctx; |
507 | |
508 | dev_dbg(&perf->ntb->dev, "DB vec %d mask %#llx bits %#llx\n", vec, |
509 | ntb_db_vector_mask(perf->ntb, vec), ntb_db_read(perf->ntb)); |
510 | |
511 | /* Just receive all available commands */ |
512 | (void)perf_cmd_recv(perf); |
513 | } |
514 | |
515 | static void perf_msg_event(void *ctx) |
516 | { |
517 | struct perf_ctx *perf = ctx; |
518 | |
519 | dev_dbg(&perf->ntb->dev, "Msg status bits %#llx\n", |
520 | ntb_msg_read_sts(perf->ntb)); |
521 | |
522 | /* Messages are only sent one-by-one */ |
523 | (void)perf_cmd_recv(perf); |
524 | } |
525 | |
526 | static const struct ntb_ctx_ops perf_ops = { |
527 | .link_event = perf_link_event, |
528 | .db_event = perf_db_event, |
529 | .msg_event = perf_msg_event |
530 | }; |
531 | |
532 | static void perf_free_outbuf(struct perf_peer *peer) |
533 | { |
534 | (void)ntb_peer_mw_clear_trans(ntb: peer->perf->ntb, pidx: peer->pidx, widx: peer->gidx); |
535 | } |
536 | |
537 | static int perf_setup_outbuf(struct perf_peer *peer) |
538 | { |
539 | struct perf_ctx *perf = peer->perf; |
540 | int ret; |
541 | |
542 | /* Outbuf size can be unaligned due to custom max_mw_size */ |
543 | ret = ntb_peer_mw_set_trans(ntb: perf->ntb, pidx: peer->pidx, widx: peer->gidx, |
544 | addr: peer->outbuf_xlat, size: peer->outbuf_size); |
545 | if (ret) { |
546 | dev_err(&perf->ntb->dev, "Failed to set outbuf translation\n"); |
547 | return ret; |
548 | } |
549 | |
550 | /* Initialization is finally done */ |
551 | set_bit(nr: PERF_STS_DONE, addr: &peer->sts); |
552 | complete_all(&peer->init_comp); |
553 | |
554 | return 0; |
555 | } |
556 | |
557 | static void perf_free_inbuf(struct perf_peer *peer) |
558 | { |
559 | if (!peer->inbuf) |
560 | return; |
561 | |
562 | (void)ntb_mw_clear_trans(ntb: peer->perf->ntb, pidx: peer->pidx, widx: peer->gidx); |
563 | dma_free_coherent(dev: &peer->perf->ntb->pdev->dev, size: peer->inbuf_size, |
564 | cpu_addr: peer->inbuf, dma_handle: peer->inbuf_xlat); |
565 | peer->inbuf = NULL; |
566 | } |
567 | |
568 | static int perf_setup_inbuf(struct perf_peer *peer) |
569 | { |
570 | resource_size_t xlat_align, size_align, size_max; |
571 | struct perf_ctx *perf = peer->perf; |
572 | int ret; |
573 | |
574 | /* Get inbound MW parameters */ |
575 | ret = ntb_mw_get_align(ntb: perf->ntb, pidx: peer->pidx, widx: perf->gidx, |
576 | addr_align: &xlat_align, size_align: &size_align, size_max: &size_max); |
577 | if (ret) { |
578 | dev_err(&perf->ntb->dev, "Couldn't get inbuf restrictions\n"); |
579 | return ret; |
580 | } |
581 | |
582 | if (peer->inbuf_size > size_max) { |
583 | dev_err(&perf->ntb->dev, "Too big inbuf size %pa > %pa\n", |
584 | &peer->inbuf_size, &size_max); |
585 | return -EINVAL; |
586 | } |
587 | |
588 | peer->inbuf_size = round_up(peer->inbuf_size, size_align); |
589 | |
590 | perf_free_inbuf(peer); |
591 | |
592 | peer->inbuf = dma_alloc_coherent(dev: &perf->ntb->pdev->dev, |
593 | size: peer->inbuf_size, dma_handle: &peer->inbuf_xlat, |
594 | GFP_KERNEL); |
595 | if (!peer->inbuf) { |
596 | dev_err(&perf->ntb->dev, "Failed to alloc inbuf of %pa\n", |
597 | &peer->inbuf_size); |
598 | return -ENOMEM; |
599 | } |
600 | if (!IS_ALIGNED(peer->inbuf_xlat, xlat_align)) { |
601 | ret = -EINVAL; |
602 | dev_err(&perf->ntb->dev, "Unaligned inbuf allocated\n"); |
603 | goto err_free_inbuf; |
604 | } |
605 | |
606 | ret = ntb_mw_set_trans(ntb: perf->ntb, pidx: peer->pidx, widx: peer->gidx, |
607 | addr: peer->inbuf_xlat, size: peer->inbuf_size); |
608 | if (ret) { |
609 | dev_err(&perf->ntb->dev, "Failed to set inbuf translation\n"); |
610 | goto err_free_inbuf; |
611 | } |
612 | |
613 | /* |
614 | * We submit inbuf xlat transmission cmd for execution here to follow |
615 | * the code architecture, even though this method is called from service |
616 | * work itself so the command will be executed right after it returns. |
617 | */ |
618 | (void)perf_cmd_exec(peer, cmd: PERF_CMD_SXLAT); |
619 | |
620 | return 0; |
621 | |
622 | err_free_inbuf: |
623 | perf_free_inbuf(peer); |
624 | |
625 | return ret; |
626 | } |
627 | |
628 | static void perf_service_work(struct work_struct *work) |
629 | { |
630 | struct perf_peer *peer = to_peer_service(work); |
631 | |
632 | if (test_and_clear_bit(nr: PERF_CMD_SSIZE, addr: &peer->sts)) |
633 | perf_cmd_send(peer, cmd: PERF_CMD_SSIZE, data: peer->outbuf_size); |
634 | |
635 | if (test_and_clear_bit(nr: PERF_CMD_RSIZE, addr: &peer->sts)) |
636 | perf_setup_inbuf(peer); |
637 | |
638 | if (test_and_clear_bit(nr: PERF_CMD_SXLAT, addr: &peer->sts)) |
639 | perf_cmd_send(peer, cmd: PERF_CMD_SXLAT, data: peer->inbuf_xlat); |
640 | |
641 | if (test_and_clear_bit(nr: PERF_CMD_RXLAT, addr: &peer->sts)) |
642 | perf_setup_outbuf(peer); |
643 | |
644 | if (test_and_clear_bit(nr: PERF_CMD_CLEAR, addr: &peer->sts)) { |
645 | init_completion(x: &peer->init_comp); |
646 | clear_bit(nr: PERF_STS_DONE, addr: &peer->sts); |
647 | if (test_bit(0, &peer->perf->busy_flag) && |
648 | peer == peer->perf->test_peer) { |
649 | dev_warn(&peer->perf->ntb->dev, |
650 | "Freeing while test on-fly\n"); |
651 | perf_terminate_test(perf: peer->perf); |
652 | } |
653 | perf_free_outbuf(peer); |
654 | perf_free_inbuf(peer); |
655 | } |
656 | } |
657 | |
658 | static int perf_init_service(struct perf_ctx *perf) |
659 | { |
660 | u64 mask; |
661 | |
662 | if (ntb_peer_mw_count(ntb: perf->ntb) < perf->pcnt) { |
663 | dev_err(&perf->ntb->dev, "Not enough memory windows\n"); |
664 | return -EINVAL; |
665 | } |
666 | |
667 | if (ntb_msg_count(ntb: perf->ntb) >= PERF_MSG_CNT) { |
668 | perf->cmd_send = perf_msg_cmd_send; |
669 | perf->cmd_recv = perf_msg_cmd_recv; |
670 | |
671 | dev_dbg(&perf->ntb->dev, "Message service initialized\n"); |
672 | |
673 | return 0; |
674 | } |
675 | |
676 | dev_dbg(&perf->ntb->dev, "Message service unsupported\n"); |
677 | |
678 | mask = GENMASK_ULL(perf->pcnt, 0); |
679 | if (ntb_spad_count(ntb: perf->ntb) >= PERF_SPAD_CNT(perf->pcnt) && |
680 | (ntb_db_valid_mask(ntb: perf->ntb) & mask) == mask) { |
681 | perf->cmd_send = perf_spad_cmd_send; |
682 | perf->cmd_recv = perf_spad_cmd_recv; |
683 | |
684 | dev_dbg(&perf->ntb->dev, "Scratchpad service initialized\n"); |
685 | |
686 | return 0; |
687 | } |
688 | |
689 | dev_dbg(&perf->ntb->dev, "Scratchpad service unsupported\n"); |
690 | |
691 | dev_err(&perf->ntb->dev, "Command services unsupported\n"); |
692 | |
693 | return -EINVAL; |
694 | } |
695 | |
696 | static int perf_enable_service(struct perf_ctx *perf) |
697 | { |
698 | u64 mask, incmd_bit; |
699 | int ret, sidx, scnt; |
700 | |
701 | mask = ntb_db_valid_mask(ntb: perf->ntb); |
702 | (void)ntb_db_set_mask(ntb: perf->ntb, db_bits: mask); |
703 | |
704 | ret = ntb_set_ctx(ntb: perf->ntb, ctx: perf, ctx_ops: &perf_ops); |
705 | if (ret) |
706 | return ret; |
707 | |
708 | if (perf->cmd_send == perf_msg_cmd_send) { |
709 | u64 inbits, outbits; |
710 | |
711 | inbits = ntb_msg_inbits(ntb: perf->ntb); |
712 | outbits = ntb_msg_outbits(ntb: perf->ntb); |
713 | (void)ntb_msg_set_mask(ntb: perf->ntb, mask_bits: inbits | outbits); |
714 | |
715 | incmd_bit = BIT_ULL(__ffs64(inbits)); |
716 | ret = ntb_msg_clear_mask(ntb: perf->ntb, mask_bits: incmd_bit); |
717 | |
718 | dev_dbg(&perf->ntb->dev, "MSG sts unmasked %#llx\n", incmd_bit); |
719 | } else { |
720 | scnt = ntb_spad_count(ntb: perf->ntb); |
721 | for (sidx = 0; sidx < scnt; sidx++) |
722 | ntb_spad_write(ntb: perf->ntb, sidx, val: PERF_CMD_INVAL); |
723 | incmd_bit = PERF_SPAD_NOTIFY(perf->gidx); |
724 | ret = ntb_db_clear_mask(ntb: perf->ntb, db_bits: incmd_bit); |
725 | |
726 | dev_dbg(&perf->ntb->dev, "DB bits unmasked %#llx\n", incmd_bit); |
727 | } |
728 | if (ret) { |
729 | ntb_clear_ctx(ntb: perf->ntb); |
730 | return ret; |
731 | } |
732 | |
733 | ntb_link_enable(ntb: perf->ntb, max_speed: NTB_SPEED_AUTO, max_width: NTB_WIDTH_AUTO); |
734 | /* Might be not necessary */ |
735 | ntb_link_event(ntb: perf->ntb); |
736 | |
737 | return 0; |
738 | } |
739 | |
740 | static void perf_disable_service(struct perf_ctx *perf) |
741 | { |
742 | int pidx; |
743 | |
744 | if (perf->cmd_send == perf_msg_cmd_send) { |
745 | u64 inbits; |
746 | |
747 | inbits = ntb_msg_inbits(ntb: perf->ntb); |
748 | (void)ntb_msg_set_mask(ntb: perf->ntb, mask_bits: inbits); |
749 | } else { |
750 | (void)ntb_db_set_mask(ntb: perf->ntb, PERF_SPAD_NOTIFY(perf->gidx)); |
751 | } |
752 | |
753 | ntb_clear_ctx(ntb: perf->ntb); |
754 | |
755 | for (pidx = 0; pidx < perf->pcnt; pidx++) |
756 | perf_cmd_exec(peer: &perf->peers[pidx], cmd: PERF_CMD_CLEAR); |
757 | |
758 | for (pidx = 0; pidx < perf->pcnt; pidx++) |
759 | flush_work(work: &perf->peers[pidx].service); |
760 | |
761 | for (pidx = 0; pidx < perf->pcnt; pidx++) { |
762 | struct perf_peer *peer = &perf->peers[pidx]; |
763 | |
764 | ntb_spad_write(ntb: perf->ntb, PERF_SPAD_CMD(peer->gidx), val: 0); |
765 | } |
766 | |
767 | ntb_db_clear(ntb: perf->ntb, PERF_SPAD_NOTIFY(perf->gidx)); |
768 | |
769 | ntb_link_disable(ntb: perf->ntb); |
770 | } |
771 | |
772 | /*============================================================================== |
773 | * Performance measuring work-thread |
774 | *============================================================================== |
775 | */ |
776 | |
777 | static void perf_dma_copy_callback(void *data) |
778 | { |
779 | struct perf_thread *pthr = data; |
780 | |
781 | atomic_dec(v: &pthr->dma_sync); |
782 | wake_up(&pthr->dma_wait); |
783 | } |
784 | |
785 | static int perf_copy_chunk(struct perf_thread *pthr, |
786 | void __iomem *dst, void *src, size_t len) |
787 | { |
788 | struct dma_async_tx_descriptor *tx; |
789 | struct dmaengine_unmap_data *unmap; |
790 | struct device *dma_dev; |
791 | int try = 0, ret = 0; |
792 | struct perf_peer *peer = pthr->perf->test_peer; |
793 | void __iomem *vbase; |
794 | void __iomem *dst_vaddr; |
795 | dma_addr_t dst_dma_addr; |
796 | |
797 | if (!use_dma) { |
798 | memcpy_toio(dst, src, len); |
799 | goto ret_check_tsync; |
800 | } |
801 | |
802 | dma_dev = pthr->dma_chan->device->dev; |
803 | |
804 | if (!is_dma_copy_aligned(dev: pthr->dma_chan->device, offset_in_page(src), |
805 | offset_in_page(dst), len)) |
806 | return -EIO; |
807 | |
808 | vbase = peer->outbuf; |
809 | dst_vaddr = dst; |
810 | dst_dma_addr = peer->dma_dst_addr + (dst_vaddr - vbase); |
811 | |
812 | unmap = dmaengine_get_unmap_data(dev: dma_dev, nr: 1, GFP_NOWAIT); |
813 | if (!unmap) |
814 | return -ENOMEM; |
815 | |
816 | unmap->len = len; |
817 | unmap->addr[0] = dma_map_page(dma_dev, virt_to_page(src), |
818 | offset_in_page(src), len, DMA_TO_DEVICE); |
819 | if (dma_mapping_error(dev: dma_dev, dma_addr: unmap->addr[0])) { |
820 | ret = -EIO; |
821 | goto err_free_resource; |
822 | } |
823 | unmap->to_cnt = 1; |
824 | |
825 | do { |
826 | tx = dmaengine_prep_dma_memcpy(chan: pthr->dma_chan, dest: dst_dma_addr, |
827 | src: unmap->addr[0], len, flags: DMA_PREP_INTERRUPT | DMA_CTRL_ACK); |
828 | if (!tx) |
829 | msleep(DMA_MDELAY); |
830 | } while (!tx && (try++ < DMA_TRIES)); |
831 | |
832 | if (!tx) { |
833 | ret = -EIO; |
834 | goto err_free_resource; |
835 | } |
836 | |
837 | tx->callback = perf_dma_copy_callback; |
838 | tx->callback_param = pthr; |
839 | dma_set_unmap(tx, unmap); |
840 | |
841 | ret = dma_submit_error(cookie: dmaengine_submit(desc: tx)); |
842 | if (ret) |
843 | goto err_free_resource; |
844 | |
845 | dmaengine_unmap_put(unmap); |
846 | |
847 | atomic_inc(v: &pthr->dma_sync); |
848 | dma_async_issue_pending(chan: pthr->dma_chan); |
849 | |
850 | ret_check_tsync: |
851 | return likely(atomic_read(&pthr->perf->tsync) > 0) ? 0 : -EINTR; |
852 | |
853 | err_free_resource: |
854 | dmaengine_unmap_put(unmap); |
855 | |
856 | return ret; |
857 | } |
858 | |
859 | static bool perf_dma_filter(struct dma_chan *chan, void *data) |
860 | { |
861 | struct perf_ctx *perf = data; |
862 | int node; |
863 | |
864 | node = dev_to_node(dev: &perf->ntb->dev); |
865 | |
866 | return node == NUMA_NO_NODE || node == dev_to_node(dev: chan->device->dev); |
867 | } |
868 | |
869 | static int perf_init_test(struct perf_thread *pthr) |
870 | { |
871 | struct perf_ctx *perf = pthr->perf; |
872 | dma_cap_mask_t dma_mask; |
873 | struct perf_peer *peer = pthr->perf->test_peer; |
874 | |
875 | pthr->src = kmalloc_node(perf->test_peer->outbuf_size, GFP_KERNEL, |
876 | dev_to_node(&perf->ntb->dev)); |
877 | if (!pthr->src) |
878 | return -ENOMEM; |
879 | |
880 | get_random_bytes(buf: pthr->src, len: perf->test_peer->outbuf_size); |
881 | |
882 | if (!use_dma) |
883 | return 0; |
884 | |
885 | dma_cap_zero(dma_mask); |
886 | dma_cap_set(DMA_MEMCPY, dma_mask); |
887 | pthr->dma_chan = dma_request_channel(dma_mask, perf_dma_filter, perf); |
888 | if (!pthr->dma_chan) { |
889 | dev_err(&perf->ntb->dev, "%d: Failed to get DMA channel\n", |
890 | pthr->tidx); |
891 | goto err_free; |
892 | } |
893 | peer->dma_dst_addr = |
894 | dma_map_resource(dev: pthr->dma_chan->device->dev, |
895 | phys_addr: peer->out_phys_addr, size: peer->outbuf_size, |
896 | dir: DMA_FROM_DEVICE, attrs: 0); |
897 | if (dma_mapping_error(dev: pthr->dma_chan->device->dev, |
898 | dma_addr: peer->dma_dst_addr)) { |
899 | dev_err(pthr->dma_chan->device->dev, "%d: Failed to map DMA addr\n", |
900 | pthr->tidx); |
901 | peer->dma_dst_addr = 0; |
902 | dma_release_channel(chan: pthr->dma_chan); |
903 | goto err_free; |
904 | } |
905 | dev_dbg(pthr->dma_chan->device->dev, "%d: Map MMIO %pa to DMA addr %pad\n", |
906 | pthr->tidx, |
907 | &peer->out_phys_addr, |
908 | &peer->dma_dst_addr); |
909 | |
910 | atomic_set(v: &pthr->dma_sync, i: 0); |
911 | return 0; |
912 | |
913 | err_free: |
914 | atomic_dec(v: &perf->tsync); |
915 | wake_up(&perf->twait); |
916 | kfree(objp: pthr->src); |
917 | return -ENODEV; |
918 | } |
919 | |
920 | static int perf_run_test(struct perf_thread *pthr) |
921 | { |
922 | struct perf_peer *peer = pthr->perf->test_peer; |
923 | struct perf_ctx *perf = pthr->perf; |
924 | void __iomem *flt_dst, *bnd_dst; |
925 | u64 total_size, chunk_size; |
926 | void *flt_src; |
927 | int ret = 0; |
928 | |
929 | total_size = 1ULL << total_order; |
930 | chunk_size = 1ULL << chunk_order; |
931 | chunk_size = min_t(u64, peer->outbuf_size, chunk_size); |
932 | |
933 | flt_src = pthr->src; |
934 | bnd_dst = peer->outbuf + peer->outbuf_size; |
935 | flt_dst = peer->outbuf; |
936 | |
937 | pthr->duration = ktime_get(); |
938 | |
939 | /* Copied field is cleared on test launch stage */ |
940 | while (pthr->copied < total_size) { |
941 | ret = perf_copy_chunk(pthr, dst: flt_dst, src: flt_src, len: chunk_size); |
942 | if (ret) { |
943 | dev_err(&perf->ntb->dev, "%d: Got error %d on test\n", |
944 | pthr->tidx, ret); |
945 | return ret; |
946 | } |
947 | |
948 | pthr->copied += chunk_size; |
949 | |
950 | flt_dst += chunk_size; |
951 | flt_src += chunk_size; |
952 | if (flt_dst >= bnd_dst || flt_dst < peer->outbuf) { |
953 | flt_dst = peer->outbuf; |
954 | flt_src = pthr->src; |
955 | } |
956 | |
957 | /* Give up CPU to give a chance for other threads to use it */ |
958 | schedule(); |
959 | } |
960 | |
961 | return 0; |
962 | } |
963 | |
964 | static int perf_sync_test(struct perf_thread *pthr) |
965 | { |
966 | struct perf_ctx *perf = pthr->perf; |
967 | |
968 | if (!use_dma) |
969 | goto no_dma_ret; |
970 | |
971 | wait_event(pthr->dma_wait, |
972 | (atomic_read(&pthr->dma_sync) == 0 || |
973 | atomic_read(&perf->tsync) < 0)); |
974 | |
975 | if (atomic_read(v: &perf->tsync) < 0) |
976 | return -EINTR; |
977 | |
978 | no_dma_ret: |
979 | pthr->duration = ktime_sub(ktime_get(), pthr->duration); |
980 | |
981 | dev_dbg(&perf->ntb->dev, "%d: copied %llu bytes\n", |
982 | pthr->tidx, pthr->copied); |
983 | |
984 | dev_dbg(&perf->ntb->dev, "%d: lasted %llu usecs\n", |
985 | pthr->tidx, ktime_to_us(pthr->duration)); |
986 | |
987 | dev_dbg(&perf->ntb->dev, "%d: %llu MBytes/s\n", pthr->tidx, |
988 | div64_u64(pthr->copied, ktime_to_us(pthr->duration))); |
989 | |
990 | return 0; |
991 | } |
992 | |
993 | static void perf_clear_test(struct perf_thread *pthr) |
994 | { |
995 | struct perf_ctx *perf = pthr->perf; |
996 | |
997 | if (!use_dma) |
998 | goto no_dma_notify; |
999 | |
1000 | /* |
1001 | * If test finished without errors, termination isn't needed. |
1002 | * We call it anyway just to be sure of the transfers completion. |
1003 | */ |
1004 | (void)dmaengine_terminate_sync(chan: pthr->dma_chan); |
1005 | if (pthr->perf->test_peer->dma_dst_addr) |
1006 | dma_unmap_resource(dev: pthr->dma_chan->device->dev, |
1007 | addr: pthr->perf->test_peer->dma_dst_addr, |
1008 | size: pthr->perf->test_peer->outbuf_size, |
1009 | dir: DMA_FROM_DEVICE, attrs: 0); |
1010 | |
1011 | dma_release_channel(chan: pthr->dma_chan); |
1012 | |
1013 | no_dma_notify: |
1014 | atomic_dec(v: &perf->tsync); |
1015 | wake_up(&perf->twait); |
1016 | kfree(objp: pthr->src); |
1017 | } |
1018 | |
1019 | static void perf_thread_work(struct work_struct *work) |
1020 | { |
1021 | struct perf_thread *pthr = to_thread_work(work); |
1022 | int ret; |
1023 | |
1024 | /* |
1025 | * Perform stages in compliance with use_dma flag value. |
1026 | * Test status is changed only if error happened, otherwise |
1027 | * status -ENODATA is kept while test is on-fly. Results |
1028 | * synchronization is performed only if test fininshed |
1029 | * without an error or interruption. |
1030 | */ |
1031 | ret = perf_init_test(pthr); |
1032 | if (ret) { |
1033 | pthr->status = ret; |
1034 | return; |
1035 | } |
1036 | |
1037 | ret = perf_run_test(pthr); |
1038 | if (ret) { |
1039 | pthr->status = ret; |
1040 | goto err_clear_test; |
1041 | } |
1042 | |
1043 | pthr->status = perf_sync_test(pthr); |
1044 | |
1045 | err_clear_test: |
1046 | perf_clear_test(pthr); |
1047 | } |
1048 | |
1049 | static int perf_set_tcnt(struct perf_ctx *perf, u8 tcnt) |
1050 | { |
1051 | if (tcnt == 0 || tcnt > MAX_THREADS_CNT) |
1052 | return -EINVAL; |
1053 | |
1054 | if (test_and_set_bit_lock(nr: 0, addr: &perf->busy_flag)) |
1055 | return -EBUSY; |
1056 | |
1057 | perf->tcnt = tcnt; |
1058 | |
1059 | clear_bit_unlock(nr: 0, addr: &perf->busy_flag); |
1060 | |
1061 | return 0; |
1062 | } |
1063 | |
1064 | static void perf_terminate_test(struct perf_ctx *perf) |
1065 | { |
1066 | int tidx; |
1067 | |
1068 | atomic_set(v: &perf->tsync, i: -1); |
1069 | wake_up(&perf->twait); |
1070 | |
1071 | for (tidx = 0; tidx < MAX_THREADS_CNT; tidx++) { |
1072 | wake_up(&perf->threads[tidx].dma_wait); |
1073 | cancel_work_sync(work: &perf->threads[tidx].work); |
1074 | } |
1075 | } |
1076 | |
1077 | static int perf_submit_test(struct perf_peer *peer) |
1078 | { |
1079 | struct perf_ctx *perf = peer->perf; |
1080 | struct perf_thread *pthr; |
1081 | int tidx, ret; |
1082 | |
1083 | ret = wait_for_completion_interruptible(x: &peer->init_comp); |
1084 | if (ret < 0) |
1085 | return ret; |
1086 | |
1087 | if (test_and_set_bit_lock(nr: 0, addr: &perf->busy_flag)) |
1088 | return -EBUSY; |
1089 | |
1090 | perf->test_peer = peer; |
1091 | atomic_set(v: &perf->tsync, i: perf->tcnt); |
1092 | |
1093 | for (tidx = 0; tidx < MAX_THREADS_CNT; tidx++) { |
1094 | pthr = &perf->threads[tidx]; |
1095 | |
1096 | pthr->status = -ENODATA; |
1097 | pthr->copied = 0; |
1098 | pthr->duration = ktime_set(secs: 0, nsecs: 0); |
1099 | if (tidx < perf->tcnt) |
1100 | (void)queue_work(wq: perf_wq, work: &pthr->work); |
1101 | } |
1102 | |
1103 | ret = wait_event_interruptible(perf->twait, |
1104 | atomic_read(&perf->tsync) <= 0); |
1105 | if (ret == -ERESTARTSYS) { |
1106 | perf_terminate_test(perf); |
1107 | ret = -EINTR; |
1108 | } |
1109 | |
1110 | clear_bit_unlock(nr: 0, addr: &perf->busy_flag); |
1111 | |
1112 | return ret; |
1113 | } |
1114 | |
1115 | static int perf_read_stats(struct perf_ctx *perf, char *buf, |
1116 | size_t size, ssize_t *pos) |
1117 | { |
1118 | struct perf_thread *pthr; |
1119 | int tidx; |
1120 | |
1121 | if (test_and_set_bit_lock(nr: 0, addr: &perf->busy_flag)) |
1122 | return -EBUSY; |
1123 | |
1124 | (*pos) += scnprintf(buf: buf + *pos, size: size - *pos, |
1125 | fmt: " Peer %d test statistics:\n", perf->test_peer->pidx); |
1126 | |
1127 | for (tidx = 0; tidx < MAX_THREADS_CNT; tidx++) { |
1128 | pthr = &perf->threads[tidx]; |
1129 | |
1130 | if (pthr->status == -ENODATA) |
1131 | continue; |
1132 | |
1133 | if (pthr->status) { |
1134 | (*pos) += scnprintf(buf: buf + *pos, size: size - *pos, |
1135 | fmt: "%d: error status %d\n", tidx, pthr->status); |
1136 | continue; |
1137 | } |
1138 | |
1139 | (*pos) += scnprintf(buf: buf + *pos, size: size - *pos, |
1140 | fmt: "%d: copied %llu bytes in %llu usecs, %llu MBytes/s\n", |
1141 | tidx, pthr->copied, ktime_to_us(kt: pthr->duration), |
1142 | div64_u64(dividend: pthr->copied, divisor: ktime_to_us(kt: pthr->duration))); |
1143 | } |
1144 | |
1145 | clear_bit_unlock(nr: 0, addr: &perf->busy_flag); |
1146 | |
1147 | return 0; |
1148 | } |
1149 | |
1150 | static void perf_init_threads(struct perf_ctx *perf) |
1151 | { |
1152 | struct perf_thread *pthr; |
1153 | int tidx; |
1154 | |
1155 | perf->tcnt = DEF_THREADS_CNT; |
1156 | perf->test_peer = &perf->peers[0]; |
1157 | init_waitqueue_head(&perf->twait); |
1158 | |
1159 | for (tidx = 0; tidx < MAX_THREADS_CNT; tidx++) { |
1160 | pthr = &perf->threads[tidx]; |
1161 | |
1162 | pthr->perf = perf; |
1163 | pthr->tidx = tidx; |
1164 | pthr->status = -ENODATA; |
1165 | init_waitqueue_head(&pthr->dma_wait); |
1166 | INIT_WORK(&pthr->work, perf_thread_work); |
1167 | } |
1168 | } |
1169 | |
1170 | static void perf_clear_threads(struct perf_ctx *perf) |
1171 | { |
1172 | perf_terminate_test(perf); |
1173 | } |
1174 | |
1175 | /*============================================================================== |
1176 | * DebugFS nodes |
1177 | *============================================================================== |
1178 | */ |
1179 | |
1180 | static ssize_t perf_dbgfs_read_info(struct file *filep, char __user *ubuf, |
1181 | size_t size, loff_t *offp) |
1182 | { |
1183 | struct perf_ctx *perf = filep->private_data; |
1184 | struct perf_peer *peer; |
1185 | size_t buf_size; |
1186 | ssize_t pos = 0; |
1187 | int ret, pidx; |
1188 | char *buf; |
1189 | |
1190 | buf_size = min_t(size_t, size, 0x1000U); |
1191 | |
1192 | buf = kmalloc(buf_size, GFP_KERNEL); |
1193 | if (!buf) |
1194 | return -ENOMEM; |
1195 | |
1196 | pos += scnprintf(buf: buf + pos, size: buf_size - pos, |
1197 | fmt: " Performance measuring tool info:\n\n"); |
1198 | |
1199 | pos += scnprintf(buf: buf + pos, size: buf_size - pos, |
1200 | fmt: "Local port %d, Global index %d\n", ntb_port_number(ntb: perf->ntb), |
1201 | perf->gidx); |
1202 | pos += scnprintf(buf: buf + pos, size: buf_size - pos, fmt: "Test status: "); |
1203 | if (test_bit(0, &perf->busy_flag)) { |
1204 | pos += scnprintf(buf: buf + pos, size: buf_size - pos, |
1205 | fmt: "on-fly with port %d (%d)\n", |
1206 | ntb_peer_port_number(ntb: perf->ntb, pidx: perf->test_peer->pidx), |
1207 | perf->test_peer->pidx); |
1208 | } else { |
1209 | pos += scnprintf(buf: buf + pos, size: buf_size - pos, fmt: "idle\n"); |
1210 | } |
1211 | |
1212 | for (pidx = 0; pidx < perf->pcnt; pidx++) { |
1213 | peer = &perf->peers[pidx]; |
1214 | |
1215 | pos += scnprintf(buf: buf + pos, size: buf_size - pos, |
1216 | fmt: "Port %d (%d), Global index %d:\n", |
1217 | ntb_peer_port_number(ntb: perf->ntb, pidx: peer->pidx), peer->pidx, |
1218 | peer->gidx); |
1219 | |
1220 | pos += scnprintf(buf: buf + pos, size: buf_size - pos, |
1221 | fmt: "\tLink status: %s\n", |
1222 | test_bit(PERF_STS_LNKUP, &peer->sts) ? "up": "down"); |
1223 | |
1224 | pos += scnprintf(buf: buf + pos, size: buf_size - pos, |
1225 | fmt: "\tOut buffer addr 0x%pK\n", peer->outbuf); |
1226 | |
1227 | pos += scnprintf(buf: buf + pos, size: buf_size - pos, |
1228 | fmt: "\tOut buff phys addr %pap\n", &peer->out_phys_addr); |
1229 | |
1230 | pos += scnprintf(buf: buf + pos, size: buf_size - pos, |
1231 | fmt: "\tOut buffer size %pa\n", &peer->outbuf_size); |
1232 | |
1233 | pos += scnprintf(buf: buf + pos, size: buf_size - pos, |
1234 | fmt: "\tOut buffer xlat 0x%016llx[p]\n", peer->outbuf_xlat); |
1235 | |
1236 | if (!peer->inbuf) { |
1237 | pos += scnprintf(buf: buf + pos, size: buf_size - pos, |
1238 | fmt: "\tIn buffer addr: unallocated\n"); |
1239 | continue; |
1240 | } |
1241 | |
1242 | pos += scnprintf(buf: buf + pos, size: buf_size - pos, |
1243 | fmt: "\tIn buffer addr 0x%pK\n", peer->inbuf); |
1244 | |
1245 | pos += scnprintf(buf: buf + pos, size: buf_size - pos, |
1246 | fmt: "\tIn buffer size %pa\n", &peer->inbuf_size); |
1247 | |
1248 | pos += scnprintf(buf: buf + pos, size: buf_size - pos, |
1249 | fmt: "\tIn buffer xlat %pad[p]\n", &peer->inbuf_xlat); |
1250 | } |
1251 | |
1252 | ret = simple_read_from_buffer(to: ubuf, count: size, ppos: offp, from: buf, available: pos); |
1253 | kfree(objp: buf); |
1254 | |
1255 | return ret; |
1256 | } |
1257 | |
1258 | static const struct file_operations perf_dbgfs_info = { |
1259 | .open = simple_open, |
1260 | .read = perf_dbgfs_read_info |
1261 | }; |
1262 | |
1263 | static ssize_t perf_dbgfs_read_run(struct file *filep, char __user *ubuf, |
1264 | size_t size, loff_t *offp) |
1265 | { |
1266 | struct perf_ctx *perf = filep->private_data; |
1267 | ssize_t ret, pos = 0; |
1268 | char *buf; |
1269 | |
1270 | buf = kmalloc(PERF_BUF_LEN, GFP_KERNEL); |
1271 | if (!buf) |
1272 | return -ENOMEM; |
1273 | |
1274 | ret = perf_read_stats(perf, buf, PERF_BUF_LEN, pos: &pos); |
1275 | if (ret) |
1276 | goto err_free; |
1277 | |
1278 | ret = simple_read_from_buffer(to: ubuf, count: size, ppos: offp, from: buf, available: pos); |
1279 | err_free: |
1280 | kfree(objp: buf); |
1281 | |
1282 | return ret; |
1283 | } |
1284 | |
1285 | static ssize_t perf_dbgfs_write_run(struct file *filep, const char __user *ubuf, |
1286 | size_t size, loff_t *offp) |
1287 | { |
1288 | struct perf_ctx *perf = filep->private_data; |
1289 | struct perf_peer *peer; |
1290 | int pidx, ret; |
1291 | |
1292 | ret = kstrtoint_from_user(s: ubuf, count: size, base: 0, res: &pidx); |
1293 | if (ret) |
1294 | return ret; |
1295 | |
1296 | if (pidx < 0 || pidx >= perf->pcnt) |
1297 | return -EINVAL; |
1298 | |
1299 | peer = &perf->peers[pidx]; |
1300 | |
1301 | ret = perf_submit_test(peer); |
1302 | if (ret) |
1303 | return ret; |
1304 | |
1305 | return size; |
1306 | } |
1307 | |
1308 | static const struct file_operations perf_dbgfs_run = { |
1309 | .open = simple_open, |
1310 | .read = perf_dbgfs_read_run, |
1311 | .write = perf_dbgfs_write_run |
1312 | }; |
1313 | |
1314 | static ssize_t perf_dbgfs_read_tcnt(struct file *filep, char __user *ubuf, |
1315 | size_t size, loff_t *offp) |
1316 | { |
1317 | struct perf_ctx *perf = filep->private_data; |
1318 | char buf[8]; |
1319 | ssize_t pos; |
1320 | |
1321 | pos = scnprintf(buf, size: sizeof(buf), fmt: "%hhu\n", perf->tcnt); |
1322 | |
1323 | return simple_read_from_buffer(to: ubuf, count: size, ppos: offp, from: buf, available: pos); |
1324 | } |
1325 | |
1326 | static ssize_t perf_dbgfs_write_tcnt(struct file *filep, |
1327 | const char __user *ubuf, |
1328 | size_t size, loff_t *offp) |
1329 | { |
1330 | struct perf_ctx *perf = filep->private_data; |
1331 | int ret; |
1332 | u8 val; |
1333 | |
1334 | ret = kstrtou8_from_user(s: ubuf, count: size, base: 0, res: &val); |
1335 | if (ret) |
1336 | return ret; |
1337 | |
1338 | ret = perf_set_tcnt(perf, tcnt: val); |
1339 | if (ret) |
1340 | return ret; |
1341 | |
1342 | return size; |
1343 | } |
1344 | |
1345 | static const struct file_operations perf_dbgfs_tcnt = { |
1346 | .open = simple_open, |
1347 | .read = perf_dbgfs_read_tcnt, |
1348 | .write = perf_dbgfs_write_tcnt |
1349 | }; |
1350 | |
1351 | static void perf_setup_dbgfs(struct perf_ctx *perf) |
1352 | { |
1353 | struct pci_dev *pdev = perf->ntb->pdev; |
1354 | |
1355 | perf->dbgfs_dir = debugfs_create_dir(name: pci_name(pdev), parent: perf_dbgfs_topdir); |
1356 | if (IS_ERR(ptr: perf->dbgfs_dir)) { |
1357 | dev_warn(&perf->ntb->dev, "DebugFS unsupported\n"); |
1358 | return; |
1359 | } |
1360 | |
1361 | debugfs_create_file("info", 0600, perf->dbgfs_dir, perf, |
1362 | &perf_dbgfs_info); |
1363 | |
1364 | debugfs_create_file("run", 0600, perf->dbgfs_dir, perf, |
1365 | &perf_dbgfs_run); |
1366 | |
1367 | debugfs_create_file("threads_count", 0600, perf->dbgfs_dir, perf, |
1368 | &perf_dbgfs_tcnt); |
1369 | |
1370 | /* They are made read-only for test exec safety and integrity */ |
1371 | debugfs_create_u8(name: "chunk_order", mode: 0500, parent: perf->dbgfs_dir, value: &chunk_order); |
1372 | |
1373 | debugfs_create_u8(name: "total_order", mode: 0500, parent: perf->dbgfs_dir, value: &total_order); |
1374 | |
1375 | debugfs_create_bool(name: "use_dma", mode: 0500, parent: perf->dbgfs_dir, value: &use_dma); |
1376 | } |
1377 | |
1378 | static void perf_clear_dbgfs(struct perf_ctx *perf) |
1379 | { |
1380 | debugfs_remove_recursive(dentry: perf->dbgfs_dir); |
1381 | } |
1382 | |
1383 | /*============================================================================== |
1384 | * Basic driver initialization |
1385 | *============================================================================== |
1386 | */ |
1387 | |
1388 | static struct perf_ctx *perf_create_data(struct ntb_dev *ntb) |
1389 | { |
1390 | struct perf_ctx *perf; |
1391 | |
1392 | perf = devm_kzalloc(dev: &ntb->dev, size: sizeof(*perf), GFP_KERNEL); |
1393 | if (!perf) |
1394 | return ERR_PTR(error: -ENOMEM); |
1395 | |
1396 | perf->pcnt = ntb_peer_port_count(ntb); |
1397 | perf->peers = devm_kcalloc(dev: &ntb->dev, n: perf->pcnt, size: sizeof(*perf->peers), |
1398 | GFP_KERNEL); |
1399 | if (!perf->peers) |
1400 | return ERR_PTR(error: -ENOMEM); |
1401 | |
1402 | perf->ntb = ntb; |
1403 | |
1404 | return perf; |
1405 | } |
1406 | |
1407 | static int perf_setup_peer_mw(struct perf_peer *peer) |
1408 | { |
1409 | struct perf_ctx *perf = peer->perf; |
1410 | phys_addr_t phys_addr; |
1411 | int ret; |
1412 | |
1413 | /* Get outbound MW parameters and map it */ |
1414 | ret = ntb_peer_mw_get_addr(ntb: perf->ntb, widx: perf->gidx, base: &phys_addr, |
1415 | size: &peer->outbuf_size); |
1416 | if (ret) |
1417 | return ret; |
1418 | |
1419 | peer->outbuf = devm_ioremap_wc(dev: &perf->ntb->dev, offset: phys_addr, |
1420 | size: peer->outbuf_size); |
1421 | if (!peer->outbuf) |
1422 | return -ENOMEM; |
1423 | |
1424 | peer->out_phys_addr = phys_addr; |
1425 | |
1426 | if (max_mw_size && peer->outbuf_size > max_mw_size) { |
1427 | peer->outbuf_size = max_mw_size; |
1428 | dev_warn(&peer->perf->ntb->dev, |
1429 | "Peer %d outbuf reduced to %pa\n", peer->pidx, |
1430 | &peer->outbuf_size); |
1431 | } |
1432 | |
1433 | return 0; |
1434 | } |
1435 | |
1436 | static int perf_init_peers(struct perf_ctx *perf) |
1437 | { |
1438 | struct perf_peer *peer; |
1439 | int pidx, lport, ret; |
1440 | |
1441 | lport = ntb_port_number(ntb: perf->ntb); |
1442 | perf->gidx = -1; |
1443 | for (pidx = 0; pidx < perf->pcnt; pidx++) { |
1444 | peer = &perf->peers[pidx]; |
1445 | |
1446 | peer->perf = perf; |
1447 | peer->pidx = pidx; |
1448 | if (lport < ntb_peer_port_number(ntb: perf->ntb, pidx)) { |
1449 | if (perf->gidx == -1) |
1450 | perf->gidx = pidx; |
1451 | peer->gidx = pidx + 1; |
1452 | } else { |
1453 | peer->gidx = pidx; |
1454 | } |
1455 | INIT_WORK(&peer->service, perf_service_work); |
1456 | init_completion(x: &peer->init_comp); |
1457 | } |
1458 | if (perf->gidx == -1) |
1459 | perf->gidx = pidx; |
1460 | |
1461 | /* |
1462 | * Hardware with only two ports may not have unique port |
1463 | * numbers. In this case, the gidxs should all be zero. |
1464 | */ |
1465 | if (perf->pcnt == 1 && ntb_port_number(ntb: perf->ntb) == 0 && |
1466 | ntb_peer_port_number(ntb: perf->ntb, pidx: 0) == 0) { |
1467 | perf->gidx = 0; |
1468 | perf->peers[0].gidx = 0; |
1469 | } |
1470 | |
1471 | for (pidx = 0; pidx < perf->pcnt; pidx++) { |
1472 | ret = perf_setup_peer_mw(peer: &perf->peers[pidx]); |
1473 | if (ret) |
1474 | return ret; |
1475 | } |
1476 | |
1477 | dev_dbg(&perf->ntb->dev, "Global port index %d\n", perf->gidx); |
1478 | |
1479 | return 0; |
1480 | } |
1481 | |
1482 | static int perf_probe(struct ntb_client *client, struct ntb_dev *ntb) |
1483 | { |
1484 | struct perf_ctx *perf; |
1485 | int ret; |
1486 | |
1487 | perf = perf_create_data(ntb); |
1488 | if (IS_ERR(ptr: perf)) |
1489 | return PTR_ERR(ptr: perf); |
1490 | |
1491 | ret = perf_init_peers(perf); |
1492 | if (ret) |
1493 | return ret; |
1494 | |
1495 | perf_init_threads(perf); |
1496 | |
1497 | ret = perf_init_service(perf); |
1498 | if (ret) |
1499 | return ret; |
1500 | |
1501 | ret = perf_enable_service(perf); |
1502 | if (ret) |
1503 | return ret; |
1504 | |
1505 | perf_setup_dbgfs(perf); |
1506 | |
1507 | return 0; |
1508 | } |
1509 | |
1510 | static void perf_remove(struct ntb_client *client, struct ntb_dev *ntb) |
1511 | { |
1512 | struct perf_ctx *perf = ntb->ctx; |
1513 | |
1514 | perf_clear_dbgfs(perf); |
1515 | |
1516 | perf_disable_service(perf); |
1517 | |
1518 | perf_clear_threads(perf); |
1519 | } |
1520 | |
1521 | static struct ntb_client perf_client = { |
1522 | .ops = { |
1523 | .probe = perf_probe, |
1524 | .remove = perf_remove |
1525 | } |
1526 | }; |
1527 | |
1528 | static int __init perf_init(void) |
1529 | { |
1530 | int ret; |
1531 | |
1532 | if (chunk_order > MAX_CHUNK_ORDER) { |
1533 | chunk_order = MAX_CHUNK_ORDER; |
1534 | pr_info("Chunk order reduced to %hhu\n", chunk_order); |
1535 | } |
1536 | |
1537 | if (total_order < chunk_order) { |
1538 | total_order = chunk_order; |
1539 | pr_info("Total data order reduced to %hhu\n", total_order); |
1540 | } |
1541 | |
1542 | perf_wq = alloc_workqueue(fmt: "perf_wq", flags: WQ_UNBOUND | WQ_SYSFS, max_active: 0); |
1543 | if (!perf_wq) |
1544 | return -ENOMEM; |
1545 | |
1546 | if (debugfs_initialized()) |
1547 | perf_dbgfs_topdir = debugfs_create_dir(KBUILD_MODNAME, NULL); |
1548 | |
1549 | ret = ntb_register_client(&perf_client); |
1550 | if (ret) { |
1551 | debugfs_remove_recursive(dentry: perf_dbgfs_topdir); |
1552 | destroy_workqueue(wq: perf_wq); |
1553 | } |
1554 | |
1555 | return ret; |
1556 | } |
1557 | module_init(perf_init); |
1558 | |
1559 | static void __exit perf_exit(void) |
1560 | { |
1561 | ntb_unregister_client(client: &perf_client); |
1562 | debugfs_remove_recursive(dentry: perf_dbgfs_topdir); |
1563 | destroy_workqueue(wq: perf_wq); |
1564 | } |
1565 | module_exit(perf_exit); |
1566 |
Definitions
- max_mw_size
- chunk_order
- total_order
- use_dma
- perf_cmd
- perf_peer
- perf_thread
- perf_ctx
- perf_dbgfs_topdir
- perf_wq
- perf_link_is_up
- perf_spad_cmd_send
- perf_spad_cmd_recv
- perf_msg_cmd_send
- perf_msg_cmd_recv
- perf_cmd_send
- perf_cmd_exec
- perf_cmd_recv
- perf_link_event
- perf_db_event
- perf_msg_event
- perf_ops
- perf_free_outbuf
- perf_setup_outbuf
- perf_free_inbuf
- perf_setup_inbuf
- perf_service_work
- perf_init_service
- perf_enable_service
- perf_disable_service
- perf_dma_copy_callback
- perf_copy_chunk
- perf_dma_filter
- perf_init_test
- perf_run_test
- perf_sync_test
- perf_clear_test
- perf_thread_work
- perf_set_tcnt
- perf_terminate_test
- perf_submit_test
- perf_read_stats
- perf_init_threads
- perf_clear_threads
- perf_dbgfs_read_info
- perf_dbgfs_info
- perf_dbgfs_read_run
- perf_dbgfs_write_run
- perf_dbgfs_run
- perf_dbgfs_read_tcnt
- perf_dbgfs_write_tcnt
- perf_dbgfs_tcnt
- perf_setup_dbgfs
- perf_clear_dbgfs
- perf_create_data
- perf_setup_peer_mw
- perf_init_peers
- perf_probe
- perf_remove
- perf_client
- perf_init
Improve your Profiling and Debugging skills
Find out more