1 | /* |
---|---|
2 | * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. |
3 | * |
4 | * This software is available to you under a choice of one of two |
5 | * licenses. You may choose to be licensed under the terms of the GNU |
6 | * General Public License (GPL) Version 2, available from the file |
7 | * COPYING in the main directory of this source tree, or the |
8 | * OpenIB.org BSD license below: |
9 | * |
10 | * Redistribution and use in source and binary forms, with or |
11 | * without modification, are permitted provided that the following |
12 | * conditions are met: |
13 | * |
14 | * - Redistributions of source code must retain the above |
15 | * copyright notice, this list of conditions and the following |
16 | * disclaimer. |
17 | * |
18 | * - Redistributions in binary form must reproduce the above |
19 | * copyright notice, this list of conditions and the following |
20 | * disclaimer in the documentation and/or other materials |
21 | * provided with the distribution. |
22 | * |
23 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
24 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
25 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND |
26 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS |
27 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN |
28 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN |
29 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
30 | * SOFTWARE. |
31 | */ |
32 | |
33 | #include <linux/kref.h> |
34 | #include <rdma/ib_umem.h> |
35 | #include <rdma/ib_user_verbs.h> |
36 | #include <rdma/ib_cache.h> |
37 | #include "mlx5_ib.h" |
38 | #include "srq.h" |
39 | #include "qp.h" |
40 | |
41 | #define UVERBS_MODULE_NAME mlx5_ib |
42 | #include <rdma/uverbs_named_ioctl.h> |
43 | |
44 | static void mlx5_ib_cq_comp(struct mlx5_core_cq *cq, struct mlx5_eqe *eqe) |
45 | { |
46 | struct ib_cq *ibcq = &to_mibcq(mcq: cq)->ibcq; |
47 | |
48 | ibcq->comp_handler(ibcq, ibcq->cq_context); |
49 | } |
50 | |
51 | static void mlx5_ib_cq_event(struct mlx5_core_cq *mcq, enum mlx5_event type) |
52 | { |
53 | struct mlx5_ib_cq *cq = container_of(mcq, struct mlx5_ib_cq, mcq); |
54 | struct mlx5_ib_dev *dev = to_mdev(ibdev: cq->ibcq.device); |
55 | struct ib_cq *ibcq = &cq->ibcq; |
56 | struct ib_event event; |
57 | |
58 | if (type != MLX5_EVENT_TYPE_CQ_ERROR) { |
59 | mlx5_ib_warn(dev, "Unexpected event type %d on CQ %06x\n", |
60 | type, mcq->cqn); |
61 | return; |
62 | } |
63 | |
64 | if (ibcq->event_handler) { |
65 | event.device = &dev->ib_dev; |
66 | event.event = IB_EVENT_CQ_ERR; |
67 | event.element.cq = ibcq; |
68 | ibcq->event_handler(&event, ibcq->cq_context); |
69 | } |
70 | } |
71 | |
72 | static void *get_cqe(struct mlx5_ib_cq *cq, int n) |
73 | { |
74 | return mlx5_frag_buf_get_wqe(fbc: &cq->buf.fbc, ix: n); |
75 | } |
76 | |
77 | static u8 sw_ownership_bit(int n, int nent) |
78 | { |
79 | return (n & nent) ? 1 : 0; |
80 | } |
81 | |
82 | static void *get_sw_cqe(struct mlx5_ib_cq *cq, int n) |
83 | { |
84 | void *cqe = get_cqe(cq, n: n & cq->ibcq.cqe); |
85 | struct mlx5_cqe64 *cqe64; |
86 | |
87 | cqe64 = (cq->mcq.cqe_sz == 64) ? cqe : cqe + 64; |
88 | |
89 | if (likely(get_cqe_opcode(cqe64) != MLX5_CQE_INVALID) && |
90 | !((cqe64->op_own & MLX5_CQE_OWNER_MASK) ^ !!(n & (cq->ibcq.cqe + 1)))) { |
91 | return cqe; |
92 | } else { |
93 | return NULL; |
94 | } |
95 | } |
96 | |
97 | static void *next_cqe_sw(struct mlx5_ib_cq *cq) |
98 | { |
99 | return get_sw_cqe(cq, n: cq->mcq.cons_index); |
100 | } |
101 | |
102 | static enum ib_wc_opcode get_umr_comp(struct mlx5_ib_wq *wq, int idx) |
103 | { |
104 | switch (wq->wr_data[idx]) { |
105 | case MLX5_IB_WR_UMR: |
106 | return 0; |
107 | |
108 | case IB_WR_LOCAL_INV: |
109 | return IB_WC_LOCAL_INV; |
110 | |
111 | case IB_WR_REG_MR: |
112 | return IB_WC_REG_MR; |
113 | |
114 | default: |
115 | pr_warn("unknown completion status\n"); |
116 | return 0; |
117 | } |
118 | } |
119 | |
120 | static void handle_good_req(struct ib_wc *wc, struct mlx5_cqe64 *cqe, |
121 | struct mlx5_ib_wq *wq, int idx) |
122 | { |
123 | wc->wc_flags = 0; |
124 | switch (be32_to_cpu(cqe->sop_drop_qpn) >> 24) { |
125 | case MLX5_OPCODE_RDMA_WRITE_IMM: |
126 | wc->wc_flags |= IB_WC_WITH_IMM; |
127 | fallthrough; |
128 | case MLX5_OPCODE_RDMA_WRITE: |
129 | wc->opcode = IB_WC_RDMA_WRITE; |
130 | break; |
131 | case MLX5_OPCODE_SEND_IMM: |
132 | wc->wc_flags |= IB_WC_WITH_IMM; |
133 | fallthrough; |
134 | case MLX5_OPCODE_SEND: |
135 | case MLX5_OPCODE_SEND_INVAL: |
136 | wc->opcode = IB_WC_SEND; |
137 | break; |
138 | case MLX5_OPCODE_RDMA_READ: |
139 | wc->opcode = IB_WC_RDMA_READ; |
140 | wc->byte_len = be32_to_cpu(cqe->byte_cnt); |
141 | break; |
142 | case MLX5_OPCODE_ATOMIC_CS: |
143 | wc->opcode = IB_WC_COMP_SWAP; |
144 | wc->byte_len = 8; |
145 | break; |
146 | case MLX5_OPCODE_ATOMIC_FA: |
147 | wc->opcode = IB_WC_FETCH_ADD; |
148 | wc->byte_len = 8; |
149 | break; |
150 | case MLX5_OPCODE_ATOMIC_MASKED_CS: |
151 | wc->opcode = IB_WC_MASKED_COMP_SWAP; |
152 | wc->byte_len = 8; |
153 | break; |
154 | case MLX5_OPCODE_ATOMIC_MASKED_FA: |
155 | wc->opcode = IB_WC_MASKED_FETCH_ADD; |
156 | wc->byte_len = 8; |
157 | break; |
158 | case MLX5_OPCODE_UMR: |
159 | wc->opcode = get_umr_comp(wq, idx); |
160 | break; |
161 | } |
162 | } |
163 | |
164 | enum { |
165 | MLX5_GRH_IN_BUFFER = 1, |
166 | MLX5_GRH_IN_CQE = 2, |
167 | }; |
168 | |
169 | static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe, |
170 | struct mlx5_ib_qp *qp) |
171 | { |
172 | enum rdma_link_layer ll = rdma_port_get_link_layer(device: qp->ibqp.device, port_num: 1); |
173 | struct mlx5_ib_dev *dev = to_mdev(ibdev: qp->ibqp.device); |
174 | struct mlx5_ib_srq *srq = NULL; |
175 | struct mlx5_ib_wq *wq; |
176 | u16 wqe_ctr; |
177 | u8 roce_packet_type; |
178 | bool vlan_present; |
179 | u8 g; |
180 | |
181 | if (qp->ibqp.srq || qp->ibqp.xrcd) { |
182 | struct mlx5_core_srq *msrq = NULL; |
183 | |
184 | if (qp->ibqp.xrcd) { |
185 | msrq = mlx5_cmd_get_srq(dev, be32_to_cpu(cqe->srqn)); |
186 | if (msrq) |
187 | srq = to_mibsrq(msrq); |
188 | } else { |
189 | srq = to_msrq(ibsrq: qp->ibqp.srq); |
190 | } |
191 | if (srq) { |
192 | wqe_ctr = be16_to_cpu(cqe->wqe_counter); |
193 | wc->wr_id = srq->wrid[wqe_ctr]; |
194 | mlx5_ib_free_srq_wqe(srq, wqe_index: wqe_ctr); |
195 | if (msrq) |
196 | mlx5_core_res_put(res: &msrq->common); |
197 | } |
198 | } else { |
199 | wq = &qp->rq; |
200 | wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)]; |
201 | ++wq->tail; |
202 | } |
203 | wc->byte_len = be32_to_cpu(cqe->byte_cnt); |
204 | |
205 | switch (get_cqe_opcode(cqe)) { |
206 | case MLX5_CQE_RESP_WR_IMM: |
207 | wc->opcode = IB_WC_RECV_RDMA_WITH_IMM; |
208 | wc->wc_flags = IB_WC_WITH_IMM; |
209 | wc->ex.imm_data = cqe->immediate; |
210 | break; |
211 | case MLX5_CQE_RESP_SEND: |
212 | wc->opcode = IB_WC_RECV; |
213 | wc->wc_flags = IB_WC_IP_CSUM_OK; |
214 | if (unlikely(!((cqe->hds_ip_ext & CQE_L3_OK) && |
215 | (cqe->hds_ip_ext & CQE_L4_OK)))) |
216 | wc->wc_flags = 0; |
217 | break; |
218 | case MLX5_CQE_RESP_SEND_IMM: |
219 | wc->opcode = IB_WC_RECV; |
220 | wc->wc_flags = IB_WC_WITH_IMM; |
221 | wc->ex.imm_data = cqe->immediate; |
222 | break; |
223 | case MLX5_CQE_RESP_SEND_INV: |
224 | wc->opcode = IB_WC_RECV; |
225 | wc->wc_flags = IB_WC_WITH_INVALIDATE; |
226 | wc->ex.invalidate_rkey = be32_to_cpu(cqe->inval_rkey); |
227 | break; |
228 | } |
229 | wc->src_qp = be32_to_cpu(cqe->flags_rqpn) & 0xffffff; |
230 | wc->dlid_path_bits = cqe->ml_path; |
231 | g = (be32_to_cpu(cqe->flags_rqpn) >> 28) & 3; |
232 | wc->wc_flags |= g ? IB_WC_GRH : 0; |
233 | if (is_qp1(qp_type: qp->type)) { |
234 | u16 pkey = be32_to_cpu(cqe->pkey) & 0xffff; |
235 | |
236 | ib_find_cached_pkey(device: &dev->ib_dev, port_num: qp->port, pkey, |
237 | index: &wc->pkey_index); |
238 | } else { |
239 | wc->pkey_index = 0; |
240 | } |
241 | |
242 | if (ll != IB_LINK_LAYER_ETHERNET) { |
243 | wc->slid = be16_to_cpu(cqe->slid); |
244 | wc->sl = (be32_to_cpu(cqe->flags_rqpn) >> 24) & 0xf; |
245 | return; |
246 | } |
247 | |
248 | wc->slid = 0; |
249 | vlan_present = cqe->l4_l3_hdr_type & 0x1; |
250 | roce_packet_type = (be32_to_cpu(cqe->flags_rqpn) >> 24) & 0x3; |
251 | if (vlan_present) { |
252 | wc->vlan_id = (be16_to_cpu(cqe->vlan_info)) & 0xfff; |
253 | wc->sl = (be16_to_cpu(cqe->vlan_info) >> 13) & 0x7; |
254 | wc->wc_flags |= IB_WC_WITH_VLAN; |
255 | } else { |
256 | wc->sl = 0; |
257 | } |
258 | |
259 | switch (roce_packet_type) { |
260 | case MLX5_CQE_ROCE_L3_HEADER_TYPE_GRH: |
261 | wc->network_hdr_type = RDMA_NETWORK_ROCE_V1; |
262 | break; |
263 | case MLX5_CQE_ROCE_L3_HEADER_TYPE_IPV6: |
264 | wc->network_hdr_type = RDMA_NETWORK_IPV6; |
265 | break; |
266 | case MLX5_CQE_ROCE_L3_HEADER_TYPE_IPV4: |
267 | wc->network_hdr_type = RDMA_NETWORK_IPV4; |
268 | break; |
269 | } |
270 | wc->wc_flags |= IB_WC_WITH_NETWORK_HDR_TYPE; |
271 | } |
272 | |
273 | static void dump_cqe(struct mlx5_ib_dev *dev, struct mlx5_err_cqe *cqe, |
274 | struct ib_wc *wc, const char *level) |
275 | { |
276 | mlx5_ib_log(level, dev, "WC error: %d, Message: %s\n", wc->status, |
277 | ib_wc_status_msg(wc->status)); |
278 | print_hex_dump(level, prefix_str: "cqe_dump: ", prefix_type: DUMP_PREFIX_OFFSET, rowsize: 16, groupsize: 1, |
279 | buf: cqe, len: sizeof(*cqe), ascii: false); |
280 | } |
281 | |
282 | static void mlx5_handle_error_cqe(struct mlx5_ib_dev *dev, |
283 | struct mlx5_err_cqe *cqe, |
284 | struct ib_wc *wc) |
285 | { |
286 | const char *dump = KERN_WARNING; |
287 | |
288 | switch (cqe->syndrome) { |
289 | case MLX5_CQE_SYNDROME_LOCAL_LENGTH_ERR: |
290 | wc->status = IB_WC_LOC_LEN_ERR; |
291 | break; |
292 | case MLX5_CQE_SYNDROME_LOCAL_QP_OP_ERR: |
293 | wc->status = IB_WC_LOC_QP_OP_ERR; |
294 | break; |
295 | case MLX5_CQE_SYNDROME_LOCAL_PROT_ERR: |
296 | dump = KERN_DEBUG; |
297 | wc->status = IB_WC_LOC_PROT_ERR; |
298 | break; |
299 | case MLX5_CQE_SYNDROME_WR_FLUSH_ERR: |
300 | dump = NULL; |
301 | wc->status = IB_WC_WR_FLUSH_ERR; |
302 | break; |
303 | case MLX5_CQE_SYNDROME_MW_BIND_ERR: |
304 | wc->status = IB_WC_MW_BIND_ERR; |
305 | break; |
306 | case MLX5_CQE_SYNDROME_BAD_RESP_ERR: |
307 | wc->status = IB_WC_BAD_RESP_ERR; |
308 | break; |
309 | case MLX5_CQE_SYNDROME_LOCAL_ACCESS_ERR: |
310 | wc->status = IB_WC_LOC_ACCESS_ERR; |
311 | break; |
312 | case MLX5_CQE_SYNDROME_REMOTE_INVAL_REQ_ERR: |
313 | wc->status = IB_WC_REM_INV_REQ_ERR; |
314 | break; |
315 | case MLX5_CQE_SYNDROME_REMOTE_ACCESS_ERR: |
316 | dump = KERN_DEBUG; |
317 | wc->status = IB_WC_REM_ACCESS_ERR; |
318 | break; |
319 | case MLX5_CQE_SYNDROME_REMOTE_OP_ERR: |
320 | dump = KERN_DEBUG; |
321 | wc->status = IB_WC_REM_OP_ERR; |
322 | break; |
323 | case MLX5_CQE_SYNDROME_TRANSPORT_RETRY_EXC_ERR: |
324 | dump = NULL; |
325 | wc->status = IB_WC_RETRY_EXC_ERR; |
326 | break; |
327 | case MLX5_CQE_SYNDROME_RNR_RETRY_EXC_ERR: |
328 | dump = NULL; |
329 | wc->status = IB_WC_RNR_RETRY_EXC_ERR; |
330 | break; |
331 | case MLX5_CQE_SYNDROME_REMOTE_ABORTED_ERR: |
332 | wc->status = IB_WC_REM_ABORT_ERR; |
333 | break; |
334 | default: |
335 | wc->status = IB_WC_GENERAL_ERR; |
336 | break; |
337 | } |
338 | |
339 | wc->vendor_err = cqe->vendor_err_synd; |
340 | if (dump) |
341 | dump_cqe(dev, cqe, wc, level: dump); |
342 | } |
343 | |
344 | static void handle_atomics(struct mlx5_ib_qp *qp, struct mlx5_cqe64 *cqe64, |
345 | u16 tail, u16 head) |
346 | { |
347 | u16 idx; |
348 | |
349 | do { |
350 | idx = tail & (qp->sq.wqe_cnt - 1); |
351 | if (idx == head) |
352 | break; |
353 | |
354 | tail = qp->sq.w_list[idx].next; |
355 | } while (1); |
356 | tail = qp->sq.w_list[idx].next; |
357 | qp->sq.last_poll = tail; |
358 | } |
359 | |
360 | static void free_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf) |
361 | { |
362 | mlx5_frag_buf_free(dev: dev->mdev, buf: &buf->frag_buf); |
363 | } |
364 | |
365 | static void get_sig_err_item(struct mlx5_sig_err_cqe *cqe, |
366 | struct ib_sig_err *item) |
367 | { |
368 | u16 syndrome = be16_to_cpu(cqe->syndrome); |
369 | |
370 | #define GUARD_ERR (1 << 13) |
371 | #define APPTAG_ERR (1 << 12) |
372 | #define REFTAG_ERR (1 << 11) |
373 | |
374 | if (syndrome & GUARD_ERR) { |
375 | item->err_type = IB_SIG_BAD_GUARD; |
376 | item->expected = be32_to_cpu(cqe->expected_trans_sig) >> 16; |
377 | item->actual = be32_to_cpu(cqe->actual_trans_sig) >> 16; |
378 | } else |
379 | if (syndrome & REFTAG_ERR) { |
380 | item->err_type = IB_SIG_BAD_REFTAG; |
381 | item->expected = be32_to_cpu(cqe->expected_reftag); |
382 | item->actual = be32_to_cpu(cqe->actual_reftag); |
383 | } else |
384 | if (syndrome & APPTAG_ERR) { |
385 | item->err_type = IB_SIG_BAD_APPTAG; |
386 | item->expected = be32_to_cpu(cqe->expected_trans_sig) & 0xffff; |
387 | item->actual = be32_to_cpu(cqe->actual_trans_sig) & 0xffff; |
388 | } else { |
389 | pr_err("Got signature completion error with bad syndrome %04x\n", |
390 | syndrome); |
391 | } |
392 | |
393 | item->sig_err_offset = be64_to_cpu(cqe->err_offset); |
394 | item->key = be32_to_cpu(cqe->mkey); |
395 | } |
396 | |
397 | static void sw_comp(struct mlx5_ib_qp *qp, int num_entries, struct ib_wc *wc, |
398 | int *npolled, bool is_send) |
399 | { |
400 | struct mlx5_ib_wq *wq; |
401 | unsigned int cur; |
402 | int np; |
403 | int i; |
404 | |
405 | wq = (is_send) ? &qp->sq : &qp->rq; |
406 | cur = wq->head - wq->tail; |
407 | np = *npolled; |
408 | |
409 | if (cur == 0) |
410 | return; |
411 | |
412 | for (i = 0; i < cur && np < num_entries; i++) { |
413 | unsigned int idx; |
414 | |
415 | idx = (is_send) ? wq->last_poll : wq->tail; |
416 | idx &= (wq->wqe_cnt - 1); |
417 | wc->wr_id = wq->wrid[idx]; |
418 | wc->status = IB_WC_WR_FLUSH_ERR; |
419 | wc->vendor_err = MLX5_CQE_SYNDROME_WR_FLUSH_ERR; |
420 | wq->tail++; |
421 | if (is_send) |
422 | wq->last_poll = wq->w_list[idx].next; |
423 | np++; |
424 | wc->qp = &qp->ibqp; |
425 | wc++; |
426 | } |
427 | *npolled = np; |
428 | } |
429 | |
430 | static void mlx5_ib_poll_sw_comp(struct mlx5_ib_cq *cq, int num_entries, |
431 | struct ib_wc *wc, int *npolled) |
432 | { |
433 | struct mlx5_ib_qp *qp; |
434 | |
435 | *npolled = 0; |
436 | /* Find uncompleted WQEs belonging to that cq and return mmics ones */ |
437 | list_for_each_entry(qp, &cq->list_send_qp, cq_send_list) { |
438 | sw_comp(qp, num_entries, wc: wc + *npolled, npolled, is_send: true); |
439 | if (*npolled >= num_entries) |
440 | return; |
441 | } |
442 | |
443 | list_for_each_entry(qp, &cq->list_recv_qp, cq_recv_list) { |
444 | sw_comp(qp, num_entries, wc: wc + *npolled, npolled, is_send: false); |
445 | if (*npolled >= num_entries) |
446 | return; |
447 | } |
448 | } |
449 | |
450 | static int mlx5_poll_one(struct mlx5_ib_cq *cq, |
451 | struct mlx5_ib_qp **cur_qp, |
452 | struct ib_wc *wc) |
453 | { |
454 | struct mlx5_ib_dev *dev = to_mdev(ibdev: cq->ibcq.device); |
455 | struct mlx5_err_cqe *err_cqe; |
456 | struct mlx5_cqe64 *cqe64; |
457 | struct mlx5_core_qp *mqp; |
458 | struct mlx5_ib_wq *wq; |
459 | uint8_t opcode; |
460 | uint32_t qpn; |
461 | u16 wqe_ctr; |
462 | void *cqe; |
463 | int idx; |
464 | |
465 | repoll: |
466 | cqe = next_cqe_sw(cq); |
467 | if (!cqe) |
468 | return -EAGAIN; |
469 | |
470 | cqe64 = (cq->mcq.cqe_sz == 64) ? cqe : cqe + 64; |
471 | |
472 | ++cq->mcq.cons_index; |
473 | |
474 | /* Make sure we read CQ entry contents after we've checked the |
475 | * ownership bit. |
476 | */ |
477 | rmb(); |
478 | |
479 | opcode = get_cqe_opcode(cqe: cqe64); |
480 | if (unlikely(opcode == MLX5_CQE_RESIZE_CQ)) { |
481 | if (likely(cq->resize_buf)) { |
482 | free_cq_buf(dev, buf: &cq->buf); |
483 | cq->buf = *cq->resize_buf; |
484 | kfree(objp: cq->resize_buf); |
485 | cq->resize_buf = NULL; |
486 | goto repoll; |
487 | } else { |
488 | mlx5_ib_warn(dev, "unexpected resize cqe\n"); |
489 | } |
490 | } |
491 | |
492 | qpn = ntohl(cqe64->sop_drop_qpn) & 0xffffff; |
493 | if (!*cur_qp || (qpn != (*cur_qp)->trans_qp.base.mqp.qpn)) { |
494 | /* We do not have to take the QP table lock here, |
495 | * because CQs will be locked while QPs are removed |
496 | * from the table. |
497 | */ |
498 | mqp = radix_tree_lookup(&dev->qp_table.tree, qpn); |
499 | *cur_qp = to_mibqp(mqp); |
500 | } |
501 | |
502 | wc->qp = &(*cur_qp)->ibqp; |
503 | switch (opcode) { |
504 | case MLX5_CQE_REQ: |
505 | wq = &(*cur_qp)->sq; |
506 | wqe_ctr = be16_to_cpu(cqe64->wqe_counter); |
507 | idx = wqe_ctr & (wq->wqe_cnt - 1); |
508 | handle_good_req(wc, cqe: cqe64, wq, idx); |
509 | handle_atomics(qp: *cur_qp, cqe64, tail: wq->last_poll, head: idx); |
510 | wc->wr_id = wq->wrid[idx]; |
511 | wq->tail = wq->wqe_head[idx] + 1; |
512 | wc->status = IB_WC_SUCCESS; |
513 | break; |
514 | case MLX5_CQE_RESP_WR_IMM: |
515 | case MLX5_CQE_RESP_SEND: |
516 | case MLX5_CQE_RESP_SEND_IMM: |
517 | case MLX5_CQE_RESP_SEND_INV: |
518 | handle_responder(wc, cqe: cqe64, qp: *cur_qp); |
519 | wc->status = IB_WC_SUCCESS; |
520 | break; |
521 | case MLX5_CQE_RESIZE_CQ: |
522 | break; |
523 | case MLX5_CQE_REQ_ERR: |
524 | case MLX5_CQE_RESP_ERR: |
525 | err_cqe = (struct mlx5_err_cqe *)cqe64; |
526 | mlx5_handle_error_cqe(dev, cqe: err_cqe, wc); |
527 | mlx5_ib_dbg(dev, "%s error cqe on cqn 0x%x:\n", |
528 | opcode == MLX5_CQE_REQ_ERR ? |
529 | "Requestor": "Responder", cq->mcq.cqn); |
530 | mlx5_ib_dbg(dev, "syndrome 0x%x, vendor syndrome 0x%x\n", |
531 | err_cqe->syndrome, err_cqe->vendor_err_synd); |
532 | if (wc->status != IB_WC_WR_FLUSH_ERR && |
533 | (*cur_qp)->type == MLX5_IB_QPT_REG_UMR) |
534 | dev->umrc.state = MLX5_UMR_STATE_RECOVER; |
535 | |
536 | if (opcode == MLX5_CQE_REQ_ERR) { |
537 | wq = &(*cur_qp)->sq; |
538 | wqe_ctr = be16_to_cpu(cqe64->wqe_counter); |
539 | idx = wqe_ctr & (wq->wqe_cnt - 1); |
540 | wc->wr_id = wq->wrid[idx]; |
541 | wq->tail = wq->wqe_head[idx] + 1; |
542 | } else { |
543 | struct mlx5_ib_srq *srq; |
544 | |
545 | if ((*cur_qp)->ibqp.srq) { |
546 | srq = to_msrq(ibsrq: (*cur_qp)->ibqp.srq); |
547 | wqe_ctr = be16_to_cpu(cqe64->wqe_counter); |
548 | wc->wr_id = srq->wrid[wqe_ctr]; |
549 | mlx5_ib_free_srq_wqe(srq, wqe_index: wqe_ctr); |
550 | } else { |
551 | wq = &(*cur_qp)->rq; |
552 | wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)]; |
553 | ++wq->tail; |
554 | } |
555 | } |
556 | break; |
557 | case MLX5_CQE_SIG_ERR: { |
558 | struct mlx5_sig_err_cqe *sig_err_cqe = |
559 | (struct mlx5_sig_err_cqe *)cqe64; |
560 | struct mlx5_core_sig_ctx *sig; |
561 | |
562 | xa_lock(&dev->sig_mrs); |
563 | sig = xa_load(&dev->sig_mrs, |
564 | index: mlx5_base_mkey(be32_to_cpu(sig_err_cqe->mkey))); |
565 | get_sig_err_item(cqe: sig_err_cqe, item: &sig->err_item); |
566 | sig->sig_err_exists = true; |
567 | sig->sigerr_count++; |
568 | |
569 | mlx5_ib_warn(dev, "CQN: 0x%x Got SIGERR on key: 0x%x err_type %x err_offset %llx expected %x actual %x\n", |
570 | cq->mcq.cqn, sig->err_item.key, |
571 | sig->err_item.err_type, |
572 | sig->err_item.sig_err_offset, |
573 | sig->err_item.expected, |
574 | sig->err_item.actual); |
575 | |
576 | xa_unlock(&dev->sig_mrs); |
577 | goto repoll; |
578 | } |
579 | } |
580 | |
581 | return 0; |
582 | } |
583 | |
584 | static int poll_soft_wc(struct mlx5_ib_cq *cq, int num_entries, |
585 | struct ib_wc *wc, bool is_fatal_err) |
586 | { |
587 | struct mlx5_ib_dev *dev = to_mdev(ibdev: cq->ibcq.device); |
588 | struct mlx5_ib_wc *soft_wc, *next; |
589 | int npolled = 0; |
590 | |
591 | list_for_each_entry_safe(soft_wc, next, &cq->wc_list, list) { |
592 | if (npolled >= num_entries) |
593 | break; |
594 | |
595 | mlx5_ib_dbg(dev, "polled software generated completion on CQ 0x%x\n", |
596 | cq->mcq.cqn); |
597 | |
598 | if (unlikely(is_fatal_err)) { |
599 | soft_wc->wc.status = IB_WC_WR_FLUSH_ERR; |
600 | soft_wc->wc.vendor_err = MLX5_CQE_SYNDROME_WR_FLUSH_ERR; |
601 | } |
602 | wc[npolled++] = soft_wc->wc; |
603 | list_del(entry: &soft_wc->list); |
604 | kfree(objp: soft_wc); |
605 | } |
606 | |
607 | return npolled; |
608 | } |
609 | |
610 | int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) |
611 | { |
612 | struct mlx5_ib_cq *cq = to_mcq(ibcq); |
613 | struct mlx5_ib_qp *cur_qp = NULL; |
614 | struct mlx5_ib_dev *dev = to_mdev(ibdev: cq->ibcq.device); |
615 | struct mlx5_core_dev *mdev = dev->mdev; |
616 | unsigned long flags; |
617 | int soft_polled = 0; |
618 | int npolled; |
619 | |
620 | spin_lock_irqsave(&cq->lock, flags); |
621 | if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { |
622 | /* make sure no soft wqe's are waiting */ |
623 | if (unlikely(!list_empty(&cq->wc_list))) |
624 | soft_polled = poll_soft_wc(cq, num_entries, wc, is_fatal_err: true); |
625 | |
626 | mlx5_ib_poll_sw_comp(cq, num_entries: num_entries - soft_polled, |
627 | wc: wc + soft_polled, npolled: &npolled); |
628 | goto out; |
629 | } |
630 | |
631 | if (unlikely(!list_empty(&cq->wc_list))) |
632 | soft_polled = poll_soft_wc(cq, num_entries, wc, is_fatal_err: false); |
633 | |
634 | for (npolled = 0; npolled < num_entries - soft_polled; npolled++) { |
635 | if (mlx5_poll_one(cq, cur_qp: &cur_qp, wc: wc + soft_polled + npolled)) |
636 | break; |
637 | } |
638 | |
639 | if (npolled) |
640 | mlx5_cq_set_ci(cq: &cq->mcq); |
641 | out: |
642 | spin_unlock_irqrestore(lock: &cq->lock, flags); |
643 | |
644 | return soft_polled + npolled; |
645 | } |
646 | |
647 | int mlx5_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) |
648 | { |
649 | struct mlx5_core_dev *mdev = to_mdev(ibdev: ibcq->device)->mdev; |
650 | struct mlx5_ib_cq *cq = to_mcq(ibcq); |
651 | void __iomem *uar_page = mdev->priv.uar->map; |
652 | unsigned long irq_flags; |
653 | int ret = 0; |
654 | |
655 | spin_lock_irqsave(&cq->lock, irq_flags); |
656 | if (cq->notify_flags != IB_CQ_NEXT_COMP) |
657 | cq->notify_flags = flags & IB_CQ_SOLICITED_MASK; |
658 | |
659 | if ((flags & IB_CQ_REPORT_MISSED_EVENTS) && !list_empty(head: &cq->wc_list)) |
660 | ret = 1; |
661 | spin_unlock_irqrestore(lock: &cq->lock, flags: irq_flags); |
662 | |
663 | mlx5_cq_arm(cq: &cq->mcq, |
664 | cmd: (flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED ? |
665 | MLX5_CQ_DB_REQ_NOT_SOL : MLX5_CQ_DB_REQ_NOT, |
666 | uar_page, cons_index: to_mcq(ibcq)->mcq.cons_index); |
667 | |
668 | return ret; |
669 | } |
670 | |
671 | static int alloc_cq_frag_buf(struct mlx5_ib_dev *dev, |
672 | struct mlx5_ib_cq_buf *buf, |
673 | int nent, |
674 | int cqe_size) |
675 | { |
676 | struct mlx5_frag_buf *frag_buf = &buf->frag_buf; |
677 | u8 log_wq_stride = 6 + (cqe_size == 128 ? 1 : 0); |
678 | u8 log_wq_sz = ilog2(cqe_size); |
679 | int err; |
680 | |
681 | err = mlx5_frag_buf_alloc_node(dev: dev->mdev, |
682 | size: nent * cqe_size, |
683 | buf: frag_buf, |
684 | node: dev->mdev->priv.numa_node); |
685 | if (err) |
686 | return err; |
687 | |
688 | mlx5_init_fbc(frags: frag_buf->frags, log_stride: log_wq_stride, log_sz: log_wq_sz, fbc: &buf->fbc); |
689 | |
690 | buf->cqe_size = cqe_size; |
691 | buf->nent = nent; |
692 | |
693 | return 0; |
694 | } |
695 | |
696 | enum { |
697 | MLX5_CQE_RES_FORMAT_HASH = 0, |
698 | MLX5_CQE_RES_FORMAT_CSUM = 1, |
699 | MLX5_CQE_RES_FORMAT_CSUM_STRIDX = 3, |
700 | }; |
701 | |
702 | static int mini_cqe_res_format_to_hw(struct mlx5_ib_dev *dev, u8 format) |
703 | { |
704 | switch (format) { |
705 | case MLX5_IB_CQE_RES_FORMAT_HASH: |
706 | return MLX5_CQE_RES_FORMAT_HASH; |
707 | case MLX5_IB_CQE_RES_FORMAT_CSUM: |
708 | return MLX5_CQE_RES_FORMAT_CSUM; |
709 | case MLX5_IB_CQE_RES_FORMAT_CSUM_STRIDX: |
710 | if (MLX5_CAP_GEN(dev->mdev, mini_cqe_resp_stride_index)) |
711 | return MLX5_CQE_RES_FORMAT_CSUM_STRIDX; |
712 | return -EOPNOTSUPP; |
713 | default: |
714 | return -EINVAL; |
715 | } |
716 | } |
717 | |
718 | static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata, |
719 | struct mlx5_ib_cq *cq, int entries, u32 **cqb, |
720 | int *cqe_size, int *index, int *inlen, |
721 | struct uverbs_attr_bundle *attrs) |
722 | { |
723 | struct mlx5_ib_create_cq ucmd = {}; |
724 | unsigned long page_size; |
725 | unsigned int page_offset_quantized; |
726 | size_t ucmdlen; |
727 | __be64 *pas; |
728 | int ncont; |
729 | void *cqc; |
730 | int err; |
731 | struct mlx5_ib_ucontext *context = rdma_udata_to_drv_context( |
732 | udata, struct mlx5_ib_ucontext, ibucontext); |
733 | |
734 | ucmdlen = min(udata->inlen, sizeof(ucmd)); |
735 | if (ucmdlen < offsetof(struct mlx5_ib_create_cq, flags)) |
736 | return -EINVAL; |
737 | |
738 | if (ib_copy_from_udata(dest: &ucmd, udata, len: ucmdlen)) |
739 | return -EFAULT; |
740 | |
741 | if ((ucmd.flags & ~(MLX5_IB_CREATE_CQ_FLAGS_CQE_128B_PAD | |
742 | MLX5_IB_CREATE_CQ_FLAGS_UAR_PAGE_INDEX | |
743 | MLX5_IB_CREATE_CQ_FLAGS_REAL_TIME_TS))) |
744 | return -EINVAL; |
745 | |
746 | if ((ucmd.cqe_size != 64 && ucmd.cqe_size != 128) || |
747 | ucmd.reserved0 || ucmd.reserved1) |
748 | return -EINVAL; |
749 | |
750 | *cqe_size = ucmd.cqe_size; |
751 | |
752 | cq->buf.umem = |
753 | ib_umem_get(device: &dev->ib_dev, addr: ucmd.buf_addr, |
754 | size: entries * ucmd.cqe_size, access: IB_ACCESS_LOCAL_WRITE); |
755 | if (IS_ERR(ptr: cq->buf.umem)) { |
756 | err = PTR_ERR(ptr: cq->buf.umem); |
757 | return err; |
758 | } |
759 | |
760 | page_size = mlx5_umem_find_best_cq_quantized_pgoff( |
761 | cq->buf.umem, cqc, log_page_size, MLX5_ADAPTER_PAGE_SHIFT, |
762 | page_offset, 64, &page_offset_quantized); |
763 | if (!page_size) { |
764 | err = -EINVAL; |
765 | goto err_umem; |
766 | } |
767 | |
768 | err = mlx5_ib_db_map_user(context, virt: ucmd.db_addr, db: &cq->db); |
769 | if (err) |
770 | goto err_umem; |
771 | |
772 | ncont = ib_umem_num_dma_blocks(umem: cq->buf.umem, pgsz: page_size); |
773 | mlx5_ib_dbg( |
774 | dev, |
775 | "addr 0x%llx, size %u, npages %zu, page_size %lu, ncont %d\n", |
776 | ucmd.buf_addr, entries * ucmd.cqe_size, |
777 | ib_umem_num_pages(cq->buf.umem), page_size, ncont); |
778 | |
779 | *inlen = MLX5_ST_SZ_BYTES(create_cq_in) + |
780 | MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) * ncont; |
781 | *cqb = kvzalloc(*inlen, GFP_KERNEL); |
782 | if (!*cqb) { |
783 | err = -ENOMEM; |
784 | goto err_db; |
785 | } |
786 | |
787 | pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, *cqb, pas); |
788 | mlx5_ib_populate_pas(umem: cq->buf.umem, page_size, pas, access_flags: 0); |
789 | |
790 | cqc = MLX5_ADDR_OF(create_cq_in, *cqb, cq_context); |
791 | MLX5_SET(cqc, cqc, log_page_size, |
792 | order_base_2(page_size) - MLX5_ADAPTER_PAGE_SHIFT); |
793 | MLX5_SET(cqc, cqc, page_offset, page_offset_quantized); |
794 | |
795 | if (uverbs_attr_is_valid(attrs_bundle: attrs, idx: MLX5_IB_ATTR_CREATE_CQ_UAR_INDEX)) { |
796 | err = uverbs_copy_from(index, attrs, MLX5_IB_ATTR_CREATE_CQ_UAR_INDEX); |
797 | if (err) |
798 | goto err_cqb; |
799 | } else if (ucmd.flags & MLX5_IB_CREATE_CQ_FLAGS_UAR_PAGE_INDEX) { |
800 | *index = ucmd.uar_page_index; |
801 | } else if (context->bfregi.lib_uar_dyn) { |
802 | err = -EINVAL; |
803 | goto err_cqb; |
804 | } else { |
805 | *index = context->bfregi.sys_pages[0]; |
806 | } |
807 | |
808 | if (ucmd.cqe_comp_en == 1) { |
809 | int mini_cqe_format; |
810 | |
811 | if (!((*cqe_size == 128 && |
812 | MLX5_CAP_GEN(dev->mdev, cqe_compression_128)) || |
813 | (*cqe_size == 64 && |
814 | MLX5_CAP_GEN(dev->mdev, cqe_compression)))) { |
815 | err = -EOPNOTSUPP; |
816 | mlx5_ib_warn(dev, "CQE compression is not supported for size %d!\n", |
817 | *cqe_size); |
818 | goto err_cqb; |
819 | } |
820 | |
821 | mini_cqe_format = |
822 | mini_cqe_res_format_to_hw(dev, |
823 | format: ucmd.cqe_comp_res_format); |
824 | if (mini_cqe_format < 0) { |
825 | err = mini_cqe_format; |
826 | mlx5_ib_dbg(dev, "CQE compression res format %d error: %d\n", |
827 | ucmd.cqe_comp_res_format, err); |
828 | goto err_cqb; |
829 | } |
830 | |
831 | MLX5_SET(cqc, cqc, cqe_comp_en, 1); |
832 | MLX5_SET(cqc, cqc, mini_cqe_res_format, mini_cqe_format); |
833 | } |
834 | |
835 | if (ucmd.flags & MLX5_IB_CREATE_CQ_FLAGS_CQE_128B_PAD) { |
836 | if (*cqe_size != 128 || |
837 | !MLX5_CAP_GEN(dev->mdev, cqe_128_always)) { |
838 | err = -EOPNOTSUPP; |
839 | mlx5_ib_warn(dev, |
840 | "CQE padding is not supported for CQE size of %dB!\n", |
841 | *cqe_size); |
842 | goto err_cqb; |
843 | } |
844 | |
845 | cq->private_flags |= MLX5_IB_CQ_PR_FLAGS_CQE_128_PAD; |
846 | } |
847 | |
848 | if (ucmd.flags & MLX5_IB_CREATE_CQ_FLAGS_REAL_TIME_TS) |
849 | cq->private_flags |= MLX5_IB_CQ_PR_FLAGS_REAL_TIME_TS; |
850 | |
851 | MLX5_SET(create_cq_in, *cqb, uid, context->devx_uid); |
852 | return 0; |
853 | |
854 | err_cqb: |
855 | kvfree(addr: *cqb); |
856 | |
857 | err_db: |
858 | mlx5_ib_db_unmap_user(context, db: &cq->db); |
859 | |
860 | err_umem: |
861 | ib_umem_release(umem: cq->buf.umem); |
862 | return err; |
863 | } |
864 | |
865 | static void destroy_cq_user(struct mlx5_ib_cq *cq, struct ib_udata *udata) |
866 | { |
867 | struct mlx5_ib_ucontext *context = rdma_udata_to_drv_context( |
868 | udata, struct mlx5_ib_ucontext, ibucontext); |
869 | |
870 | mlx5_ib_db_unmap_user(context, db: &cq->db); |
871 | ib_umem_release(umem: cq->buf.umem); |
872 | } |
873 | |
874 | static void init_cq_frag_buf(struct mlx5_ib_cq_buf *buf) |
875 | { |
876 | int i; |
877 | void *cqe; |
878 | struct mlx5_cqe64 *cqe64; |
879 | |
880 | for (i = 0; i < buf->nent; i++) { |
881 | cqe = mlx5_frag_buf_get_wqe(fbc: &buf->fbc, ix: i); |
882 | cqe64 = buf->cqe_size == 64 ? cqe : cqe + 64; |
883 | cqe64->op_own = MLX5_CQE_INVALID << 4; |
884 | } |
885 | } |
886 | |
887 | static int create_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq, |
888 | int entries, int cqe_size, |
889 | u32 **cqb, int *index, int *inlen) |
890 | { |
891 | __be64 *pas; |
892 | void *cqc; |
893 | int err; |
894 | |
895 | err = mlx5_db_alloc(dev: dev->mdev, db: &cq->db); |
896 | if (err) |
897 | return err; |
898 | |
899 | cq->mcq.set_ci_db = cq->db.db; |
900 | cq->mcq.arm_db = cq->db.db + 1; |
901 | cq->mcq.cqe_sz = cqe_size; |
902 | |
903 | err = alloc_cq_frag_buf(dev, buf: &cq->buf, nent: entries, cqe_size); |
904 | if (err) |
905 | goto err_db; |
906 | |
907 | init_cq_frag_buf(buf: &cq->buf); |
908 | |
909 | *inlen = MLX5_ST_SZ_BYTES(create_cq_in) + |
910 | MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) * |
911 | cq->buf.frag_buf.npages; |
912 | *cqb = kvzalloc(*inlen, GFP_KERNEL); |
913 | if (!*cqb) { |
914 | err = -ENOMEM; |
915 | goto err_buf; |
916 | } |
917 | |
918 | pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, *cqb, pas); |
919 | mlx5_fill_page_frag_array(frag_buf: &cq->buf.frag_buf, pas); |
920 | |
921 | cqc = MLX5_ADDR_OF(create_cq_in, *cqb, cq_context); |
922 | MLX5_SET(cqc, cqc, log_page_size, |
923 | cq->buf.frag_buf.page_shift - |
924 | MLX5_ADAPTER_PAGE_SHIFT); |
925 | |
926 | *index = dev->mdev->priv.uar->index; |
927 | |
928 | return 0; |
929 | |
930 | err_buf: |
931 | free_cq_buf(dev, buf: &cq->buf); |
932 | |
933 | err_db: |
934 | mlx5_db_free(dev: dev->mdev, db: &cq->db); |
935 | return err; |
936 | } |
937 | |
938 | static void destroy_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq) |
939 | { |
940 | free_cq_buf(dev, buf: &cq->buf); |
941 | mlx5_db_free(dev: dev->mdev, db: &cq->db); |
942 | } |
943 | |
944 | static void notify_soft_wc_handler(struct work_struct *work) |
945 | { |
946 | struct mlx5_ib_cq *cq = container_of(work, struct mlx5_ib_cq, |
947 | notify_work); |
948 | |
949 | cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context); |
950 | } |
951 | |
952 | int mlx5_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, |
953 | struct uverbs_attr_bundle *attrs) |
954 | { |
955 | struct ib_udata *udata = &attrs->driver_udata; |
956 | struct ib_device *ibdev = ibcq->device; |
957 | int entries = attr->cqe; |
958 | int vector = attr->comp_vector; |
959 | struct mlx5_ib_dev *dev = to_mdev(ibdev); |
960 | struct mlx5_ib_cq *cq = to_mcq(ibcq); |
961 | u32 out[MLX5_ST_SZ_DW(create_cq_out)]; |
962 | int index; |
963 | int inlen; |
964 | u32 *cqb = NULL; |
965 | void *cqc; |
966 | int cqe_size; |
967 | int eqn; |
968 | int err; |
969 | |
970 | if (entries < 0 || |
971 | (entries > (1 << MLX5_CAP_GEN(dev->mdev, log_max_cq_sz)))) |
972 | return -EINVAL; |
973 | |
974 | if (check_cq_create_flags(flags: attr->flags)) |
975 | return -EOPNOTSUPP; |
976 | |
977 | entries = roundup_pow_of_two(entries + 1); |
978 | if (entries > (1 << MLX5_CAP_GEN(dev->mdev, log_max_cq_sz))) |
979 | return -EINVAL; |
980 | |
981 | cq->ibcq.cqe = entries - 1; |
982 | mutex_init(&cq->resize_mutex); |
983 | spin_lock_init(&cq->lock); |
984 | cq->resize_buf = NULL; |
985 | cq->resize_umem = NULL; |
986 | cq->create_flags = attr->flags; |
987 | INIT_LIST_HEAD(list: &cq->list_send_qp); |
988 | INIT_LIST_HEAD(list: &cq->list_recv_qp); |
989 | |
990 | if (udata) { |
991 | err = create_cq_user(dev, udata, cq, entries, cqb: &cqb, cqe_size: &cqe_size, |
992 | index: &index, inlen: &inlen, attrs); |
993 | if (err) |
994 | return err; |
995 | } else { |
996 | cqe_size = cache_line_size() == 128 ? 128 : 64; |
997 | err = create_cq_kernel(dev, cq, entries, cqe_size, cqb: &cqb, |
998 | index: &index, inlen: &inlen); |
999 | if (err) |
1000 | return err; |
1001 | |
1002 | INIT_WORK(&cq->notify_work, notify_soft_wc_handler); |
1003 | } |
1004 | |
1005 | err = mlx5_comp_eqn_get(dev: dev->mdev, vecidx: vector, eqn: &eqn); |
1006 | if (err) |
1007 | goto err_cqb; |
1008 | |
1009 | cq->cqe_size = cqe_size; |
1010 | |
1011 | cqc = MLX5_ADDR_OF(create_cq_in, cqb, cq_context); |
1012 | MLX5_SET(cqc, cqc, cqe_sz, |
1013 | cqe_sz_to_mlx_sz(cqe_size, |
1014 | cq->private_flags & |
1015 | MLX5_IB_CQ_PR_FLAGS_CQE_128_PAD)); |
1016 | MLX5_SET(cqc, cqc, log_cq_size, ilog2(entries)); |
1017 | MLX5_SET(cqc, cqc, uar_page, index); |
1018 | MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn); |
1019 | MLX5_SET64(cqc, cqc, dbr_addr, cq->db.dma); |
1020 | if (cq->create_flags & IB_UVERBS_CQ_FLAGS_IGNORE_OVERRUN) |
1021 | MLX5_SET(cqc, cqc, oi, 1); |
1022 | |
1023 | err = mlx5_core_create_cq(dev: dev->mdev, cq: &cq->mcq, in: cqb, inlen, out, outlen: sizeof(out)); |
1024 | if (err) |
1025 | goto err_cqb; |
1026 | |
1027 | mlx5_ib_dbg(dev, "cqn 0x%x\n", cq->mcq.cqn); |
1028 | if (udata) |
1029 | cq->mcq.tasklet_ctx.comp = mlx5_ib_cq_comp; |
1030 | else |
1031 | cq->mcq.comp = mlx5_ib_cq_comp; |
1032 | cq->mcq.event = mlx5_ib_cq_event; |
1033 | |
1034 | INIT_LIST_HEAD(list: &cq->wc_list); |
1035 | |
1036 | if (udata) |
1037 | if (ib_copy_to_udata(udata, src: &cq->mcq.cqn, len: sizeof(__u32))) { |
1038 | err = -EFAULT; |
1039 | goto err_cmd; |
1040 | } |
1041 | |
1042 | |
1043 | kvfree(addr: cqb); |
1044 | return 0; |
1045 | |
1046 | err_cmd: |
1047 | mlx5_core_destroy_cq(dev: dev->mdev, cq: &cq->mcq); |
1048 | |
1049 | err_cqb: |
1050 | kvfree(addr: cqb); |
1051 | if (udata) |
1052 | destroy_cq_user(cq, udata); |
1053 | else |
1054 | destroy_cq_kernel(dev, cq); |
1055 | return err; |
1056 | } |
1057 | |
1058 | int mlx5_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata) |
1059 | { |
1060 | struct mlx5_ib_dev *dev = to_mdev(ibdev: cq->device); |
1061 | struct mlx5_ib_cq *mcq = to_mcq(ibcq: cq); |
1062 | int ret; |
1063 | |
1064 | ret = mlx5_core_destroy_cq(dev: dev->mdev, cq: &mcq->mcq); |
1065 | if (ret) |
1066 | return ret; |
1067 | |
1068 | if (udata) |
1069 | destroy_cq_user(cq: mcq, udata); |
1070 | else |
1071 | destroy_cq_kernel(dev, cq: mcq); |
1072 | return 0; |
1073 | } |
1074 | |
1075 | static int is_equal_rsn(struct mlx5_cqe64 *cqe64, u32 rsn) |
1076 | { |
1077 | return rsn == (ntohl(cqe64->sop_drop_qpn) & 0xffffff); |
1078 | } |
1079 | |
1080 | void __mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 rsn, struct mlx5_ib_srq *srq) |
1081 | { |
1082 | struct mlx5_cqe64 *cqe64, *dest64; |
1083 | void *cqe, *dest; |
1084 | u32 prod_index; |
1085 | int nfreed = 0; |
1086 | u8 owner_bit; |
1087 | |
1088 | if (!cq) |
1089 | return; |
1090 | |
1091 | /* First we need to find the current producer index, so we |
1092 | * know where to start cleaning from. It doesn't matter if HW |
1093 | * adds new entries after this loop -- the QP we're worried |
1094 | * about is already in RESET, so the new entries won't come |
1095 | * from our QP and therefore don't need to be checked. |
1096 | */ |
1097 | for (prod_index = cq->mcq.cons_index; get_sw_cqe(cq, n: prod_index); prod_index++) |
1098 | if (prod_index == cq->mcq.cons_index + cq->ibcq.cqe) |
1099 | break; |
1100 | |
1101 | /* Now sweep backwards through the CQ, removing CQ entries |
1102 | * that match our QP by copying older entries on top of them. |
1103 | */ |
1104 | while ((int) --prod_index - (int) cq->mcq.cons_index >= 0) { |
1105 | cqe = get_cqe(cq, n: prod_index & cq->ibcq.cqe); |
1106 | cqe64 = (cq->mcq.cqe_sz == 64) ? cqe : cqe + 64; |
1107 | if (is_equal_rsn(cqe64, rsn)) { |
1108 | if (srq && (ntohl(cqe64->srqn) & 0xffffff)) |
1109 | mlx5_ib_free_srq_wqe(srq, be16_to_cpu(cqe64->wqe_counter)); |
1110 | ++nfreed; |
1111 | } else if (nfreed) { |
1112 | dest = get_cqe(cq, n: (prod_index + nfreed) & cq->ibcq.cqe); |
1113 | dest64 = (cq->mcq.cqe_sz == 64) ? dest : dest + 64; |
1114 | owner_bit = dest64->op_own & MLX5_CQE_OWNER_MASK; |
1115 | memcpy(dest, cqe, cq->mcq.cqe_sz); |
1116 | dest64->op_own = owner_bit | |
1117 | (dest64->op_own & ~MLX5_CQE_OWNER_MASK); |
1118 | } |
1119 | } |
1120 | |
1121 | if (nfreed) { |
1122 | cq->mcq.cons_index += nfreed; |
1123 | /* Make sure update of buffer contents is done before |
1124 | * updating consumer index. |
1125 | */ |
1126 | wmb(); |
1127 | mlx5_cq_set_ci(cq: &cq->mcq); |
1128 | } |
1129 | } |
1130 | |
1131 | void mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 qpn, struct mlx5_ib_srq *srq) |
1132 | { |
1133 | if (!cq) |
1134 | return; |
1135 | |
1136 | spin_lock_irq(lock: &cq->lock); |
1137 | __mlx5_ib_cq_clean(cq, rsn: qpn, srq); |
1138 | spin_unlock_irq(lock: &cq->lock); |
1139 | } |
1140 | |
1141 | int mlx5_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period) |
1142 | { |
1143 | struct mlx5_ib_dev *dev = to_mdev(ibdev: cq->device); |
1144 | struct mlx5_ib_cq *mcq = to_mcq(ibcq: cq); |
1145 | int err; |
1146 | |
1147 | if (!MLX5_CAP_GEN(dev->mdev, cq_moderation)) |
1148 | return -EOPNOTSUPP; |
1149 | |
1150 | if (cq_period > MLX5_MAX_CQ_PERIOD) |
1151 | return -EINVAL; |
1152 | |
1153 | err = mlx5_core_modify_cq_moderation(dev: dev->mdev, cq: &mcq->mcq, |
1154 | cq_period, cq_max_count: cq_count); |
1155 | if (err) |
1156 | mlx5_ib_warn(dev, "modify cq 0x%x failed\n", mcq->mcq.cqn); |
1157 | |
1158 | return err; |
1159 | } |
1160 | |
1161 | static int resize_user(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq, |
1162 | int entries, struct ib_udata *udata, |
1163 | int *cqe_size) |
1164 | { |
1165 | struct mlx5_ib_resize_cq ucmd; |
1166 | struct ib_umem *umem; |
1167 | int err; |
1168 | |
1169 | err = ib_copy_from_udata(dest: &ucmd, udata, len: sizeof(ucmd)); |
1170 | if (err) |
1171 | return err; |
1172 | |
1173 | if (ucmd.reserved0 || ucmd.reserved1) |
1174 | return -EINVAL; |
1175 | |
1176 | /* check multiplication overflow */ |
1177 | if (ucmd.cqe_size && SIZE_MAX / ucmd.cqe_size <= entries - 1) |
1178 | return -EINVAL; |
1179 | |
1180 | umem = ib_umem_get(device: &dev->ib_dev, addr: ucmd.buf_addr, |
1181 | size: (size_t)ucmd.cqe_size * entries, |
1182 | access: IB_ACCESS_LOCAL_WRITE); |
1183 | if (IS_ERR(ptr: umem)) { |
1184 | err = PTR_ERR(ptr: umem); |
1185 | return err; |
1186 | } |
1187 | |
1188 | cq->resize_umem = umem; |
1189 | *cqe_size = ucmd.cqe_size; |
1190 | |
1191 | return 0; |
1192 | } |
1193 | |
1194 | static int resize_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq, |
1195 | int entries, int cqe_size) |
1196 | { |
1197 | int err; |
1198 | |
1199 | cq->resize_buf = kzalloc(sizeof(*cq->resize_buf), GFP_KERNEL); |
1200 | if (!cq->resize_buf) |
1201 | return -ENOMEM; |
1202 | |
1203 | err = alloc_cq_frag_buf(dev, buf: cq->resize_buf, nent: entries, cqe_size); |
1204 | if (err) |
1205 | goto ex; |
1206 | |
1207 | init_cq_frag_buf(buf: cq->resize_buf); |
1208 | |
1209 | return 0; |
1210 | |
1211 | ex: |
1212 | kfree(objp: cq->resize_buf); |
1213 | return err; |
1214 | } |
1215 | |
1216 | static int copy_resize_cqes(struct mlx5_ib_cq *cq) |
1217 | { |
1218 | struct mlx5_ib_dev *dev = to_mdev(ibdev: cq->ibcq.device); |
1219 | struct mlx5_cqe64 *scqe64; |
1220 | struct mlx5_cqe64 *dcqe64; |
1221 | void *start_cqe; |
1222 | void *scqe; |
1223 | void *dcqe; |
1224 | int ssize; |
1225 | int dsize; |
1226 | int i; |
1227 | u8 sw_own; |
1228 | |
1229 | ssize = cq->buf.cqe_size; |
1230 | dsize = cq->resize_buf->cqe_size; |
1231 | if (ssize != dsize) { |
1232 | mlx5_ib_warn(dev, "resize from different cqe size is not supported\n"); |
1233 | return -EINVAL; |
1234 | } |
1235 | |
1236 | i = cq->mcq.cons_index; |
1237 | scqe = get_sw_cqe(cq, n: i); |
1238 | scqe64 = ssize == 64 ? scqe : scqe + 64; |
1239 | start_cqe = scqe; |
1240 | if (!scqe) { |
1241 | mlx5_ib_warn(dev, "expected cqe in sw ownership\n"); |
1242 | return -EINVAL; |
1243 | } |
1244 | |
1245 | while (get_cqe_opcode(cqe: scqe64) != MLX5_CQE_RESIZE_CQ) { |
1246 | dcqe = mlx5_frag_buf_get_wqe(fbc: &cq->resize_buf->fbc, |
1247 | ix: (i + 1) & cq->resize_buf->nent); |
1248 | dcqe64 = dsize == 64 ? dcqe : dcqe + 64; |
1249 | sw_own = sw_ownership_bit(n: i + 1, nent: cq->resize_buf->nent); |
1250 | memcpy(dcqe, scqe, dsize); |
1251 | dcqe64->op_own = (dcqe64->op_own & ~MLX5_CQE_OWNER_MASK) | sw_own; |
1252 | |
1253 | ++i; |
1254 | scqe = get_sw_cqe(cq, n: i); |
1255 | scqe64 = ssize == 64 ? scqe : scqe + 64; |
1256 | if (!scqe) { |
1257 | mlx5_ib_warn(dev, "expected cqe in sw ownership\n"); |
1258 | return -EINVAL; |
1259 | } |
1260 | |
1261 | if (scqe == start_cqe) { |
1262 | pr_warn("resize CQ failed to get resize CQE, CQN 0x%x\n", |
1263 | cq->mcq.cqn); |
1264 | return -ENOMEM; |
1265 | } |
1266 | } |
1267 | ++cq->mcq.cons_index; |
1268 | return 0; |
1269 | } |
1270 | |
1271 | int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata) |
1272 | { |
1273 | struct mlx5_ib_dev *dev = to_mdev(ibdev: ibcq->device); |
1274 | struct mlx5_ib_cq *cq = to_mcq(ibcq); |
1275 | void *cqc; |
1276 | u32 *in; |
1277 | int err; |
1278 | int npas; |
1279 | __be64 *pas; |
1280 | unsigned int page_offset_quantized = 0; |
1281 | unsigned int page_shift; |
1282 | int inlen; |
1283 | int cqe_size; |
1284 | unsigned long flags; |
1285 | |
1286 | if (!MLX5_CAP_GEN(dev->mdev, cq_resize)) { |
1287 | pr_info("Firmware does not support resize CQ\n"); |
1288 | return -ENOSYS; |
1289 | } |
1290 | |
1291 | if (entries < 1 || |
1292 | entries > (1 << MLX5_CAP_GEN(dev->mdev, log_max_cq_sz))) { |
1293 | mlx5_ib_warn(dev, "wrong entries number %d, max %d\n", |
1294 | entries, |
1295 | 1 << MLX5_CAP_GEN(dev->mdev, log_max_cq_sz)); |
1296 | return -EINVAL; |
1297 | } |
1298 | |
1299 | entries = roundup_pow_of_two(entries + 1); |
1300 | if (entries > (1 << MLX5_CAP_GEN(dev->mdev, log_max_cq_sz)) + 1) |
1301 | return -EINVAL; |
1302 | |
1303 | if (entries == ibcq->cqe + 1) |
1304 | return 0; |
1305 | |
1306 | mutex_lock(&cq->resize_mutex); |
1307 | if (udata) { |
1308 | unsigned long page_size; |
1309 | |
1310 | err = resize_user(dev, cq, entries, udata, cqe_size: &cqe_size); |
1311 | if (err) |
1312 | goto ex; |
1313 | |
1314 | page_size = mlx5_umem_find_best_cq_quantized_pgoff( |
1315 | cq->resize_umem, cqc, log_page_size, |
1316 | MLX5_ADAPTER_PAGE_SHIFT, page_offset, 64, |
1317 | &page_offset_quantized); |
1318 | if (!page_size) { |
1319 | err = -EINVAL; |
1320 | goto ex_resize; |
1321 | } |
1322 | npas = ib_umem_num_dma_blocks(umem: cq->resize_umem, pgsz: page_size); |
1323 | page_shift = order_base_2(page_size); |
1324 | } else { |
1325 | struct mlx5_frag_buf *frag_buf; |
1326 | |
1327 | cqe_size = 64; |
1328 | err = resize_kernel(dev, cq, entries, cqe_size); |
1329 | if (err) |
1330 | goto ex; |
1331 | frag_buf = &cq->resize_buf->frag_buf; |
1332 | npas = frag_buf->npages; |
1333 | page_shift = frag_buf->page_shift; |
1334 | } |
1335 | |
1336 | inlen = MLX5_ST_SZ_BYTES(modify_cq_in) + |
1337 | MLX5_FLD_SZ_BYTES(modify_cq_in, pas[0]) * npas; |
1338 | |
1339 | in = kvzalloc(inlen, GFP_KERNEL); |
1340 | if (!in) { |
1341 | err = -ENOMEM; |
1342 | goto ex_resize; |
1343 | } |
1344 | |
1345 | pas = (__be64 *)MLX5_ADDR_OF(modify_cq_in, in, pas); |
1346 | if (udata) |
1347 | mlx5_ib_populate_pas(umem: cq->resize_umem, page_size: 1UL << page_shift, pas, |
1348 | access_flags: 0); |
1349 | else |
1350 | mlx5_fill_page_frag_array(frag_buf: &cq->resize_buf->frag_buf, pas); |
1351 | |
1352 | MLX5_SET(modify_cq_in, in, |
1353 | modify_field_select_resize_field_select.resize_field_select.resize_field_select, |
1354 | MLX5_MODIFY_CQ_MASK_LOG_SIZE | |
1355 | MLX5_MODIFY_CQ_MASK_PG_OFFSET | |
1356 | MLX5_MODIFY_CQ_MASK_PG_SIZE); |
1357 | |
1358 | cqc = MLX5_ADDR_OF(modify_cq_in, in, cq_context); |
1359 | |
1360 | MLX5_SET(cqc, cqc, log_page_size, |
1361 | page_shift - MLX5_ADAPTER_PAGE_SHIFT); |
1362 | MLX5_SET(cqc, cqc, page_offset, page_offset_quantized); |
1363 | MLX5_SET(cqc, cqc, cqe_sz, |
1364 | cqe_sz_to_mlx_sz(cqe_size, |
1365 | cq->private_flags & |
1366 | MLX5_IB_CQ_PR_FLAGS_CQE_128_PAD)); |
1367 | MLX5_SET(cqc, cqc, log_cq_size, ilog2(entries)); |
1368 | |
1369 | MLX5_SET(modify_cq_in, in, op_mod, MLX5_CQ_OPMOD_RESIZE); |
1370 | MLX5_SET(modify_cq_in, in, cqn, cq->mcq.cqn); |
1371 | |
1372 | err = mlx5_core_modify_cq(dev: dev->mdev, cq: &cq->mcq, in, inlen); |
1373 | if (err) |
1374 | goto ex_alloc; |
1375 | |
1376 | if (udata) { |
1377 | cq->ibcq.cqe = entries - 1; |
1378 | ib_umem_release(umem: cq->buf.umem); |
1379 | cq->buf.umem = cq->resize_umem; |
1380 | cq->resize_umem = NULL; |
1381 | } else { |
1382 | struct mlx5_ib_cq_buf tbuf; |
1383 | int resized = 0; |
1384 | |
1385 | spin_lock_irqsave(&cq->lock, flags); |
1386 | if (cq->resize_buf) { |
1387 | err = copy_resize_cqes(cq); |
1388 | if (!err) { |
1389 | tbuf = cq->buf; |
1390 | cq->buf = *cq->resize_buf; |
1391 | kfree(objp: cq->resize_buf); |
1392 | cq->resize_buf = NULL; |
1393 | resized = 1; |
1394 | } |
1395 | } |
1396 | cq->ibcq.cqe = entries - 1; |
1397 | spin_unlock_irqrestore(lock: &cq->lock, flags); |
1398 | if (resized) |
1399 | free_cq_buf(dev, buf: &tbuf); |
1400 | } |
1401 | mutex_unlock(lock: &cq->resize_mutex); |
1402 | |
1403 | kvfree(addr: in); |
1404 | return 0; |
1405 | |
1406 | ex_alloc: |
1407 | kvfree(addr: in); |
1408 | |
1409 | ex_resize: |
1410 | ib_umem_release(umem: cq->resize_umem); |
1411 | if (!udata) { |
1412 | free_cq_buf(dev, buf: cq->resize_buf); |
1413 | cq->resize_buf = NULL; |
1414 | } |
1415 | ex: |
1416 | mutex_unlock(lock: &cq->resize_mutex); |
1417 | return err; |
1418 | } |
1419 | |
1420 | int mlx5_ib_get_cqe_size(struct ib_cq *ibcq) |
1421 | { |
1422 | struct mlx5_ib_cq *cq; |
1423 | |
1424 | if (!ibcq) |
1425 | return 128; |
1426 | |
1427 | cq = to_mcq(ibcq); |
1428 | return cq->cqe_size; |
1429 | } |
1430 | |
1431 | /* Called from atomic context */ |
1432 | int mlx5_ib_generate_wc(struct ib_cq *ibcq, struct ib_wc *wc) |
1433 | { |
1434 | struct mlx5_ib_wc *soft_wc; |
1435 | struct mlx5_ib_cq *cq = to_mcq(ibcq); |
1436 | unsigned long flags; |
1437 | |
1438 | soft_wc = kmalloc(sizeof(*soft_wc), GFP_ATOMIC); |
1439 | if (!soft_wc) |
1440 | return -ENOMEM; |
1441 | |
1442 | soft_wc->wc = *wc; |
1443 | spin_lock_irqsave(&cq->lock, flags); |
1444 | list_add_tail(new: &soft_wc->list, head: &cq->wc_list); |
1445 | if (cq->notify_flags == IB_CQ_NEXT_COMP || |
1446 | wc->status != IB_WC_SUCCESS) { |
1447 | cq->notify_flags = 0; |
1448 | schedule_work(work: &cq->notify_work); |
1449 | } |
1450 | spin_unlock_irqrestore(lock: &cq->lock, flags); |
1451 | |
1452 | return 0; |
1453 | } |
1454 | |
1455 | ADD_UVERBS_ATTRIBUTES_SIMPLE( |
1456 | mlx5_ib_cq_create, |
1457 | UVERBS_OBJECT_CQ, |
1458 | UVERBS_METHOD_CQ_CREATE, |
1459 | UVERBS_ATTR_PTR_IN( |
1460 | MLX5_IB_ATTR_CREATE_CQ_UAR_INDEX, |
1461 | UVERBS_ATTR_TYPE(u32), |
1462 | UA_OPTIONAL)); |
1463 | |
1464 | const struct uapi_definition mlx5_ib_create_cq_defs[] = { |
1465 | UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_CQ, &mlx5_ib_cq_create), |
1466 | {}, |
1467 | }; |
1468 |
Definitions
- mlx5_ib_cq_comp
- mlx5_ib_cq_event
- get_cqe
- sw_ownership_bit
- get_sw_cqe
- next_cqe_sw
- get_umr_comp
- handle_good_req
- handle_responder
- dump_cqe
- mlx5_handle_error_cqe
- handle_atomics
- free_cq_buf
- get_sig_err_item
- sw_comp
- mlx5_ib_poll_sw_comp
- mlx5_poll_one
- poll_soft_wc
- mlx5_ib_poll_cq
- mlx5_ib_arm_cq
- alloc_cq_frag_buf
- mini_cqe_res_format_to_hw
- create_cq_user
- destroy_cq_user
- init_cq_frag_buf
- create_cq_kernel
- destroy_cq_kernel
- notify_soft_wc_handler
- mlx5_ib_create_cq
- mlx5_ib_destroy_cq
- is_equal_rsn
- __mlx5_ib_cq_clean
- mlx5_ib_cq_clean
- mlx5_ib_modify_cq
- resize_user
- resize_kernel
- copy_resize_cqes
- mlx5_ib_resize_cq
- mlx5_ib_get_cqe_size
- mlx5_ib_generate_wc
- mlx5_ib_cq_create
Improve your Profiling and Debugging skills
Find out more