| 1 | // SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) |
| 2 | /* |
| 3 | * Copyright(c) 2020 Intel Corporation. |
| 4 | * |
| 5 | */ |
| 6 | |
| 7 | /* |
| 8 | * This file contains HFI1 support for IPOIB SDMA functionality |
| 9 | */ |
| 10 | |
| 11 | #include <linux/log2.h> |
| 12 | #include <linux/circ_buf.h> |
| 13 | |
| 14 | #include "sdma.h" |
| 15 | #include "verbs.h" |
| 16 | #include "trace_ibhdrs.h" |
| 17 | #include "ipoib.h" |
| 18 | #include "trace_tx.h" |
| 19 | |
| 20 | /* Add a convenience helper */ |
| 21 | #define CIRC_ADD(val, add, size) (((val) + (add)) & ((size) - 1)) |
| 22 | #define CIRC_NEXT(val, size) CIRC_ADD(val, 1, size) |
| 23 | #define CIRC_PREV(val, size) CIRC_ADD(val, -1, size) |
| 24 | |
| 25 | struct ipoib_txparms { |
| 26 | struct hfi1_devdata *dd; |
| 27 | struct rdma_ah_attr *ah_attr; |
| 28 | struct hfi1_ibport *ibp; |
| 29 | struct hfi1_ipoib_txq *txq; |
| 30 | union hfi1_ipoib_flow flow; |
| 31 | u32 dqpn; |
| 32 | u8 hdr_dwords; |
| 33 | u8 entropy; |
| 34 | }; |
| 35 | |
| 36 | static struct ipoib_txreq * |
| 37 | hfi1_txreq_from_idx(struct hfi1_ipoib_circ_buf *r, u32 idx) |
| 38 | { |
| 39 | return (struct ipoib_txreq *)(r->items + (idx << r->shift)); |
| 40 | } |
| 41 | |
| 42 | static u32 hfi1_ipoib_txreqs(const u64 sent, const u64 completed) |
| 43 | { |
| 44 | return sent - completed; |
| 45 | } |
| 46 | |
| 47 | static u64 hfi1_ipoib_used(struct hfi1_ipoib_txq *txq) |
| 48 | { |
| 49 | return hfi1_ipoib_txreqs(sent: txq->tx_ring.sent_txreqs, |
| 50 | completed: txq->tx_ring.complete_txreqs); |
| 51 | } |
| 52 | |
| 53 | static void hfi1_ipoib_stop_txq(struct hfi1_ipoib_txq *txq) |
| 54 | { |
| 55 | trace_hfi1_txq_stop(txq); |
| 56 | if (atomic_inc_return(v: &txq->tx_ring.stops) == 1) |
| 57 | netif_stop_subqueue(dev: txq->priv->netdev, queue_index: txq->q_idx); |
| 58 | } |
| 59 | |
| 60 | static void hfi1_ipoib_wake_txq(struct hfi1_ipoib_txq *txq) |
| 61 | { |
| 62 | trace_hfi1_txq_wake(txq); |
| 63 | if (atomic_dec_and_test(v: &txq->tx_ring.stops)) |
| 64 | netif_wake_subqueue(dev: txq->priv->netdev, queue_index: txq->q_idx); |
| 65 | } |
| 66 | |
| 67 | static uint hfi1_ipoib_ring_hwat(struct hfi1_ipoib_txq *txq) |
| 68 | { |
| 69 | return min_t(uint, txq->priv->netdev->tx_queue_len, |
| 70 | txq->tx_ring.max_items - 1); |
| 71 | } |
| 72 | |
| 73 | static uint hfi1_ipoib_ring_lwat(struct hfi1_ipoib_txq *txq) |
| 74 | { |
| 75 | return min_t(uint, txq->priv->netdev->tx_queue_len, |
| 76 | txq->tx_ring.max_items) >> 1; |
| 77 | } |
| 78 | |
| 79 | static void hfi1_ipoib_check_queue_depth(struct hfi1_ipoib_txq *txq) |
| 80 | { |
| 81 | ++txq->tx_ring.sent_txreqs; |
| 82 | if (hfi1_ipoib_used(txq) >= hfi1_ipoib_ring_hwat(txq) && |
| 83 | !atomic_xchg(v: &txq->tx_ring.ring_full, new: 1)) { |
| 84 | trace_hfi1_txq_full(txq); |
| 85 | hfi1_ipoib_stop_txq(txq); |
| 86 | } |
| 87 | } |
| 88 | |
| 89 | static void hfi1_ipoib_check_queue_stopped(struct hfi1_ipoib_txq *txq) |
| 90 | { |
| 91 | struct net_device *dev = txq->priv->netdev; |
| 92 | |
| 93 | /* If shutting down just return as queue state is irrelevant */ |
| 94 | if (unlikely(dev->reg_state != NETREG_REGISTERED)) |
| 95 | return; |
| 96 | |
| 97 | /* |
| 98 | * When the queue has been drained to less than half full it will be |
| 99 | * restarted. |
| 100 | * The size of the txreq ring is fixed at initialization. |
| 101 | * The tx queue len can be adjusted upward while the interface is |
| 102 | * running. |
| 103 | * The tx queue len can be large enough to overflow the txreq_ring. |
| 104 | * Use the minimum of the current tx_queue_len or the rings max txreqs |
| 105 | * to protect against ring overflow. |
| 106 | */ |
| 107 | if (hfi1_ipoib_used(txq) < hfi1_ipoib_ring_lwat(txq) && |
| 108 | atomic_xchg(v: &txq->tx_ring.ring_full, new: 0)) { |
| 109 | trace_hfi1_txq_xmit_unstopped(txq); |
| 110 | hfi1_ipoib_wake_txq(txq); |
| 111 | } |
| 112 | } |
| 113 | |
| 114 | static void hfi1_ipoib_free_tx(struct ipoib_txreq *tx, int budget) |
| 115 | { |
| 116 | struct hfi1_ipoib_dev_priv *priv = tx->txq->priv; |
| 117 | |
| 118 | if (likely(!tx->sdma_status)) { |
| 119 | dev_sw_netstats_tx_add(dev: priv->netdev, packets: 1, len: tx->skb->len); |
| 120 | } else { |
| 121 | ++priv->netdev->stats.tx_errors; |
| 122 | dd_dev_warn(priv->dd, |
| 123 | "%s: Status = 0x%x pbc 0x%llx txq = %d sde = %d\n" , |
| 124 | __func__, tx->sdma_status, |
| 125 | le64_to_cpu(tx->sdma_hdr->pbc), tx->txq->q_idx, |
| 126 | tx->txq->sde->this_idx); |
| 127 | } |
| 128 | |
| 129 | napi_consume_skb(skb: tx->skb, budget); |
| 130 | tx->skb = NULL; |
| 131 | sdma_txclean(dd: priv->dd, tx: &tx->txreq); |
| 132 | } |
| 133 | |
| 134 | static void hfi1_ipoib_drain_tx_ring(struct hfi1_ipoib_txq *txq) |
| 135 | { |
| 136 | struct hfi1_ipoib_circ_buf *tx_ring = &txq->tx_ring; |
| 137 | int i; |
| 138 | struct ipoib_txreq *tx; |
| 139 | |
| 140 | for (i = 0; i < tx_ring->max_items; i++) { |
| 141 | tx = hfi1_txreq_from_idx(r: tx_ring, idx: i); |
| 142 | tx->complete = 0; |
| 143 | dev_kfree_skb_any(skb: tx->skb); |
| 144 | tx->skb = NULL; |
| 145 | sdma_txclean(dd: txq->priv->dd, tx: &tx->txreq); |
| 146 | } |
| 147 | tx_ring->head = 0; |
| 148 | tx_ring->tail = 0; |
| 149 | tx_ring->complete_txreqs = 0; |
| 150 | tx_ring->sent_txreqs = 0; |
| 151 | tx_ring->avail = hfi1_ipoib_ring_hwat(txq); |
| 152 | } |
| 153 | |
| 154 | static int hfi1_ipoib_poll_tx_ring(struct napi_struct *napi, int budget) |
| 155 | { |
| 156 | struct hfi1_ipoib_txq *txq = |
| 157 | container_of(napi, struct hfi1_ipoib_txq, napi); |
| 158 | struct hfi1_ipoib_circ_buf *tx_ring = &txq->tx_ring; |
| 159 | u32 head = tx_ring->head; |
| 160 | u32 max_tx = tx_ring->max_items; |
| 161 | int work_done; |
| 162 | struct ipoib_txreq *tx = hfi1_txreq_from_idx(r: tx_ring, idx: head); |
| 163 | |
| 164 | trace_hfi1_txq_poll(txq); |
| 165 | for (work_done = 0; work_done < budget; work_done++) { |
| 166 | /* See hfi1_ipoib_sdma_complete() */ |
| 167 | if (!smp_load_acquire(&tx->complete)) |
| 168 | break; |
| 169 | tx->complete = 0; |
| 170 | trace_hfi1_tx_produce(tx, idx: head); |
| 171 | hfi1_ipoib_free_tx(tx, budget); |
| 172 | head = CIRC_NEXT(head, max_tx); |
| 173 | tx = hfi1_txreq_from_idx(r: tx_ring, idx: head); |
| 174 | } |
| 175 | tx_ring->complete_txreqs += work_done; |
| 176 | |
| 177 | /* Finished freeing tx items so store the head value. */ |
| 178 | smp_store_release(&tx_ring->head, head); |
| 179 | |
| 180 | hfi1_ipoib_check_queue_stopped(txq); |
| 181 | |
| 182 | if (work_done < budget) |
| 183 | napi_complete_done(n: napi, work_done); |
| 184 | |
| 185 | return work_done; |
| 186 | } |
| 187 | |
| 188 | static void hfi1_ipoib_sdma_complete(struct sdma_txreq *txreq, int status) |
| 189 | { |
| 190 | struct ipoib_txreq *tx = container_of(txreq, struct ipoib_txreq, txreq); |
| 191 | |
| 192 | trace_hfi1_txq_complete(txq: tx->txq); |
| 193 | tx->sdma_status = status; |
| 194 | /* see hfi1_ipoib_poll_tx_ring */ |
| 195 | smp_store_release(&tx->complete, 1); |
| 196 | napi_schedule_irqoff(n: &tx->txq->napi); |
| 197 | } |
| 198 | |
| 199 | static int hfi1_ipoib_build_ulp_payload(struct ipoib_txreq *tx, |
| 200 | struct ipoib_txparms *txp) |
| 201 | { |
| 202 | struct hfi1_devdata *dd = txp->dd; |
| 203 | struct sdma_txreq *txreq = &tx->txreq; |
| 204 | struct sk_buff *skb = tx->skb; |
| 205 | int ret = 0; |
| 206 | int i; |
| 207 | |
| 208 | if (skb_headlen(skb)) { |
| 209 | ret = sdma_txadd_kvaddr(dd, tx: txreq, kvaddr: skb->data, len: skb_headlen(skb)); |
| 210 | if (unlikely(ret)) |
| 211 | return ret; |
| 212 | } |
| 213 | |
| 214 | for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { |
| 215 | const skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; |
| 216 | |
| 217 | ret = sdma_txadd_page(dd, |
| 218 | tx: txreq, |
| 219 | page: skb_frag_page(frag), |
| 220 | offset: skb_frag_off(frag), |
| 221 | len: skb_frag_size(frag), |
| 222 | NULL, NULL, NULL); |
| 223 | if (unlikely(ret)) |
| 224 | break; |
| 225 | } |
| 226 | |
| 227 | return ret; |
| 228 | } |
| 229 | |
| 230 | static int hfi1_ipoib_build_tx_desc(struct ipoib_txreq *tx, |
| 231 | struct ipoib_txparms *txp) |
| 232 | { |
| 233 | struct hfi1_devdata *dd = txp->dd; |
| 234 | struct sdma_txreq *txreq = &tx->txreq; |
| 235 | struct hfi1_sdma_header *sdma_hdr = tx->sdma_hdr; |
| 236 | u16 pkt_bytes = |
| 237 | sizeof(sdma_hdr->pbc) + (txp->hdr_dwords << 2) + tx->skb->len; |
| 238 | int ret; |
| 239 | |
| 240 | ret = sdma_txinit(tx: txreq, flags: 0, tlen: pkt_bytes, cb: hfi1_ipoib_sdma_complete); |
| 241 | if (unlikely(ret)) |
| 242 | return ret; |
| 243 | |
| 244 | /* add pbc + headers */ |
| 245 | ret = sdma_txadd_kvaddr(dd, |
| 246 | tx: txreq, |
| 247 | kvaddr: sdma_hdr, |
| 248 | len: sizeof(sdma_hdr->pbc) + (txp->hdr_dwords << 2)); |
| 249 | if (unlikely(ret)) |
| 250 | return ret; |
| 251 | |
| 252 | /* add the ulp payload */ |
| 253 | return hfi1_ipoib_build_ulp_payload(tx, txp); |
| 254 | } |
| 255 | |
| 256 | static void (struct ipoib_txreq *tx, |
| 257 | struct ipoib_txparms *txp) |
| 258 | { |
| 259 | struct hfi1_ipoib_dev_priv *priv = tx->txq->priv; |
| 260 | struct hfi1_sdma_header *sdma_hdr = tx->sdma_hdr; |
| 261 | struct sk_buff *skb = tx->skb; |
| 262 | struct hfi1_pportdata *ppd = ppd_from_ibp(ibp: txp->ibp); |
| 263 | struct rdma_ah_attr *ah_attr = txp->ah_attr; |
| 264 | struct ib_other_headers *ohdr; |
| 265 | struct ib_grh *grh; |
| 266 | u16 dwords; |
| 267 | u16 slid; |
| 268 | u16 dlid; |
| 269 | u16 lrh0; |
| 270 | u32 bth0; |
| 271 | u32 sqpn = (u32)(priv->netdev->dev_addr[1] << 16 | |
| 272 | priv->netdev->dev_addr[2] << 8 | |
| 273 | priv->netdev->dev_addr[3]); |
| 274 | u16 payload_dwords; |
| 275 | u8 pad_cnt; |
| 276 | |
| 277 | pad_cnt = -skb->len & 3; |
| 278 | |
| 279 | /* Includes ICRC */ |
| 280 | payload_dwords = ((skb->len + pad_cnt) >> 2) + SIZE_OF_CRC; |
| 281 | |
| 282 | /* header size in dwords LRH+BTH+DETH = (8+12+8)/4. */ |
| 283 | txp->hdr_dwords = 7; |
| 284 | |
| 285 | if (rdma_ah_get_ah_flags(attr: ah_attr) & IB_AH_GRH) { |
| 286 | grh = &sdma_hdr->hdr.ibh.u.l.grh; |
| 287 | txp->hdr_dwords += |
| 288 | hfi1_make_grh(ibp: txp->ibp, |
| 289 | hdr: grh, |
| 290 | grh: rdma_ah_read_grh(attr: ah_attr), |
| 291 | hwords: txp->hdr_dwords - LRH_9B_DWORDS, |
| 292 | nwords: payload_dwords); |
| 293 | lrh0 = HFI1_LRH_GRH; |
| 294 | ohdr = &sdma_hdr->hdr.ibh.u.l.oth; |
| 295 | } else { |
| 296 | lrh0 = HFI1_LRH_BTH; |
| 297 | ohdr = &sdma_hdr->hdr.ibh.u.oth; |
| 298 | } |
| 299 | |
| 300 | lrh0 |= (rdma_ah_get_sl(attr: ah_attr) & 0xf) << 4; |
| 301 | lrh0 |= (txp->flow.sc5 & 0xf) << 12; |
| 302 | |
| 303 | dlid = opa_get_lid(rdma_ah_get_dlid(ah_attr), 9B); |
| 304 | if (dlid == be16_to_cpu(IB_LID_PERMISSIVE)) { |
| 305 | slid = be16_to_cpu(IB_LID_PERMISSIVE); |
| 306 | } else { |
| 307 | u16 lid = (u16)ppd->lid; |
| 308 | |
| 309 | if (lid) { |
| 310 | lid |= rdma_ah_get_path_bits(attr: ah_attr) & |
| 311 | ((1 << ppd->lmc) - 1); |
| 312 | slid = lid; |
| 313 | } else { |
| 314 | slid = be16_to_cpu(IB_LID_PERMISSIVE); |
| 315 | } |
| 316 | } |
| 317 | |
| 318 | /* Includes ICRC */ |
| 319 | dwords = txp->hdr_dwords + payload_dwords; |
| 320 | |
| 321 | /* Build the lrh */ |
| 322 | sdma_hdr->hdr.hdr_type = HFI1_PKT_TYPE_9B; |
| 323 | hfi1_make_ib_hdr(hdr: &sdma_hdr->hdr.ibh, lrh0, len: dwords, dlid, slid); |
| 324 | |
| 325 | /* Build the bth */ |
| 326 | bth0 = (IB_OPCODE_UD_SEND_ONLY << 24) | (pad_cnt << 20) | priv->pkey; |
| 327 | |
| 328 | ohdr->bth[0] = cpu_to_be32(bth0); |
| 329 | ohdr->bth[1] = cpu_to_be32(txp->dqpn); |
| 330 | ohdr->bth[2] = cpu_to_be32(mask_psn((u32)txp->txq->tx_ring.sent_txreqs)); |
| 331 | |
| 332 | /* Build the deth */ |
| 333 | ohdr->u.ud.deth[0] = cpu_to_be32(priv->qkey); |
| 334 | ohdr->u.ud.deth[1] = cpu_to_be32((txp->entropy << |
| 335 | HFI1_IPOIB_ENTROPY_SHIFT) | sqpn); |
| 336 | |
| 337 | /* Construct the pbc. */ |
| 338 | sdma_hdr->pbc = |
| 339 | cpu_to_le64(create_pbc(ppd, |
| 340 | ib_is_sc5(txp->flow.sc5) << |
| 341 | PBC_DC_INFO_SHIFT, |
| 342 | 0, |
| 343 | sc_to_vlt(priv->dd, txp->flow.sc5), |
| 344 | dwords - SIZE_OF_CRC + |
| 345 | (sizeof(sdma_hdr->pbc) >> 2))); |
| 346 | } |
| 347 | |
| 348 | static struct ipoib_txreq *hfi1_ipoib_send_dma_common(struct net_device *dev, |
| 349 | struct sk_buff *skb, |
| 350 | struct ipoib_txparms *txp) |
| 351 | { |
| 352 | struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev); |
| 353 | struct hfi1_ipoib_txq *txq = txp->txq; |
| 354 | struct ipoib_txreq *tx; |
| 355 | struct hfi1_ipoib_circ_buf *tx_ring = &txq->tx_ring; |
| 356 | u32 tail = tx_ring->tail; |
| 357 | int ret; |
| 358 | |
| 359 | if (unlikely(!tx_ring->avail)) { |
| 360 | u32 head; |
| 361 | |
| 362 | if (hfi1_ipoib_used(txq) >= hfi1_ipoib_ring_hwat(txq)) |
| 363 | /* This shouldn't happen with a stopped queue */ |
| 364 | return ERR_PTR(error: -ENOMEM); |
| 365 | /* See hfi1_ipoib_poll_tx_ring() */ |
| 366 | head = smp_load_acquire(&tx_ring->head); |
| 367 | tx_ring->avail = |
| 368 | min_t(u32, hfi1_ipoib_ring_hwat(txq), |
| 369 | CIRC_CNT(head, tail, tx_ring->max_items)); |
| 370 | } else { |
| 371 | tx_ring->avail--; |
| 372 | } |
| 373 | tx = hfi1_txreq_from_idx(r: tx_ring, idx: tail); |
| 374 | trace_hfi1_txq_alloc_tx(txq); |
| 375 | |
| 376 | /* so that we can test if the sdma descriptors are there */ |
| 377 | tx->txreq.num_desc = 0; |
| 378 | tx->txq = txq; |
| 379 | tx->skb = skb; |
| 380 | INIT_LIST_HEAD(list: &tx->txreq.list); |
| 381 | |
| 382 | hfi1_ipoib_build_ib_tx_headers(tx, txp); |
| 383 | |
| 384 | ret = hfi1_ipoib_build_tx_desc(tx, txp); |
| 385 | if (likely(!ret)) { |
| 386 | if (txq->flow.as_int != txp->flow.as_int) { |
| 387 | txq->flow.tx_queue = txp->flow.tx_queue; |
| 388 | txq->flow.sc5 = txp->flow.sc5; |
| 389 | txq->sde = |
| 390 | sdma_select_engine_sc(dd: priv->dd, |
| 391 | selector: txp->flow.tx_queue, |
| 392 | sc5: txp->flow.sc5); |
| 393 | trace_hfi1_flow_switch(txq); |
| 394 | } |
| 395 | |
| 396 | return tx; |
| 397 | } |
| 398 | |
| 399 | sdma_txclean(dd: priv->dd, tx: &tx->txreq); |
| 400 | |
| 401 | return ERR_PTR(error: ret); |
| 402 | } |
| 403 | |
| 404 | static int hfi1_ipoib_submit_tx_list(struct net_device *dev, |
| 405 | struct hfi1_ipoib_txq *txq) |
| 406 | { |
| 407 | int ret; |
| 408 | u16 count_out; |
| 409 | |
| 410 | ret = sdma_send_txlist(sde: txq->sde, |
| 411 | wait: iowait_get_ib_work(w: &txq->wait), |
| 412 | tx_list: &txq->tx_list, |
| 413 | count_out: &count_out); |
| 414 | if (likely(!ret) || ret == -EBUSY || ret == -ECOMM) |
| 415 | return ret; |
| 416 | |
| 417 | dd_dev_warn(txq->priv->dd, "cannot send skb tx list, err %d.\n" , ret); |
| 418 | |
| 419 | return ret; |
| 420 | } |
| 421 | |
| 422 | static int hfi1_ipoib_flush_tx_list(struct net_device *dev, |
| 423 | struct hfi1_ipoib_txq *txq) |
| 424 | { |
| 425 | int ret = 0; |
| 426 | |
| 427 | if (!list_empty(head: &txq->tx_list)) { |
| 428 | /* Flush the current list */ |
| 429 | ret = hfi1_ipoib_submit_tx_list(dev, txq); |
| 430 | |
| 431 | if (unlikely(ret)) |
| 432 | if (ret != -EBUSY) |
| 433 | ++dev->stats.tx_carrier_errors; |
| 434 | } |
| 435 | |
| 436 | return ret; |
| 437 | } |
| 438 | |
| 439 | static int hfi1_ipoib_submit_tx(struct hfi1_ipoib_txq *txq, |
| 440 | struct ipoib_txreq *tx) |
| 441 | { |
| 442 | int ret; |
| 443 | |
| 444 | ret = sdma_send_txreq(sde: txq->sde, |
| 445 | wait: iowait_get_ib_work(w: &txq->wait), |
| 446 | tx: &tx->txreq, |
| 447 | pkts_sent: txq->pkts_sent); |
| 448 | if (likely(!ret)) { |
| 449 | txq->pkts_sent = true; |
| 450 | iowait_starve_clear(pkts_sent: txq->pkts_sent, w: &txq->wait); |
| 451 | } |
| 452 | |
| 453 | return ret; |
| 454 | } |
| 455 | |
| 456 | static int hfi1_ipoib_send_dma_single(struct net_device *dev, |
| 457 | struct sk_buff *skb, |
| 458 | struct ipoib_txparms *txp) |
| 459 | { |
| 460 | struct hfi1_ipoib_txq *txq = txp->txq; |
| 461 | struct hfi1_ipoib_circ_buf *tx_ring; |
| 462 | struct ipoib_txreq *tx; |
| 463 | int ret; |
| 464 | |
| 465 | tx = hfi1_ipoib_send_dma_common(dev, skb, txp); |
| 466 | if (IS_ERR(ptr: tx)) { |
| 467 | int ret = PTR_ERR(ptr: tx); |
| 468 | |
| 469 | dev_kfree_skb_any(skb); |
| 470 | |
| 471 | if (ret == -ENOMEM) |
| 472 | ++dev->stats.tx_errors; |
| 473 | else |
| 474 | ++dev->stats.tx_carrier_errors; |
| 475 | |
| 476 | return NETDEV_TX_OK; |
| 477 | } |
| 478 | |
| 479 | tx_ring = &txq->tx_ring; |
| 480 | trace_hfi1_tx_consume(tx, idx: tx_ring->tail); |
| 481 | /* consume tx */ |
| 482 | smp_store_release(&tx_ring->tail, CIRC_NEXT(tx_ring->tail, tx_ring->max_items)); |
| 483 | ret = hfi1_ipoib_submit_tx(txq, tx); |
| 484 | if (likely(!ret)) { |
| 485 | tx_ok: |
| 486 | trace_sdma_output_ibhdr(dd: txq->priv->dd, |
| 487 | opah: &tx->sdma_hdr->hdr, |
| 488 | sc5: ib_is_sc5(sc5: txp->flow.sc5)); |
| 489 | hfi1_ipoib_check_queue_depth(txq); |
| 490 | return NETDEV_TX_OK; |
| 491 | } |
| 492 | |
| 493 | txq->pkts_sent = false; |
| 494 | |
| 495 | if (ret == -EBUSY || ret == -ECOMM) |
| 496 | goto tx_ok; |
| 497 | |
| 498 | /* mark complete and kick napi tx */ |
| 499 | smp_store_release(&tx->complete, 1); |
| 500 | napi_schedule(n: &tx->txq->napi); |
| 501 | |
| 502 | ++dev->stats.tx_carrier_errors; |
| 503 | |
| 504 | return NETDEV_TX_OK; |
| 505 | } |
| 506 | |
| 507 | static int hfi1_ipoib_send_dma_list(struct net_device *dev, |
| 508 | struct sk_buff *skb, |
| 509 | struct ipoib_txparms *txp) |
| 510 | { |
| 511 | struct hfi1_ipoib_txq *txq = txp->txq; |
| 512 | struct hfi1_ipoib_circ_buf *tx_ring; |
| 513 | struct ipoib_txreq *tx; |
| 514 | |
| 515 | /* Has the flow change ? */ |
| 516 | if (txq->flow.as_int != txp->flow.as_int) { |
| 517 | int ret; |
| 518 | |
| 519 | trace_hfi1_flow_flush(txq); |
| 520 | ret = hfi1_ipoib_flush_tx_list(dev, txq); |
| 521 | if (unlikely(ret)) { |
| 522 | if (ret == -EBUSY) |
| 523 | ++dev->stats.tx_dropped; |
| 524 | dev_kfree_skb_any(skb); |
| 525 | return NETDEV_TX_OK; |
| 526 | } |
| 527 | } |
| 528 | tx = hfi1_ipoib_send_dma_common(dev, skb, txp); |
| 529 | if (IS_ERR(ptr: tx)) { |
| 530 | int ret = PTR_ERR(ptr: tx); |
| 531 | |
| 532 | dev_kfree_skb_any(skb); |
| 533 | |
| 534 | if (ret == -ENOMEM) |
| 535 | ++dev->stats.tx_errors; |
| 536 | else |
| 537 | ++dev->stats.tx_carrier_errors; |
| 538 | |
| 539 | return NETDEV_TX_OK; |
| 540 | } |
| 541 | |
| 542 | tx_ring = &txq->tx_ring; |
| 543 | trace_hfi1_tx_consume(tx, idx: tx_ring->tail); |
| 544 | /* consume tx */ |
| 545 | smp_store_release(&tx_ring->tail, CIRC_NEXT(tx_ring->tail, tx_ring->max_items)); |
| 546 | list_add_tail(new: &tx->txreq.list, head: &txq->tx_list); |
| 547 | |
| 548 | hfi1_ipoib_check_queue_depth(txq); |
| 549 | |
| 550 | trace_sdma_output_ibhdr(dd: txq->priv->dd, |
| 551 | opah: &tx->sdma_hdr->hdr, |
| 552 | sc5: ib_is_sc5(sc5: txp->flow.sc5)); |
| 553 | |
| 554 | if (!netdev_xmit_more()) |
| 555 | (void)hfi1_ipoib_flush_tx_list(dev, txq); |
| 556 | |
| 557 | return NETDEV_TX_OK; |
| 558 | } |
| 559 | |
| 560 | static u8 hfi1_ipoib_calc_entropy(struct sk_buff *skb) |
| 561 | { |
| 562 | if (skb_transport_header_was_set(skb)) { |
| 563 | u8 *hdr = (u8 *)skb_transport_header(skb); |
| 564 | |
| 565 | return (hdr[0] ^ hdr[1] ^ hdr[2] ^ hdr[3]); |
| 566 | } |
| 567 | |
| 568 | return (u8)skb_get_queue_mapping(skb); |
| 569 | } |
| 570 | |
| 571 | int hfi1_ipoib_send(struct net_device *dev, |
| 572 | struct sk_buff *skb, |
| 573 | struct ib_ah *address, |
| 574 | u32 dqpn) |
| 575 | { |
| 576 | struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev); |
| 577 | struct ipoib_txparms txp; |
| 578 | struct rdma_netdev *rn = netdev_priv(dev); |
| 579 | |
| 580 | if (unlikely(skb->len > rn->mtu + HFI1_IPOIB_ENCAP_LEN)) { |
| 581 | dd_dev_warn(priv->dd, "packet len %d (> %d) too long to send, dropping\n" , |
| 582 | skb->len, |
| 583 | rn->mtu + HFI1_IPOIB_ENCAP_LEN); |
| 584 | ++dev->stats.tx_dropped; |
| 585 | ++dev->stats.tx_errors; |
| 586 | dev_kfree_skb_any(skb); |
| 587 | return NETDEV_TX_OK; |
| 588 | } |
| 589 | |
| 590 | txp.dd = priv->dd; |
| 591 | txp.ah_attr = &ibah_to_rvtah(ibah: address)->attr; |
| 592 | txp.ibp = to_iport(ibdev: priv->device, port: priv->port_num); |
| 593 | txp.txq = &priv->txqs[skb_get_queue_mapping(skb)]; |
| 594 | txp.dqpn = dqpn; |
| 595 | txp.flow.sc5 = txp.ibp->sl_to_sc[rdma_ah_get_sl(attr: txp.ah_attr)]; |
| 596 | txp.flow.tx_queue = (u8)skb_get_queue_mapping(skb); |
| 597 | txp.entropy = hfi1_ipoib_calc_entropy(skb); |
| 598 | |
| 599 | if (netdev_xmit_more() || !list_empty(head: &txp.txq->tx_list)) |
| 600 | return hfi1_ipoib_send_dma_list(dev, skb, txp: &txp); |
| 601 | |
| 602 | return hfi1_ipoib_send_dma_single(dev, skb, txp: &txp); |
| 603 | } |
| 604 | |
| 605 | /* |
| 606 | * hfi1_ipoib_sdma_sleep - ipoib sdma sleep function |
| 607 | * |
| 608 | * This function gets called from sdma_send_txreq() when there are not enough |
| 609 | * sdma descriptors available to send the packet. It adds Tx queue's wait |
| 610 | * structure to sdma engine's dmawait list to be woken up when descriptors |
| 611 | * become available. |
| 612 | */ |
| 613 | static int hfi1_ipoib_sdma_sleep(struct sdma_engine *sde, |
| 614 | struct iowait_work *wait, |
| 615 | struct sdma_txreq *txreq, |
| 616 | uint seq, |
| 617 | bool pkts_sent) |
| 618 | { |
| 619 | struct hfi1_ipoib_txq *txq = |
| 620 | container_of(wait->iow, struct hfi1_ipoib_txq, wait); |
| 621 | |
| 622 | write_seqlock(sl: &sde->waitlock); |
| 623 | |
| 624 | if (likely(txq->priv->netdev->reg_state == NETREG_REGISTERED)) { |
| 625 | if (sdma_progress(sde, seq, tx: txreq)) { |
| 626 | write_sequnlock(sl: &sde->waitlock); |
| 627 | return -EAGAIN; |
| 628 | } |
| 629 | |
| 630 | if (list_empty(head: &txreq->list)) |
| 631 | /* came from non-list submit */ |
| 632 | list_add_tail(new: &txreq->list, head: &txq->tx_list); |
| 633 | if (list_empty(head: &txq->wait.list)) { |
| 634 | struct hfi1_ibport *ibp = &sde->ppd->ibport_data; |
| 635 | |
| 636 | if (!atomic_xchg(v: &txq->tx_ring.no_desc, new: 1)) { |
| 637 | trace_hfi1_txq_queued(txq); |
| 638 | hfi1_ipoib_stop_txq(txq); |
| 639 | } |
| 640 | ibp->rvp.n_dmawait++; |
| 641 | iowait_queue(pkts_sent, w: wait->iow, wait_head: &sde->dmawait); |
| 642 | } |
| 643 | |
| 644 | write_sequnlock(sl: &sde->waitlock); |
| 645 | return -EBUSY; |
| 646 | } |
| 647 | |
| 648 | write_sequnlock(sl: &sde->waitlock); |
| 649 | return -EINVAL; |
| 650 | } |
| 651 | |
| 652 | /* |
| 653 | * hfi1_ipoib_sdma_wakeup - ipoib sdma wakeup function |
| 654 | * |
| 655 | * This function gets called when SDMA descriptors becomes available and Tx |
| 656 | * queue's wait structure was previously added to sdma engine's dmawait list. |
| 657 | */ |
| 658 | static void hfi1_ipoib_sdma_wakeup(struct iowait *wait, int reason) |
| 659 | { |
| 660 | struct hfi1_ipoib_txq *txq = |
| 661 | container_of(wait, struct hfi1_ipoib_txq, wait); |
| 662 | |
| 663 | trace_hfi1_txq_wakeup(txq); |
| 664 | if (likely(txq->priv->netdev->reg_state == NETREG_REGISTERED)) |
| 665 | iowait_schedule(wait, wq: system_highpri_wq, cpu: WORK_CPU_UNBOUND); |
| 666 | } |
| 667 | |
| 668 | static void hfi1_ipoib_flush_txq(struct work_struct *work) |
| 669 | { |
| 670 | struct iowait_work *ioww = |
| 671 | container_of(work, struct iowait_work, iowork); |
| 672 | struct iowait *wait = iowait_ioww_to_iow(w: ioww); |
| 673 | struct hfi1_ipoib_txq *txq = |
| 674 | container_of(wait, struct hfi1_ipoib_txq, wait); |
| 675 | struct net_device *dev = txq->priv->netdev; |
| 676 | |
| 677 | if (likely(dev->reg_state == NETREG_REGISTERED) && |
| 678 | likely(!hfi1_ipoib_flush_tx_list(dev, txq))) |
| 679 | if (atomic_xchg(v: &txq->tx_ring.no_desc, new: 0)) |
| 680 | hfi1_ipoib_wake_txq(txq); |
| 681 | } |
| 682 | |
| 683 | int hfi1_ipoib_txreq_init(struct hfi1_ipoib_dev_priv *priv) |
| 684 | { |
| 685 | struct net_device *dev = priv->netdev; |
| 686 | u32 tx_ring_size, tx_item_size; |
| 687 | struct hfi1_ipoib_circ_buf *tx_ring; |
| 688 | int i, j; |
| 689 | |
| 690 | /* |
| 691 | * Ring holds 1 less than tx_ring_size |
| 692 | * Round up to next power of 2 in order to hold at least tx_queue_len |
| 693 | */ |
| 694 | tx_ring_size = roundup_pow_of_two(dev->tx_queue_len + 1); |
| 695 | tx_item_size = roundup_pow_of_two(sizeof(struct ipoib_txreq)); |
| 696 | |
| 697 | priv->txqs = kcalloc_node(dev->num_tx_queues, |
| 698 | sizeof(struct hfi1_ipoib_txq), |
| 699 | GFP_KERNEL, |
| 700 | priv->dd->node); |
| 701 | if (!priv->txqs) |
| 702 | return -ENOMEM; |
| 703 | |
| 704 | for (i = 0; i < dev->num_tx_queues; i++) { |
| 705 | struct hfi1_ipoib_txq *txq = &priv->txqs[i]; |
| 706 | struct ipoib_txreq *tx; |
| 707 | |
| 708 | tx_ring = &txq->tx_ring; |
| 709 | iowait_init(wait: &txq->wait, |
| 710 | tx_limit: 0, |
| 711 | func: hfi1_ipoib_flush_txq, |
| 712 | NULL, |
| 713 | sleep: hfi1_ipoib_sdma_sleep, |
| 714 | wakeup: hfi1_ipoib_sdma_wakeup, |
| 715 | NULL, |
| 716 | NULL); |
| 717 | txq->priv = priv; |
| 718 | txq->sde = NULL; |
| 719 | INIT_LIST_HEAD(list: &txq->tx_list); |
| 720 | atomic_set(v: &txq->tx_ring.stops, i: 0); |
| 721 | atomic_set(v: &txq->tx_ring.ring_full, i: 0); |
| 722 | atomic_set(v: &txq->tx_ring.no_desc, i: 0); |
| 723 | txq->q_idx = i; |
| 724 | txq->flow.tx_queue = 0xff; |
| 725 | txq->flow.sc5 = 0xff; |
| 726 | txq->pkts_sent = false; |
| 727 | |
| 728 | netdev_queue_numa_node_write(q: netdev_get_tx_queue(dev, index: i), |
| 729 | node: priv->dd->node); |
| 730 | |
| 731 | txq->tx_ring.items = |
| 732 | kvzalloc_node(array_size(tx_ring_size, tx_item_size), |
| 733 | GFP_KERNEL, priv->dd->node); |
| 734 | if (!txq->tx_ring.items) |
| 735 | goto free_txqs; |
| 736 | |
| 737 | txq->tx_ring.max_items = tx_ring_size; |
| 738 | txq->tx_ring.shift = ilog2(tx_item_size); |
| 739 | txq->tx_ring.avail = hfi1_ipoib_ring_hwat(txq); |
| 740 | tx_ring = &txq->tx_ring; |
| 741 | for (j = 0; j < tx_ring_size; j++) { |
| 742 | hfi1_txreq_from_idx(r: tx_ring, idx: j)->sdma_hdr = |
| 743 | kzalloc_node(sizeof(*tx->sdma_hdr), |
| 744 | GFP_KERNEL, priv->dd->node); |
| 745 | if (!hfi1_txreq_from_idx(r: tx_ring, idx: j)->sdma_hdr) |
| 746 | goto free_txqs; |
| 747 | } |
| 748 | |
| 749 | netif_napi_add_tx(dev, napi: &txq->napi, poll: hfi1_ipoib_poll_tx_ring); |
| 750 | } |
| 751 | |
| 752 | return 0; |
| 753 | |
| 754 | free_txqs: |
| 755 | for (i--; i >= 0; i--) { |
| 756 | struct hfi1_ipoib_txq *txq = &priv->txqs[i]; |
| 757 | |
| 758 | netif_napi_del(napi: &txq->napi); |
| 759 | tx_ring = &txq->tx_ring; |
| 760 | for (j = 0; j < tx_ring_size; j++) |
| 761 | kfree(objp: hfi1_txreq_from_idx(r: tx_ring, idx: j)->sdma_hdr); |
| 762 | kvfree(addr: tx_ring->items); |
| 763 | } |
| 764 | |
| 765 | kfree(objp: priv->txqs); |
| 766 | priv->txqs = NULL; |
| 767 | return -ENOMEM; |
| 768 | } |
| 769 | |
| 770 | static void hfi1_ipoib_drain_tx_list(struct hfi1_ipoib_txq *txq) |
| 771 | { |
| 772 | struct sdma_txreq *txreq; |
| 773 | struct sdma_txreq *txreq_tmp; |
| 774 | |
| 775 | list_for_each_entry_safe(txreq, txreq_tmp, &txq->tx_list, list) { |
| 776 | struct ipoib_txreq *tx = |
| 777 | container_of(txreq, struct ipoib_txreq, txreq); |
| 778 | |
| 779 | list_del(entry: &txreq->list); |
| 780 | sdma_txclean(dd: txq->priv->dd, tx: &tx->txreq); |
| 781 | dev_kfree_skb_any(skb: tx->skb); |
| 782 | tx->skb = NULL; |
| 783 | txq->tx_ring.complete_txreqs++; |
| 784 | } |
| 785 | |
| 786 | if (hfi1_ipoib_used(txq)) |
| 787 | dd_dev_warn(txq->priv->dd, |
| 788 | "txq %d not empty found %u requests\n" , |
| 789 | txq->q_idx, |
| 790 | hfi1_ipoib_txreqs(txq->tx_ring.sent_txreqs, |
| 791 | txq->tx_ring.complete_txreqs)); |
| 792 | } |
| 793 | |
| 794 | void hfi1_ipoib_txreq_deinit(struct hfi1_ipoib_dev_priv *priv) |
| 795 | { |
| 796 | int i, j; |
| 797 | |
| 798 | for (i = 0; i < priv->netdev->num_tx_queues; i++) { |
| 799 | struct hfi1_ipoib_txq *txq = &priv->txqs[i]; |
| 800 | struct hfi1_ipoib_circ_buf *tx_ring = &txq->tx_ring; |
| 801 | |
| 802 | iowait_cancel_work(w: &txq->wait); |
| 803 | iowait_sdma_drain(wait: &txq->wait); |
| 804 | hfi1_ipoib_drain_tx_list(txq); |
| 805 | netif_napi_del(napi: &txq->napi); |
| 806 | hfi1_ipoib_drain_tx_ring(txq); |
| 807 | for (j = 0; j < tx_ring->max_items; j++) |
| 808 | kfree(objp: hfi1_txreq_from_idx(r: tx_ring, idx: j)->sdma_hdr); |
| 809 | kvfree(addr: tx_ring->items); |
| 810 | } |
| 811 | |
| 812 | kfree(objp: priv->txqs); |
| 813 | priv->txqs = NULL; |
| 814 | } |
| 815 | |
| 816 | void hfi1_ipoib_napi_tx_enable(struct net_device *dev) |
| 817 | { |
| 818 | struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev); |
| 819 | int i; |
| 820 | |
| 821 | for (i = 0; i < dev->num_tx_queues; i++) { |
| 822 | struct hfi1_ipoib_txq *txq = &priv->txqs[i]; |
| 823 | |
| 824 | napi_enable(n: &txq->napi); |
| 825 | } |
| 826 | } |
| 827 | |
| 828 | void hfi1_ipoib_napi_tx_disable(struct net_device *dev) |
| 829 | { |
| 830 | struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev); |
| 831 | int i; |
| 832 | |
| 833 | for (i = 0; i < dev->num_tx_queues; i++) { |
| 834 | struct hfi1_ipoib_txq *txq = &priv->txqs[i]; |
| 835 | |
| 836 | napi_disable(n: &txq->napi); |
| 837 | hfi1_ipoib_drain_tx_ring(txq); |
| 838 | } |
| 839 | } |
| 840 | |
| 841 | void hfi1_ipoib_tx_timeout(struct net_device *dev, unsigned int q) |
| 842 | { |
| 843 | struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev); |
| 844 | struct hfi1_ipoib_txq *txq = &priv->txqs[q]; |
| 845 | |
| 846 | dd_dev_info(priv->dd, "timeout txq %p q %u stopped %u stops %d no_desc %d ring_full %d\n" , |
| 847 | txq, q, |
| 848 | __netif_subqueue_stopped(dev, txq->q_idx), |
| 849 | atomic_read(&txq->tx_ring.stops), |
| 850 | atomic_read(&txq->tx_ring.no_desc), |
| 851 | atomic_read(&txq->tx_ring.ring_full)); |
| 852 | dd_dev_info(priv->dd, "sde %p engine %u\n" , |
| 853 | txq->sde, |
| 854 | txq->sde ? txq->sde->this_idx : 0); |
| 855 | dd_dev_info(priv->dd, "flow %x\n" , txq->flow.as_int); |
| 856 | dd_dev_info(priv->dd, "sent %llu completed %llu used %llu\n" , |
| 857 | txq->tx_ring.sent_txreqs, txq->tx_ring.complete_txreqs, |
| 858 | hfi1_ipoib_used(txq)); |
| 859 | dd_dev_info(priv->dd, "tx_queue_len %u max_items %u\n" , |
| 860 | dev->tx_queue_len, txq->tx_ring.max_items); |
| 861 | dd_dev_info(priv->dd, "head %u tail %u\n" , |
| 862 | txq->tx_ring.head, txq->tx_ring.tail); |
| 863 | dd_dev_info(priv->dd, "wait queued %u\n" , |
| 864 | !list_empty(&txq->wait.list)); |
| 865 | dd_dev_info(priv->dd, "tx_list empty %u\n" , |
| 866 | list_empty(&txq->tx_list)); |
| 867 | } |
| 868 | |
| 869 | |