| 1 | // SPDX-License-Identifier: GPL-2.0-or-later |
| 2 | /* |
| 3 | * Linux driver for VMware's vmxnet3 ethernet NIC. |
| 4 | * Copyright (C) 2008-2023, VMware, Inc. All Rights Reserved. |
| 5 | * Maintained by: pv-drivers@vmware.com |
| 6 | * |
| 7 | */ |
| 8 | |
| 9 | #include "vmxnet3_int.h" |
| 10 | #include "vmxnet3_xdp.h" |
| 11 | |
| 12 | static void |
| 13 | vmxnet3_xdp_exchange_program(struct vmxnet3_adapter *adapter, |
| 14 | struct bpf_prog *prog) |
| 15 | { |
| 16 | rcu_assign_pointer(adapter->xdp_bpf_prog, prog); |
| 17 | } |
| 18 | |
| 19 | static inline struct vmxnet3_tx_queue * |
| 20 | vmxnet3_xdp_get_tq(struct vmxnet3_adapter *adapter) |
| 21 | { |
| 22 | struct vmxnet3_tx_queue *tq; |
| 23 | int tq_number; |
| 24 | int cpu; |
| 25 | |
| 26 | tq_number = adapter->num_tx_queues; |
| 27 | cpu = smp_processor_id(); |
| 28 | if (likely(cpu < tq_number)) |
| 29 | tq = &adapter->tx_queue[cpu]; |
| 30 | else |
| 31 | tq = &adapter->tx_queue[cpu % tq_number]; |
| 32 | |
| 33 | return tq; |
| 34 | } |
| 35 | |
| 36 | static int |
| 37 | vmxnet3_xdp_set(struct net_device *netdev, struct netdev_bpf *bpf, |
| 38 | struct netlink_ext_ack *extack) |
| 39 | { |
| 40 | struct vmxnet3_adapter *adapter = netdev_priv(dev: netdev); |
| 41 | struct bpf_prog *new_bpf_prog = bpf->prog; |
| 42 | struct bpf_prog *old_bpf_prog; |
| 43 | bool need_update; |
| 44 | bool running; |
| 45 | int err; |
| 46 | |
| 47 | if (new_bpf_prog && netdev->mtu > VMXNET3_XDP_MAX_MTU) { |
| 48 | NL_SET_ERR_MSG_FMT_MOD(extack, "MTU %u too large for XDP" , |
| 49 | netdev->mtu); |
| 50 | return -EOPNOTSUPP; |
| 51 | } |
| 52 | |
| 53 | if (adapter->netdev->features & NETIF_F_LRO) { |
| 54 | NL_SET_ERR_MSG_MOD(extack, "LRO is not supported with XDP" ); |
| 55 | adapter->netdev->features &= ~NETIF_F_LRO; |
| 56 | } |
| 57 | |
| 58 | old_bpf_prog = rcu_dereference(adapter->xdp_bpf_prog); |
| 59 | if (!new_bpf_prog && !old_bpf_prog) |
| 60 | return 0; |
| 61 | |
| 62 | running = netif_running(dev: netdev); |
| 63 | need_update = !!old_bpf_prog != !!new_bpf_prog; |
| 64 | |
| 65 | if (running && need_update) |
| 66 | vmxnet3_quiesce_dev(adapter); |
| 67 | |
| 68 | vmxnet3_xdp_exchange_program(adapter, prog: new_bpf_prog); |
| 69 | if (old_bpf_prog) |
| 70 | bpf_prog_put(prog: old_bpf_prog); |
| 71 | |
| 72 | if (!running || !need_update) |
| 73 | return 0; |
| 74 | |
| 75 | if (new_bpf_prog) |
| 76 | xdp_features_set_redirect_target(dev: netdev, support_sg: false); |
| 77 | else |
| 78 | xdp_features_clear_redirect_target(dev: netdev); |
| 79 | |
| 80 | vmxnet3_reset_dev(adapter); |
| 81 | vmxnet3_rq_destroy_all(adapter); |
| 82 | vmxnet3_adjust_rx_ring_size(adapter); |
| 83 | err = vmxnet3_rq_create_all(adapter); |
| 84 | if (err) { |
| 85 | NL_SET_ERR_MSG_MOD(extack, |
| 86 | "failed to re-create rx queues for XDP." ); |
| 87 | return -EOPNOTSUPP; |
| 88 | } |
| 89 | err = vmxnet3_activate_dev(adapter); |
| 90 | if (err) { |
| 91 | NL_SET_ERR_MSG_MOD(extack, |
| 92 | "failed to activate device for XDP." ); |
| 93 | return -EOPNOTSUPP; |
| 94 | } |
| 95 | clear_bit(VMXNET3_STATE_BIT_RESETTING, addr: &adapter->state); |
| 96 | |
| 97 | return 0; |
| 98 | } |
| 99 | |
| 100 | /* This is the main xdp call used by kernel to set/unset eBPF program. */ |
| 101 | int |
| 102 | vmxnet3_xdp(struct net_device *netdev, struct netdev_bpf *bpf) |
| 103 | { |
| 104 | switch (bpf->command) { |
| 105 | case XDP_SETUP_PROG: |
| 106 | return vmxnet3_xdp_set(netdev, bpf, extack: bpf->extack); |
| 107 | default: |
| 108 | return -EINVAL; |
| 109 | } |
| 110 | |
| 111 | return 0; |
| 112 | } |
| 113 | |
| 114 | static int |
| 115 | vmxnet3_xdp_xmit_frame(struct vmxnet3_adapter *adapter, |
| 116 | struct xdp_frame *xdpf, |
| 117 | struct vmxnet3_tx_queue *tq, bool dma_map) |
| 118 | { |
| 119 | struct vmxnet3_tx_buf_info *tbi = NULL; |
| 120 | union Vmxnet3_GenericDesc *gdesc; |
| 121 | struct vmxnet3_tx_ctx ctx; |
| 122 | int tx_num_deferred; |
| 123 | struct page *page; |
| 124 | u32 buf_size; |
| 125 | u32 dw2; |
| 126 | |
| 127 | spin_lock_irq(lock: &tq->tx_lock); |
| 128 | dw2 = (tq->tx_ring.gen ^ 0x1) << VMXNET3_TXD_GEN_SHIFT; |
| 129 | dw2 |= xdpf->len; |
| 130 | ctx.sop_txd = tq->tx_ring.base + tq->tx_ring.next2fill; |
| 131 | gdesc = ctx.sop_txd; |
| 132 | |
| 133 | buf_size = xdpf->len; |
| 134 | tbi = tq->buf_info + tq->tx_ring.next2fill; |
| 135 | |
| 136 | if (vmxnet3_cmd_ring_desc_avail(ring: &tq->tx_ring) == 0) { |
| 137 | tq->stats.tx_ring_full++; |
| 138 | spin_unlock_irq(lock: &tq->tx_lock); |
| 139 | return -ENOSPC; |
| 140 | } |
| 141 | |
| 142 | tbi->map_type = VMXNET3_MAP_XDP; |
| 143 | if (dma_map) { /* ndo_xdp_xmit */ |
| 144 | tbi->dma_addr = dma_map_single(&adapter->pdev->dev, |
| 145 | xdpf->data, buf_size, |
| 146 | DMA_TO_DEVICE); |
| 147 | if (dma_mapping_error(dev: &adapter->pdev->dev, dma_addr: tbi->dma_addr)) { |
| 148 | spin_unlock_irq(lock: &tq->tx_lock); |
| 149 | return -EFAULT; |
| 150 | } |
| 151 | tbi->map_type |= VMXNET3_MAP_SINGLE; |
| 152 | } else { /* XDP buffer from page pool */ |
| 153 | page = virt_to_page(xdpf->data); |
| 154 | tbi->dma_addr = page_pool_get_dma_addr(page) + |
| 155 | (xdpf->data - (void *)xdpf); |
| 156 | dma_sync_single_for_device(dev: &adapter->pdev->dev, |
| 157 | addr: tbi->dma_addr, size: buf_size, |
| 158 | dir: DMA_TO_DEVICE); |
| 159 | } |
| 160 | tbi->xdpf = xdpf; |
| 161 | tbi->len = buf_size; |
| 162 | |
| 163 | gdesc = tq->tx_ring.base + tq->tx_ring.next2fill; |
| 164 | WARN_ON_ONCE(gdesc->txd.gen == tq->tx_ring.gen); |
| 165 | |
| 166 | gdesc->txd.addr = cpu_to_le64(tbi->dma_addr); |
| 167 | gdesc->dword[2] = cpu_to_le32(dw2); |
| 168 | |
| 169 | /* Setup the EOP desc */ |
| 170 | gdesc->dword[3] = cpu_to_le32(VMXNET3_TXD_CQ | VMXNET3_TXD_EOP); |
| 171 | |
| 172 | gdesc->txd.om = 0; |
| 173 | gdesc->txd.msscof = 0; |
| 174 | gdesc->txd.hlen = 0; |
| 175 | gdesc->txd.ti = 0; |
| 176 | |
| 177 | tx_num_deferred = le32_to_cpu(tq->shared->txNumDeferred); |
| 178 | le32_add_cpu(var: &tq->shared->txNumDeferred, val: 1); |
| 179 | tx_num_deferred++; |
| 180 | |
| 181 | vmxnet3_cmd_ring_adv_next2fill(ring: &tq->tx_ring); |
| 182 | |
| 183 | /* set the last buf_info for the pkt */ |
| 184 | tbi->sop_idx = ctx.sop_txd - tq->tx_ring.base; |
| 185 | |
| 186 | dma_wmb(); |
| 187 | gdesc->dword[2] = cpu_to_le32(le32_to_cpu(gdesc->dword[2]) ^ |
| 188 | VMXNET3_TXD_GEN); |
| 189 | spin_unlock_irq(lock: &tq->tx_lock); |
| 190 | |
| 191 | /* No need to handle the case when tx_num_deferred doesn't reach |
| 192 | * threshold. Backend driver at hypervisor side will poll and reset |
| 193 | * tq->shared->txNumDeferred to 0. |
| 194 | */ |
| 195 | if (tx_num_deferred >= le32_to_cpu(tq->shared->txThreshold)) { |
| 196 | tq->shared->txNumDeferred = 0; |
| 197 | VMXNET3_WRITE_BAR0_REG(adapter, |
| 198 | VMXNET3_REG_TXPROD + tq->qid * 8, |
| 199 | tq->tx_ring.next2fill); |
| 200 | } |
| 201 | |
| 202 | return 0; |
| 203 | } |
| 204 | |
| 205 | static int |
| 206 | vmxnet3_xdp_xmit_back(struct vmxnet3_adapter *adapter, |
| 207 | struct xdp_frame *xdpf) |
| 208 | { |
| 209 | struct vmxnet3_tx_queue *tq; |
| 210 | struct netdev_queue *nq; |
| 211 | int err; |
| 212 | |
| 213 | tq = vmxnet3_xdp_get_tq(adapter); |
| 214 | if (tq->stopped) |
| 215 | return -ENETDOWN; |
| 216 | |
| 217 | nq = netdev_get_tx_queue(dev: adapter->netdev, index: tq->qid); |
| 218 | |
| 219 | __netif_tx_lock(txq: nq, smp_processor_id()); |
| 220 | err = vmxnet3_xdp_xmit_frame(adapter, xdpf, tq, dma_map: false); |
| 221 | __netif_tx_unlock(txq: nq); |
| 222 | |
| 223 | return err; |
| 224 | } |
| 225 | |
| 226 | /* ndo_xdp_xmit */ |
| 227 | int |
| 228 | vmxnet3_xdp_xmit(struct net_device *dev, |
| 229 | int n, struct xdp_frame **frames, u32 flags) |
| 230 | { |
| 231 | struct vmxnet3_adapter *adapter = netdev_priv(dev); |
| 232 | struct vmxnet3_tx_queue *tq; |
| 233 | struct netdev_queue *nq; |
| 234 | int i; |
| 235 | |
| 236 | if (unlikely(test_bit(VMXNET3_STATE_BIT_QUIESCED, &adapter->state))) |
| 237 | return -ENETDOWN; |
| 238 | if (unlikely(test_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state))) |
| 239 | return -EINVAL; |
| 240 | |
| 241 | tq = vmxnet3_xdp_get_tq(adapter); |
| 242 | if (tq->stopped) |
| 243 | return -ENETDOWN; |
| 244 | |
| 245 | nq = netdev_get_tx_queue(dev: adapter->netdev, index: tq->qid); |
| 246 | |
| 247 | __netif_tx_lock(txq: nq, smp_processor_id()); |
| 248 | for (i = 0; i < n; i++) { |
| 249 | if (vmxnet3_xdp_xmit_frame(adapter, xdpf: frames[i], tq, dma_map: true)) { |
| 250 | tq->stats.xdp_xmit_err++; |
| 251 | break; |
| 252 | } |
| 253 | } |
| 254 | tq->stats.xdp_xmit += i; |
| 255 | __netif_tx_unlock(txq: nq); |
| 256 | |
| 257 | return i; |
| 258 | } |
| 259 | |
| 260 | static int |
| 261 | vmxnet3_run_xdp(struct vmxnet3_rx_queue *rq, struct xdp_buff *xdp, |
| 262 | struct bpf_prog *prog) |
| 263 | { |
| 264 | struct xdp_frame *xdpf; |
| 265 | struct page *page; |
| 266 | int err; |
| 267 | u32 act; |
| 268 | |
| 269 | rq->stats.xdp_packets++; |
| 270 | act = bpf_prog_run_xdp(prog, xdp); |
| 271 | page = virt_to_page(xdp->data_hard_start); |
| 272 | |
| 273 | switch (act) { |
| 274 | case XDP_PASS: |
| 275 | return act; |
| 276 | case XDP_REDIRECT: |
| 277 | err = xdp_do_redirect(dev: rq->adapter->netdev, xdp, prog); |
| 278 | if (!err) { |
| 279 | rq->stats.xdp_redirects++; |
| 280 | } else { |
| 281 | rq->stats.xdp_drops++; |
| 282 | page_pool_recycle_direct(pool: rq->page_pool, page); |
| 283 | } |
| 284 | return act; |
| 285 | case XDP_TX: |
| 286 | xdpf = xdp_convert_buff_to_frame(xdp); |
| 287 | if (unlikely(!xdpf || |
| 288 | vmxnet3_xdp_xmit_back(rq->adapter, xdpf))) { |
| 289 | rq->stats.xdp_drops++; |
| 290 | page_pool_recycle_direct(pool: rq->page_pool, page); |
| 291 | } else { |
| 292 | rq->stats.xdp_tx++; |
| 293 | } |
| 294 | return act; |
| 295 | default: |
| 296 | bpf_warn_invalid_xdp_action(dev: rq->adapter->netdev, prog, act); |
| 297 | fallthrough; |
| 298 | case XDP_ABORTED: |
| 299 | trace_xdp_exception(dev: rq->adapter->netdev, xdp: prog, act); |
| 300 | rq->stats.xdp_aborted++; |
| 301 | break; |
| 302 | case XDP_DROP: |
| 303 | rq->stats.xdp_drops++; |
| 304 | break; |
| 305 | } |
| 306 | |
| 307 | page_pool_recycle_direct(pool: rq->page_pool, page); |
| 308 | |
| 309 | return act; |
| 310 | } |
| 311 | |
| 312 | static struct sk_buff * |
| 313 | vmxnet3_build_skb(struct vmxnet3_rx_queue *rq, struct page *page, |
| 314 | const struct xdp_buff *xdp) |
| 315 | { |
| 316 | struct sk_buff *skb; |
| 317 | |
| 318 | skb = build_skb(page_address(page), PAGE_SIZE); |
| 319 | if (unlikely(!skb)) { |
| 320 | page_pool_recycle_direct(pool: rq->page_pool, page); |
| 321 | rq->stats.rx_buf_alloc_failure++; |
| 322 | return NULL; |
| 323 | } |
| 324 | |
| 325 | /* bpf prog might change len and data position. */ |
| 326 | skb_reserve(skb, len: xdp->data - xdp->data_hard_start); |
| 327 | skb_put(skb, len: xdp->data_end - xdp->data); |
| 328 | skb_mark_for_recycle(skb); |
| 329 | |
| 330 | return skb; |
| 331 | } |
| 332 | |
| 333 | /* Handle packets from DataRing. */ |
| 334 | int |
| 335 | vmxnet3_process_xdp_small(struct vmxnet3_adapter *adapter, |
| 336 | struct vmxnet3_rx_queue *rq, |
| 337 | void *data, int len, |
| 338 | struct sk_buff **skb_xdp_pass) |
| 339 | { |
| 340 | struct bpf_prog *xdp_prog; |
| 341 | struct xdp_buff xdp; |
| 342 | struct page *page; |
| 343 | int act; |
| 344 | |
| 345 | page = page_pool_alloc_pages(pool: rq->page_pool, GFP_ATOMIC); |
| 346 | if (unlikely(!page)) { |
| 347 | rq->stats.rx_buf_alloc_failure++; |
| 348 | return XDP_DROP; |
| 349 | } |
| 350 | |
| 351 | xdp_init_buff(xdp: &xdp, PAGE_SIZE, rxq: &rq->xdp_rxq); |
| 352 | xdp_prepare_buff(xdp: &xdp, page_address(page), headroom: rq->page_pool->p.offset, |
| 353 | data_len: len, meta_valid: false); |
| 354 | xdp_buff_clear_frags_flag(xdp: &xdp); |
| 355 | |
| 356 | /* Must copy the data because it's at dataring. */ |
| 357 | memcpy(xdp.data, data, len); |
| 358 | |
| 359 | xdp_prog = rcu_dereference(rq->adapter->xdp_bpf_prog); |
| 360 | if (!xdp_prog) { |
| 361 | act = XDP_PASS; |
| 362 | goto out_skb; |
| 363 | } |
| 364 | act = vmxnet3_run_xdp(rq, xdp: &xdp, prog: xdp_prog); |
| 365 | if (act != XDP_PASS) |
| 366 | return act; |
| 367 | |
| 368 | out_skb: |
| 369 | *skb_xdp_pass = vmxnet3_build_skb(rq, page, xdp: &xdp); |
| 370 | if (!*skb_xdp_pass) |
| 371 | return XDP_DROP; |
| 372 | |
| 373 | /* No need to refill. */ |
| 374 | return likely(*skb_xdp_pass) ? act : XDP_DROP; |
| 375 | } |
| 376 | |
| 377 | int |
| 378 | vmxnet3_process_xdp(struct vmxnet3_adapter *adapter, |
| 379 | struct vmxnet3_rx_queue *rq, |
| 380 | struct Vmxnet3_RxCompDesc *rcd, |
| 381 | struct vmxnet3_rx_buf_info *rbi, |
| 382 | struct Vmxnet3_RxDesc *rxd, |
| 383 | struct sk_buff **skb_xdp_pass) |
| 384 | { |
| 385 | struct bpf_prog *xdp_prog; |
| 386 | dma_addr_t new_dma_addr; |
| 387 | struct xdp_buff xdp; |
| 388 | struct page *page; |
| 389 | void *new_data; |
| 390 | int act; |
| 391 | |
| 392 | page = rbi->page; |
| 393 | dma_sync_single_for_cpu(dev: &adapter->pdev->dev, |
| 394 | addr: page_pool_get_dma_addr(page) + |
| 395 | rq->page_pool->p.offset, size: rbi->len, |
| 396 | dir: page_pool_get_dma_dir(pool: rq->page_pool)); |
| 397 | |
| 398 | xdp_init_buff(xdp: &xdp, PAGE_SIZE, rxq: &rq->xdp_rxq); |
| 399 | xdp_prepare_buff(xdp: &xdp, page_address(page), headroom: rq->page_pool->p.offset, |
| 400 | data_len: rcd->len, meta_valid: false); |
| 401 | xdp_buff_clear_frags_flag(xdp: &xdp); |
| 402 | |
| 403 | xdp_prog = rcu_dereference(rq->adapter->xdp_bpf_prog); |
| 404 | if (!xdp_prog) { |
| 405 | act = XDP_PASS; |
| 406 | goto out_skb; |
| 407 | } |
| 408 | act = vmxnet3_run_xdp(rq, xdp: &xdp, prog: xdp_prog); |
| 409 | |
| 410 | if (act == XDP_PASS) { |
| 411 | out_skb: |
| 412 | *skb_xdp_pass = vmxnet3_build_skb(rq, page, xdp: &xdp); |
| 413 | if (!*skb_xdp_pass) |
| 414 | act = XDP_DROP; |
| 415 | } |
| 416 | |
| 417 | new_data = vmxnet3_pp_get_buff(pp: rq->page_pool, dma_addr: &new_dma_addr, |
| 418 | GFP_ATOMIC); |
| 419 | if (!new_data) { |
| 420 | rq->stats.rx_buf_alloc_failure++; |
| 421 | return XDP_DROP; |
| 422 | } |
| 423 | rbi->page = virt_to_page(new_data); |
| 424 | rbi->dma_addr = new_dma_addr; |
| 425 | rxd->addr = cpu_to_le64(rbi->dma_addr); |
| 426 | rxd->len = rbi->len; |
| 427 | |
| 428 | return act; |
| 429 | } |
| 430 | |