1 | // SPDX-License-Identifier: GPL-2.0-or-later |
2 | /* |
3 | * Linux driver for VMware's vmxnet3 ethernet NIC. |
4 | * Copyright (C) 2008-2023, VMware, Inc. All Rights Reserved. |
5 | * Maintained by: pv-drivers@vmware.com |
6 | * |
7 | */ |
8 | |
9 | #include "vmxnet3_int.h" |
10 | #include "vmxnet3_xdp.h" |
11 | |
12 | static void |
13 | vmxnet3_xdp_exchange_program(struct vmxnet3_adapter *adapter, |
14 | struct bpf_prog *prog) |
15 | { |
16 | rcu_assign_pointer(adapter->xdp_bpf_prog, prog); |
17 | } |
18 | |
19 | static inline struct vmxnet3_tx_queue * |
20 | vmxnet3_xdp_get_tq(struct vmxnet3_adapter *adapter) |
21 | { |
22 | struct vmxnet3_tx_queue *tq; |
23 | int tq_number; |
24 | int cpu; |
25 | |
26 | tq_number = adapter->num_tx_queues; |
27 | cpu = smp_processor_id(); |
28 | if (likely(cpu < tq_number)) |
29 | tq = &adapter->tx_queue[cpu]; |
30 | else |
31 | tq = &adapter->tx_queue[reciprocal_scale(val: cpu, ep_ro: tq_number)]; |
32 | |
33 | return tq; |
34 | } |
35 | |
36 | static int |
37 | vmxnet3_xdp_set(struct net_device *netdev, struct netdev_bpf *bpf, |
38 | struct netlink_ext_ack *extack) |
39 | { |
40 | struct vmxnet3_adapter *adapter = netdev_priv(dev: netdev); |
41 | struct bpf_prog *new_bpf_prog = bpf->prog; |
42 | struct bpf_prog *old_bpf_prog; |
43 | bool need_update; |
44 | bool running; |
45 | int err; |
46 | |
47 | if (new_bpf_prog && netdev->mtu > VMXNET3_XDP_MAX_MTU) { |
48 | NL_SET_ERR_MSG_FMT_MOD(extack, "MTU %u too large for XDP" , |
49 | netdev->mtu); |
50 | return -EOPNOTSUPP; |
51 | } |
52 | |
53 | if (adapter->netdev->features & NETIF_F_LRO) { |
54 | NL_SET_ERR_MSG_MOD(extack, "LRO is not supported with XDP" ); |
55 | adapter->netdev->features &= ~NETIF_F_LRO; |
56 | } |
57 | |
58 | old_bpf_prog = rcu_dereference(adapter->xdp_bpf_prog); |
59 | if (!new_bpf_prog && !old_bpf_prog) |
60 | return 0; |
61 | |
62 | running = netif_running(dev: netdev); |
63 | need_update = !!old_bpf_prog != !!new_bpf_prog; |
64 | |
65 | if (running && need_update) |
66 | vmxnet3_quiesce_dev(adapter); |
67 | |
68 | vmxnet3_xdp_exchange_program(adapter, prog: new_bpf_prog); |
69 | if (old_bpf_prog) |
70 | bpf_prog_put(prog: old_bpf_prog); |
71 | |
72 | if (!running || !need_update) |
73 | return 0; |
74 | |
75 | if (new_bpf_prog) |
76 | xdp_features_set_redirect_target(dev: netdev, support_sg: false); |
77 | else |
78 | xdp_features_clear_redirect_target(dev: netdev); |
79 | |
80 | vmxnet3_reset_dev(adapter); |
81 | vmxnet3_rq_destroy_all(adapter); |
82 | vmxnet3_adjust_rx_ring_size(adapter); |
83 | err = vmxnet3_rq_create_all(adapter); |
84 | if (err) { |
85 | NL_SET_ERR_MSG_MOD(extack, |
86 | "failed to re-create rx queues for XDP." ); |
87 | return -EOPNOTSUPP; |
88 | } |
89 | err = vmxnet3_activate_dev(adapter); |
90 | if (err) { |
91 | NL_SET_ERR_MSG_MOD(extack, |
92 | "failed to activate device for XDP." ); |
93 | return -EOPNOTSUPP; |
94 | } |
95 | clear_bit(VMXNET3_STATE_BIT_RESETTING, addr: &adapter->state); |
96 | |
97 | return 0; |
98 | } |
99 | |
100 | /* This is the main xdp call used by kernel to set/unset eBPF program. */ |
101 | int |
102 | vmxnet3_xdp(struct net_device *netdev, struct netdev_bpf *bpf) |
103 | { |
104 | switch (bpf->command) { |
105 | case XDP_SETUP_PROG: |
106 | return vmxnet3_xdp_set(netdev, bpf, extack: bpf->extack); |
107 | default: |
108 | return -EINVAL; |
109 | } |
110 | |
111 | return 0; |
112 | } |
113 | |
114 | static int |
115 | vmxnet3_xdp_xmit_frame(struct vmxnet3_adapter *adapter, |
116 | struct xdp_frame *xdpf, |
117 | struct vmxnet3_tx_queue *tq, bool dma_map) |
118 | { |
119 | struct vmxnet3_tx_buf_info *tbi = NULL; |
120 | union Vmxnet3_GenericDesc *gdesc; |
121 | struct vmxnet3_tx_ctx ctx; |
122 | int tx_num_deferred; |
123 | struct page *page; |
124 | u32 buf_size; |
125 | u32 dw2; |
126 | |
127 | dw2 = (tq->tx_ring.gen ^ 0x1) << VMXNET3_TXD_GEN_SHIFT; |
128 | dw2 |= xdpf->len; |
129 | ctx.sop_txd = tq->tx_ring.base + tq->tx_ring.next2fill; |
130 | gdesc = ctx.sop_txd; |
131 | |
132 | buf_size = xdpf->len; |
133 | tbi = tq->buf_info + tq->tx_ring.next2fill; |
134 | |
135 | if (vmxnet3_cmd_ring_desc_avail(ring: &tq->tx_ring) == 0) { |
136 | tq->stats.tx_ring_full++; |
137 | return -ENOSPC; |
138 | } |
139 | |
140 | tbi->map_type = VMXNET3_MAP_XDP; |
141 | if (dma_map) { /* ndo_xdp_xmit */ |
142 | tbi->dma_addr = dma_map_single(&adapter->pdev->dev, |
143 | xdpf->data, buf_size, |
144 | DMA_TO_DEVICE); |
145 | if (dma_mapping_error(dev: &adapter->pdev->dev, dma_addr: tbi->dma_addr)) |
146 | return -EFAULT; |
147 | tbi->map_type |= VMXNET3_MAP_SINGLE; |
148 | } else { /* XDP buffer from page pool */ |
149 | page = virt_to_page(xdpf->data); |
150 | tbi->dma_addr = page_pool_get_dma_addr(page) + |
151 | VMXNET3_XDP_HEADROOM; |
152 | dma_sync_single_for_device(dev: &adapter->pdev->dev, |
153 | addr: tbi->dma_addr, size: buf_size, |
154 | dir: DMA_TO_DEVICE); |
155 | } |
156 | tbi->xdpf = xdpf; |
157 | tbi->len = buf_size; |
158 | |
159 | gdesc = tq->tx_ring.base + tq->tx_ring.next2fill; |
160 | WARN_ON_ONCE(gdesc->txd.gen == tq->tx_ring.gen); |
161 | |
162 | gdesc->txd.addr = cpu_to_le64(tbi->dma_addr); |
163 | gdesc->dword[2] = cpu_to_le32(dw2); |
164 | |
165 | /* Setup the EOP desc */ |
166 | gdesc->dword[3] = cpu_to_le32(VMXNET3_TXD_CQ | VMXNET3_TXD_EOP); |
167 | |
168 | gdesc->txd.om = 0; |
169 | gdesc->txd.msscof = 0; |
170 | gdesc->txd.hlen = 0; |
171 | gdesc->txd.ti = 0; |
172 | |
173 | tx_num_deferred = le32_to_cpu(tq->shared->txNumDeferred); |
174 | le32_add_cpu(var: &tq->shared->txNumDeferred, val: 1); |
175 | tx_num_deferred++; |
176 | |
177 | vmxnet3_cmd_ring_adv_next2fill(ring: &tq->tx_ring); |
178 | |
179 | /* set the last buf_info for the pkt */ |
180 | tbi->sop_idx = ctx.sop_txd - tq->tx_ring.base; |
181 | |
182 | dma_wmb(); |
183 | gdesc->dword[2] = cpu_to_le32(le32_to_cpu(gdesc->dword[2]) ^ |
184 | VMXNET3_TXD_GEN); |
185 | |
186 | /* No need to handle the case when tx_num_deferred doesn't reach |
187 | * threshold. Backend driver at hypervisor side will poll and reset |
188 | * tq->shared->txNumDeferred to 0. |
189 | */ |
190 | if (tx_num_deferred >= le32_to_cpu(tq->shared->txThreshold)) { |
191 | tq->shared->txNumDeferred = 0; |
192 | VMXNET3_WRITE_BAR0_REG(adapter, |
193 | VMXNET3_REG_TXPROD + tq->qid * 8, |
194 | tq->tx_ring.next2fill); |
195 | } |
196 | |
197 | return 0; |
198 | } |
199 | |
200 | static int |
201 | vmxnet3_xdp_xmit_back(struct vmxnet3_adapter *adapter, |
202 | struct xdp_frame *xdpf) |
203 | { |
204 | struct vmxnet3_tx_queue *tq; |
205 | struct netdev_queue *nq; |
206 | int err; |
207 | |
208 | tq = vmxnet3_xdp_get_tq(adapter); |
209 | if (tq->stopped) |
210 | return -ENETDOWN; |
211 | |
212 | nq = netdev_get_tx_queue(dev: adapter->netdev, index: tq->qid); |
213 | |
214 | __netif_tx_lock(txq: nq, smp_processor_id()); |
215 | err = vmxnet3_xdp_xmit_frame(adapter, xdpf, tq, dma_map: false); |
216 | __netif_tx_unlock(txq: nq); |
217 | |
218 | return err; |
219 | } |
220 | |
221 | /* ndo_xdp_xmit */ |
222 | int |
223 | vmxnet3_xdp_xmit(struct net_device *dev, |
224 | int n, struct xdp_frame **frames, u32 flags) |
225 | { |
226 | struct vmxnet3_adapter *adapter = netdev_priv(dev); |
227 | struct vmxnet3_tx_queue *tq; |
228 | int i; |
229 | |
230 | if (unlikely(test_bit(VMXNET3_STATE_BIT_QUIESCED, &adapter->state))) |
231 | return -ENETDOWN; |
232 | if (unlikely(test_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state))) |
233 | return -EINVAL; |
234 | |
235 | tq = vmxnet3_xdp_get_tq(adapter); |
236 | if (tq->stopped) |
237 | return -ENETDOWN; |
238 | |
239 | for (i = 0; i < n; i++) { |
240 | if (vmxnet3_xdp_xmit_frame(adapter, xdpf: frames[i], tq, dma_map: true)) { |
241 | tq->stats.xdp_xmit_err++; |
242 | break; |
243 | } |
244 | } |
245 | tq->stats.xdp_xmit += i; |
246 | |
247 | return i; |
248 | } |
249 | |
250 | static int |
251 | vmxnet3_run_xdp(struct vmxnet3_rx_queue *rq, struct xdp_buff *xdp, |
252 | struct bpf_prog *prog) |
253 | { |
254 | struct xdp_frame *xdpf; |
255 | struct page *page; |
256 | int err; |
257 | u32 act; |
258 | |
259 | rq->stats.xdp_packets++; |
260 | act = bpf_prog_run_xdp(prog, xdp); |
261 | page = virt_to_page(xdp->data_hard_start); |
262 | |
263 | switch (act) { |
264 | case XDP_PASS: |
265 | return act; |
266 | case XDP_REDIRECT: |
267 | err = xdp_do_redirect(dev: rq->adapter->netdev, xdp, prog); |
268 | if (!err) { |
269 | rq->stats.xdp_redirects++; |
270 | } else { |
271 | rq->stats.xdp_drops++; |
272 | page_pool_recycle_direct(pool: rq->page_pool, page); |
273 | } |
274 | return act; |
275 | case XDP_TX: |
276 | xdpf = xdp_convert_buff_to_frame(xdp); |
277 | if (unlikely(!xdpf || |
278 | vmxnet3_xdp_xmit_back(rq->adapter, xdpf))) { |
279 | rq->stats.xdp_drops++; |
280 | page_pool_recycle_direct(pool: rq->page_pool, page); |
281 | } else { |
282 | rq->stats.xdp_tx++; |
283 | } |
284 | return act; |
285 | default: |
286 | bpf_warn_invalid_xdp_action(dev: rq->adapter->netdev, prog, act); |
287 | fallthrough; |
288 | case XDP_ABORTED: |
289 | trace_xdp_exception(dev: rq->adapter->netdev, xdp: prog, act); |
290 | rq->stats.xdp_aborted++; |
291 | break; |
292 | case XDP_DROP: |
293 | rq->stats.xdp_drops++; |
294 | break; |
295 | } |
296 | |
297 | page_pool_recycle_direct(pool: rq->page_pool, page); |
298 | |
299 | return act; |
300 | } |
301 | |
302 | static struct sk_buff * |
303 | vmxnet3_build_skb(struct vmxnet3_rx_queue *rq, struct page *page, |
304 | const struct xdp_buff *xdp) |
305 | { |
306 | struct sk_buff *skb; |
307 | |
308 | skb = build_skb(page_address(page), PAGE_SIZE); |
309 | if (unlikely(!skb)) { |
310 | page_pool_recycle_direct(pool: rq->page_pool, page); |
311 | rq->stats.rx_buf_alloc_failure++; |
312 | return NULL; |
313 | } |
314 | |
315 | /* bpf prog might change len and data position. */ |
316 | skb_reserve(skb, len: xdp->data - xdp->data_hard_start); |
317 | skb_put(skb, len: xdp->data_end - xdp->data); |
318 | skb_mark_for_recycle(skb); |
319 | |
320 | return skb; |
321 | } |
322 | |
323 | /* Handle packets from DataRing. */ |
324 | int |
325 | vmxnet3_process_xdp_small(struct vmxnet3_adapter *adapter, |
326 | struct vmxnet3_rx_queue *rq, |
327 | void *data, int len, |
328 | struct sk_buff **skb_xdp_pass) |
329 | { |
330 | struct bpf_prog *xdp_prog; |
331 | struct xdp_buff xdp; |
332 | struct page *page; |
333 | int act; |
334 | |
335 | page = page_pool_alloc_pages(pool: rq->page_pool, GFP_ATOMIC); |
336 | if (unlikely(!page)) { |
337 | rq->stats.rx_buf_alloc_failure++; |
338 | return XDP_DROP; |
339 | } |
340 | |
341 | xdp_init_buff(xdp: &xdp, PAGE_SIZE, rxq: &rq->xdp_rxq); |
342 | xdp_prepare_buff(xdp: &xdp, page_address(page), headroom: rq->page_pool->p.offset, |
343 | data_len: len, meta_valid: false); |
344 | xdp_buff_clear_frags_flag(xdp: &xdp); |
345 | |
346 | /* Must copy the data because it's at dataring. */ |
347 | memcpy(xdp.data, data, len); |
348 | |
349 | xdp_prog = rcu_dereference(rq->adapter->xdp_bpf_prog); |
350 | if (!xdp_prog) { |
351 | act = XDP_PASS; |
352 | goto out_skb; |
353 | } |
354 | act = vmxnet3_run_xdp(rq, xdp: &xdp, prog: xdp_prog); |
355 | if (act != XDP_PASS) |
356 | return act; |
357 | |
358 | out_skb: |
359 | *skb_xdp_pass = vmxnet3_build_skb(rq, page, xdp: &xdp); |
360 | if (!*skb_xdp_pass) |
361 | return XDP_DROP; |
362 | |
363 | /* No need to refill. */ |
364 | return likely(*skb_xdp_pass) ? act : XDP_DROP; |
365 | } |
366 | |
367 | int |
368 | vmxnet3_process_xdp(struct vmxnet3_adapter *adapter, |
369 | struct vmxnet3_rx_queue *rq, |
370 | struct Vmxnet3_RxCompDesc *rcd, |
371 | struct vmxnet3_rx_buf_info *rbi, |
372 | struct Vmxnet3_RxDesc *rxd, |
373 | struct sk_buff **skb_xdp_pass) |
374 | { |
375 | struct bpf_prog *xdp_prog; |
376 | dma_addr_t new_dma_addr; |
377 | struct xdp_buff xdp; |
378 | struct page *page; |
379 | void *new_data; |
380 | int act; |
381 | |
382 | page = rbi->page; |
383 | dma_sync_single_for_cpu(dev: &adapter->pdev->dev, |
384 | addr: page_pool_get_dma_addr(page) + |
385 | rq->page_pool->p.offset, size: rcd->len, |
386 | dir: page_pool_get_dma_dir(pool: rq->page_pool)); |
387 | |
388 | xdp_init_buff(xdp: &xdp, frame_sz: rbi->len, rxq: &rq->xdp_rxq); |
389 | xdp_prepare_buff(xdp: &xdp, page_address(page), headroom: rq->page_pool->p.offset, |
390 | data_len: rcd->len, meta_valid: false); |
391 | xdp_buff_clear_frags_flag(xdp: &xdp); |
392 | |
393 | xdp_prog = rcu_dereference(rq->adapter->xdp_bpf_prog); |
394 | if (!xdp_prog) { |
395 | act = XDP_PASS; |
396 | goto out_skb; |
397 | } |
398 | act = vmxnet3_run_xdp(rq, xdp: &xdp, prog: xdp_prog); |
399 | |
400 | if (act == XDP_PASS) { |
401 | out_skb: |
402 | *skb_xdp_pass = vmxnet3_build_skb(rq, page, xdp: &xdp); |
403 | if (!*skb_xdp_pass) |
404 | act = XDP_DROP; |
405 | } |
406 | |
407 | new_data = vmxnet3_pp_get_buff(pp: rq->page_pool, dma_addr: &new_dma_addr, |
408 | GFP_ATOMIC); |
409 | if (!new_data) { |
410 | rq->stats.rx_buf_alloc_failure++; |
411 | return XDP_DROP; |
412 | } |
413 | rbi->page = virt_to_page(new_data); |
414 | rbi->dma_addr = new_dma_addr; |
415 | rxd->addr = cpu_to_le64(rbi->dma_addr); |
416 | rxd->len = rbi->len; |
417 | |
418 | return act; |
419 | } |
420 | |