1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /**************************************************************************** |
3 | * Driver for Solarflare network controllers and boards |
4 | * Copyright 2005-2006 Fen Systems Ltd. |
5 | * Copyright 2005-2013 Solarflare Communications Inc. |
6 | */ |
7 | |
8 | #include <linux/pci.h> |
9 | #include <linux/tcp.h> |
10 | #include <linux/ip.h> |
11 | #include <linux/in.h> |
12 | #include <linux/ipv6.h> |
13 | #include <linux/slab.h> |
14 | #include <net/ipv6.h> |
15 | #include <linux/if_ether.h> |
16 | #include <linux/highmem.h> |
17 | #include <linux/cache.h> |
18 | #include "net_driver.h" |
19 | #include "efx.h" |
20 | #include "io.h" |
21 | #include "nic.h" |
22 | #include "tx.h" |
23 | #include "tx_common.h" |
24 | #include "workarounds.h" |
25 | |
26 | static inline u8 *efx_tx_get_copy_buffer(struct efx_tx_queue *tx_queue, |
27 | struct efx_tx_buffer *buffer) |
28 | { |
29 | unsigned int index = efx_tx_queue_get_insert_index(tx_queue); |
30 | struct efx_buffer *page_buf = |
31 | &tx_queue->cb_page[index >> (PAGE_SHIFT - EFX_TX_CB_ORDER)]; |
32 | unsigned int offset = |
33 | ((index << EFX_TX_CB_ORDER) + NET_IP_ALIGN) & (PAGE_SIZE - 1); |
34 | |
35 | if (unlikely(!page_buf->addr) && |
36 | efx_siena_alloc_buffer(efx: tx_queue->efx, buffer: page_buf, PAGE_SIZE, |
37 | GFP_ATOMIC)) |
38 | return NULL; |
39 | buffer->dma_addr = page_buf->dma_addr + offset; |
40 | buffer->unmap_len = 0; |
41 | return (u8 *)page_buf->addr + offset; |
42 | } |
43 | |
44 | static void efx_tx_maybe_stop_queue(struct efx_tx_queue *txq1) |
45 | { |
46 | /* We need to consider all queues that the net core sees as one */ |
47 | struct efx_nic *efx = txq1->efx; |
48 | struct efx_tx_queue *txq2; |
49 | unsigned int fill_level; |
50 | |
51 | fill_level = efx_channel_tx_old_fill_level(channel: txq1->channel); |
52 | if (likely(fill_level < efx->txq_stop_thresh)) |
53 | return; |
54 | |
55 | /* We used the stale old_read_count above, which gives us a |
56 | * pessimistic estimate of the fill level (which may even |
57 | * validly be >= efx->txq_entries). Now try again using |
58 | * read_count (more likely to be a cache miss). |
59 | * |
60 | * If we read read_count and then conditionally stop the |
61 | * queue, it is possible for the completion path to race with |
62 | * us and complete all outstanding descriptors in the middle, |
63 | * after which there will be no more completions to wake it. |
64 | * Therefore we stop the queue first, then read read_count |
65 | * (with a memory barrier to ensure the ordering), then |
66 | * restart the queue if the fill level turns out to be low |
67 | * enough. |
68 | */ |
69 | netif_tx_stop_queue(dev_queue: txq1->core_txq); |
70 | smp_mb(); |
71 | efx_for_each_channel_tx_queue(txq2, txq1->channel) |
72 | txq2->old_read_count = READ_ONCE(txq2->read_count); |
73 | |
74 | fill_level = efx_channel_tx_old_fill_level(channel: txq1->channel); |
75 | EFX_WARN_ON_ONCE_PARANOID(fill_level >= efx->txq_entries); |
76 | if (likely(fill_level < efx->txq_stop_thresh)) { |
77 | smp_mb(); |
78 | if (likely(!efx->loopback_selftest)) |
79 | netif_tx_start_queue(dev_queue: txq1->core_txq); |
80 | } |
81 | } |
82 | |
83 | static int efx_enqueue_skb_copy(struct efx_tx_queue *tx_queue, |
84 | struct sk_buff *skb) |
85 | { |
86 | unsigned int copy_len = skb->len; |
87 | struct efx_tx_buffer *buffer; |
88 | u8 *copy_buffer; |
89 | int rc; |
90 | |
91 | EFX_WARN_ON_ONCE_PARANOID(copy_len > EFX_TX_CB_SIZE); |
92 | |
93 | buffer = efx_tx_queue_get_insert_buffer(tx_queue); |
94 | |
95 | copy_buffer = efx_tx_get_copy_buffer(tx_queue, buffer); |
96 | if (unlikely(!copy_buffer)) |
97 | return -ENOMEM; |
98 | |
99 | rc = skb_copy_bits(skb, offset: 0, to: copy_buffer, len: copy_len); |
100 | EFX_WARN_ON_PARANOID(rc); |
101 | buffer->len = copy_len; |
102 | |
103 | buffer->skb = skb; |
104 | buffer->flags = EFX_TX_BUF_SKB; |
105 | |
106 | ++tx_queue->insert_count; |
107 | return rc; |
108 | } |
109 | |
110 | /* Send any pending traffic for a channel. xmit_more is shared across all |
111 | * queues for a channel, so we must check all of them. |
112 | */ |
113 | static void efx_tx_send_pending(struct efx_channel *channel) |
114 | { |
115 | struct efx_tx_queue *q; |
116 | |
117 | efx_for_each_channel_tx_queue(q, channel) { |
118 | if (q->xmit_pending) |
119 | efx_nic_push_buffers(tx_queue: q); |
120 | } |
121 | } |
122 | |
123 | /* |
124 | * Add a socket buffer to a TX queue |
125 | * |
126 | * This maps all fragments of a socket buffer for DMA and adds them to |
127 | * the TX queue. The queue's insert pointer will be incremented by |
128 | * the number of fragments in the socket buffer. |
129 | * |
130 | * If any DMA mapping fails, any mapped fragments will be unmapped, |
131 | * the queue's insert pointer will be restored to its original value. |
132 | * |
133 | * This function is split out from efx_siena_hard_start_xmit to allow the |
134 | * loopback test to direct packets via specific TX queues. |
135 | * |
136 | * Returns NETDEV_TX_OK. |
137 | * You must hold netif_tx_lock() to call this function. |
138 | */ |
139 | netdev_tx_t __efx_siena_enqueue_skb(struct efx_tx_queue *tx_queue, |
140 | struct sk_buff *skb) |
141 | { |
142 | unsigned int old_insert_count = tx_queue->insert_count; |
143 | bool xmit_more = netdev_xmit_more(); |
144 | bool data_mapped = false; |
145 | unsigned int segments; |
146 | unsigned int skb_len; |
147 | int rc; |
148 | |
149 | skb_len = skb->len; |
150 | segments = skb_is_gso(skb) ? skb_shinfo(skb)->gso_segs : 0; |
151 | if (segments == 1) |
152 | segments = 0; /* Don't use TSO for a single segment. */ |
153 | |
154 | /* Handle TSO first - it's *possible* (although unlikely) that we might |
155 | * be passed a packet to segment that's smaller than the copybreak/PIO |
156 | * size limit. |
157 | */ |
158 | if (segments) { |
159 | rc = efx_siena_tx_tso_fallback(tx_queue, skb); |
160 | tx_queue->tso_fallbacks++; |
161 | if (rc == 0) |
162 | return 0; |
163 | goto err; |
164 | } else if (skb->data_len && skb_len <= EFX_TX_CB_SIZE) { |
165 | /* Pad short packets or coalesce short fragmented packets. */ |
166 | if (efx_enqueue_skb_copy(tx_queue, skb)) |
167 | goto err; |
168 | tx_queue->cb_packets++; |
169 | data_mapped = true; |
170 | } |
171 | |
172 | /* Map for DMA and create descriptors if we haven't done so already. */ |
173 | if (!data_mapped && (efx_siena_tx_map_data(tx_queue, skb, segment_count: segments))) |
174 | goto err; |
175 | |
176 | efx_tx_maybe_stop_queue(txq1: tx_queue); |
177 | |
178 | tx_queue->xmit_pending = true; |
179 | |
180 | /* Pass off to hardware */ |
181 | if (__netdev_tx_sent_queue(dev_queue: tx_queue->core_txq, bytes: skb_len, xmit_more)) |
182 | efx_tx_send_pending(channel: tx_queue->channel); |
183 | |
184 | tx_queue->tx_packets++; |
185 | return NETDEV_TX_OK; |
186 | |
187 | |
188 | err: |
189 | efx_siena_enqueue_unwind(tx_queue, insert_count: old_insert_count); |
190 | dev_kfree_skb_any(skb); |
191 | |
192 | /* If we're not expecting another transmit and we had something to push |
193 | * on this queue or a partner queue then we need to push here to get the |
194 | * previous packets out. |
195 | */ |
196 | if (!xmit_more) |
197 | efx_tx_send_pending(channel: tx_queue->channel); |
198 | |
199 | return NETDEV_TX_OK; |
200 | } |
201 | |
202 | /* Transmit a packet from an XDP buffer |
203 | * |
204 | * Returns number of packets sent on success, error code otherwise. |
205 | * Runs in NAPI context, either in our poll (for XDP TX) or a different NIC |
206 | * (for XDP redirect). |
207 | */ |
208 | int efx_siena_xdp_tx_buffers(struct efx_nic *efx, int n, struct xdp_frame **xdpfs, |
209 | bool flush) |
210 | { |
211 | struct efx_tx_buffer *tx_buffer; |
212 | struct efx_tx_queue *tx_queue; |
213 | struct xdp_frame *xdpf; |
214 | dma_addr_t dma_addr; |
215 | unsigned int len; |
216 | int space; |
217 | int cpu; |
218 | int i = 0; |
219 | |
220 | if (unlikely(n && !xdpfs)) |
221 | return -EINVAL; |
222 | if (unlikely(!n)) |
223 | return 0; |
224 | |
225 | cpu = raw_smp_processor_id(); |
226 | if (unlikely(cpu >= efx->xdp_tx_queue_count)) |
227 | return -EINVAL; |
228 | |
229 | tx_queue = efx->xdp_tx_queues[cpu]; |
230 | if (unlikely(!tx_queue)) |
231 | return -EINVAL; |
232 | |
233 | if (!tx_queue->initialised) |
234 | return -EINVAL; |
235 | |
236 | if (efx->xdp_txq_queues_mode != EFX_XDP_TX_QUEUES_DEDICATED) |
237 | HARD_TX_LOCK(efx->net_dev, tx_queue->core_txq, cpu); |
238 | |
239 | /* If we're borrowing net stack queues we have to handle stop-restart |
240 | * or we might block the queue and it will be considered as frozen |
241 | */ |
242 | if (efx->xdp_txq_queues_mode == EFX_XDP_TX_QUEUES_BORROWED) { |
243 | if (netif_tx_queue_stopped(dev_queue: tx_queue->core_txq)) |
244 | goto unlock; |
245 | efx_tx_maybe_stop_queue(txq1: tx_queue); |
246 | } |
247 | |
248 | /* Check for available space. We should never need multiple |
249 | * descriptors per frame. |
250 | */ |
251 | space = efx->txq_entries + |
252 | tx_queue->read_count - tx_queue->insert_count; |
253 | |
254 | for (i = 0; i < n; i++) { |
255 | xdpf = xdpfs[i]; |
256 | |
257 | if (i >= space) |
258 | break; |
259 | |
260 | /* We'll want a descriptor for this tx. */ |
261 | prefetchw(x: __efx_tx_queue_get_insert_buffer(tx_queue)); |
262 | |
263 | len = xdpf->len; |
264 | |
265 | /* Map for DMA. */ |
266 | dma_addr = dma_map_single(&efx->pci_dev->dev, |
267 | xdpf->data, len, |
268 | DMA_TO_DEVICE); |
269 | if (dma_mapping_error(dev: &efx->pci_dev->dev, dma_addr)) |
270 | break; |
271 | |
272 | /* Create descriptor and set up for unmapping DMA. */ |
273 | tx_buffer = efx_siena_tx_map_chunk(tx_queue, dma_addr, len); |
274 | tx_buffer->xdpf = xdpf; |
275 | tx_buffer->flags = EFX_TX_BUF_XDP | |
276 | EFX_TX_BUF_MAP_SINGLE; |
277 | tx_buffer->dma_offset = 0; |
278 | tx_buffer->unmap_len = len; |
279 | tx_queue->tx_packets++; |
280 | } |
281 | |
282 | /* Pass mapped frames to hardware. */ |
283 | if (flush && i > 0) |
284 | efx_nic_push_buffers(tx_queue); |
285 | |
286 | unlock: |
287 | if (efx->xdp_txq_queues_mode != EFX_XDP_TX_QUEUES_DEDICATED) |
288 | HARD_TX_UNLOCK(efx->net_dev, tx_queue->core_txq); |
289 | |
290 | return i == 0 ? -EIO : i; |
291 | } |
292 | |
293 | /* Initiate a packet transmission. We use one channel per CPU |
294 | * (sharing when we have more CPUs than channels). |
295 | * |
296 | * Context: non-blocking. |
297 | * Should always return NETDEV_TX_OK and consume the skb. |
298 | */ |
299 | netdev_tx_t efx_siena_hard_start_xmit(struct sk_buff *skb, |
300 | struct net_device *net_dev) |
301 | { |
302 | struct efx_nic *efx = netdev_priv(dev: net_dev); |
303 | struct efx_tx_queue *tx_queue; |
304 | unsigned index, type; |
305 | |
306 | EFX_WARN_ON_PARANOID(!netif_device_present(net_dev)); |
307 | |
308 | index = skb_get_queue_mapping(skb); |
309 | type = efx_tx_csum_type_skb(skb); |
310 | if (index >= efx->n_tx_channels) { |
311 | index -= efx->n_tx_channels; |
312 | type |= EFX_TXQ_TYPE_HIGHPRI; |
313 | } |
314 | |
315 | /* PTP "event" packet */ |
316 | if (unlikely(efx_xmit_with_hwtstamp(skb)) && |
317 | ((efx_siena_ptp_use_mac_tx_timestamps(efx) && efx->ptp_data) || |
318 | unlikely(efx_siena_ptp_is_ptp_tx(efx, skb)))) { |
319 | /* There may be existing transmits on the channel that are |
320 | * waiting for this packet to trigger the doorbell write. |
321 | * We need to send the packets at this point. |
322 | */ |
323 | efx_tx_send_pending(channel: efx_get_tx_channel(efx, index)); |
324 | return efx_siena_ptp_tx(efx, skb); |
325 | } |
326 | |
327 | tx_queue = efx_get_tx_queue(efx, index, type); |
328 | if (WARN_ON_ONCE(!tx_queue)) { |
329 | /* We don't have a TXQ of the right type. |
330 | * This should never happen, as we don't advertise offload |
331 | * features unless we can support them. |
332 | */ |
333 | dev_kfree_skb_any(skb); |
334 | /* If we're not expecting another transmit and we had something to push |
335 | * on this queue or a partner queue then we need to push here to get the |
336 | * previous packets out. |
337 | */ |
338 | if (!netdev_xmit_more()) |
339 | efx_tx_send_pending(channel: efx_get_tx_channel(efx, index)); |
340 | return NETDEV_TX_OK; |
341 | } |
342 | |
343 | return __efx_siena_enqueue_skb(tx_queue, skb); |
344 | } |
345 | |
346 | void efx_siena_init_tx_queue_core_txq(struct efx_tx_queue *tx_queue) |
347 | { |
348 | struct efx_nic *efx = tx_queue->efx; |
349 | |
350 | /* Must be inverse of queue lookup in efx_siena_hard_start_xmit() */ |
351 | tx_queue->core_txq = |
352 | netdev_get_tx_queue(dev: efx->net_dev, |
353 | index: tx_queue->channel->channel + |
354 | ((tx_queue->type & EFX_TXQ_TYPE_HIGHPRI) ? |
355 | efx->n_tx_channels : 0)); |
356 | } |
357 | |
358 | int efx_siena_setup_tc(struct net_device *net_dev, enum tc_setup_type type, |
359 | void *type_data) |
360 | { |
361 | struct efx_nic *efx = netdev_priv(dev: net_dev); |
362 | struct tc_mqprio_qopt *mqprio = type_data; |
363 | unsigned tc, num_tc; |
364 | |
365 | if (type != TC_SETUP_QDISC_MQPRIO) |
366 | return -EOPNOTSUPP; |
367 | |
368 | /* Only Siena supported highpri queues */ |
369 | if (efx_nic_rev(efx) > EFX_REV_SIENA_A0) |
370 | return -EOPNOTSUPP; |
371 | |
372 | num_tc = mqprio->num_tc; |
373 | |
374 | if (num_tc > EFX_MAX_TX_TC) |
375 | return -EINVAL; |
376 | |
377 | mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS; |
378 | |
379 | if (num_tc == net_dev->num_tc) |
380 | return 0; |
381 | |
382 | for (tc = 0; tc < num_tc; tc++) { |
383 | net_dev->tc_to_txq[tc].offset = tc * efx->n_tx_channels; |
384 | net_dev->tc_to_txq[tc].count = efx->n_tx_channels; |
385 | } |
386 | |
387 | net_dev->num_tc = num_tc; |
388 | |
389 | return netif_set_real_num_tx_queues(dev: net_dev, |
390 | max_t(int, num_tc, 1) * |
391 | efx->n_tx_channels); |
392 | } |
393 | |