1 | /* SPDX-License-Identifier: GPL-2.0 */ |
2 | #ifndef _LINUX_NET_QUEUES_H |
3 | #define _LINUX_NET_QUEUES_H |
4 | |
5 | #include <linux/netdevice.h> |
6 | |
7 | /** |
8 | * struct netdev_config - queue-related configuration for a netdev |
9 | * @hds_thresh: HDS Threshold value. |
10 | * @hds_config: HDS value from userspace. |
11 | */ |
12 | struct netdev_config { |
13 | u32 hds_thresh; |
14 | u8 hds_config; |
15 | }; |
16 | |
17 | /* See the netdev.yaml spec for definition of each statistic */ |
18 | struct netdev_queue_stats_rx { |
19 | u64 bytes; |
20 | u64 packets; |
21 | u64 alloc_fail; |
22 | |
23 | u64 hw_drops; |
24 | u64 hw_drop_overruns; |
25 | |
26 | u64 csum_complete; |
27 | u64 csum_unnecessary; |
28 | u64 csum_none; |
29 | u64 csum_bad; |
30 | |
31 | u64 hw_gro_packets; |
32 | u64 hw_gro_bytes; |
33 | u64 hw_gro_wire_packets; |
34 | u64 hw_gro_wire_bytes; |
35 | |
36 | u64 hw_drop_ratelimits; |
37 | }; |
38 | |
39 | struct netdev_queue_stats_tx { |
40 | u64 bytes; |
41 | u64 packets; |
42 | |
43 | u64 hw_drops; |
44 | u64 hw_drop_errors; |
45 | |
46 | u64 csum_none; |
47 | u64 needs_csum; |
48 | |
49 | u64 hw_gso_packets; |
50 | u64 hw_gso_bytes; |
51 | u64 hw_gso_wire_packets; |
52 | u64 hw_gso_wire_bytes; |
53 | |
54 | u64 hw_drop_ratelimits; |
55 | |
56 | u64 stop; |
57 | u64 wake; |
58 | }; |
59 | |
60 | /** |
61 | * struct netdev_stat_ops - netdev ops for fine grained stats |
62 | * @get_queue_stats_rx: get stats for a given Rx queue |
63 | * @get_queue_stats_tx: get stats for a given Tx queue |
64 | * @get_base_stats: get base stats (not belonging to any live instance) |
65 | * |
66 | * Query stats for a given object. The values of the statistics are undefined |
67 | * on entry (specifically they are *not* zero-initialized). Drivers should |
68 | * assign values only to the statistics they collect. Statistics which are not |
69 | * collected must be left undefined. |
70 | * |
71 | * Queue objects are not necessarily persistent, and only currently active |
72 | * queues are queried by the per-queue callbacks. This means that per-queue |
73 | * statistics will not generally add up to the total number of events for |
74 | * the device. The @get_base_stats callback allows filling in the delta |
75 | * between events for currently live queues and overall device history. |
76 | * @get_base_stats can also be used to report any miscellaneous packets |
77 | * transferred outside of the main set of queues used by the networking stack. |
78 | * When the statistics for the entire device are queried, first @get_base_stats |
79 | * is issued to collect the delta, and then a series of per-queue callbacks. |
80 | * Only statistics which are set in @get_base_stats will be reported |
81 | * at the device level, meaning that unlike in queue callbacks, setting |
82 | * a statistic to zero in @get_base_stats is a legitimate thing to do. |
83 | * This is because @get_base_stats has a second function of designating which |
84 | * statistics are in fact correct for the entire device (e.g. when history |
85 | * for some of the events is not maintained, and reliable "total" cannot |
86 | * be provided). |
87 | * |
88 | * Ops are called under the instance lock if netdev_need_ops_lock() |
89 | * returns true, otherwise under rtnl_lock. |
90 | * Device drivers can assume that when collecting total device stats, |
91 | * the @get_base_stats and subsequent per-queue calls are performed |
92 | * "atomically" (without releasing the relevant lock). |
93 | * |
94 | * Device drivers are encouraged to reset the per-queue statistics when |
95 | * number of queues change. This is because the primary use case for |
96 | * per-queue statistics is currently to detect traffic imbalance. |
97 | */ |
98 | struct netdev_stat_ops { |
99 | void (*get_queue_stats_rx)(struct net_device *dev, int idx, |
100 | struct netdev_queue_stats_rx *stats); |
101 | void (*get_queue_stats_tx)(struct net_device *dev, int idx, |
102 | struct netdev_queue_stats_tx *stats); |
103 | void (*get_base_stats)(struct net_device *dev, |
104 | struct netdev_queue_stats_rx *rx, |
105 | struct netdev_queue_stats_tx *tx); |
106 | }; |
107 | |
108 | void netdev_stat_queue_sum(struct net_device *netdev, |
109 | int rx_start, int rx_end, |
110 | struct netdev_queue_stats_rx *rx_sum, |
111 | int tx_start, int tx_end, |
112 | struct netdev_queue_stats_tx *tx_sum); |
113 | |
114 | /** |
115 | * struct netdev_queue_mgmt_ops - netdev ops for queue management |
116 | * |
117 | * @ndo_queue_mem_size: Size of the struct that describes a queue's memory. |
118 | * |
119 | * @ndo_queue_mem_alloc: Allocate memory for an RX queue at the specified index. |
120 | * The new memory is written at the specified address. |
121 | * |
122 | * @ndo_queue_mem_free: Free memory from an RX queue. |
123 | * |
124 | * @ndo_queue_start: Start an RX queue with the specified memory and at the |
125 | * specified index. |
126 | * |
127 | * @ndo_queue_stop: Stop the RX queue at the specified index. The stopped |
128 | * queue's memory is written at the specified address. |
129 | * |
130 | * Note that @ndo_queue_mem_alloc and @ndo_queue_mem_free may be called while |
131 | * the interface is closed. @ndo_queue_start and @ndo_queue_stop will only |
132 | * be called for an interface which is open. |
133 | */ |
134 | struct netdev_queue_mgmt_ops { |
135 | size_t ndo_queue_mem_size; |
136 | int (*ndo_queue_mem_alloc)(struct net_device *dev, |
137 | void *per_queue_mem, |
138 | int idx); |
139 | void (*ndo_queue_mem_free)(struct net_device *dev, |
140 | void *per_queue_mem); |
141 | int (*ndo_queue_start)(struct net_device *dev, |
142 | void *per_queue_mem, |
143 | int idx); |
144 | int (*ndo_queue_stop)(struct net_device *dev, |
145 | void *per_queue_mem, |
146 | int idx); |
147 | }; |
148 | |
149 | /** |
150 | * DOC: Lockless queue stopping / waking helpers. |
151 | * |
152 | * The netif_txq_maybe_stop() and __netif_txq_completed_wake() |
153 | * macros are designed to safely implement stopping |
154 | * and waking netdev queues without full lock protection. |
155 | * |
156 | * We assume that there can be no concurrent stop attempts and no concurrent |
157 | * wake attempts. The try-stop should happen from the xmit handler, |
158 | * while wake up should be triggered from NAPI poll context. |
159 | * The two may run concurrently (single producer, single consumer). |
160 | * |
161 | * The try-stop side is expected to run from the xmit handler and therefore |
162 | * it does not reschedule Tx (netif_tx_start_queue() instead of |
163 | * netif_tx_wake_queue()). Uses of the ``stop`` macros outside of the xmit |
164 | * handler may lead to xmit queue being enabled but not run. |
165 | * The waking side does not have similar context restrictions. |
166 | * |
167 | * The macros guarantee that rings will not remain stopped if there's |
168 | * space available, but they do *not* prevent false wake ups when |
169 | * the ring is full! Drivers should check for ring full at the start |
170 | * for the xmit handler. |
171 | * |
172 | * All descriptor ring indexes (and other relevant shared state) must |
173 | * be updated before invoking the macros. |
174 | */ |
175 | |
176 | #define netif_txq_try_stop(txq, get_desc, start_thrs) \ |
177 | ({ \ |
178 | int _res; \ |
179 | \ |
180 | netif_tx_stop_queue(txq); \ |
181 | /* Producer index and stop bit must be visible \ |
182 | * to consumer before we recheck. \ |
183 | * Pairs with a barrier in __netif_txq_completed_wake(). \ |
184 | */ \ |
185 | smp_mb__after_atomic(); \ |
186 | \ |
187 | /* We need to check again in a case another \ |
188 | * CPU has just made room available. \ |
189 | */ \ |
190 | _res = 0; \ |
191 | if (unlikely(get_desc >= start_thrs)) { \ |
192 | netif_tx_start_queue(txq); \ |
193 | _res = -1; \ |
194 | } \ |
195 | _res; \ |
196 | }) \ |
197 | |
198 | /** |
199 | * netif_txq_maybe_stop() - locklessly stop a Tx queue, if needed |
200 | * @txq: struct netdev_queue to stop/start |
201 | * @get_desc: get current number of free descriptors (see requirements below!) |
202 | * @stop_thrs: minimal number of available descriptors for queue to be left |
203 | * enabled |
204 | * @start_thrs: minimal number of descriptors to re-enable the queue, can be |
205 | * equal to @stop_thrs or higher to avoid frequent waking |
206 | * |
207 | * All arguments may be evaluated multiple times, beware of side effects. |
208 | * @get_desc must be a formula or a function call, it must always |
209 | * return up-to-date information when evaluated! |
210 | * Expected to be used from ndo_start_xmit, see the comment on top of the file. |
211 | * |
212 | * Returns: |
213 | * 0 if the queue was stopped |
214 | * 1 if the queue was left enabled |
215 | * -1 if the queue was re-enabled (raced with waking) |
216 | */ |
217 | #define netif_txq_maybe_stop(txq, get_desc, stop_thrs, start_thrs) \ |
218 | ({ \ |
219 | int _res; \ |
220 | \ |
221 | _res = 1; \ |
222 | if (unlikely(get_desc < stop_thrs)) \ |
223 | _res = netif_txq_try_stop(txq, get_desc, start_thrs); \ |
224 | _res; \ |
225 | }) \ |
226 | |
227 | /* Variant of netdev_tx_completed_queue() which guarantees smp_mb() if |
228 | * @bytes != 0, regardless of kernel config. |
229 | */ |
230 | static inline void |
231 | netdev_txq_completed_mb(struct netdev_queue *dev_queue, |
232 | unsigned int pkts, unsigned int bytes) |
233 | { |
234 | if (IS_ENABLED(CONFIG_BQL)) |
235 | netdev_tx_completed_queue(dev_queue, pkts, bytes); |
236 | else if (bytes) |
237 | smp_mb(); |
238 | } |
239 | |
240 | /** |
241 | * __netif_txq_completed_wake() - locklessly wake a Tx queue, if needed |
242 | * @txq: struct netdev_queue to stop/start |
243 | * @pkts: number of packets completed |
244 | * @bytes: number of bytes completed |
245 | * @get_desc: get current number of free descriptors (see requirements below!) |
246 | * @start_thrs: minimal number of descriptors to re-enable the queue |
247 | * @down_cond: down condition, predicate indicating that the queue should |
248 | * not be woken up even if descriptors are available |
249 | * |
250 | * All arguments may be evaluated multiple times. |
251 | * @get_desc must be a formula or a function call, it must always |
252 | * return up-to-date information when evaluated! |
253 | * Reports completed pkts/bytes to BQL. |
254 | * |
255 | * Returns: |
256 | * 0 if the queue was woken up |
257 | * 1 if the queue was already enabled (or disabled but @down_cond is true) |
258 | * -1 if the queue was left unchanged (@start_thrs not reached) |
259 | */ |
260 | #define __netif_txq_completed_wake(txq, pkts, bytes, \ |
261 | get_desc, start_thrs, down_cond) \ |
262 | ({ \ |
263 | int _res; \ |
264 | \ |
265 | /* Report to BQL and piggy back on its barrier. \ |
266 | * Barrier makes sure that anybody stopping the queue \ |
267 | * after this point sees the new consumer index. \ |
268 | * Pairs with barrier in netif_txq_try_stop(). \ |
269 | */ \ |
270 | netdev_txq_completed_mb(txq, pkts, bytes); \ |
271 | \ |
272 | _res = -1; \ |
273 | if (pkts && likely(get_desc >= start_thrs)) { \ |
274 | _res = 1; \ |
275 | if (unlikely(netif_tx_queue_stopped(txq)) && \ |
276 | !(down_cond)) { \ |
277 | netif_tx_wake_queue(txq); \ |
278 | _res = 0; \ |
279 | } \ |
280 | } \ |
281 | _res; \ |
282 | }) |
283 | |
284 | #define netif_txq_completed_wake(txq, pkts, bytes, get_desc, start_thrs) \ |
285 | __netif_txq_completed_wake(txq, pkts, bytes, get_desc, start_thrs, false) |
286 | |
287 | /* subqueue variants follow */ |
288 | |
289 | #define netif_subqueue_try_stop(dev, idx, get_desc, start_thrs) \ |
290 | ({ \ |
291 | struct netdev_queue *_txq; \ |
292 | \ |
293 | _txq = netdev_get_tx_queue(dev, idx); \ |
294 | netif_txq_try_stop(_txq, get_desc, start_thrs); \ |
295 | }) |
296 | |
297 | #define netif_subqueue_maybe_stop(dev, idx, get_desc, stop_thrs, start_thrs) \ |
298 | ({ \ |
299 | struct netdev_queue *_txq; \ |
300 | \ |
301 | _txq = netdev_get_tx_queue(dev, idx); \ |
302 | netif_txq_maybe_stop(_txq, get_desc, stop_thrs, start_thrs); \ |
303 | }) |
304 | |
305 | #define netif_subqueue_completed_wake(dev, idx, pkts, bytes, \ |
306 | get_desc, start_thrs) \ |
307 | ({ \ |
308 | struct netdev_queue *_txq; \ |
309 | \ |
310 | _txq = netdev_get_tx_queue(dev, idx); \ |
311 | netif_txq_completed_wake(_txq, pkts, bytes, \ |
312 | get_desc, start_thrs); \ |
313 | }) |
314 | |
315 | #endif |
316 | |