1 | // SPDX-License-Identifier: GPL-2.0-or-later |
2 | /* drivers/net/ifb.c: |
3 | |
4 | The purpose of this driver is to provide a device that allows |
5 | for sharing of resources: |
6 | |
7 | 1) qdiscs/policies that are per device as opposed to system wide. |
8 | ifb allows for a device which can be redirected to thus providing |
9 | an impression of sharing. |
10 | |
11 | 2) Allows for queueing incoming traffic for shaping instead of |
12 | dropping. |
13 | |
14 | The original concept is based on what is known as the IMQ |
15 | driver initially written by Martin Devera, later rewritten |
16 | by Patrick McHardy and then maintained by Andre Correa. |
17 | |
18 | You need the tc action mirror or redirect to feed this device |
19 | packets. |
20 | |
21 | |
22 | Authors: Jamal Hadi Salim (2005) |
23 | |
24 | */ |
25 | |
26 | |
27 | #include <linux/module.h> |
28 | #include <linux/kernel.h> |
29 | #include <linux/netdevice.h> |
30 | #include <linux/ethtool.h> |
31 | #include <linux/etherdevice.h> |
32 | #include <linux/init.h> |
33 | #include <linux/interrupt.h> |
34 | #include <linux/moduleparam.h> |
35 | #include <linux/netfilter_netdev.h> |
36 | #include <net/pkt_sched.h> |
37 | #include <net/net_namespace.h> |
38 | |
39 | #define TX_Q_LIMIT 32 |
40 | |
41 | struct ifb_q_stats { |
42 | u64 packets; |
43 | u64 bytes; |
44 | struct u64_stats_sync sync; |
45 | }; |
46 | |
47 | struct ifb_q_private { |
48 | struct net_device *dev; |
49 | struct tasklet_struct ifb_tasklet; |
50 | int tasklet_pending; |
51 | int txqnum; |
52 | struct sk_buff_head rq; |
53 | struct sk_buff_head tq; |
54 | struct ifb_q_stats rx_stats; |
55 | struct ifb_q_stats tx_stats; |
56 | } ____cacheline_aligned_in_smp; |
57 | |
58 | struct ifb_dev_private { |
59 | struct ifb_q_private *tx_private; |
60 | }; |
61 | |
62 | /* For ethtools stats. */ |
63 | struct ifb_q_stats_desc { |
64 | char desc[ETH_GSTRING_LEN]; |
65 | size_t offset; |
66 | }; |
67 | |
68 | #define IFB_Q_STAT(m) offsetof(struct ifb_q_stats, m) |
69 | |
70 | static const struct ifb_q_stats_desc ifb_q_stats_desc[] = { |
71 | { "packets" , IFB_Q_STAT(packets) }, |
72 | { "bytes" , IFB_Q_STAT(bytes) }, |
73 | }; |
74 | |
75 | #define IFB_Q_STATS_LEN ARRAY_SIZE(ifb_q_stats_desc) |
76 | |
77 | static netdev_tx_t ifb_xmit(struct sk_buff *skb, struct net_device *dev); |
78 | static int ifb_open(struct net_device *dev); |
79 | static int ifb_close(struct net_device *dev); |
80 | |
81 | static void ifb_update_q_stats(struct ifb_q_stats *stats, int len) |
82 | { |
83 | u64_stats_update_begin(syncp: &stats->sync); |
84 | stats->packets++; |
85 | stats->bytes += len; |
86 | u64_stats_update_end(syncp: &stats->sync); |
87 | } |
88 | |
89 | static void ifb_ri_tasklet(struct tasklet_struct *t) |
90 | { |
91 | struct ifb_q_private *txp = from_tasklet(txp, t, ifb_tasklet); |
92 | struct netdev_queue *txq; |
93 | struct sk_buff *skb; |
94 | |
95 | txq = netdev_get_tx_queue(dev: txp->dev, index: txp->txqnum); |
96 | skb = skb_peek(list_: &txp->tq); |
97 | if (!skb) { |
98 | if (!__netif_tx_trylock(txq)) |
99 | goto resched; |
100 | skb_queue_splice_tail_init(list: &txp->rq, head: &txp->tq); |
101 | __netif_tx_unlock(txq); |
102 | } |
103 | |
104 | while ((skb = __skb_dequeue(list: &txp->tq)) != NULL) { |
105 | /* Skip tc and netfilter to prevent redirection loop. */ |
106 | skb->redirected = 0; |
107 | #ifdef CONFIG_NET_CLS_ACT |
108 | skb->tc_skip_classify = 1; |
109 | #endif |
110 | nf_skip_egress(skb, skip: true); |
111 | |
112 | ifb_update_q_stats(stats: &txp->tx_stats, len: skb->len); |
113 | |
114 | rcu_read_lock(); |
115 | skb->dev = dev_get_by_index_rcu(net: dev_net(dev: txp->dev), ifindex: skb->skb_iif); |
116 | if (!skb->dev) { |
117 | rcu_read_unlock(); |
118 | dev_kfree_skb(skb); |
119 | txp->dev->stats.tx_dropped++; |
120 | if (skb_queue_len(list_: &txp->tq) != 0) |
121 | goto resched; |
122 | break; |
123 | } |
124 | rcu_read_unlock(); |
125 | skb->skb_iif = txp->dev->ifindex; |
126 | |
127 | if (!skb->from_ingress) { |
128 | dev_queue_xmit(skb); |
129 | } else { |
130 | skb_pull_rcsum(skb, len: skb->mac_len); |
131 | netif_receive_skb(skb); |
132 | } |
133 | } |
134 | |
135 | if (__netif_tx_trylock(txq)) { |
136 | skb = skb_peek(list_: &txp->rq); |
137 | if (!skb) { |
138 | txp->tasklet_pending = 0; |
139 | if (netif_tx_queue_stopped(dev_queue: txq)) |
140 | netif_tx_wake_queue(dev_queue: txq); |
141 | } else { |
142 | __netif_tx_unlock(txq); |
143 | goto resched; |
144 | } |
145 | __netif_tx_unlock(txq); |
146 | } else { |
147 | resched: |
148 | txp->tasklet_pending = 1; |
149 | tasklet_schedule(t: &txp->ifb_tasklet); |
150 | } |
151 | |
152 | } |
153 | |
154 | static void ifb_stats64(struct net_device *dev, |
155 | struct rtnl_link_stats64 *stats) |
156 | { |
157 | struct ifb_dev_private *dp = netdev_priv(dev); |
158 | struct ifb_q_private *txp = dp->tx_private; |
159 | unsigned int start; |
160 | u64 packets, bytes; |
161 | int i; |
162 | |
163 | for (i = 0; i < dev->num_tx_queues; i++,txp++) { |
164 | do { |
165 | start = u64_stats_fetch_begin(syncp: &txp->rx_stats.sync); |
166 | packets = txp->rx_stats.packets; |
167 | bytes = txp->rx_stats.bytes; |
168 | } while (u64_stats_fetch_retry(syncp: &txp->rx_stats.sync, start)); |
169 | stats->rx_packets += packets; |
170 | stats->rx_bytes += bytes; |
171 | |
172 | do { |
173 | start = u64_stats_fetch_begin(syncp: &txp->tx_stats.sync); |
174 | packets = txp->tx_stats.packets; |
175 | bytes = txp->tx_stats.bytes; |
176 | } while (u64_stats_fetch_retry(syncp: &txp->tx_stats.sync, start)); |
177 | stats->tx_packets += packets; |
178 | stats->tx_bytes += bytes; |
179 | } |
180 | stats->rx_dropped = dev->stats.rx_dropped; |
181 | stats->tx_dropped = dev->stats.tx_dropped; |
182 | } |
183 | |
184 | static int ifb_dev_init(struct net_device *dev) |
185 | { |
186 | struct ifb_dev_private *dp = netdev_priv(dev); |
187 | struct ifb_q_private *txp; |
188 | int i; |
189 | |
190 | txp = kcalloc(n: dev->num_tx_queues, size: sizeof(*txp), GFP_KERNEL); |
191 | if (!txp) |
192 | return -ENOMEM; |
193 | dp->tx_private = txp; |
194 | for (i = 0; i < dev->num_tx_queues; i++,txp++) { |
195 | txp->txqnum = i; |
196 | txp->dev = dev; |
197 | __skb_queue_head_init(list: &txp->rq); |
198 | __skb_queue_head_init(list: &txp->tq); |
199 | u64_stats_init(syncp: &txp->rx_stats.sync); |
200 | u64_stats_init(syncp: &txp->tx_stats.sync); |
201 | tasklet_setup(t: &txp->ifb_tasklet, callback: ifb_ri_tasklet); |
202 | netif_tx_start_queue(dev_queue: netdev_get_tx_queue(dev, index: i)); |
203 | } |
204 | return 0; |
205 | } |
206 | |
207 | static void ifb_get_strings(struct net_device *dev, u32 stringset, u8 *buf) |
208 | { |
209 | u8 *p = buf; |
210 | int i, j; |
211 | |
212 | switch (stringset) { |
213 | case ETH_SS_STATS: |
214 | for (i = 0; i < dev->real_num_rx_queues; i++) |
215 | for (j = 0; j < IFB_Q_STATS_LEN; j++) |
216 | ethtool_sprintf(data: &p, fmt: "rx_queue_%u_%.18s" , |
217 | i, ifb_q_stats_desc[j].desc); |
218 | |
219 | for (i = 0; i < dev->real_num_tx_queues; i++) |
220 | for (j = 0; j < IFB_Q_STATS_LEN; j++) |
221 | ethtool_sprintf(data: &p, fmt: "tx_queue_%u_%.18s" , |
222 | i, ifb_q_stats_desc[j].desc); |
223 | |
224 | break; |
225 | } |
226 | } |
227 | |
228 | static int ifb_get_sset_count(struct net_device *dev, int sset) |
229 | { |
230 | switch (sset) { |
231 | case ETH_SS_STATS: |
232 | return IFB_Q_STATS_LEN * (dev->real_num_rx_queues + |
233 | dev->real_num_tx_queues); |
234 | default: |
235 | return -EOPNOTSUPP; |
236 | } |
237 | } |
238 | |
239 | static void ifb_fill_stats_data(u64 **data, |
240 | struct ifb_q_stats *q_stats) |
241 | { |
242 | void *stats_base = (void *)q_stats; |
243 | unsigned int start; |
244 | size_t offset; |
245 | int j; |
246 | |
247 | do { |
248 | start = u64_stats_fetch_begin(syncp: &q_stats->sync); |
249 | for (j = 0; j < IFB_Q_STATS_LEN; j++) { |
250 | offset = ifb_q_stats_desc[j].offset; |
251 | (*data)[j] = *(u64 *)(stats_base + offset); |
252 | } |
253 | } while (u64_stats_fetch_retry(syncp: &q_stats->sync, start)); |
254 | |
255 | *data += IFB_Q_STATS_LEN; |
256 | } |
257 | |
258 | static void ifb_get_ethtool_stats(struct net_device *dev, |
259 | struct ethtool_stats *stats, u64 *data) |
260 | { |
261 | struct ifb_dev_private *dp = netdev_priv(dev); |
262 | struct ifb_q_private *txp; |
263 | int i; |
264 | |
265 | for (i = 0; i < dev->real_num_rx_queues; i++) { |
266 | txp = dp->tx_private + i; |
267 | ifb_fill_stats_data(data: &data, q_stats: &txp->rx_stats); |
268 | } |
269 | |
270 | for (i = 0; i < dev->real_num_tx_queues; i++) { |
271 | txp = dp->tx_private + i; |
272 | ifb_fill_stats_data(data: &data, q_stats: &txp->tx_stats); |
273 | } |
274 | } |
275 | |
276 | static const struct net_device_ops ifb_netdev_ops = { |
277 | .ndo_open = ifb_open, |
278 | .ndo_stop = ifb_close, |
279 | .ndo_get_stats64 = ifb_stats64, |
280 | .ndo_start_xmit = ifb_xmit, |
281 | .ndo_validate_addr = eth_validate_addr, |
282 | .ndo_init = ifb_dev_init, |
283 | }; |
284 | |
285 | static const struct ethtool_ops ifb_ethtool_ops = { |
286 | .get_strings = ifb_get_strings, |
287 | .get_sset_count = ifb_get_sset_count, |
288 | .get_ethtool_stats = ifb_get_ethtool_stats, |
289 | }; |
290 | |
291 | #define IFB_FEATURES (NETIF_F_HW_CSUM | NETIF_F_SG | NETIF_F_FRAGLIST | \ |
292 | NETIF_F_GSO_SOFTWARE | NETIF_F_GSO_ENCAP_ALL | \ |
293 | NETIF_F_HIGHDMA | NETIF_F_HW_VLAN_CTAG_TX | \ |
294 | NETIF_F_HW_VLAN_STAG_TX) |
295 | |
296 | static void ifb_dev_free(struct net_device *dev) |
297 | { |
298 | struct ifb_dev_private *dp = netdev_priv(dev); |
299 | struct ifb_q_private *txp = dp->tx_private; |
300 | int i; |
301 | |
302 | for (i = 0; i < dev->num_tx_queues; i++,txp++) { |
303 | tasklet_kill(t: &txp->ifb_tasklet); |
304 | __skb_queue_purge(list: &txp->rq); |
305 | __skb_queue_purge(list: &txp->tq); |
306 | } |
307 | kfree(objp: dp->tx_private); |
308 | } |
309 | |
310 | static void ifb_setup(struct net_device *dev) |
311 | { |
312 | /* Initialize the device structure. */ |
313 | dev->netdev_ops = &ifb_netdev_ops; |
314 | dev->ethtool_ops = &ifb_ethtool_ops; |
315 | |
316 | /* Fill in device structure with ethernet-generic values. */ |
317 | ether_setup(dev); |
318 | dev->tx_queue_len = TX_Q_LIMIT; |
319 | |
320 | dev->features |= IFB_FEATURES; |
321 | dev->hw_features |= dev->features; |
322 | dev->hw_enc_features |= dev->features; |
323 | dev->vlan_features |= IFB_FEATURES & ~(NETIF_F_HW_VLAN_CTAG_TX | |
324 | NETIF_F_HW_VLAN_STAG_TX); |
325 | |
326 | dev->flags |= IFF_NOARP; |
327 | dev->flags &= ~IFF_MULTICAST; |
328 | dev->priv_flags &= ~IFF_TX_SKB_SHARING; |
329 | netif_keep_dst(dev); |
330 | eth_hw_addr_random(dev); |
331 | dev->needs_free_netdev = true; |
332 | dev->priv_destructor = ifb_dev_free; |
333 | |
334 | dev->min_mtu = 0; |
335 | dev->max_mtu = 0; |
336 | } |
337 | |
338 | static netdev_tx_t ifb_xmit(struct sk_buff *skb, struct net_device *dev) |
339 | { |
340 | struct ifb_dev_private *dp = netdev_priv(dev); |
341 | struct ifb_q_private *txp = dp->tx_private + skb_get_queue_mapping(skb); |
342 | |
343 | ifb_update_q_stats(stats: &txp->rx_stats, len: skb->len); |
344 | |
345 | if (!skb->redirected || !skb->skb_iif) { |
346 | dev_kfree_skb(skb); |
347 | dev->stats.rx_dropped++; |
348 | return NETDEV_TX_OK; |
349 | } |
350 | |
351 | if (skb_queue_len(list_: &txp->rq) >= dev->tx_queue_len) |
352 | netif_tx_stop_queue(dev_queue: netdev_get_tx_queue(dev, index: txp->txqnum)); |
353 | |
354 | __skb_queue_tail(list: &txp->rq, newsk: skb); |
355 | if (!txp->tasklet_pending) { |
356 | txp->tasklet_pending = 1; |
357 | tasklet_schedule(t: &txp->ifb_tasklet); |
358 | } |
359 | |
360 | return NETDEV_TX_OK; |
361 | } |
362 | |
363 | static int ifb_close(struct net_device *dev) |
364 | { |
365 | netif_tx_stop_all_queues(dev); |
366 | return 0; |
367 | } |
368 | |
369 | static int ifb_open(struct net_device *dev) |
370 | { |
371 | netif_tx_start_all_queues(dev); |
372 | return 0; |
373 | } |
374 | |
375 | static int ifb_validate(struct nlattr *tb[], struct nlattr *data[], |
376 | struct netlink_ext_ack *extack) |
377 | { |
378 | if (tb[IFLA_ADDRESS]) { |
379 | if (nla_len(nla: tb[IFLA_ADDRESS]) != ETH_ALEN) |
380 | return -EINVAL; |
381 | if (!is_valid_ether_addr(addr: nla_data(nla: tb[IFLA_ADDRESS]))) |
382 | return -EADDRNOTAVAIL; |
383 | } |
384 | return 0; |
385 | } |
386 | |
387 | static struct rtnl_link_ops ifb_link_ops __read_mostly = { |
388 | .kind = "ifb" , |
389 | .priv_size = sizeof(struct ifb_dev_private), |
390 | .setup = ifb_setup, |
391 | .validate = ifb_validate, |
392 | }; |
393 | |
394 | /* Number of ifb devices to be set up by this module. |
395 | * Note that these legacy devices have one queue. |
396 | * Prefer something like : ip link add ifb10 numtxqueues 8 type ifb |
397 | */ |
398 | static int numifbs = 2; |
399 | module_param(numifbs, int, 0); |
400 | MODULE_PARM_DESC(numifbs, "Number of ifb devices" ); |
401 | |
402 | static int __init ifb_init_one(int index) |
403 | { |
404 | struct net_device *dev_ifb; |
405 | int err; |
406 | |
407 | dev_ifb = alloc_netdev(sizeof(struct ifb_dev_private), "ifb%d" , |
408 | NET_NAME_UNKNOWN, ifb_setup); |
409 | |
410 | if (!dev_ifb) |
411 | return -ENOMEM; |
412 | |
413 | dev_ifb->rtnl_link_ops = &ifb_link_ops; |
414 | err = register_netdevice(dev: dev_ifb); |
415 | if (err < 0) |
416 | goto err; |
417 | |
418 | return 0; |
419 | |
420 | err: |
421 | free_netdev(dev: dev_ifb); |
422 | return err; |
423 | } |
424 | |
425 | static int __init ifb_init_module(void) |
426 | { |
427 | int i, err; |
428 | |
429 | down_write(sem: &pernet_ops_rwsem); |
430 | rtnl_lock(); |
431 | err = __rtnl_link_register(ops: &ifb_link_ops); |
432 | if (err < 0) |
433 | goto out; |
434 | |
435 | for (i = 0; i < numifbs && !err; i++) { |
436 | err = ifb_init_one(index: i); |
437 | cond_resched(); |
438 | } |
439 | if (err) |
440 | __rtnl_link_unregister(ops: &ifb_link_ops); |
441 | |
442 | out: |
443 | rtnl_unlock(); |
444 | up_write(sem: &pernet_ops_rwsem); |
445 | |
446 | return err; |
447 | } |
448 | |
449 | static void __exit ifb_cleanup_module(void) |
450 | { |
451 | rtnl_link_unregister(ops: &ifb_link_ops); |
452 | } |
453 | |
454 | module_init(ifb_init_module); |
455 | module_exit(ifb_cleanup_module); |
456 | MODULE_LICENSE("GPL" ); |
457 | MODULE_DESCRIPTION("Intermediate Functional Block (ifb) netdevice driver for sharing of resources and ingress packet queuing" ); |
458 | MODULE_AUTHOR("Jamal Hadi Salim" ); |
459 | MODULE_ALIAS_RTNL_LINK("ifb" ); |
460 | |