1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* Copyright (c) 2018, Intel Corporation. */ |
3 | |
4 | /* This provides a net_failover interface for paravirtual drivers to |
5 | * provide an alternate datapath by exporting APIs to create and |
6 | * destroy a upper 'net_failover' netdev. The upper dev manages the |
7 | * original paravirtual interface as a 'standby' netdev and uses the |
8 | * generic failover infrastructure to register and manage a direct |
9 | * attached VF as a 'primary' netdev. This enables live migration of |
10 | * a VM with direct attached VF by failing over to the paravirtual |
11 | * datapath when the VF is unplugged. |
12 | * |
13 | * Some of the netdev management routines are based on bond/team driver as |
14 | * this driver provides active-backup functionality similar to those drivers. |
15 | */ |
16 | |
17 | #include <linux/netdevice.h> |
18 | #include <linux/etherdevice.h> |
19 | #include <linux/ethtool.h> |
20 | #include <linux/module.h> |
21 | #include <linux/slab.h> |
22 | #include <linux/netpoll.h> |
23 | #include <linux/rtnetlink.h> |
24 | #include <linux/if_vlan.h> |
25 | #include <linux/pci.h> |
26 | #include <net/sch_generic.h> |
27 | #include <uapi/linux/if_arp.h> |
28 | #include <net/net_failover.h> |
29 | |
30 | static bool net_failover_xmit_ready(struct net_device *dev) |
31 | { |
32 | return netif_running(dev) && netif_carrier_ok(dev); |
33 | } |
34 | |
35 | static int net_failover_open(struct net_device *dev) |
36 | { |
37 | struct net_failover_info *nfo_info = netdev_priv(dev); |
38 | struct net_device *primary_dev, *standby_dev; |
39 | int err; |
40 | |
41 | primary_dev = rtnl_dereference(nfo_info->primary_dev); |
42 | if (primary_dev) { |
43 | err = dev_open(dev: primary_dev, NULL); |
44 | if (err) |
45 | goto err_primary_open; |
46 | } |
47 | |
48 | standby_dev = rtnl_dereference(nfo_info->standby_dev); |
49 | if (standby_dev) { |
50 | err = dev_open(dev: standby_dev, NULL); |
51 | if (err) |
52 | goto err_standby_open; |
53 | } |
54 | |
55 | if ((primary_dev && net_failover_xmit_ready(dev: primary_dev)) || |
56 | (standby_dev && net_failover_xmit_ready(dev: standby_dev))) { |
57 | netif_carrier_on(dev); |
58 | netif_tx_wake_all_queues(dev); |
59 | } |
60 | |
61 | return 0; |
62 | |
63 | err_standby_open: |
64 | if (primary_dev) |
65 | dev_close(dev: primary_dev); |
66 | err_primary_open: |
67 | netif_tx_disable(dev); |
68 | return err; |
69 | } |
70 | |
71 | static int net_failover_close(struct net_device *dev) |
72 | { |
73 | struct net_failover_info *nfo_info = netdev_priv(dev); |
74 | struct net_device *slave_dev; |
75 | |
76 | netif_tx_disable(dev); |
77 | |
78 | slave_dev = rtnl_dereference(nfo_info->primary_dev); |
79 | if (slave_dev) |
80 | dev_close(dev: slave_dev); |
81 | |
82 | slave_dev = rtnl_dereference(nfo_info->standby_dev); |
83 | if (slave_dev) |
84 | dev_close(dev: slave_dev); |
85 | |
86 | return 0; |
87 | } |
88 | |
89 | static netdev_tx_t net_failover_drop_xmit(struct sk_buff *skb, |
90 | struct net_device *dev) |
91 | { |
92 | dev_core_stats_tx_dropped_inc(dev); |
93 | dev_kfree_skb_any(skb); |
94 | return NETDEV_TX_OK; |
95 | } |
96 | |
97 | static netdev_tx_t net_failover_start_xmit(struct sk_buff *skb, |
98 | struct net_device *dev) |
99 | { |
100 | struct net_failover_info *nfo_info = netdev_priv(dev); |
101 | struct net_device *xmit_dev; |
102 | |
103 | /* Try xmit via primary netdev followed by standby netdev */ |
104 | xmit_dev = rcu_dereference_bh(nfo_info->primary_dev); |
105 | if (!xmit_dev || !net_failover_xmit_ready(dev: xmit_dev)) { |
106 | xmit_dev = rcu_dereference_bh(nfo_info->standby_dev); |
107 | if (!xmit_dev || !net_failover_xmit_ready(dev: xmit_dev)) |
108 | return net_failover_drop_xmit(skb, dev); |
109 | } |
110 | |
111 | skb->dev = xmit_dev; |
112 | skb->queue_mapping = qdisc_skb_cb(skb)->slave_dev_queue_mapping; |
113 | |
114 | return dev_queue_xmit(skb); |
115 | } |
116 | |
117 | static u16 net_failover_select_queue(struct net_device *dev, |
118 | struct sk_buff *skb, |
119 | struct net_device *sb_dev) |
120 | { |
121 | struct net_failover_info *nfo_info = netdev_priv(dev); |
122 | struct net_device *primary_dev; |
123 | u16 txq; |
124 | |
125 | primary_dev = rcu_dereference(nfo_info->primary_dev); |
126 | if (primary_dev) { |
127 | const struct net_device_ops *ops = primary_dev->netdev_ops; |
128 | |
129 | if (ops->ndo_select_queue) |
130 | txq = ops->ndo_select_queue(primary_dev, skb, sb_dev); |
131 | else |
132 | txq = netdev_pick_tx(dev: primary_dev, skb, NULL); |
133 | } else { |
134 | txq = skb_rx_queue_recorded(skb) ? skb_get_rx_queue(skb) : 0; |
135 | } |
136 | |
137 | /* Save the original txq to restore before passing to the driver */ |
138 | qdisc_skb_cb(skb)->slave_dev_queue_mapping = skb->queue_mapping; |
139 | |
140 | if (unlikely(txq >= dev->real_num_tx_queues)) { |
141 | do { |
142 | txq -= dev->real_num_tx_queues; |
143 | } while (txq >= dev->real_num_tx_queues); |
144 | } |
145 | |
146 | return txq; |
147 | } |
148 | |
149 | /* fold stats, assuming all rtnl_link_stats64 fields are u64, but |
150 | * that some drivers can provide 32bit values only. |
151 | */ |
152 | static void net_failover_fold_stats(struct rtnl_link_stats64 *_res, |
153 | const struct rtnl_link_stats64 *_new, |
154 | const struct rtnl_link_stats64 *_old) |
155 | { |
156 | const u64 *new = (const u64 *)_new; |
157 | const u64 *old = (const u64 *)_old; |
158 | u64 *res = (u64 *)_res; |
159 | int i; |
160 | |
161 | for (i = 0; i < sizeof(*_res) / sizeof(u64); i++) { |
162 | u64 nv = new[i]; |
163 | u64 ov = old[i]; |
164 | s64 delta = nv - ov; |
165 | |
166 | /* detects if this particular field is 32bit only */ |
167 | if (((nv | ov) >> 32) == 0) |
168 | delta = (s64)(s32)((u32)nv - (u32)ov); |
169 | |
170 | /* filter anomalies, some drivers reset their stats |
171 | * at down/up events. |
172 | */ |
173 | if (delta > 0) |
174 | res[i] += delta; |
175 | } |
176 | } |
177 | |
178 | static void net_failover_get_stats(struct net_device *dev, |
179 | struct rtnl_link_stats64 *stats) |
180 | { |
181 | struct net_failover_info *nfo_info = netdev_priv(dev); |
182 | const struct rtnl_link_stats64 *new; |
183 | struct rtnl_link_stats64 temp; |
184 | struct net_device *slave_dev; |
185 | |
186 | spin_lock(lock: &nfo_info->stats_lock); |
187 | memcpy(stats, &nfo_info->failover_stats, sizeof(*stats)); |
188 | |
189 | rcu_read_lock(); |
190 | |
191 | slave_dev = rcu_dereference(nfo_info->primary_dev); |
192 | if (slave_dev) { |
193 | new = dev_get_stats(dev: slave_dev, storage: &temp); |
194 | net_failover_fold_stats(res: stats, new: new, old: &nfo_info->primary_stats); |
195 | memcpy(&nfo_info->primary_stats, new, sizeof(*new)); |
196 | } |
197 | |
198 | slave_dev = rcu_dereference(nfo_info->standby_dev); |
199 | if (slave_dev) { |
200 | new = dev_get_stats(dev: slave_dev, storage: &temp); |
201 | net_failover_fold_stats(res: stats, new: new, old: &nfo_info->standby_stats); |
202 | memcpy(&nfo_info->standby_stats, new, sizeof(*new)); |
203 | } |
204 | |
205 | rcu_read_unlock(); |
206 | |
207 | memcpy(&nfo_info->failover_stats, stats, sizeof(*stats)); |
208 | spin_unlock(lock: &nfo_info->stats_lock); |
209 | } |
210 | |
211 | static int net_failover_change_mtu(struct net_device *dev, int new_mtu) |
212 | { |
213 | struct net_failover_info *nfo_info = netdev_priv(dev); |
214 | struct net_device *primary_dev, *standby_dev; |
215 | int ret = 0; |
216 | |
217 | primary_dev = rtnl_dereference(nfo_info->primary_dev); |
218 | if (primary_dev) { |
219 | ret = dev_set_mtu(primary_dev, new_mtu); |
220 | if (ret) |
221 | return ret; |
222 | } |
223 | |
224 | standby_dev = rtnl_dereference(nfo_info->standby_dev); |
225 | if (standby_dev) { |
226 | ret = dev_set_mtu(standby_dev, new_mtu); |
227 | if (ret) { |
228 | if (primary_dev) |
229 | dev_set_mtu(primary_dev, dev->mtu); |
230 | return ret; |
231 | } |
232 | } |
233 | |
234 | dev->mtu = new_mtu; |
235 | |
236 | return 0; |
237 | } |
238 | |
239 | static void net_failover_set_rx_mode(struct net_device *dev) |
240 | { |
241 | struct net_failover_info *nfo_info = netdev_priv(dev); |
242 | struct net_device *slave_dev; |
243 | |
244 | rcu_read_lock(); |
245 | |
246 | slave_dev = rcu_dereference(nfo_info->primary_dev); |
247 | if (slave_dev) { |
248 | dev_uc_sync_multiple(to: slave_dev, from: dev); |
249 | dev_mc_sync_multiple(to: slave_dev, from: dev); |
250 | } |
251 | |
252 | slave_dev = rcu_dereference(nfo_info->standby_dev); |
253 | if (slave_dev) { |
254 | dev_uc_sync_multiple(to: slave_dev, from: dev); |
255 | dev_mc_sync_multiple(to: slave_dev, from: dev); |
256 | } |
257 | |
258 | rcu_read_unlock(); |
259 | } |
260 | |
261 | static int net_failover_vlan_rx_add_vid(struct net_device *dev, __be16 proto, |
262 | u16 vid) |
263 | { |
264 | struct net_failover_info *nfo_info = netdev_priv(dev); |
265 | struct net_device *primary_dev, *standby_dev; |
266 | int ret = 0; |
267 | |
268 | primary_dev = rcu_dereference(nfo_info->primary_dev); |
269 | if (primary_dev) { |
270 | ret = vlan_vid_add(dev: primary_dev, proto, vid); |
271 | if (ret) |
272 | return ret; |
273 | } |
274 | |
275 | standby_dev = rcu_dereference(nfo_info->standby_dev); |
276 | if (standby_dev) { |
277 | ret = vlan_vid_add(dev: standby_dev, proto, vid); |
278 | if (ret) |
279 | if (primary_dev) |
280 | vlan_vid_del(dev: primary_dev, proto, vid); |
281 | } |
282 | |
283 | return ret; |
284 | } |
285 | |
286 | static int net_failover_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, |
287 | u16 vid) |
288 | { |
289 | struct net_failover_info *nfo_info = netdev_priv(dev); |
290 | struct net_device *slave_dev; |
291 | |
292 | slave_dev = rcu_dereference(nfo_info->primary_dev); |
293 | if (slave_dev) |
294 | vlan_vid_del(dev: slave_dev, proto, vid); |
295 | |
296 | slave_dev = rcu_dereference(nfo_info->standby_dev); |
297 | if (slave_dev) |
298 | vlan_vid_del(dev: slave_dev, proto, vid); |
299 | |
300 | return 0; |
301 | } |
302 | |
303 | static const struct net_device_ops failover_dev_ops = { |
304 | .ndo_open = net_failover_open, |
305 | .ndo_stop = net_failover_close, |
306 | .ndo_start_xmit = net_failover_start_xmit, |
307 | .ndo_select_queue = net_failover_select_queue, |
308 | .ndo_get_stats64 = net_failover_get_stats, |
309 | .ndo_change_mtu = net_failover_change_mtu, |
310 | .ndo_set_rx_mode = net_failover_set_rx_mode, |
311 | .ndo_vlan_rx_add_vid = net_failover_vlan_rx_add_vid, |
312 | .ndo_vlan_rx_kill_vid = net_failover_vlan_rx_kill_vid, |
313 | .ndo_validate_addr = eth_validate_addr, |
314 | .ndo_features_check = passthru_features_check, |
315 | }; |
316 | |
317 | #define FAILOVER_NAME "net_failover" |
318 | #define FAILOVER_VERSION "0.1" |
319 | |
320 | static void nfo_ethtool_get_drvinfo(struct net_device *dev, |
321 | struct ethtool_drvinfo *drvinfo) |
322 | { |
323 | strscpy(p: drvinfo->driver, FAILOVER_NAME, size: sizeof(drvinfo->driver)); |
324 | strscpy(p: drvinfo->version, FAILOVER_VERSION, size: sizeof(drvinfo->version)); |
325 | } |
326 | |
327 | static int nfo_ethtool_get_link_ksettings(struct net_device *dev, |
328 | struct ethtool_link_ksettings *cmd) |
329 | { |
330 | struct net_failover_info *nfo_info = netdev_priv(dev); |
331 | struct net_device *slave_dev; |
332 | |
333 | slave_dev = rtnl_dereference(nfo_info->primary_dev); |
334 | if (!slave_dev || !net_failover_xmit_ready(dev: slave_dev)) { |
335 | slave_dev = rtnl_dereference(nfo_info->standby_dev); |
336 | if (!slave_dev || !net_failover_xmit_ready(dev: slave_dev)) { |
337 | cmd->base.duplex = DUPLEX_UNKNOWN; |
338 | cmd->base.port = PORT_OTHER; |
339 | cmd->base.speed = SPEED_UNKNOWN; |
340 | |
341 | return 0; |
342 | } |
343 | } |
344 | |
345 | return __ethtool_get_link_ksettings(dev: slave_dev, link_ksettings: cmd); |
346 | } |
347 | |
348 | static const struct ethtool_ops failover_ethtool_ops = { |
349 | .get_drvinfo = nfo_ethtool_get_drvinfo, |
350 | .get_link = ethtool_op_get_link, |
351 | .get_link_ksettings = nfo_ethtool_get_link_ksettings, |
352 | }; |
353 | |
354 | /* Called when slave dev is injecting data into network stack. |
355 | * Change the associated network device from lower dev to failover dev. |
356 | * note: already called with rcu_read_lock |
357 | */ |
358 | static rx_handler_result_t net_failover_handle_frame(struct sk_buff **pskb) |
359 | { |
360 | struct sk_buff *skb = *pskb; |
361 | struct net_device *dev = rcu_dereference(skb->dev->rx_handler_data); |
362 | struct net_failover_info *nfo_info = netdev_priv(dev); |
363 | struct net_device *primary_dev, *standby_dev; |
364 | |
365 | primary_dev = rcu_dereference(nfo_info->primary_dev); |
366 | standby_dev = rcu_dereference(nfo_info->standby_dev); |
367 | |
368 | if (primary_dev && skb->dev == standby_dev) |
369 | return RX_HANDLER_EXACT; |
370 | |
371 | skb->dev = dev; |
372 | |
373 | return RX_HANDLER_ANOTHER; |
374 | } |
375 | |
376 | static void net_failover_compute_features(struct net_device *dev) |
377 | { |
378 | netdev_features_t vlan_features = FAILOVER_VLAN_FEATURES & |
379 | NETIF_F_ALL_FOR_ALL; |
380 | netdev_features_t enc_features = FAILOVER_ENC_FEATURES; |
381 | unsigned short = ETH_HLEN; |
382 | unsigned int dst_release_flag = IFF_XMIT_DST_RELEASE | |
383 | IFF_XMIT_DST_RELEASE_PERM; |
384 | struct net_failover_info *nfo_info = netdev_priv(dev); |
385 | struct net_device *primary_dev, *standby_dev; |
386 | |
387 | primary_dev = rcu_dereference(nfo_info->primary_dev); |
388 | if (primary_dev) { |
389 | vlan_features = |
390 | netdev_increment_features(all: vlan_features, |
391 | one: primary_dev->vlan_features, |
392 | FAILOVER_VLAN_FEATURES); |
393 | enc_features = |
394 | netdev_increment_features(all: enc_features, |
395 | one: primary_dev->hw_enc_features, |
396 | FAILOVER_ENC_FEATURES); |
397 | |
398 | dst_release_flag &= primary_dev->priv_flags; |
399 | if (primary_dev->hard_header_len > max_hard_header_len) |
400 | max_hard_header_len = primary_dev->hard_header_len; |
401 | } |
402 | |
403 | standby_dev = rcu_dereference(nfo_info->standby_dev); |
404 | if (standby_dev) { |
405 | vlan_features = |
406 | netdev_increment_features(all: vlan_features, |
407 | one: standby_dev->vlan_features, |
408 | FAILOVER_VLAN_FEATURES); |
409 | enc_features = |
410 | netdev_increment_features(all: enc_features, |
411 | one: standby_dev->hw_enc_features, |
412 | FAILOVER_ENC_FEATURES); |
413 | |
414 | dst_release_flag &= standby_dev->priv_flags; |
415 | if (standby_dev->hard_header_len > max_hard_header_len) |
416 | max_hard_header_len = standby_dev->hard_header_len; |
417 | } |
418 | |
419 | dev->vlan_features = vlan_features; |
420 | dev->hw_enc_features = enc_features | NETIF_F_GSO_ENCAP_ALL; |
421 | dev->hard_header_len = max_hard_header_len; |
422 | |
423 | dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; |
424 | if (dst_release_flag == (IFF_XMIT_DST_RELEASE | |
425 | IFF_XMIT_DST_RELEASE_PERM)) |
426 | dev->priv_flags |= IFF_XMIT_DST_RELEASE; |
427 | |
428 | netdev_change_features(dev); |
429 | } |
430 | |
431 | static void net_failover_lower_state_changed(struct net_device *slave_dev, |
432 | struct net_device *primary_dev, |
433 | struct net_device *standby_dev) |
434 | { |
435 | struct netdev_lag_lower_state_info info; |
436 | |
437 | if (netif_carrier_ok(dev: slave_dev)) |
438 | info.link_up = true; |
439 | else |
440 | info.link_up = false; |
441 | |
442 | if (slave_dev == primary_dev) { |
443 | if (netif_running(dev: primary_dev)) |
444 | info.tx_enabled = true; |
445 | else |
446 | info.tx_enabled = false; |
447 | } else { |
448 | if ((primary_dev && netif_running(dev: primary_dev)) || |
449 | (!netif_running(dev: standby_dev))) |
450 | info.tx_enabled = false; |
451 | else |
452 | info.tx_enabled = true; |
453 | } |
454 | |
455 | netdev_lower_state_changed(lower_dev: slave_dev, lower_state_info: &info); |
456 | } |
457 | |
458 | static int net_failover_slave_pre_register(struct net_device *slave_dev, |
459 | struct net_device *failover_dev) |
460 | { |
461 | struct net_device *standby_dev, *primary_dev; |
462 | struct net_failover_info *nfo_info; |
463 | bool slave_is_standby; |
464 | |
465 | nfo_info = netdev_priv(dev: failover_dev); |
466 | standby_dev = rtnl_dereference(nfo_info->standby_dev); |
467 | primary_dev = rtnl_dereference(nfo_info->primary_dev); |
468 | slave_is_standby = slave_dev->dev.parent == failover_dev->dev.parent; |
469 | if (slave_is_standby ? standby_dev : primary_dev) { |
470 | netdev_err(dev: failover_dev, format: "%s attempting to register as slave dev when %s already present\n" , |
471 | slave_dev->name, |
472 | slave_is_standby ? "standby" : "primary" ); |
473 | return -EINVAL; |
474 | } |
475 | |
476 | /* We want to allow only a direct attached VF device as a primary |
477 | * netdev. As there is no easy way to check for a VF device, restrict |
478 | * this to a pci device. |
479 | */ |
480 | if (!slave_is_standby && (!slave_dev->dev.parent || |
481 | !dev_is_pci(slave_dev->dev.parent))) |
482 | return -EINVAL; |
483 | |
484 | if (failover_dev->features & NETIF_F_VLAN_CHALLENGED && |
485 | vlan_uses_dev(dev: failover_dev)) { |
486 | netdev_err(dev: failover_dev, format: "Device %s is VLAN challenged and failover device has VLAN set up\n" , |
487 | failover_dev->name); |
488 | return -EINVAL; |
489 | } |
490 | |
491 | return 0; |
492 | } |
493 | |
494 | static int net_failover_slave_register(struct net_device *slave_dev, |
495 | struct net_device *failover_dev) |
496 | { |
497 | struct net_device *standby_dev, *primary_dev; |
498 | struct net_failover_info *nfo_info; |
499 | bool slave_is_standby; |
500 | u32 orig_mtu; |
501 | int err; |
502 | |
503 | /* Align MTU of slave with failover dev */ |
504 | orig_mtu = slave_dev->mtu; |
505 | err = dev_set_mtu(slave_dev, failover_dev->mtu); |
506 | if (err) { |
507 | netdev_err(dev: failover_dev, format: "unable to change mtu of %s to %u register failed\n" , |
508 | slave_dev->name, failover_dev->mtu); |
509 | goto done; |
510 | } |
511 | |
512 | dev_hold(dev: slave_dev); |
513 | |
514 | if (netif_running(dev: failover_dev)) { |
515 | err = dev_open(dev: slave_dev, NULL); |
516 | if (err && (err != -EBUSY)) { |
517 | netdev_err(dev: failover_dev, format: "Opening slave %s failed err:%d\n" , |
518 | slave_dev->name, err); |
519 | goto err_dev_open; |
520 | } |
521 | } |
522 | |
523 | netif_addr_lock_bh(dev: failover_dev); |
524 | dev_uc_sync_multiple(to: slave_dev, from: failover_dev); |
525 | dev_mc_sync_multiple(to: slave_dev, from: failover_dev); |
526 | netif_addr_unlock_bh(dev: failover_dev); |
527 | |
528 | err = vlan_vids_add_by_dev(dev: slave_dev, by_dev: failover_dev); |
529 | if (err) { |
530 | netdev_err(dev: failover_dev, format: "Failed to add vlan ids to device %s err:%d\n" , |
531 | slave_dev->name, err); |
532 | goto err_vlan_add; |
533 | } |
534 | |
535 | nfo_info = netdev_priv(dev: failover_dev); |
536 | standby_dev = rtnl_dereference(nfo_info->standby_dev); |
537 | primary_dev = rtnl_dereference(nfo_info->primary_dev); |
538 | slave_is_standby = slave_dev->dev.parent == failover_dev->dev.parent; |
539 | |
540 | if (slave_is_standby) { |
541 | rcu_assign_pointer(nfo_info->standby_dev, slave_dev); |
542 | standby_dev = slave_dev; |
543 | dev_get_stats(dev: standby_dev, storage: &nfo_info->standby_stats); |
544 | } else { |
545 | rcu_assign_pointer(nfo_info->primary_dev, slave_dev); |
546 | primary_dev = slave_dev; |
547 | dev_get_stats(dev: primary_dev, storage: &nfo_info->primary_stats); |
548 | failover_dev->min_mtu = slave_dev->min_mtu; |
549 | failover_dev->max_mtu = slave_dev->max_mtu; |
550 | } |
551 | |
552 | net_failover_lower_state_changed(slave_dev, primary_dev, standby_dev); |
553 | net_failover_compute_features(dev: failover_dev); |
554 | |
555 | call_netdevice_notifiers(val: NETDEV_JOIN, dev: slave_dev); |
556 | |
557 | netdev_info(dev: failover_dev, format: "failover %s slave:%s registered\n" , |
558 | slave_is_standby ? "standby" : "primary" , slave_dev->name); |
559 | |
560 | return 0; |
561 | |
562 | err_vlan_add: |
563 | dev_uc_unsync(to: slave_dev, from: failover_dev); |
564 | dev_mc_unsync(to: slave_dev, from: failover_dev); |
565 | dev_close(dev: slave_dev); |
566 | err_dev_open: |
567 | dev_put(dev: slave_dev); |
568 | dev_set_mtu(slave_dev, orig_mtu); |
569 | done: |
570 | return err; |
571 | } |
572 | |
573 | static int net_failover_slave_pre_unregister(struct net_device *slave_dev, |
574 | struct net_device *failover_dev) |
575 | { |
576 | struct net_device *standby_dev, *primary_dev; |
577 | struct net_failover_info *nfo_info; |
578 | |
579 | nfo_info = netdev_priv(dev: failover_dev); |
580 | primary_dev = rtnl_dereference(nfo_info->primary_dev); |
581 | standby_dev = rtnl_dereference(nfo_info->standby_dev); |
582 | |
583 | if (slave_dev != primary_dev && slave_dev != standby_dev) |
584 | return -ENODEV; |
585 | |
586 | return 0; |
587 | } |
588 | |
589 | static int net_failover_slave_unregister(struct net_device *slave_dev, |
590 | struct net_device *failover_dev) |
591 | { |
592 | struct net_device *standby_dev, *primary_dev; |
593 | struct net_failover_info *nfo_info; |
594 | bool slave_is_standby; |
595 | |
596 | nfo_info = netdev_priv(dev: failover_dev); |
597 | primary_dev = rtnl_dereference(nfo_info->primary_dev); |
598 | standby_dev = rtnl_dereference(nfo_info->standby_dev); |
599 | |
600 | if (WARN_ON_ONCE(slave_dev != primary_dev && slave_dev != standby_dev)) |
601 | return -ENODEV; |
602 | |
603 | vlan_vids_del_by_dev(dev: slave_dev, by_dev: failover_dev); |
604 | dev_uc_unsync(to: slave_dev, from: failover_dev); |
605 | dev_mc_unsync(to: slave_dev, from: failover_dev); |
606 | dev_close(dev: slave_dev); |
607 | |
608 | nfo_info = netdev_priv(dev: failover_dev); |
609 | dev_get_stats(dev: failover_dev, storage: &nfo_info->failover_stats); |
610 | |
611 | slave_is_standby = slave_dev->dev.parent == failover_dev->dev.parent; |
612 | if (slave_is_standby) { |
613 | RCU_INIT_POINTER(nfo_info->standby_dev, NULL); |
614 | } else { |
615 | RCU_INIT_POINTER(nfo_info->primary_dev, NULL); |
616 | if (standby_dev) { |
617 | failover_dev->min_mtu = standby_dev->min_mtu; |
618 | failover_dev->max_mtu = standby_dev->max_mtu; |
619 | } |
620 | } |
621 | |
622 | dev_put(dev: slave_dev); |
623 | |
624 | net_failover_compute_features(dev: failover_dev); |
625 | |
626 | netdev_info(dev: failover_dev, format: "failover %s slave:%s unregistered\n" , |
627 | slave_is_standby ? "standby" : "primary" , slave_dev->name); |
628 | |
629 | return 0; |
630 | } |
631 | |
632 | static int net_failover_slave_link_change(struct net_device *slave_dev, |
633 | struct net_device *failover_dev) |
634 | { |
635 | struct net_device *primary_dev, *standby_dev; |
636 | struct net_failover_info *nfo_info; |
637 | |
638 | nfo_info = netdev_priv(dev: failover_dev); |
639 | |
640 | primary_dev = rtnl_dereference(nfo_info->primary_dev); |
641 | standby_dev = rtnl_dereference(nfo_info->standby_dev); |
642 | |
643 | if (slave_dev != primary_dev && slave_dev != standby_dev) |
644 | return -ENODEV; |
645 | |
646 | if ((primary_dev && net_failover_xmit_ready(dev: primary_dev)) || |
647 | (standby_dev && net_failover_xmit_ready(dev: standby_dev))) { |
648 | netif_carrier_on(dev: failover_dev); |
649 | netif_tx_wake_all_queues(dev: failover_dev); |
650 | } else { |
651 | dev_get_stats(dev: failover_dev, storage: &nfo_info->failover_stats); |
652 | netif_carrier_off(dev: failover_dev); |
653 | netif_tx_stop_all_queues(dev: failover_dev); |
654 | } |
655 | |
656 | net_failover_lower_state_changed(slave_dev, primary_dev, standby_dev); |
657 | |
658 | return 0; |
659 | } |
660 | |
661 | static int net_failover_slave_name_change(struct net_device *slave_dev, |
662 | struct net_device *failover_dev) |
663 | { |
664 | struct net_device *primary_dev, *standby_dev; |
665 | struct net_failover_info *nfo_info; |
666 | |
667 | nfo_info = netdev_priv(dev: failover_dev); |
668 | |
669 | primary_dev = rtnl_dereference(nfo_info->primary_dev); |
670 | standby_dev = rtnl_dereference(nfo_info->standby_dev); |
671 | |
672 | if (slave_dev != primary_dev && slave_dev != standby_dev) |
673 | return -ENODEV; |
674 | |
675 | /* We need to bring up the slave after the rename by udev in case |
676 | * open failed with EBUSY when it was registered. |
677 | */ |
678 | dev_open(dev: slave_dev, NULL); |
679 | |
680 | return 0; |
681 | } |
682 | |
683 | static struct failover_ops net_failover_ops = { |
684 | .slave_pre_register = net_failover_slave_pre_register, |
685 | .slave_register = net_failover_slave_register, |
686 | .slave_pre_unregister = net_failover_slave_pre_unregister, |
687 | .slave_unregister = net_failover_slave_unregister, |
688 | .slave_link_change = net_failover_slave_link_change, |
689 | .slave_name_change = net_failover_slave_name_change, |
690 | .slave_handle_frame = net_failover_handle_frame, |
691 | }; |
692 | |
693 | /** |
694 | * net_failover_create - Create and register a failover instance |
695 | * |
696 | * @standby_dev: standby netdev |
697 | * |
698 | * Creates a failover netdev and registers a failover instance for a standby |
699 | * netdev. Used by paravirtual drivers that use 3-netdev model. |
700 | * The failover netdev acts as a master device and controls 2 slave devices - |
701 | * the original standby netdev and a VF netdev with the same MAC gets |
702 | * registered as primary netdev. |
703 | * |
704 | * Return: pointer to failover instance |
705 | */ |
706 | struct failover *net_failover_create(struct net_device *standby_dev) |
707 | { |
708 | struct device *dev = standby_dev->dev.parent; |
709 | struct net_device *failover_dev; |
710 | struct failover *failover; |
711 | int err; |
712 | |
713 | /* Alloc at least 2 queues, for now we are going with 16 assuming |
714 | * that VF devices being enslaved won't have too many queues. |
715 | */ |
716 | failover_dev = alloc_etherdev_mq(sizeof(struct net_failover_info), 16); |
717 | if (!failover_dev) { |
718 | dev_err(dev, "Unable to allocate failover_netdev!\n" ); |
719 | return ERR_PTR(error: -ENOMEM); |
720 | } |
721 | |
722 | dev_net_set(dev: failover_dev, net: dev_net(dev: standby_dev)); |
723 | SET_NETDEV_DEV(failover_dev, dev); |
724 | |
725 | failover_dev->netdev_ops = &failover_dev_ops; |
726 | failover_dev->ethtool_ops = &failover_ethtool_ops; |
727 | |
728 | /* Initialize the device options */ |
729 | failover_dev->priv_flags |= IFF_UNICAST_FLT | IFF_NO_QUEUE; |
730 | failover_dev->priv_flags &= ~(IFF_XMIT_DST_RELEASE | |
731 | IFF_TX_SKB_SHARING); |
732 | |
733 | /* don't acquire failover netdev's netif_tx_lock when transmitting */ |
734 | failover_dev->features |= NETIF_F_LLTX; |
735 | |
736 | /* Don't allow failover devices to change network namespaces. */ |
737 | failover_dev->features |= NETIF_F_NETNS_LOCAL; |
738 | |
739 | failover_dev->hw_features = FAILOVER_VLAN_FEATURES | |
740 | NETIF_F_HW_VLAN_CTAG_TX | |
741 | NETIF_F_HW_VLAN_CTAG_RX | |
742 | NETIF_F_HW_VLAN_CTAG_FILTER; |
743 | |
744 | failover_dev->hw_features |= NETIF_F_GSO_ENCAP_ALL; |
745 | failover_dev->features |= failover_dev->hw_features; |
746 | |
747 | dev_addr_set(dev: failover_dev, addr: standby_dev->dev_addr); |
748 | |
749 | failover_dev->min_mtu = standby_dev->min_mtu; |
750 | failover_dev->max_mtu = standby_dev->max_mtu; |
751 | |
752 | err = register_netdev(dev: failover_dev); |
753 | if (err) { |
754 | dev_err(dev, "Unable to register failover_dev!\n" ); |
755 | goto err_register_netdev; |
756 | } |
757 | |
758 | netif_carrier_off(dev: failover_dev); |
759 | |
760 | failover = failover_register(dev: failover_dev, ops: &net_failover_ops); |
761 | if (IS_ERR(ptr: failover)) { |
762 | err = PTR_ERR(ptr: failover); |
763 | goto err_failover_register; |
764 | } |
765 | |
766 | return failover; |
767 | |
768 | err_failover_register: |
769 | unregister_netdev(dev: failover_dev); |
770 | err_register_netdev: |
771 | free_netdev(dev: failover_dev); |
772 | |
773 | return ERR_PTR(error: err); |
774 | } |
775 | EXPORT_SYMBOL_GPL(net_failover_create); |
776 | |
777 | /** |
778 | * net_failover_destroy - Destroy a failover instance |
779 | * |
780 | * @failover: pointer to failover instance |
781 | * |
782 | * Unregisters any slave netdevs associated with the failover instance by |
783 | * calling failover_slave_unregister(). |
784 | * unregisters the failover instance itself and finally frees the failover |
785 | * netdev. Used by paravirtual drivers that use 3-netdev model. |
786 | * |
787 | */ |
788 | void net_failover_destroy(struct failover *failover) |
789 | { |
790 | struct net_failover_info *nfo_info; |
791 | struct net_device *failover_dev; |
792 | struct net_device *slave_dev; |
793 | |
794 | if (!failover) |
795 | return; |
796 | |
797 | failover_dev = rcu_dereference(failover->failover_dev); |
798 | nfo_info = netdev_priv(dev: failover_dev); |
799 | |
800 | netif_device_detach(dev: failover_dev); |
801 | |
802 | rtnl_lock(); |
803 | |
804 | slave_dev = rtnl_dereference(nfo_info->primary_dev); |
805 | if (slave_dev) |
806 | failover_slave_unregister(slave_dev); |
807 | |
808 | slave_dev = rtnl_dereference(nfo_info->standby_dev); |
809 | if (slave_dev) |
810 | failover_slave_unregister(slave_dev); |
811 | |
812 | failover_unregister(failover); |
813 | |
814 | unregister_netdevice(dev: failover_dev); |
815 | |
816 | rtnl_unlock(); |
817 | |
818 | free_netdev(dev: failover_dev); |
819 | } |
820 | EXPORT_SYMBOL_GPL(net_failover_destroy); |
821 | |
822 | static __init int |
823 | net_failover_init(void) |
824 | { |
825 | return 0; |
826 | } |
827 | module_init(net_failover_init); |
828 | |
829 | static __exit |
830 | void net_failover_exit(void) |
831 | { |
832 | } |
833 | module_exit(net_failover_exit); |
834 | |
835 | MODULE_DESCRIPTION("Failover driver for Paravirtual drivers" ); |
836 | MODULE_LICENSE("GPL v2" ); |
837 | |