| 1 | // SPDX-License-Identifier: GPL-2.0-or-later |
| 2 | /* |
| 3 | * Linux network device link state notification |
| 4 | * |
| 5 | * Author: |
| 6 | * Stefan Rompf <sux@loplof.de> |
| 7 | */ |
| 8 | |
| 9 | #include <linux/module.h> |
| 10 | #include <linux/netdevice.h> |
| 11 | #include <linux/if.h> |
| 12 | #include <net/sock.h> |
| 13 | #include <net/pkt_sched.h> |
| 14 | #include <linux/rtnetlink.h> |
| 15 | #include <linux/jiffies.h> |
| 16 | #include <linux/spinlock.h> |
| 17 | #include <linux/workqueue.h> |
| 18 | #include <linux/bitops.h> |
| 19 | #include <linux/types.h> |
| 20 | |
| 21 | #include "dev.h" |
| 22 | |
| 23 | enum lw_bits { |
| 24 | LW_URGENT = 0, |
| 25 | }; |
| 26 | |
| 27 | static unsigned long linkwatch_flags; |
| 28 | static unsigned long linkwatch_nextevent; |
| 29 | |
| 30 | static void linkwatch_event(struct work_struct *dummy); |
| 31 | static DECLARE_DELAYED_WORK(linkwatch_work, linkwatch_event); |
| 32 | |
| 33 | static LIST_HEAD(lweventlist); |
| 34 | static DEFINE_SPINLOCK(lweventlist_lock); |
| 35 | |
| 36 | static unsigned int default_operstate(const struct net_device *dev) |
| 37 | { |
| 38 | if (netif_testing(dev)) |
| 39 | return IF_OPER_TESTING; |
| 40 | |
| 41 | /* Some uppers (DSA) have additional sources for being down, so |
| 42 | * first check whether lower is indeed the source of its down state. |
| 43 | */ |
| 44 | if (!netif_carrier_ok(dev)) { |
| 45 | struct net_device *peer; |
| 46 | int iflink; |
| 47 | |
| 48 | /* If called from netdev_run_todo()/linkwatch_sync_dev(), |
| 49 | * dev_net(dev) can be already freed, and RTNL is not held. |
| 50 | */ |
| 51 | if (dev->reg_state <= NETREG_REGISTERED) |
| 52 | iflink = dev_get_iflink(dev); |
| 53 | else |
| 54 | iflink = dev->ifindex; |
| 55 | |
| 56 | if (iflink == dev->ifindex) |
| 57 | return IF_OPER_DOWN; |
| 58 | |
| 59 | ASSERT_RTNL(); |
| 60 | peer = __dev_get_by_index(net: dev_net(dev), ifindex: iflink); |
| 61 | if (!peer) |
| 62 | return IF_OPER_DOWN; |
| 63 | |
| 64 | return netif_carrier_ok(dev: peer) ? IF_OPER_DOWN : |
| 65 | IF_OPER_LOWERLAYERDOWN; |
| 66 | } |
| 67 | |
| 68 | if (netif_dormant(dev)) |
| 69 | return IF_OPER_DORMANT; |
| 70 | |
| 71 | return IF_OPER_UP; |
| 72 | } |
| 73 | |
| 74 | static void rfc2863_policy(struct net_device *dev) |
| 75 | { |
| 76 | unsigned int operstate = default_operstate(dev); |
| 77 | |
| 78 | if (operstate == READ_ONCE(dev->operstate)) |
| 79 | return; |
| 80 | |
| 81 | switch(dev->link_mode) { |
| 82 | case IF_LINK_MODE_TESTING: |
| 83 | if (operstate == IF_OPER_UP) |
| 84 | operstate = IF_OPER_TESTING; |
| 85 | break; |
| 86 | |
| 87 | case IF_LINK_MODE_DORMANT: |
| 88 | if (operstate == IF_OPER_UP) |
| 89 | operstate = IF_OPER_DORMANT; |
| 90 | break; |
| 91 | case IF_LINK_MODE_DEFAULT: |
| 92 | default: |
| 93 | break; |
| 94 | } |
| 95 | |
| 96 | WRITE_ONCE(dev->operstate, operstate); |
| 97 | } |
| 98 | |
| 99 | |
| 100 | void linkwatch_init_dev(struct net_device *dev) |
| 101 | { |
| 102 | /* Handle pre-registration link state changes */ |
| 103 | if (!netif_carrier_ok(dev) || netif_dormant(dev) || |
| 104 | netif_testing(dev)) |
| 105 | rfc2863_policy(dev); |
| 106 | } |
| 107 | |
| 108 | |
| 109 | static bool linkwatch_urgent_event(struct net_device *dev) |
| 110 | { |
| 111 | if (!netif_running(dev)) |
| 112 | return false; |
| 113 | |
| 114 | if (dev->ifindex != dev_get_iflink(dev)) |
| 115 | return true; |
| 116 | |
| 117 | if (netif_is_lag_port(dev) || netif_is_lag_master(dev)) |
| 118 | return true; |
| 119 | |
| 120 | return netif_carrier_ok(dev) && qdisc_tx_changing(dev); |
| 121 | } |
| 122 | |
| 123 | |
| 124 | static void linkwatch_add_event(struct net_device *dev) |
| 125 | { |
| 126 | unsigned long flags; |
| 127 | |
| 128 | spin_lock_irqsave(&lweventlist_lock, flags); |
| 129 | if (list_empty(head: &dev->link_watch_list)) { |
| 130 | list_add_tail(new: &dev->link_watch_list, head: &lweventlist); |
| 131 | netdev_hold(dev, tracker: &dev->linkwatch_dev_tracker, GFP_ATOMIC); |
| 132 | } |
| 133 | spin_unlock_irqrestore(lock: &lweventlist_lock, flags); |
| 134 | } |
| 135 | |
| 136 | |
| 137 | static void linkwatch_schedule_work(int urgent) |
| 138 | { |
| 139 | unsigned long delay = linkwatch_nextevent - jiffies; |
| 140 | |
| 141 | if (test_bit(LW_URGENT, &linkwatch_flags)) |
| 142 | return; |
| 143 | |
| 144 | /* Minimise down-time: drop delay for up event. */ |
| 145 | if (urgent) { |
| 146 | if (test_and_set_bit(nr: LW_URGENT, addr: &linkwatch_flags)) |
| 147 | return; |
| 148 | delay = 0; |
| 149 | } |
| 150 | |
| 151 | /* If we wrap around we'll delay it by at most HZ. */ |
| 152 | if (delay > HZ) |
| 153 | delay = 0; |
| 154 | |
| 155 | /* |
| 156 | * If urgent, schedule immediate execution; otherwise, don't |
| 157 | * override the existing timer. |
| 158 | */ |
| 159 | if (test_bit(LW_URGENT, &linkwatch_flags)) |
| 160 | mod_delayed_work(wq: system_dfl_wq, dwork: &linkwatch_work, delay: 0); |
| 161 | else |
| 162 | queue_delayed_work(wq: system_dfl_wq, dwork: &linkwatch_work, delay); |
| 163 | } |
| 164 | |
| 165 | |
| 166 | static void linkwatch_do_dev(struct net_device *dev) |
| 167 | { |
| 168 | /* |
| 169 | * Make sure the above read is complete since it can be |
| 170 | * rewritten as soon as we clear the bit below. |
| 171 | */ |
| 172 | smp_mb__before_atomic(); |
| 173 | |
| 174 | /* We are about to handle this device, |
| 175 | * so new events can be accepted |
| 176 | */ |
| 177 | clear_bit(nr: __LINK_STATE_LINKWATCH_PENDING, addr: &dev->state); |
| 178 | |
| 179 | rfc2863_policy(dev); |
| 180 | if (dev->flags & IFF_UP) { |
| 181 | if (netif_carrier_ok(dev)) |
| 182 | dev_activate(dev); |
| 183 | else |
| 184 | dev_deactivate(dev); |
| 185 | |
| 186 | netif_state_change(dev); |
| 187 | } |
| 188 | } |
| 189 | |
| 190 | static void __linkwatch_run_queue(int urgent_only) |
| 191 | { |
| 192 | #define MAX_DO_DEV_PER_LOOP 100 |
| 193 | |
| 194 | int do_dev = MAX_DO_DEV_PER_LOOP; |
| 195 | /* Use a local list here since we add non-urgent |
| 196 | * events back to the global one when called with |
| 197 | * urgent_only=1. |
| 198 | */ |
| 199 | LIST_HEAD(wrk); |
| 200 | |
| 201 | /* Give urgent case more budget */ |
| 202 | if (urgent_only) |
| 203 | do_dev += MAX_DO_DEV_PER_LOOP; |
| 204 | |
| 205 | /* |
| 206 | * Limit the number of linkwatch events to one |
| 207 | * per second so that a runaway driver does not |
| 208 | * cause a storm of messages on the netlink |
| 209 | * socket. This limit does not apply to up events |
| 210 | * while the device qdisc is down. |
| 211 | */ |
| 212 | if (!urgent_only) |
| 213 | linkwatch_nextevent = jiffies + HZ; |
| 214 | /* Limit wrap-around effect on delay. */ |
| 215 | else if (time_after(linkwatch_nextevent, jiffies + HZ)) |
| 216 | linkwatch_nextevent = jiffies; |
| 217 | |
| 218 | clear_bit(nr: LW_URGENT, addr: &linkwatch_flags); |
| 219 | |
| 220 | spin_lock_irq(lock: &lweventlist_lock); |
| 221 | list_splice_init(list: &lweventlist, head: &wrk); |
| 222 | |
| 223 | while (!list_empty(head: &wrk) && do_dev > 0) { |
| 224 | struct net_device *dev; |
| 225 | |
| 226 | dev = list_first_entry(&wrk, struct net_device, link_watch_list); |
| 227 | list_del_init(entry: &dev->link_watch_list); |
| 228 | |
| 229 | if (!netif_device_present(dev) || |
| 230 | (urgent_only && !linkwatch_urgent_event(dev))) { |
| 231 | list_add_tail(new: &dev->link_watch_list, head: &lweventlist); |
| 232 | continue; |
| 233 | } |
| 234 | /* We must free netdev tracker under |
| 235 | * the spinlock protection. |
| 236 | */ |
| 237 | netdev_tracker_free(dev, tracker: &dev->linkwatch_dev_tracker); |
| 238 | spin_unlock_irq(lock: &lweventlist_lock); |
| 239 | netdev_lock_ops(dev); |
| 240 | linkwatch_do_dev(dev); |
| 241 | netdev_unlock_ops(dev); |
| 242 | /* Use __dev_put() because netdev_tracker_free() was already |
| 243 | * called above. Must be after netdev_unlock_ops() to prevent |
| 244 | * netdev_run_todo() from freeing the device while still in use. |
| 245 | */ |
| 246 | __dev_put(dev); |
| 247 | do_dev--; |
| 248 | spin_lock_irq(lock: &lweventlist_lock); |
| 249 | } |
| 250 | |
| 251 | /* Add the remaining work back to lweventlist */ |
| 252 | list_splice_init(list: &wrk, head: &lweventlist); |
| 253 | |
| 254 | if (!list_empty(head: &lweventlist)) |
| 255 | linkwatch_schedule_work(urgent: 0); |
| 256 | spin_unlock_irq(lock: &lweventlist_lock); |
| 257 | } |
| 258 | |
| 259 | static bool linkwatch_clean_dev(struct net_device *dev) |
| 260 | { |
| 261 | unsigned long flags; |
| 262 | bool clean = false; |
| 263 | |
| 264 | spin_lock_irqsave(&lweventlist_lock, flags); |
| 265 | if (!list_empty(head: &dev->link_watch_list)) { |
| 266 | list_del_init(entry: &dev->link_watch_list); |
| 267 | clean = true; |
| 268 | /* We must release netdev tracker under |
| 269 | * the spinlock protection. |
| 270 | */ |
| 271 | netdev_tracker_free(dev, tracker: &dev->linkwatch_dev_tracker); |
| 272 | } |
| 273 | spin_unlock_irqrestore(lock: &lweventlist_lock, flags); |
| 274 | |
| 275 | return clean; |
| 276 | } |
| 277 | |
| 278 | void __linkwatch_sync_dev(struct net_device *dev) |
| 279 | { |
| 280 | netdev_ops_assert_locked(dev); |
| 281 | |
| 282 | if (linkwatch_clean_dev(dev)) { |
| 283 | linkwatch_do_dev(dev); |
| 284 | /* Use __dev_put() because netdev_tracker_free() was already |
| 285 | * called inside linkwatch_clean_dev(). |
| 286 | */ |
| 287 | __dev_put(dev); |
| 288 | } |
| 289 | } |
| 290 | |
| 291 | void linkwatch_sync_dev(struct net_device *dev) |
| 292 | { |
| 293 | if (linkwatch_clean_dev(dev)) { |
| 294 | netdev_lock_ops(dev); |
| 295 | linkwatch_do_dev(dev); |
| 296 | netdev_unlock_ops(dev); |
| 297 | /* Use __dev_put() because netdev_tracker_free() was already |
| 298 | * called inside linkwatch_clean_dev(). |
| 299 | */ |
| 300 | __dev_put(dev); |
| 301 | } |
| 302 | } |
| 303 | |
| 304 | /* Must be called with the rtnl semaphore held */ |
| 305 | void linkwatch_run_queue(void) |
| 306 | { |
| 307 | __linkwatch_run_queue(urgent_only: 0); |
| 308 | } |
| 309 | |
| 310 | |
| 311 | static void linkwatch_event(struct work_struct *dummy) |
| 312 | { |
| 313 | rtnl_lock(); |
| 314 | __linkwatch_run_queue(time_after(linkwatch_nextevent, jiffies)); |
| 315 | rtnl_unlock(); |
| 316 | } |
| 317 | |
| 318 | |
| 319 | void linkwatch_fire_event(struct net_device *dev) |
| 320 | { |
| 321 | bool urgent = linkwatch_urgent_event(dev); |
| 322 | |
| 323 | if (!test_and_set_bit(nr: __LINK_STATE_LINKWATCH_PENDING, addr: &dev->state)) { |
| 324 | linkwatch_add_event(dev); |
| 325 | } else if (!urgent) |
| 326 | return; |
| 327 | |
| 328 | linkwatch_schedule_work(urgent); |
| 329 | } |
| 330 | EXPORT_SYMBOL(linkwatch_fire_event); |
| 331 | |